diff --git a/CMakeLists.txt b/CMakeLists.txt index 8879913..10a75d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ set(CMAKE_LIBRARY_PATH "/home/$ENV{USER}/local/lib/") set(CMAKE_INSTALL_PREFIX "/home/$ENV{USER}/local/") # Project configuration specific parameters -set(STOSYS_M45 OFF) +set(STOSYS_M45 ON) set(STOSYS_CMAKE_DEBUG OFF) set(STOSYS_ASAN ON) @@ -146,4 +146,4 @@ if(STOSYS_CMAKE_DEBUG) foreach (_variableName ${_variableNames}) message(STATUS "${_variableName}=${${_variableName}}") endforeach() -endif() +endif() \ No newline at end of file diff --git a/src/common/nvmeprint.cpp b/src/common/nvmeprint.cpp index 063cebe..31b6982 100644 --- a/src/common/nvmeprint.cpp +++ b/src/common/nvmeprint.cpp @@ -311,25 +311,25 @@ const char *ss_nvme_status_to_string(__u16 status) { } } -static const char *nvme_feature_lba_type_to_string(__u8 type) { - switch (type) { - case 0: - return "Reserved"; - case 1: - return "Filesystem"; - case 2: - return "RAID"; - case 3: - return "Cache"; - case 4: - return "Page / Swap file"; - default: - if (type >= 0x05 && type <= 0x7f) - return "Reserved"; - else - return "Vendor Specific"; - } -} +// static const char *nvme_feature_lba_type_to_string(__u8 type) { +// switch (type) { +// case 0: +// return "Reserved"; +// case 1: +// return "Filesystem"; +// case 2: +// return "RAID"; +// case 3: +// return "Cache"; +// case 4: +// return "Page / Swap file"; +// default: +// if (type >= 0x05 && type <= 0x7f) +// return "Reserved"; +// else +// return "Vendor Specific"; +// } +// } static void nvme_show_id_ns_nsfeat(__u8 nsfeat) { __u8 rsvd = (nsfeat & 0xE0) >> 5; diff --git a/src/m1/device.cpp b/src/m1/device.cpp index 92a49a8..5412ec6 100644 --- a/src/m1/device.cpp +++ b/src/m1/device.cpp @@ -20,41 +20,43 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include +#include +#include +#include +#include #include +#include #include -#include -#include -#include #include #include #include "device.h" #include "../common/nvmeprint.h" -// Examples lifted from, https://github.com/linux-nvme/libnvme/blob/667334ff8c53dbbefa51948bbe2e086624bf4d0d/test/cpp.cc -int count_and_show_all_nvme_devices() { - nvme_root_t r; +// Examples lifted from, +// https://github.com/linux-nvme/libnvme/blob/667334ff8c53dbbefa51948bbe2e086624bf4d0d/test/cpp.cc +int count_and_show_all_nvme_devices() +{ nvme_host_t h; nvme_subsystem_t s; nvme_ctrl_t c; nvme_path_t p; nvme_ns_t n; int count = 0; - - r = nvme_scan(nullptr); + nvme_root_t r = nvme_scan(nullptr); if (!r) return -1; - nvme_for_each_host(r, h) { nvme_for_each_subsystem(h, s) { std::cout << nvme_subsystem_get_name(s) << " - NQN=" << nvme_subsystem_get_nqn(s) - << "\n"; + << std::endl; nvme_subsystem_for_each_ctrl(s, c) { std::cout << " `- " << nvme_ctrl_get_name(c) << " " << nvme_ctrl_get_transport(c) << " " << nvme_ctrl_get_address(c) << " " << nvme_ctrl_get_state(c) - << "\n"; + << std::endl; nvme_ctrl_for_each_ns(c, n) { std::cout << " `- " << nvme_ns_get_name(n) @@ -62,66 +64,76 @@ int count_and_show_all_nvme_devices() { << nvme_ns_get_lba_size(n) << " lba max:" << nvme_ns_get_lba_count(n) - << "\n"; + << std::endl; } nvme_ctrl_for_each_path(c, p) { std::cout << " `- " << nvme_path_get_name(p) << " " << nvme_path_get_ana_state(p) - << "\n"; + << std::endl; } - count++; + ++count; } } } - std::cout << "\n"; + std::cout << std::endl; nvme_free_tree(r); return count; } extern "C" { -int scan_and_identify_zns_devices(struct ss_nvme_ns *list){ - int ret; +int scan_and_identify_zns_devices(ss_nvme_ns *list) +{ int ns_counter = 0; - nvme_root_t root; nvme_host_t h; nvme_subsystem_t subsystem; nvme_ctrl_t controller; nvme_ns_t nspace; - nvme_id_ns ns{}; - - root = nvme_scan(nullptr /* for now the config file is NULL */); - if (!root){ - printf("nvme_scan call failed with errno %d , null pointer returned in the scan call\n", -errno); + nvme_id_ns ns; + nvme_root_t root = nvme_scan(nullptr /* for now the config file is NULL */); + if (!root) { + std::cout << "nvme_scan call failed with errno " + << -errno + << " , null pointer returned in the scan call" + << std::endl; return -1; } nvme_for_each_host(root, h) { nvme_for_each_subsystem(h, subsystem) { - printf("root (%d) |- name: %s sysfs_dir %s subsysqn %s \n", ns_counter, - nvme_subsystem_get_name(subsystem), - nvme_subsystem_get_sysfs_dir(subsystem), nvme_subsystem_get_nqn(subsystem)); + std::cout << "root (" << ns_counter + << ") |- name: " << nvme_subsystem_get_name(subsystem) + << " sysfs_dir " + << nvme_subsystem_get_sysfs_dir(subsystem) + << " subsysqn " << nvme_subsystem_get_nqn(subsystem) + << std::endl; nvme_subsystem_for_each_ctrl(subsystem, controller) { - printf("\t|- controller : name %s (more to follow) \n ", nvme_ctrl_get_name(controller)); + std::cout << "\t|- controller : name " + << nvme_ctrl_get_name(controller) + << " (more to follow)" << std::endl; nvme_ctrl_for_each_ns(controller, nspace) { - printf("\t\t|- namespace : name %s and command set identifier (csi) is %d (= 0 NVMe, 2 = ZNS), more to follow) \n ", - nvme_ns_get_name(nspace), nvme_ns_get_csi(nspace)); + std::cout << "\t\t|- namespace : name " + << nvme_ns_get_name(nspace) + << " and command set identifier (csi) is " + << nvme_ns_get_csi(nspace) + << " (= 0 NVMe, 2 = ZNS), more to follow)" + << std::endl; list[ns_counter].ctrl_name = strdup(nvme_ns_get_name(nspace)); - if (nvme_ns_get_csi(nspace) == NVME_CSI_ZNS) { + if (nvme_ns_get_csi(nspace) == NVME_CSI_ZNS) list[ns_counter].supports_zns = true; - } else{ + else list[ns_counter].supports_zns = false; - } // for convenience - nvme_get_nsid(nvme_ns_get_fd(nspace), &list[ns_counter].nsid); - ret = nvme_ns_identify(nspace, &ns); + nvme_get_nsid(nvme_ns_get_fd(nspace), + &list[ns_counter].nsid); + int ret = nvme_ns_identify(nspace, &ns); if (ret) { - printf("ERROR : failed to identify the namespace with %d and errno %d \n", ret, errno); + std::cout << "ERROR : failed to identify the namespace \ +with " << ret << " and errno " << errno << std::endl; return ret; } - //nvme_show_id_ns(&ns); - ns_counter++; + ++ns_counter; } } } @@ -130,196 +142,205 @@ int scan_and_identify_zns_devices(struct ss_nvme_ns *list){ return 0; } -int show_zns_zone_status(int fd, int nsid, struct zone_to_test *ztest){ +int show_zns_zone_status(const int &fd, const unsigned &nsid, + zone_to_test &ztest) +{ // ZNS specific data structures as specified in the TP 4053 - struct nvme_zns_id_ns s_zns_nsid{}; - struct nvme_zns_id_ctrl s_zns_ctrlid{}; - struct nvme_zone_report zns_report{}; - struct nvme_zns_desc *desc = nullptr, *_ztest = nullptr; // standard NVMe structures - struct nvme_id_ns s_nsid{}; - int ret; - uint64_t num_zones; - // lets first get the NVMe ns identify structure (again), we need some information from it to complement the + nvme_id_ns s_nsid; + // lets first get the NVMe ns identify structure (again), + // we need some information from it to complement the // information present in the ZNS ns identify structure - ret = nvme_identify_ns(fd, nsid, &s_nsid); - if(ret != 0){ - fprintf(stderr, "failed to identify NVMe namespace, ret %d \n", ret); + int ret = nvme_identify_ns(fd, nsid, &s_nsid); + if (ret) { + std::cerr << "failed to identify NVMe namespace, ret " + << ret << std::endl; return ret; } // see figure 8, section 3.1.1 in the ZNS specification + nvme_zns_id_ns s_zns_nsid; ret = nvme_zns_identify_ns(fd, nsid, &s_zns_nsid); - if (ret != 0) { - fprintf(stderr, "failed to identify ZNS namespace, ret %d \n", ret); + if (ret) { + std::cerr << "failed to identify ZNS namespace, ret " + << ret << std::endl; return -ret; } ss_nvme_show_zns_id_ns(&s_zns_nsid, &s_nsid); - // 3.1.2, figure 10 in the ZNS specification + nvme_zns_id_ctrl s_zns_ctrlid; ret = nvme_zns_identify_ctrl(fd, &s_zns_ctrlid); - if (ret != 0) { - fprintf(stderr, "failed to identify ZNS controller, ret %d \n", ret); + if (ret) { + std::cerr << "failed to identify ZNS controller, ret " + << ret << std::endl; return ret; } ss_nvme_show_zns_id_ctrl(&s_zns_ctrlid); - - // now we send the management related commands - see section 4.3 and 4.4 in TP 4053 - // we are now trying to retrieve the number of zones with other information present in the zone report - // the following function takes arguments that are required to filled the command structure as shown + // now we send the management related commands - + // see section 4.3 and 4.4 in TP 4053 + // we are now trying to retrieve the number of zones + // with other information present in the zone report + // the following function takes arguments + // that are required to filled the command structure as shown // in the figures 33-36 // * SLBA goes into CDW 10 and 11, as shown in Figure 34 - // * zras is Zone Receive Action Specific Features, see figure 36 for details - // * NVME_ZNS_ZRA_REPORT_ZONES and NVME_ZNS_ZRAS_REPORT_ALL are shown in Figure 36 CDW 13 + // * zras is Zone Receive Action Specific Features, + // see figure 36 for details + // * NVME_ZNS_ZRA_REPORT_ZONES and NVME_ZNS_ZRAS_REPORT_ALL + // are shown in Figure 36 CDW 13 - // Pay attention what is being passed in the zns_report pointer and size, I am passing a structure - // _WITHOUT_ its entries[] field initialized because we do not know how many zones does this namespace - // hence we first get the number of zones, and then try again to get the full report - ret = nvme_zns_mgmt_recv(fd, nsid, 0, - NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, - 0, sizeof(zns_report), (void *)&zns_report); - if(ret != 0) { - fprintf(stderr, "failed to report zones, ret %d \n", ret); + // Pay attention what is being passed in the zns_report pointer and size, + // I am passing a structure + // _WITHOUT_ its entries[] field initialized + // because we do not know how many zones does this namespace + // hence we first get the number of zones, + // and then try again to get the full report + nvme_zone_report zns_report; + ret = nvme_zns_mgmt_recv(fd, nsid, 0ULL, NVME_ZNS_ZRA_REPORT_ZONES, + NVME_ZNS_ZRAS_REPORT_ALL, false, + sizeof(zns_report), &zns_report); + if (ret) { + std::cerr << "failed to report zones, ret " << ret << std::endl; return ret; } // see figures 37-38-39 in section 4.4.1 - num_zones = le64_to_cpu(zns_report.nr_zones); + uint64_t num_zones = le64_to_cpu(zns_report.nr_zones); printf("nr_zones:%" PRIu64"\n", num_zones); - // lets get more information about the zones - the total metadata size would be + // lets get more information about the zones - + // the total metadata size would be // see the figure 37 in the ZNS description - // so we allocated an structure with a flat memory and point the zone_reports to it - // An alternate strategy would have been just allocate a 4kB page and get some numbers of zone reports whatever can + // so we allocated an structure with a flat memory + // and point the zone_reports to it + // An alternate strategy would have been just allocate a 4kB page + // and get some numbers of zone reports whatever can // fit in that in a loop. - uint64_t total_size = sizeof(zns_report) + (num_zones * sizeof(struct nvme_zns_desc)); - char *zone_reports = (char*) calloc (1, total_size); - ret = nvme_zns_mgmt_recv(fd, nsid, 0, - NVME_ZNS_ZRA_REPORT_ZONES, NVME_ZNS_ZRAS_REPORT_ALL, - 1, total_size, (void *)zone_reports); - if(ret !=0) { - fprintf(stderr, "failed to report zones, ret %d \n", ret); + uint64_t total_size = sizeof(zns_report) + + num_zones * sizeof(nvme_zns_desc); + std::unique_ptr zone_reports(new char[total_size]()); + ret = nvme_zns_mgmt_recv(fd, nsid, 0ULL, NVME_ZNS_ZRA_REPORT_ZONES, + NVME_ZNS_ZRAS_REPORT_ALL, false, + total_size, zone_reports.get()); + if (ret) { + std::cerr << "failed to report zones, ret " << ret << std::endl; return ret; } - desc = ((struct nvme_zone_report*) zone_reports)->entries; - num_zones = le64_to_cpu(((struct nvme_zone_report*) zone_reports)->nr_zones); + nvme_zns_desc *desc = ((nvme_zone_report *)zone_reports.get())->entries; + num_zones = le64_to_cpu(((nvme_zone_report *)zone_reports.get())->nr_zones); // otherwise we got all our reports, check again - printf("With the reports we have num_zones %lu (for which data transfer happened) \n", num_zones); - for(uint64_t i = 0; i < num_zones; i++){ + std::cout << "With the reports we have num_zones " << num_zones + << " (for which data transfer happened)" << std::endl; + nvme_zns_desc *_ztest = nullptr; + for (uint64_t i = 0; i < num_zones; ++i) { // see figure 39 for description of these fields - printf("\t SLBA: 0x%-8" PRIx64" WP: 0x%-8" PRIx64" Cap: 0x%-8" PRIx64" State: %-12s Type: %-14s Attrs: 0x%-x\n", - (uint64_t)le64_to_cpu(desc->zslba), (uint64_t)le64_to_cpu(desc->wp), - (uint64_t)le64_to_cpu(desc->zcap), ss_zone_state_to_string(desc->zs >> 4), - ss_zone_type_to_string(desc->zt), desc->za); - if(_ztest == nullptr && (desc->zs >> 4) == NVME_ZNS_ZS_EMPTY){ - // pick the first zone which is empty to do I/O experiments - nothing clever here + printf("\t SLBA: 0x%-8" PRIx64, le64_to_cpu(desc->zslba)); + printf(" WP: 0x%-8" PRIx64, le64_to_cpu(desc->wp)); + printf(" Cap: 0x%-8" PRIx64, le64_to_cpu(desc->zcap)); + printf(" State: %-12s", ss_zone_state_to_string(desc->zs >> 4)); + printf(" Type: %-14s", ss_zone_type_to_string(desc->zt)); + printf(" Attrs: 0x%-x\n", desc->za); + // pick the first zone which is empty to do I/O experiments + if (!_ztest && desc->zs >> 4 == NVME_ZNS_ZS_EMPTY) _ztest = desc; - } - desc++; + ++desc; } // if could be the case we did not find any empty zone - if(_ztest != nullptr){ + if (_ztest) { ret = 0; - memcpy(&ztest->desc, _ztest, sizeof(*_ztest)); + memcpy(&ztest.desc, _ztest, sizeof(*_ztest)); } else { - printf("Error: I could not find a free empty zone to test, perhaps reset the zones with: sudo nvme zns reset-zone -a /dev/nvme0n1 \n"); + std::cout << "Error: I could not find a free empty zone to test, \ +perhaps reset the zones with: sudo nvme zns reset-zone -a /dev/nvme0n1" + << std::endl; ret = -ENOENT; } // now we copy and return the zone values to do experiment on - free(zone_reports); return ret; } -int ss_nvme_device_io_with_mdts(int fd, uint32_t nsid, uint64_t slba, uint16_t numbers, void *buffer, uint64_t buf_size, - uint64_t lba_size, uint64_t mdts_size, bool read){ + +int ss_nvme_device_io_with_mdts(const int &fd, const unsigned &nsid, + unsigned long long slba, + void *buffer, uint64_t buf_size, + const uint32_t &lba_size, + const uint32_t &mdts_size, const bool &read) +{ //FIXME: - int errno; - int current_lba = slba; - uint64_t completed_size = 0; - void *temp = malloc(mdts_size); - int iteration = 0; - errno = 0; - while((errno == 0) && (completed_size < buf_size)) { - uint64_t size = buf_size-completed_size < mdts_size ? buf_size-completed_size : mdts_size; - int no_blocks = floor(size/lba_size); - memcpy(temp, buffer+(iteration*mdts_size), size); - if (!read) - errno = ss_nvme_device_write(fd, nsid, current_lba, no_blocks, temp, size); - if (read) { - errno = ss_nvme_device_read(fd,nsid,current_lba,no_blocks,temp,size); - memcpy(buffer+(iteration*mdts_size),temp, size); - } - completed_size += size; - current_lba += no_blocks; - iteration++; + while (buf_size) { + unsigned size = buf_size < (mdts_size - 1U) * lba_size ? + buf_size : (mdts_size - 1U) * lba_size; + unsigned short no_blocks = size / lba_size; + if (read) + ss_nvme_device_read(fd, nsid, slba, no_blocks, buffer, size); + else + ss_nvme_device_write(fd, nsid, slba, no_blocks, buffer, size); + if (errno) + return errno; + slba += no_blocks; + buffer = (char *)buffer + size; + buf_size -= size; } - free(temp); return errno; } -int ss_nvme_device_read(int fd, uint32_t nsid, uint64_t slba, uint16_t numbers, void *buffer, uint64_t buf_size) { +int ss_nvme_device_read(const int &fd, const unsigned &nsid, + const unsigned long long &slba, + const unsigned short &numbers, + void *buffer, const unsigned &buf_size) +{ //FIXME: - int errno; - void *mbuffer = NULL; - uint16_t control = 0, apptag = 0, appmask = 0; - uint32_t dsmgmt = 0, reftag = 0; - long long mbuffer_size = 0; - errno = nvme_read(fd, nsid, slba, numbers-1, control, dsmgmt, reftag, apptag, appmask, (long long) buf_size, buffer, mbuffer_size, - mbuffer); - ss_nvme_show_status(errno); + nvme_read(fd, nsid, slba, numbers - 1, 0U, 0U, 0U, 0U, 0U, + buf_size, buffer, 0U, nullptr); return errno; } -int ss_nvme_device_write(int fd, uint32_t nsid, uint64_t slba, uint16_t numbers, void *buffer, uint64_t buf_size) { +int ss_nvme_device_write(const int &fd, const unsigned &nsid, + const unsigned long long &slba, + const unsigned short &numbers, + void *buffer, const unsigned &buf_size) +{ //FIXME: - int errno; - void *mbuffer = NULL; - uint16_t control = 0, apptag = 0, appmask = 0; - uint32_t dsmgmt = 0, reftag = 0; - long long mbuffer_size = 0; - - errno = nvme_write(fd, nsid, slba, numbers-1, control, dsmgmt, 0, reftag, apptag, appmask, (long long) buf_size, buffer, mbuffer_size, - mbuffer); - ss_nvme_show_status(errno); + nvme_write(fd, nsid, slba, numbers - 1, 0U, 0U, 0U, 0U, 0U, 0U, + buf_size, buffer, 0U, nullptr); return errno; } -int ss_zns_device_zone_reset(int fd, uint32_t nsid, uint64_t slba) { +int ss_zns_device_zone_reset(const int &fd, const unsigned &nsid, + const unsigned long long &slba) +{ //FIXME: - int errno; - errno = nvme_zns_mgmt_send(fd, nsid, slba, false, NVME_ZNS_ZSA_RESET, 0, NULL); - ss_nvme_show_status(errno); + nvme_zns_mgmt_send(fd, nsid, slba, true, NVME_ZNS_ZSA_RESET, 0U, nullptr); return errno; } // this does not take slba because it will return that -int ss_zns_device_zone_append(int fd, uint32_t nsid, uint64_t zslba, int numbers, void *buffer, uint32_t buf_size, uint64_t *written_slba){ +int ss_zns_device_zone_append(const int &fd, const unsigned &nsid, + const unsigned long long &zslba, + const unsigned short &numbers, + void *buffer, const unsigned &buf_size, + unsigned long long *written_slba) +{ //FIXME: - int errno; - void *mbuffer = NULL; - errno = nvme_zns_append(fd, nsid, zslba, numbers-1, 0, - 0, 0, 0, buf_size, buffer, 0, mbuffer,(long long unsigned int *) written_slba); - ss_nvme_show_status(errno); + nvme_zns_append(fd, nsid, zslba, numbers - 1, 0U, 0U, 0U, 0U, + buf_size, buffer, 0U, nullptr, written_slba); return errno; } -void update_lba(uint64_t &write_lba, const uint32_t lba_size, const int count){ - //assert(false); - write_lba = write_lba + count; +void update_lba(unsigned long long &write_lba, const int &count) +{ + write_lba += count; } // see 5.15.2.2 Identify Controller data structure (CNS 01h) -uint64_t get_mdts_size(int fd){ - //FIXME: - uint64_t size, mpsmin; - struct nvme_id_ctrl ctrl; - +uint32_t get_mdts_size(const int &fd) +{ + //FIXME: + nvme_id_ctrl ctrl; //Identify MDTS - nvme_identify_ctrl(fd,&ctrl); - //printf("MDTS : %d\n",ctrl.mdts); - + nvme_identify_ctrl(fd, &ctrl); //Identify MPSMIN - void *regs; - regs = mmap(NULL,getpagesize(),PROT_READ,MAP_SHARED,fd,0); - mpsmin = NVME_CAP_MPSMIN(nvme_mmio_read64(regs+0)); - - size = pow(2,mpsmin) * pow(2,ctrl.mdts); + void *regs = mmap(nullptr, getpagesize(), PROT_READ, MAP_SHARED, fd, 0L); + uint32_t mpsmin = NVME_CAP_MPSMIN(nvme_mmio_read64(regs)); + munmap(regs, getpagesize()); + uint32_t size = 1U << (mpsmin + ctrl.mdts); return size; } + } diff --git a/src/m1/device.h b/src/m1/device.h index e9c4997..73eb26d 100644 --- a/src/m1/device.h +++ b/src/m1/device.h @@ -24,10 +24,11 @@ SOFTWARE. #ifndef STOSYS_PROJECT_DEVICE_H #define STOSYS_PROJECT_DEVICE_H -#include +#include extern "C" { -// we will use an ss_ extension to differentiate our struct definitions from the standard library +// we will use an ss_ extension +// to differentiate our struct definitions from the standard library // In C++ we should use namespaces, but I am lazy struct ss_nvme_ns { char *ctrl_name; @@ -36,28 +37,41 @@ struct ss_nvme_ns { }; struct zone_to_test { - struct nvme_zns_desc desc; - uint64_t lba_size_in_use; + nvme_zns_desc desc; + uint32_t lba_size_in_use; }; // these three function examples are given to you int count_and_show_all_nvme_devices(); -int scan_and_identify_zns_devices(struct ss_nvme_ns *list); -int show_zns_zone_status(int fd, int nsid, struct zone_to_test *ztest); - -// these follow nvme specification I added ss_ prefix to avoid namespace collision with other lbnvme functions -int ss_nvme_device_io_with_mdts(int fd, uint32_t nsid, uint64_t slba, uint16_t numbers, void *buffer, uint64_t buf_size, - uint64_t lba_size, uint64_t mdts_size, bool read); -int ss_nvme_device_read(int fd, uint32_t nsid, uint64_t slba, uint16_t numbers, void *buffer, uint64_t buf_size); -int ss_nvme_device_write(int fd, uint32_t nsid, uint64_t slba, uint16_t numbers, void *buffer, uint64_t buf_size); - +int scan_and_identify_zns_devices(ss_nvme_ns *list); +int show_zns_zone_status(const int &fd, const unsigned &nsid, + zone_to_test &ztest); +// these follow nvme specification I added ss_ prefix +// to avoid namespace collision with other lbnvme functions +int ss_nvme_device_io_with_mdts(const int &fd, const unsigned &nsid, + unsigned long long slba, + void *buffer, uint64_t buf_size, + const uint32_t &lba_size, + const uint32_t &mdts_size, const bool &read); +int ss_nvme_device_read(const int &fd, const unsigned &nsid, + const unsigned long long &slba, + const unsigned short &numbers, + void *buffer, const unsigned &buf_size); +int ss_nvme_device_write(const int &fd, const unsigned &nsid, + const unsigned long long &slba, + const unsigned short &numbers, + void *buffer, const unsigned &buf_size); // these are ZNS specific commands -int ss_zns_device_zone_reset(int fd, uint32_t nsid, uint64_t slba); -int ss_zns_device_zone_append(int fd, uint32_t nsid, uint64_t zslba, int numbers, void *buffer, uint32_t buf_size, - uint64_t *written_slba); +int ss_zns_device_zone_reset(const int &fd, const unsigned &nsid, + const unsigned long long &slba); +int ss_zns_device_zone_append(const int &fd, const unsigned &nsid, + const unsigned long long &zslba, + const unsigned short &numbers, + void *buffer, const unsigned &buf_size, + unsigned long long *written_slba); +void update_lba(unsigned long long &write_lba, const int &count); +uint32_t get_mdts_size(const int &fd); -void update_lba(uint64_t &write_lba, const uint32_t lba_size, const int count); -uint64_t get_mdts_size(int fd); } #endif //STOSYS_PROJECT_DEVICE_H diff --git a/src/m1/m1.cpp b/src/m1/m1.cpp index 6e914c0..a87a22f 100644 --- a/src/m1/m1.cpp +++ b/src/m1/m1.cpp @@ -20,58 +20,60 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include +#include #include - -#include #include -#include - +#include +#include +#include #include "device.h" #include "../common/nvmeprint.h" #include "../common/utils.h" extern "C" { -static int test1_lba_io_test(int zfd, uint32_t nsid, struct zone_to_test *ztest){ - struct nvme_id_ns *s_nsid = nullptr; - int ret; - uint64_t test_lba_address = le64_to_cpu(ztest->desc.zslba); - - ret = nvme_identify_ns(zfd, nsid, s_nsid); - if(ret != 0){ - printf("Failed to identify the controller \n"); +static int test1_lba_io_test(const int &zfd, const unsigned &nsid, + const zone_to_test &ztest) +{ + nvme_id_ns *s_nsid = nullptr; + uint64_t test_lba_address = le64_to_cpu(ztest.desc.zslba); + int ret = nvme_identify_ns(zfd, nsid, s_nsid); + if (ret) { + std::cout << "Failed to identify the controller" << std::endl; return -1; } - // we know the Zone SIZE and CAPACITY, see https://zonedstorage.io/introduction/zns/ + // we know the Zone SIZE and CAPACITY, + // see https://zonedstorage.io/introduction/zns/ // (the difference between size and capacity) // Step 0: prepare the test pattern buffer - char *w_pattern = (char *) calloc (1, ztest->lba_size_in_use); - char *r_pattern = (char *) calloc (1, ztest->lba_size_in_use); - - assert(w_pattern != nullptr); - assert(r_pattern != nullptr); - - write_pattern(w_pattern, ztest->lba_size_in_use); - // Step 1: this is an empty zone because we choose to pick so, lets write the first LBA - ret = ss_nvme_device_write(zfd, nsid, test_lba_address, 1, w_pattern, ztest->lba_size_in_use); - if(ret != 0){ - printf("ERROR: writing failed on the zone? ret %d \n", ret); + std::unique_ptr w_pattern(new char[ztest.lba_size_in_use]()); + std::unique_ptr r_pattern(new char[ztest.lba_size_in_use]()); + assert(w_pattern); + assert(r_pattern); + write_pattern(w_pattern.get(), ztest.lba_size_in_use); + // Step 1: this is an empty zone because we choose to pick so, + // lets write the first LBA + ret = ss_nvme_device_write(zfd, nsid, test_lba_address, 1U, + w_pattern.get(), ztest.lba_size_in_use); + if (ret) { + std::cout << "ERROR: writing failed on the zone? ret " + << ret << std::endl; goto done; } - printf("OK, success in writing the zone \n"); + std::cout << "OK, success in writing the zone" << std::endl; // step 2: read the pattern, the same logic - ret = ss_nvme_device_read(zfd, nsid, test_lba_address, 1, r_pattern, ztest->lba_size_in_use); - if(ret != 0){ - printf("ERROR: reading failed on the zone? ret %d \n", ret); + ret = ss_nvme_device_read(zfd, nsid, test_lba_address, 1U, + r_pattern.get(), ztest.lba_size_in_use); + if (ret) { + std::cout << "ERROR: reading failed on the zone? ret " + << ret << std::endl; goto done; } - printf("OK, success in reading the zone \n"); - printf("Matching pattern ...\n"); - match_pattern(r_pattern, ztest->lba_size_in_use); - printf("SUCCESS: pattern matched for a simple R/W test \n"); + std::cout << "OK, success in reading the zone" << std::endl; + std::cout << "Matching pattern ..." << std::endl; + match_pattern(r_pattern.get(), ztest.lba_size_in_use); + std::cout << "SUCCESS: pattern matched for a simple R/W test" << std::endl; // starting a looping test with zone reset // this test // step 1: resets a zone @@ -81,182 +83,218 @@ static int test1_lba_io_test(int zfd, uint32_t nsid, struct zone_to_test *ztest) // step 5: read all 5 and match the pattern do { // step 1: reset the whole zone - uint64_t write_lba = le64_to_cpu(ztest->desc.zslba), zone_slba = le64_to_cpu(ztest->desc.zslba); - uint64_t returned_slba = -1; + unsigned long long write_lba = le64_to_cpu(ztest.desc.zslba); + unsigned long long zone_slba = le64_to_cpu(ztest.desc.zslba); + unsigned long long returned_slba = -1; ret = ss_zns_device_zone_reset(zfd, nsid, zone_slba); - assert(ret == 0); - printf("zone at 0x%lx is reset successfully \n", zone_slba); + assert(!ret); + std::cout << "zone at 0x" << std::hex << zone_slba << std::dec + << " is reset successfully" << std::endl; // step 2: write 2x blocks, hence 2x the buffer size - char *w_pattern2 = (char *) calloc (2 , ztest->lba_size_in_use); - // I am writing these patterns in two stages so that I can test them independently. - // nothing smart here, actually more like a dumb idea. But I like dumb working code :) - write_pattern(w_pattern2, ztest->lba_size_in_use); - write_pattern(w_pattern2 + ztest->lba_size_in_use, ztest->lba_size_in_use); - ret = ss_nvme_device_write(zfd, nsid, le64_to_cpu(ztest->desc.zslba), 2, w_pattern2, 2 * ztest->lba_size_in_use); - assert(ret == 0); - printf("zone is written 2x successfully \n"); - update_lba(write_lba, ztest->lba_size_in_use, 2); + std::unique_ptr w_pattern2(new char[2UL * + ztest.lba_size_in_use]()); + // I am writing these patterns in two stages + // so that I can test them independently. + // nothing smart here, actually more like a dumb idea. + // But I like dumb working code :) + write_pattern(w_pattern2.get(), ztest.lba_size_in_use); + write_pattern(w_pattern2.get() + ztest.lba_size_in_use, + ztest.lba_size_in_use); + ret = ss_nvme_device_write(zfd, nsid, le64_to_cpu(ztest.desc.zslba), + 2U, w_pattern2.get(), + 2U * ztest.lba_size_in_use); + assert(!ret); + std::cout << "zone is written 2x successfully" << std::endl; + update_lba(write_lba, 2); // step 3: append 2x LBA blocks - ret = ss_zns_device_zone_append(zfd, nsid, zone_slba, 2, w_pattern2, - 2 * ztest->lba_size_in_use, &returned_slba); - assert(ret == 0); - printf("zone is APPENDED 2x successfully, returned pointer is at %lx (to match %lx) \n", returned_slba, write_lba); - // match that the returned pointer - which should be the original write ptr location. - // returned pointer is where the data is appended (not where the write pointer _is_) - assert(returned_slba == write_lba); - // move the returned pointer to the +2 LBAs - we can now use the returned pointer - update_lba(returned_slba, ztest->lba_size_in_use, 2); + ret = ss_zns_device_zone_append(zfd, nsid, zone_slba, 2U, + w_pattern2.get(), + 2U * ztest.lba_size_in_use, + &returned_slba); + assert(!ret); + std::cout << "zone is APPENDED 2x successfully, returned pointer is at " + << std::hex << returned_slba << std::dec << " (to match " + << std::hex << write_lba << std::dec << ")" << std::endl; + // match that the returned pointer - + // which should be the original write ptr location. + // returned pointer is where the data is appended + // (not where the write pointer _is_) + assert(returned_slba == write_lba); + // move the returned pointer to the +2 LBAs - + // we can now use the returned pointer + update_lba(returned_slba, 2); // step 4: write the 5th 1x LBA using the returned LBA from the append - ret = ss_nvme_device_write(zfd, nsid, returned_slba, 1, w_pattern, ztest->lba_size_in_use); - assert(ret == 0); - printf("The final write is ok too, we should be at 5x LBAs writes now \n"); + ret = ss_nvme_device_write(zfd, nsid, returned_slba, 1U, + w_pattern.get(), ztest.lba_size_in_use); + assert(!ret); + std::cout << "The final write is ok too, we should be at 5x LBAs \ +writes now" << std::endl; // read all 5 blocks and match their patterns - char *r_pattern2 = (char *) calloc (5, ztest->lba_size_in_use); + std::unique_ptr r_pattern2(new char[5UL * + ztest.lba_size_in_use]()); // read from the start - ret = ss_nvme_device_read(zfd, nsid, zone_slba, 5, r_pattern2, 5 * ztest->lba_size_in_use); - assert(ret == 0); - printf("The final 5x read is ok, matching pattern ... \n"); + ret = ss_nvme_device_read(zfd, nsid, zone_slba, 5U, r_pattern2.get(), + 5U * ztest.lba_size_in_use); + assert(!ret); + std::cout << "The final 5x read is ok, matching pattern ..." + << std::endl; // now test them individually - for(int i = 0 ; i < 5; i++){ - printf("\t testing the %d buffer out of 5...", i); - match_pattern(r_pattern2 + (i * ztest->lba_size_in_use), ztest->lba_size_in_use); - printf(" passed \n"); + for (int i = 0 ; i < 5; ++i) { + std::cout << "\t testing the " << i << " buffer out of 5..."; + match_pattern(r_pattern2.get() + i * ztest.lba_size_in_use, + ztest.lba_size_in_use); + std::cout << " passed" << std::endl; } - free(r_pattern2); - free(w_pattern2); - }while(false); + } while(0); done: - free(w_pattern); - free(r_pattern); - printf("ZNS I/O testing finished, status %d \n", ret); + std::cout << "ZNS I/O testing finished, status " << ret << std::endl; return ret; } -static int test2_zone0_full_io_test(int zfd, uint32_t nsid, struct zone_to_test *ztest){ - uint64_t zone_size_in_bytes = ztest->lba_size_in_use * ztest->desc.zcap; - uint64_t zslba = le64_to_cpu(ztest->desc.zslba); - uint64_t MDTS = get_mdts_size(zfd); - printf("Test 3: testing the max writing capacity of the device, trying to read and write a complete zone of size %lu bytes \n", - zone_size_in_bytes); - uint8_t *data = (uint8_t *) calloc(1, zone_size_in_bytes); - assert(data != nullptr); - - write_pattern((char*) data, zone_size_in_bytes); +static int test2_zone0_full_io_test(const int &zfd, const unsigned &nsid, + const zone_to_test &ztest) +{ + uint64_t zone_size_in_bytes = ztest.lba_size_in_use * ztest.desc.zcap; + unsigned long long zslba = le64_to_cpu(ztest.desc.zslba); + uint32_t MDTS = get_mdts_size(zfd); + std::cout << "Test 3: testing the max writing capacity of the device, \ +trying to read and write a complete zone of size " + << zone_size_in_bytes << " bytes" << std::endl; + std::unique_ptr data(new char[zone_size_in_bytes]()); + assert(data); + write_pattern(data.get(), zone_size_in_bytes); // now reset, and then write the full zone - printf("\t trying to reset the zone at 0x%lx \n", zslba); + std::cout << "\t trying to reset the zone at 0x" + << std::hex << zslba << std::dec << std::endl; int ret = ss_zns_device_zone_reset(zfd, nsid, zslba); - if(ret != 0){ - printf("Error: zone rest on 0x%lx failed, ret %d \n", zslba, ret); + if (ret) { + std::cout << "Error: zone rest on 0x" + << std::hex << zslba << std::dec + << " failed, ret " << ret << std::endl; goto done; } - ret = ss_nvme_device_io_with_mdts(zfd, nsid, zslba, ztest->desc.zcap, data, zone_size_in_bytes, - ztest->lba_size_in_use, - MDTS, - false); - if(ret != 0){ - printf("Error: zone writing on 0x%lx failed, ret %d \n", zslba, ret); + ret = ss_nvme_device_io_with_mdts(zfd, nsid, zslba, data.get(), + zone_size_in_bytes, ztest.lba_size_in_use, + MDTS, false); + if (ret) { + std::cout << "Error: zone writing on 0x" + << std::hex << zslba << std::dec + << " failed, ret " << ret << std::endl; goto done; } // now read the zone - bzero(data, zone_size_in_bytes); - ret = ss_nvme_device_io_with_mdts(zfd, nsid, zslba, ztest->desc.zcap, data, zone_size_in_bytes, - ztest->lba_size_in_use, - MDTS, - true); - if(ret != 0){ - printf("Error: zone reading on 0x%lx failed, ret %d \n", zslba, ret); + bzero(data.get(), zone_size_in_bytes); + ret = ss_nvme_device_io_with_mdts(zfd, nsid, zslba, data.get(), + zone_size_in_bytes, ztest.lba_size_in_use, + MDTS, true); + if (ret) { + std::cout << "Error: zone reading on 0x" + << std::hex << zslba << std::dec + << " failed, ret " << ret << std::endl; goto done; } - printf("\t the whole zone reading done \n"); - match_pattern((char*) data, zone_size_in_bytes); - printf("OK: the whole zone pattern matched \n"); - + std::cout << "\t the whole zone reading done" << std::endl; + match_pattern(data.get(), zone_size_in_bytes); + std::cout << "OK: the whole zone pattern matched" << std::endl; done: - free(data); return ret; } -int main() { - int ret, num_devices, fd, t1, t2; - uint32_t nsid; - struct ss_nvme_ns *my_devices, *zns_device; - struct nvme_id_ns ns{}; - struct zone_to_test ztest{}; - printf("============================================================== \n"); - printf("Welcome to M1. This is lot of ZNS/NVMe exploration \n"); - printf("============================================================== \n"); +int main() +{ + std::cout << "=============================================================\ +=" << std::endl; + std::cout << "Welcome to M1. This is lot of ZNS/NVMe exploration" + << std::endl; + std::cout << "=============================================================\ +=" << std::endl; // scan all NVMe devices in the system - just like nvme list command - ret = count_and_show_all_nvme_devices(); - if(ret < 0){ - printf("the host device scans failed, %d \n", ret); + int ret = count_and_show_all_nvme_devices(); + if (ret < 0) { + std::cout << "the host device scans failed, " << ret << std::endl; return ret; } - // now we are going to allocate scan the returned number of devices to identify a ZNS device - num_devices = ret; - printf("total number of devices in the system is %d \n", num_devices); - if(num_devices == 0){ - printf("Error: failed to open any device, zero devices in the system? \n"); + // now we are going to allocate scan the returned number of devices + // to identify a ZNS device + int num_devices = ret; + std::cout << "total number of devices in the system is " + << num_devices << std::endl; + if (!num_devices) { + std::cout << "Error: failed to open any device, zero devices in the \ +system?" << std::endl; return -ENODEV; } - my_devices = (struct ss_nvme_ns *) calloc (num_devices, sizeof(*my_devices)); - if(!my_devices){ - printf("failed calloc, -ENOMEM \n"); - return -12; - } - ret = scan_and_identify_zns_devices(my_devices); - if(ret < 0){ - printf("scanning of the devices failed %d\n", ret); + std::unique_ptr my_devices(new ss_nvme_ns[num_devices]()); + ret = scan_and_identify_zns_devices(my_devices.get()); + if (ret < 0) { + std::cout << "scanning of the devices failed" << std::endl; return ret; } - for(int i = 0; i < num_devices; i++){ - printf("namespace: %s and zns %s \n", my_devices[i].ctrl_name, (my_devices[i].supports_zns ? "YES" : "NO")); - if(my_devices[i].supports_zns) { - // with this we will just pick the last ZNS device to work with + ss_nvme_ns *zns_device = nullptr; + for (int i = 0; i < num_devices; ++i) { + std::cout << "namespace: " << my_devices[i].ctrl_name + << " and zns " << (my_devices[i].supports_zns ? "YES" : "NO") + << std::endl; + // with this we will just pick the last ZNS device to work with + if (my_devices[i].supports_zns) zns_device = &my_devices[i]; - } } - printf("Opening the device at %s \n", zns_device->ctrl_name); - fd = nvme_open(zns_device->ctrl_name); - if(fd < 0){ - printf("device %s opening failed %d errno %d \n", zns_device->ctrl_name, fd, errno); + std::cout << "Opening the device at " << zns_device->ctrl_name << std::endl; + int fd = nvme_open(zns_device->ctrl_name); + if (fd < 0) { + std::cout << "device " << zns_device->ctrl_name + << " opening failed " << fd + << " errno " << errno << std::endl; return -fd; } - printf("device %s opened successfully %d \n", zns_device->ctrl_name, fd); + std::cout << "device " << zns_device->ctrl_name + << " opened successfully " << fd << std::endl; // now try to retrieve the NVMe namespace details - step 1 get the id + unsigned nsid = 0U; ret = nvme_get_nsid(fd, &nsid); - if(ret != 0){ - printf("ERROR: failed to retrieve the nsid %d \n", ret); + if (ret) { + std::cout << "ERROR: failed to retrieve the nsid " << ret << std::endl; return ret; } - // with the id now we can query the identify namespace - see figure 249, section 5.15.2 in the NVMe specification + // with the id now we can query the identify namespace - + // see figure 249, section 5.15.2 in the NVMe specification + nvme_id_ns ns; ret = nvme_identify_ns(fd, nsid, &ns); - if(ret){ - printf("ERROR: failed to retrieve the nsid %d \n", ret); + if (ret) { + std::cout << "ERROR: failed to retrieve the nsid " << ret << std::endl; return ret; } ss_nvme_show_id_ns(&ns); - printf("number of LBA formats? %d (a zero based value) \n", ns.nlbaf); - // extract the in-use LBA size, it could be the case that the device supports multiple LBA size - ztest.lba_size_in_use = 1 << ns.lbaf[(ns.flbas & 0xf)].ds; - printf("the LBA size is %lu bytes \n", ztest.lba_size_in_use); - // this function shows the zone status and then return the first empty zone to do experiments on in ztest - ret = show_zns_zone_status(fd, nsid, &ztest); - if ( ret != 0) { - printf("failed to get a workable zone, ret %d \n", ret); + std::cout << "number of LBA formats? " << ns.nlbaf + << " (a zero based value)" << std::endl; + // extract the in-use LBA size, + // it could be the case that the device supports multiple LBA size + zone_to_test ztest; + ztest.lba_size_in_use = 1U << ns.lbaf[(ns.flbas & 0xf)].ds; + std::cout << "the LBA size is " << ztest.lba_size_in_use + << " bytes" << std::endl; + // this function shows the zone status + // and then return the first empty zone to do experiments on in ztest + ret = show_zns_zone_status(fd, nsid, ztest); + if (ret) { + std::cout << "failed to get a workable zone, ret " << ret << std::endl; return ret; } - t1 = test1_lba_io_test(fd, nsid, &ztest); - t2 = test2_zone0_full_io_test(fd, nsid, &ztest); - printf("====================================================================\n"); - printf("Milestone 1 results \n"); - printf("Test 1 (read, write, append, reset) : %s \n", (t1 == 0 ? " Passed" : " Failed")); - printf("Test 2 (Large zone read, write) : %s \n", (t2 == 0 ? " Passed" : " Failed")); - printf("====================================================================\n"); - for(int i = 0; i < num_devices; i++) { + int t1 = test1_lba_io_test(fd, nsid, ztest); + int t2 = test2_zone0_full_io_test(fd, nsid, ztest); + std::cout << "=============================================================\ +=======" << std::endl; + std::cout << "Milestone 1 results" << std::endl; + std::cout << "Test 1 (read, write, append, reset) : " + << (!t1 ? " Passed" : " Failed") << std::endl; + std::cout << "Test 2 (Large zone read, write) : " + << (!t2 ? " Passed" : " Failed") << std::endl; + std::cout << "=============================================================\ +=======" << std::endl; + for(int i = 0; i < num_devices; ++i) free(my_devices[i].ctrl_name); - } - free(my_devices); return 0; } + } diff --git a/src/m23-ftl/m2.cpp b/src/m23-ftl/m2.cpp index cc694e2..4d6cf93 100644 --- a/src/m23-ftl/m2.cpp +++ b/src/m23-ftl/m2.cpp @@ -20,103 +20,130 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include - -#include -#include #include -#include +#include #include +#include +#include +#include +#include #include "./zns_device.h" #include "../common/utils.h" extern "C" { -static int write_read_random_lbas(struct user_zns_device *my_dev, void *buf, uint32_t buf_size, uint64_t max_lbas_to_test){ - int ret = -1; - uint32_t max_lba_entries = my_dev->capacity_bytes / my_dev->lba_size_bytes; - if(max_lba_entries < max_lbas_to_test){ - printf("Error: not sufficient LBAs available, pass a smaller number \n"); +static int write_read_random_lbas(const user_zns_device &my_dev, void *buf, + const uint32_t &buf_size, + const uint64_t &max_lbas_to_test) +{ + uint32_t max_lba_entries = my_dev.capacity_bytes / my_dev.lba_size_bytes; + if (max_lba_entries < max_lbas_to_test) { + std::cout << "Error: not sufficient LBAs available, pass a smaller \ +number" << std::endl; return -1; } - const uint64_t max_lba_to_generate = (max_lba_entries - max_lbas_to_test); + const uint64_t max_lba_to_generate = max_lba_entries - max_lbas_to_test; // lets pick a random start offset - const uint64_t start_lba = (0 + (rand() % (max_lba_to_generate - 0))); - // now starting from "s" lba, we are going to write out max_lbas_to_test LBAs - for(uint64_t i = start_lba; i < (start_lba + max_lbas_to_test); i++){ + const uint64_t start_lba = 0UL + rand() % (max_lba_to_generate - 0UL); + // now starting from "s" lba, + // we are going to write out max_lbas_to_test LBAs + for (uint64_t i = start_lba; i < start_lba + max_lbas_to_test; ++i) { // make a unique pattern for each write - ith iteration - write_pattern_with_start((char*) buf, buf_size, i); - ret = zns_udevice_write(my_dev, (i * my_dev->lba_size_bytes), buf, buf_size); - if(ret != 0){ - printf("Error: writing the device failed at address 0x%lx [index %lu] \n", - (i * my_dev->lba_size_bytes), (i - start_lba)); + write_pattern_with_start(static_cast(buf), buf_size, i); + int ret = zns_udevice_write(const_cast(&my_dev), + i * my_dev.lba_size_bytes, buf, buf_size); + if (ret) { + std::cout << "Error: writing the device failed at address 0x" + << std::hex << i * my_dev.lba_size_bytes << std::dec + << " [index " << i - start_lba << "]" << std::endl; return ret; } } - printf("Writing of %lu unique LBAs OK \n", max_lbas_to_test); + std::cout << "Writing of " << max_lbas_to_test + << " unique LBAs OK" << std::endl; // otherwise all writes passed - now we test reading - for(uint64_t i = start_lba; i < (start_lba + max_lbas_to_test); i++){ + for (uint64_t i = start_lba; i < start_lba + max_lbas_to_test; ++i) { // make a unique pattern for each write - bzero((char*) buf, buf_size); - ret = zns_udevice_read(my_dev, (i * my_dev->lba_size_bytes), buf, buf_size); - if(ret != 0){ - printf("Error: writing the device failed at address 0x%lx [index %lu] \n", - (i * my_dev->lba_size_bytes), (i - start_lba)); + bzero(static_cast(buf), buf_size); + int ret = zns_udevice_read(const_cast(&my_dev), + i * my_dev.lba_size_bytes, buf, buf_size); + if (ret) { + std::cout << "Error: writing the device failed at address 0x" + << std::hex << i * my_dev.lba_size_bytes << std::dec + << " [index " << i - start_lba << "]" << std::endl; return ret; } // now we match - for ith pattern - if it fails it asserts - match_pattern_with_start((char*) buf, buf_size, i); + match_pattern_with_start(static_cast(buf), buf_size, i); } - printf("Reading and matching of %lu unique LBAs OK \n", max_lbas_to_test); + std::cout << "Reading and matching of " << max_lbas_to_test + << " unique LBAs OK" << std::endl; return 0; } -static int write_read_lba0(struct user_zns_device *dev, void *buf, uint32_t buf_size){ - write_pattern((char*) buf, buf_size); - uint64_t test_lba = 0; - int ret = zns_udevice_write(dev, test_lba, buf, buf_size); - if(ret != 0){ - printf("Error: writing the device failed at address 0x%lx \n", test_lba); +static int write_read_lba0(const user_zns_device &dev, + void *buf, const uint32_t &buf_size) +{ + write_pattern(static_cast(buf), buf_size); + uint64_t test_lba = 0UL; + int ret = zns_udevice_write(const_cast(&dev), test_lba, + buf, buf_size); + if (ret) { + std::cout << "Error: writing the device failed at address 0x" + << std::hex << test_lba << std::dec << std::endl; return ret; } - printf("%u bytes written successfully on lba 0x%lx \n", buf_size, test_lba); + std::cout << buf_size << " bytes written successfully on lba 0x" + << std::hex << test_lba << std::dec << std::endl; // zero it out bzero(buf, buf_size); - ret = zns_udevice_read(dev, test_lba, buf, buf_size); - if(ret != 0){ - printf("Error: reading the device failed at address 0x%lx \n", test_lba); + ret = zns_udevice_read(const_cast(&dev), test_lba, + buf, buf_size); + if (ret) { + std::cout << "Error: reading the device failed at address 0x" + << std::hex << test_lba << std::dec << std::endl; return ret; } - printf("%u bytes read successfully on lba 0x%lx \n", buf_size, test_lba); - match_pattern((char*) buf, buf_size); + std::cout << buf_size << " bytes read successfully on lba 0x" + << std::hex << test_lba << std::dec << std::endl; + match_pattern(static_cast(buf), buf_size); return 0; } -static int show_help(){ - printf("Usage: m2 -d device_name -h -r \n"); - printf("-d : /dev/nvmeXpY - in this format with the full path \n"); - printf("-r : resume if the FTL can. \n"); - printf("-l : the number of zones to use for log/metadata (default, minimum = 3). \n"); - printf("-h : shows help, and exits with success. No argument needed\n"); +static int show_help() +{ + std::cout << "Usage: m2 -d device_name -h -r" << std::endl; + std::cout << "-d : /dev/nvmeXpY - in this format with the full path" + << std::endl; + std::cout << "-r : resume if the FTL can." << std::endl; + std::cout << "-l : the number of zones to use for log/metadata \ +(default, minimum = 3)." << std::endl; + std::cout << "-h : shows help, and exits with success. No argument needed" + << std::endl; return 0; } -int main(int argc, char **argv) { - uint64_t start, end; - start = microseconds_since_epoch(); - srand( (unsigned) time(NULL) * getpid()); - int ret, c; - char *zns_device_name = (char*) "nvme0n1", *test_buf = nullptr, *str1 = nullptr; - struct user_zns_device *my_dev = nullptr; - struct zdev_init_params params; - params.force_reset = true; - params.log_zones = 3; - params.gc_wmark = 1; - - uint64_t max_num_lba_to_test = 0; - printf("===================================================================================== \n"); - printf("This is M2. The goal of this milestone is to implement a hybrid log-structure ZTL (Zone Translation Layer) on top of the ZNS (no GC) \n"); - printf("===================================================================================== \n"); +int main(int argc, char *argv[]) +{ + uint64_t start = microseconds_since_epoch(); + srand(static_cast(time(nullptr)) * getpid()); + zdev_init_params params = { + .name = nullptr, + .log_zones = 3, + .gc_wmark = 1, + .force_reset = true + }; + uint64_t max_num_lba_to_test = 0UL; + std::cout << "=============================================================\ +========================" << std::endl; + std::cout << "This is M2. The goal of this milestone is to implement a \ +hybrid log-structure ZTL (Zone Translation Layer) on top of the ZNS (no GC)" + << std::endl; + std::cout << "=============================================================\ +========================" << std::endl; + int c = 0; + char *zns_device_name = const_cast("nvme0n1"); + char *str1 = nullptr; while ((c = getopt(argc, argv, "l:d:hr")) != -1) { switch (c) { case 'h': @@ -128,14 +155,14 @@ int main(int argc, char **argv) { case 'd': str1 = strdupa(optarg); if (!str1) { - printf("Could not parse the arguments for the device %s '\n", optarg); + std::cout << "Could not parse the arguments for the device " + << optarg << std::endl; exit(EXIT_FAILURE); } - for (int j = 1; ; j++) { + for (int j = 1; ; ++j) { char *token = strsep(&str1, "/"); // delimited is "/" - if (token == nullptr) { + if (!token) break; - } // if there was a valid parse, just save it zns_device_name = token; } @@ -143,8 +170,10 @@ int main(int argc, char **argv) { break; case 'l': params.log_zones = atoi(optarg); - if (params.log_zones < 3){ - printf("you need 3 or more zones for the log area (metadata (think: milestone 5) + log). You passed %d \n", params.log_zones); + if (params.log_zones < 3) { + std::cout << "you need 3 or more zones for the log area \ +(metadata (think: milestone 5) + log). You passed " + << params.log_zones << std::endl; exit(-1); } break; @@ -154,30 +183,46 @@ int main(int argc, char **argv) { } } params.name = strdup(zns_device_name); - printf("parameter settings are: device-name %s log_zones %d gc-watermark %d force-reset %s\n", - params.name,params.log_zones,params.gc_wmark,params.force_reset==1?"yes":"no"); - ret = init_ss_zns_device(¶ms, &my_dev); - assert (ret == 0); - assert(my_dev->lba_size_bytes != 0); - assert(my_dev->capacity_bytes != 0); - max_num_lba_to_test = (params.log_zones - 1) * (my_dev->tparams.zns_zone_capacity / my_dev->tparams.zns_lba_size); - printf("The amount of new pages to be written would be the number of (zones - 1) / lba_size : %lu \n", max_num_lba_to_test); - printf("Why? we assume one zone will eventually be taken for writing metadata, and the rest will be used for the FTL log \n"); - test_buf = static_cast(calloc(1, my_dev->lba_size_bytes)); - int t1 = write_read_lba0(my_dev, test_buf, my_dev->lba_size_bytes); + std::cout << "parameter settings are: device-name " << params.name + << " log_zones " << params.log_zones + << " gc-watermark " << params.gc_wmark + << " force-reset " << (params.force_reset ? "yes" : "no") + << std::endl; + user_zns_device *my_dev = nullptr; + int ret = init_ss_zns_device(¶ms, &my_dev); + assert (!ret); + assert(my_dev->lba_size_bytes); + assert(my_dev->capacity_bytes); + max_num_lba_to_test = (params.log_zones - 1) * + (my_dev->tparams.zns_zone_capacity / + my_dev->tparams.zns_lba_size); + std::cout << "The amount of new pages to be written would be the number of \ +(zones - 1) / lba_size : " << max_num_lba_to_test << std::endl; + std::cout << "Why? we assume one zone will eventually be taken for writing \ +metadata, and the rest will be used for the FTL log" << std::endl; + std::unique_ptr test_buf(new char[my_dev->lba_size_bytes]()); + int t1 = write_read_lba0(*my_dev, test_buf.get(), my_dev->lba_size_bytes); // -1 because we have already written one LBA. - int t2 = write_read_random_lbas(my_dev, test_buf, my_dev->lba_size_bytes, (max_num_lba_to_test - 1)); - free(test_buf); + int t2 = write_read_random_lbas(*my_dev, test_buf.get(), + my_dev->lba_size_bytes, + max_num_lba_to_test - 1UL); ret = deinit_ss_zns_device(my_dev); free(params.name); - end = microseconds_since_epoch(); - printf("====================================================================\n"); - printf("Milestone 2 results \n"); - printf("[stosys-result] Test 1 (write, read, and match on LBA0) : %s \n", (t1 == 0 ? " Passed" : " Failed")); - printf("[stosys-result] Test 2 (%-3lu LBA write, read, match) : %s \n", max_num_lba_to_test, (t2 == 0 ? " Passed" : " Failed")); - printf("====================================================================\n"); - printf("[stosys-stats] The elapsed time is %lu milliseconds \n", ((end - start)/1000)); - printf("====================================================================\n"); + uint64_t end = microseconds_since_epoch(); + std::cout << "=============================================================\ +=======" << std::endl; + std::cout << "Milestone 2 results" << std::endl; + std::cout << "[stosys-result] Test 1 (write, read, and match on LBA0) : " + << (!t1 ? " Passed" : " Failed") << std::endl; + printf("[stosys-result] Test 2 (%-3lu LBA write, read, match) : %s \n", + max_num_lba_to_test, (!t2 ? " Passed" : " Failed")); + std::cout << "=============================================================\ +=======" << std::endl; + std::cout << "[stosys-stats] The elapsed time is " + << (end - start) / 1000UL << " milliseconds" << std::endl; + std::cout << "=============================================================\ +=======" << std::endl; return ret; } -} \ No newline at end of file + +} diff --git a/src/m23-ftl/m3.cpp b/src/m23-ftl/m3.cpp index 8b3aed3..3b1e64a 100644 --- a/src/m23-ftl/m3.cpp +++ b/src/m23-ftl/m3.cpp @@ -20,211 +20,246 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include - -#include +#include #include -#include +#include #include -#include -#include #include +#include #include +#include +#include #include - #include "zns_device.h" #include "../common/utils.h" - -static int get_sequence_as_array (uint64_t capacity, uint64_t **arr, bool shuffle) { +static int get_sequence_as_array(const uint64_t &capacity, uint64_t *&arr, + const bool &shuffle) +{ std::vector myvector; + // set some values: + for (uint64_t i = 0; i < capacity; ++i) + myvector.emplace_back(i); std::random_device rd; std::mt19937 g(rd()); - uint64_t *tmp = nullptr; - // set some values: - for (uint64_t i = 0; i < capacity; i++) { - myvector.push_back(i); - } - if(shuffle) { + if(shuffle) std::shuffle(myvector.begin(), myvector.end(), g); - } - tmp = new uint64_t[capacity]; - for(uint64_t i = 0; i < capacity; i++){ - tmp[i] = myvector[i]; - } - *arr = tmp; + arr = new uint64_t[capacity]; + for (uint64_t i = 0; i < capacity; ++i) + arr[i] = myvector[i]; return 0; } extern "C" { -static int _complete_file_io(int fd, uint64_t offset, void *buf, int sz, int is_read){ - int ret; - uint64_t written_so_far = 0; - uintptr_t ptr = (uintptr_t) buf; - while (written_so_far < (uint64_t) sz) { - if(is_read == 1) { - ret = pread(fd, (void *) (ptr + written_so_far), sz - written_so_far, offset + written_so_far); - } else { - ret = pwrite(fd, (void *) (ptr + written_so_far), sz - written_so_far, offset + written_so_far); - } - if(ret < 0){ - printf("file writing failed %d \n", ret); +static int _complete_file_io(const int &fd, const uint64_t &offset, + void *buf, const uint32_t &sz, const bool &is_read) +{ + uint32_t written_so_far = 0; + uintptr_t ptr = reinterpret_cast(buf); + while (written_so_far < sz) { + int ret = 0; + if(is_read) + ret = pread(fd, reinterpret_cast(ptr + written_so_far), + sz - written_so_far, offset + written_so_far); + else + ret = pwrite(fd, reinterpret_cast + (ptr + written_so_far), + sz - written_so_far, offset + written_so_far); + if (ret < 0) { + std::cout << "file writing failed " << ret << std::endl; return ret; } //other add and move along - written_so_far+=ret; + written_so_far += ret; } return 0; } -static int write_complete_file(int fd, uint64_t offset, void *buf, int sz){ - return _complete_file_io(fd, offset, buf, sz, 0); +static int write_complete_file(const int &fd, const uint64_t &offset, + void *buf, const uint32_t &sz) +{ + return _complete_file_io(fd, offset, buf, sz, false); } -static int read_complete_file(int fd, uint64_t offset, void *buf, int sz){ - return _complete_file_io(fd, offset, buf, sz, 1); +static int read_complete_file(const int &fd, const uint64_t &offset, + void *buf, const uint32_t &sz) +{ + return _complete_file_io(fd, offset, buf, sz, true); } /* - * Based on if the addr_list was in sequence or randomized - we will do sequential or random I/O + * Based on if the addr_list was in sequence or randomized - + * we will do sequential or random I/O * -- - * So the idea of this test is to write a parallel file on the side which has the same content, and the + * So the idea of this test is to write a parallel file on the side + * which has the same content, and the * ZNS device content should match with this file. * * addr_list = list of LBAs how they should be accessed * list_size = size of the address list - * max_hammer_io = a random number, for how many times I should randomly do a write on a random LBA + * max_hammer_io = a random number, + * for how many times I should randomly do a write on a random LBA */ -static int wr_full_device_verify(struct user_zns_device *dev, - const uint64_t *addr_list, const uint32_t list_size, - const uint32_t max_hammer_io){ - int ret; +static int wr_full_device_verify(const user_zns_device &dev, + const uint64_t *addr_list, + const uint32_t &list_size, + const uint32_t &max_hammer_io) +{ + std::unique_ptr b1(new char[dev.lba_size_bytes]()); + std::unique_ptr b2(new char[dev.lba_size_bytes]()); + assert(b1); + assert(b2); + write_pattern(b1.get(), dev.lba_size_bytes); const char *tmp_file = "./tmp-output-fulld"; - char *b1 = (char*) calloc(1, dev->lba_size_bytes); - char *b2 = (char*) calloc(1, dev->lba_size_bytes); - assert(b1 != nullptr); - assert(b2 != nullptr); - - write_pattern(b1, dev->lba_size_bytes); int fd = open(tmp_file, O_RDWR|O_CREAT, 0666); if (fd < 0) { - printf("Error: opening of the temp file failed, ret %d ", fd); + std::cout << "Error: opening of the temp file failed, ret " << fd; return -1; } // allocate this side file to the full capacity - ret = posix_fallocate(fd, 0, dev->capacity_bytes); - if(ret){ - printf("Error: fallocate failed, ret %d ", ret); + int ret = posix_fallocate(fd, 0, dev.capacity_bytes); + if (ret) { + std::cout << "Error: fallocate failed, ret " << ret; return -1; } - printf("fallocate OK with %s and size 0x%lx \n", tmp_file, dev->capacity_bytes); + std::cout << "fallocate OK with " << tmp_file << "s and size 0x" + << std::hex << dev.capacity_bytes << std::dec << std::endl; // https://stackoverflow.com/questions/29381843/generate-random-number-in-range-min-max const int min = 0; - const int max = dev->lba_size_bytes; - //initialize the device, otherwise we may have indexes where there is random garbage in both cases - for(uint32_t i = 0; i < list_size; i++){ - uint64_t woffset = (addr_list[i]) * dev->lba_size_bytes; - //random offset within the page and just write some random stuff = this is to make a unique I/O pattern - b1[(min + (rand() % (max - min)))] = (char) rand(); - // now we need to write the buffer in parallel to the zns device, and the file - ret = zns_udevice_write(dev, woffset, b1, dev->lba_size_bytes); - if(ret != 0){ - printf("Error: ZNS device writing failed at offset 0x%lx \n", woffset); + const int max = dev.lba_size_bytes; + //initialize the device, otherwise we may have indexes + // where there is random garbage in both cases + for (uint32_t i = 0; i < list_size; ++i) { + uint64_t woffset = addr_list[i] * dev.lba_size_bytes; + //random offset within the page and just write some random stuff = + // this is to make a unique I/O pattern + b1[(min + rand() % (max - min))] = (char) rand(); + // now we need to write the buffer in parallel to the zns device + // and the file + ret = zns_udevice_write(const_cast(&dev), woffset, + b1.get(), dev.lba_size_bytes); + if (ret) { + std::cout << "Error: ZNS device writing failed at offset 0x" + << std::hex << woffset << std::dec << std::endl; goto done; } - ret = write_complete_file(fd, woffset, b1, dev->lba_size_bytes); - if(ret != 0){ - printf("Error: file writing failed at offset 0x%lx \n", woffset); + ret = write_complete_file(fd, woffset, b1.get(), dev.lba_size_bytes); + if (ret) { + std::cout << "Error: file writing failed at offset 0x" + << std::hex << woffset << std::dec << std::endl; goto done; } } - printf("the ZNS user device has been written (ONCE) completely OK\n"); - if(max_hammer_io > 0){ - printf("Hammering some random LBAs %d times \n", max_hammer_io); - for(uint32_t i = 0; i < max_hammer_io; i++){ + std::cout << "the ZNS user device has been written (ONCE) completely OK" + << std::endl; + if (max_hammer_io > 0) { + std::cout << "Hammering some random LBAs " << max_hammer_io << " times" + << std::endl; + for (uint32_t i = 0; i < max_hammer_io; ++i) { // we should not generate offset which is within the list_size - uint64_t woffset = (addr_list[ 0 + (rand() % (list_size - 0))]) * dev->lba_size_bytes; - //random offset within the page and just write some random stuff, like i - b1[(min + (rand() % (max - min)))] = (char) rand(); - // now we need to write the buffer in parallel to the zns device, and the file - ret = zns_udevice_write(dev, woffset, b1, dev->lba_size_bytes); - if(ret != 0){ - printf("Error: ZNS device writing failed at offset 0x%lx \n", woffset); + uint64_t woffset = addr_list[0 + rand() % (list_size - 0)] * + dev.lba_size_bytes; + //random offset within the page and just write some random stuff, + // like i + b1[(min + rand() % (max - min))] = static_cast(rand()); + // now we need to write the buffer in parallel to the zns device, + // and the file + ret = zns_udevice_write(const_cast(&dev), + woffset, b1.get(), dev.lba_size_bytes); + if (ret) { + std::cout << "Error: ZNS device writing failed at offset 0x" + << std::hex << woffset << std::dec << std::endl; goto done; } - ret = write_complete_file(fd, woffset, b1, dev->lba_size_bytes); - if(ret != 0){ - printf("Error: file writing failed at offset 0x%lx \n", woffset); + ret = write_complete_file(fd, woffset, + b1.get(), dev.lba_size_bytes); + if (ret) { + std::cout << "Error: file writing failed at offset 0x" + << std::hex << woffset << std::dec << std::endl; goto done; } } - printf("Hammering done, OK for %d times \n", max_hammer_io); + std::cout << "Hammering done, OK for " << max_hammer_io << " times" + << std::endl; } - printf("verifying the content of the ZNS device ....\n"); + std::cout << "verifying the content of the ZNS device ...." << std::endl; // reset the buffers - write_pattern(b1, dev->lba_size_bytes); - write_pattern(b2, dev->lba_size_bytes); + write_pattern(b1.get(), dev.lba_size_bytes); + write_pattern(b2.get(), dev.lba_size_bytes); // and now read the whole device and compare the content with the file - for(uint32_t i = 0; i < list_size; i++){ - uint64_t roffset = (addr_list[i]) * dev->lba_size_bytes; - // now we need to write the buffer in parallel to the zns device, and the file - ret = zns_udevice_read(dev, roffset, b1, dev->lba_size_bytes); - assert(ret == 0); - ret = read_complete_file(fd, roffset, b2, dev->lba_size_bytes); - assert(ret == 0); + for (uint32_t i = 0; i < list_size; ++i) { + uint64_t roffset = addr_list[i] * dev.lba_size_bytes; + // now we need to write the buffer in parallel to the zns device, + // and the file + ret = zns_udevice_read(const_cast(&dev), roffset, + b1.get(), dev.lba_size_bytes); + assert(!ret); + ret = read_complete_file(fd, roffset, b2.get(), dev.lba_size_bytes); + assert(!ret); //now both of these should match - for(uint32_t j = 0; j < dev->lba_size_bytes; j++) - if(b1[j] != b2[j]){ - printf("ERROR: buffer mismatch at i %d and j %d , address is 0%lx expecting %x found %x \n", - i, j, roffset, b2[j], b1[j]); + for(uint32_t j = 0; j < dev.lba_size_bytes; ++j) + if (b1[j] != b2[j]) { + std::cout << "ERROR: buffer mismatch at i " << i + << " and j " << j << " , address is 0" + << std::hex << roffset << " expecting " << b2[j] + << " found " << b1[j] << std::dec << std::endl; ret = -EINVAL; goto done; } } - printf("Verification passed on the while device \n"); - + std::cout << "Verification passed on the while device" << std::endl; done: - free(b1); - free(b2); close(fd); ret = remove(tmp_file); - if(ret != 0){ - printf("Error: file deleting failed with ret %d \n", ret); + if (ret) { + std::cout << "Error: file deleting failed with ret " << ret + << std::endl; } return ret; } -static int show_help(){ - printf("Usage: m2 -d device_name -h -r \n"); - printf("-d : /dev/nvmeXpY - in this format with the full path \n"); - printf("-r : resume if the FTL can. \n"); - printf("-l : the number of zones to use for log/metadata (default, minimum = 3). \n"); - printf("-w : watermark threshold, the number of free zones when to trigger the gc (default, minimum = 1). \n"); - printf("-o : overwrite so [int] times (default, 10,000). \n"); - printf("-h : shows help, and exits with success. No argument needed\n"); +static int show_help() +{ + std::cout << "Usage: m2 -d device_name -h -r" << std::endl; + std::cout << "-d : /dev/nvmeXpY - in this format with the full path" + << std::endl; + std::cout << "-r : resume if the FTL can." << std::endl; + std::cout << "-l : the number of zones to use for log/metadata (default, \ +minimum = 3)." << std::endl; + std::cout << "-w : watermark threshold, the number of free zones when to \ +trigger the gc (default, minimum = 1)." << std::endl; + std::cout << "-o : overwrite so [int] times (default, 10,000)." + << std::endl; + std::cout << "-h : shows help, and exits with success. No argument needed" + << std::endl; return 0; } -int main(int argc, char **argv) { - uint64_t start, end; - start = microseconds_since_epoch(); - srand( (unsigned) time(NULL) * getpid()); - int ret, c; - char *zns_device_name = (char*) "nvme0n1", *str1 = nullptr; - struct user_zns_device *my_dev = nullptr; - uint64_t *seq_addresses = nullptr, *random_addresses = nullptr; - uint32_t to_hammer_lba = 10000; - - struct zdev_init_params params; - params.force_reset = true; - params.log_zones = 3; - params.gc_wmark = 1; - - printf("===================================================================================== \n"); - printf("This is M3. The goal of this milestone is to implement a hybrid log-structure ZTL (Zone Translation Layer) on top of the ZNS WITH a GC \n"); - printf(" ^^^^^^^^^ \n"); - printf("===================================================================================== \n"); +int main(int argc, char *argv[]) +{ + uint64_t start = microseconds_since_epoch(); + srand(static_cast(time(NULL)) * getpid()); + std::cout << "=============================================================\ +========================" << std::endl; + std::cout << "This is M3. The goal of this milestone is to implement a \ +hybrid log-structure ZTL (Zone Translation Layer) on top of the ZNS WITH a GC" + << std::endl; + std::cout << " \ + ^^^^^^^^^" + << std::endl; + std::cout << "=============================================================\ +========================" << std::endl; + int c = 0; + char *zns_device_name = const_cast("nvme0n1"); + char *str1 = nullptr; + uint32_t to_hammer_lba = 10000U; + zdev_init_params params = { + .name = nullptr, + .log_zones = 3, + .gc_wmark = 1, + .force_reset = true + }; while ((c = getopt(argc, argv, "o:m:l:d:w:hr")) != -1) { switch (c) { case 'h': @@ -239,14 +274,14 @@ int main(int argc, char **argv) { case 'd': str1 = strdupa(optarg); if (!str1) { - printf("Could not parse the arguments for the device %s '\n", optarg); + std::cout << "Could not parse the arguments for the device " + << optarg << std::endl; exit(EXIT_FAILURE); } for (int j = 1; ; j++) { char *token = strsep(&str1, "/"); // delimited is "/" - if (token == nullptr) { + if (!token) break; - } // if there was a valid parse, just save it zns_device_name = token; } @@ -254,15 +289,18 @@ int main(int argc, char **argv) { break; case 'l': params.log_zones = atoi(optarg); - if (params.log_zones < 3){ - printf("you need 3 or more zones for the log area (metadata (think: milestone 5) + log). You passed %d \n", params.log_zones); + if (params.log_zones < 3) { + std::cout << "you need 3 or more zones for the log area \ +(metadata (think: milestone 5) + log). You passed " << params.log_zones + << std::endl; exit(-1); } break; case 'w': params.gc_wmark = atoi(optarg); - if (params.gc_wmark < 1){ - printf("you need 1 or more free zones for continuous working of the FTL. You passed %d \n", params.gc_wmark); + if (params.gc_wmark < 1) { + std::cout << "you need 1 or more free zones for continuous \ +working of the FTL. You passed " << params.gc_wmark << std::endl; exit(-1); } break; @@ -272,39 +310,58 @@ int main(int argc, char **argv) { } } params.name = strdup(zns_device_name); - printf("parameter settings are: device-name %s log_zones %d gc-watermark %d force-reset %s hammer-time %d \n", - params.name,params.log_zones,params.gc_wmark,params.force_reset==1?"yes":"no", to_hammer_lba); - - ret = init_ss_zns_device(¶ms, &my_dev); - assert (ret == 0); - assert(my_dev->lba_size_bytes != 0); - assert(my_dev->capacity_bytes != 0); + std::cout << "parameter settings are: device-name " << params.name + << " log_zones " << params.log_zones + << " gc-watermark " << params.gc_wmark + << " force-reset " << (params.force_reset ? "yes" : "no") + << " hammer-time " << to_hammer_lba << std::endl; + user_zns_device *my_dev = nullptr; + int ret = init_ss_zns_device(¶ms, &my_dev); + assert (!ret); + assert(my_dev->lba_size_bytes); + assert(my_dev->capacity_bytes); uint32_t max_lba_entries = my_dev->capacity_bytes / my_dev->lba_size_bytes; // get a sequential LBA address list - get_sequence_as_array(max_lba_entries, &seq_addresses, false); + uint64_t *seq_addresses = nullptr; + get_sequence_as_array(max_lba_entries, seq_addresses, false); // get a randomized LBA address list - get_sequence_as_array(max_lba_entries, &random_addresses, true); + uint64_t *random_addresses = nullptr; + get_sequence_as_array(max_lba_entries, random_addresses, true); // now we start the test - printf("device %s is opened and initialized, reported LBA size is %u and capacity %lu , max total LBA %u to_hammer %u \n", - params.name, my_dev->lba_size_bytes, my_dev->capacity_bytes, max_lba_entries, to_hammer_lba); - int t1 = wr_full_device_verify(my_dev, seq_addresses, max_lba_entries, 0); - int t2 = wr_full_device_verify(my_dev, random_addresses, max_lba_entries, 0); - int t3 = wr_full_device_verify(my_dev, random_addresses, max_lba_entries, to_hammer_lba); + std::cout << "device " << params.name + << " is opened and initialized, reported LBA size is " + << my_dev->lba_size_bytes + << " and capacity " << my_dev->capacity_bytes + << " , max total LBA " << max_lba_entries + << " to_hammer " << to_hammer_lba << std::endl; + int t1 = wr_full_device_verify(*my_dev, seq_addresses, max_lba_entries, 0U); + int t2 = wr_full_device_verify(*my_dev, random_addresses, max_lba_entries, + 0U); + int t3 = wr_full_device_verify(*my_dev, random_addresses, max_lba_entries, + to_hammer_lba); // clean up ret = deinit_ss_zns_device(my_dev); // free all delete[] seq_addresses; delete[] random_addresses; - end = microseconds_since_epoch(); - printf("====================================================================\n"); - printf("Milestone 3 results \n"); - printf("[stosys-result] Test 1 sequential write, read, and match (full device) : %s \n", (t1 == 0 ? " Passed" : " Failed")); - printf("[stosys-result] Test 2 randomized write, read, and match (full device) : %s \n", (t2 == 0 ? " Passed" : " Failed")); - printf("[stosys-result] Test 3 randomized write, read, and match (full device, hammer %-6u) : %s \n", to_hammer_lba, (t3 == 0 ? " Passed" : " Failed")); - printf("====================================================================\n"); - printf("[stosys-stats] The elapsed time is %lu milliseconds \n", ((end - start)/1000)); - printf("====================================================================\n"); + uint64_t end = microseconds_since_epoch(); + std::cout << "=============================================================\ +=======" << std::endl; + std::cout << "Milestone 3 results" << std::endl; + std::cout << "[stosys-result] Test 1 sequential write, read, and match \ +(full device) : " << (!t1 ? " Passed" : " Failed") << std::endl; + std::cout << "[stosys-result] Test 2 randomized write, read, and match \ +(full device) : " << (!t2 ? " Passed" : " Failed") << std::endl; + printf("[stosys-result] Test 3 randomized write, read, and match (full \ +device, hammer %-6u) : %s \n", to_hammer_lba, + (!t3 ? " Passed" : " Failed")); + std::cout << "=============================================================\ +=======" << std::endl; + std::cout << "[stosys-stats] The elapsed time is " + << (end - start) / 1000UL << " milliseconds" << std::endl; + std::cout << "=============================================================\ +=======" << std::endl; return ret; } -} +} diff --git a/src/m23-ftl/zns_device.cpp b/src/m23-ftl/zns_device.cpp index 23b805f..dfdc597 100644 --- a/src/m23-ftl/zns_device.cpp +++ b/src/m23-ftl/zns_device.cpp @@ -20,23 +20,985 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "zns_device.h" #include +#include +#include +#include +#include +#include +#include +#include "zns_device.h" extern "C" { -int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev) { - return -ENOSYS; +enum { + user_read = 0x1, + gc_read = 0x2, + dev_read = 0x4, + sb_read = user_read | gc_read | dev_read, // user or gc is reading + user_write = 0x10, + gc_write = 0x20, + dev_write = 0x40, + sb_write = user_write | gc_write | dev_write // user or gc is writing +}; + +// zone in zns +typedef struct zone_info { + unsigned long long saddr; // starting physical address + uint32_t num_valid_pages; // the number of valid pages (used for log zone) + pthread_mutex_t num_valid_pages_lock; + uint32_t write_ptr; // writer pointer (used for data zone) + pthread_mutex_t write_ptr_lock; + struct zone_info *next; // linked in used_log_zones and free_zones +} zone_info; + +// page map for log zones +typedef struct page_map { + unsigned long long page_addr; // logical page address + unsigned long long physical_addr; // phisical address + zone_info *zone; // the zone this page map in + struct page_map *next; +} page_map; + +// Contains data in log zone (page map) and data in data zone (block map) +typedef struct logical_block { + unsigned long long s_page_addr; // starting logical page address + uint8_t *bitmap; + page_map *page_maps; // page mapping for this logical block (log zone) + page_map *old_page_maps; // temporily store old page maps while gc + page_map *page_maps_tail; + zone_info *data_zone; // block mapping for this logical block (data zone) + pthread_mutex_t lock; +} logical_block; + +typedef struct zns_info { + // information of device + int fd; + unsigned nsid; + uint32_t page_size; + uint32_t zone_num_pages; + uint32_t num_zones; + uint32_t num_log_zones; + uint32_t num_data_zones; + // max data transfer size (read + append limit) + uint32_t mdts; + // zone append size limit (append limit) + uint32_t zasl; + // load balancing varaible + uint32_t free_transfer_size; + uint32_t free_append_size; + uint8_t used_status; + pthread_mutex_t size_limit_lock; + // logical block corresponding to each data zone + logical_block *logical_blocks; + uint32_t bitmap_size; + // current log zone + zone_info *curr_log_zone; + // used log zone + zone_info *used_log_zones; + zone_info *used_log_zones_tail; + uint32_t num_used_log_zones; + // Free zones + zone_info *free_zones; + zone_info *free_zones_tail; + uint32_t num_free_zones; + // Lock for changing used_log_zone and free_zone + pthread_mutex_t zones_lock; + // garbage collection variable + uint32_t gc_wmark; + bool run_gc; + pthread_t gc_thread; +} zns_info; + +static inline void increase_num_valid_page(zone_info *zone, uint32_t num_pages); +static inline void decrease_num_valid_page(zone_info *zone, uint32_t num_pages); +static inline void increase_write_ptr(zone_info *zone, uint32_t num_pages); +static inline void decrease_write_ptr(zone_info *zone, uint32_t num_pages); +static inline uint32_t get_block_index(unsigned long long page_addr, + uint32_t zone_num_pages); +static inline uint32_t get_data_offset(unsigned long long page_addr, + uint32_t zone_num_pages); +static bool read_bitmap(const uint8_t bitmap[], + uint32_t offset, uint32_t num_pages); +static void write_bitmap(uint8_t bitmap[], uint32_t offset, uint32_t num_pages); +static void change_log_zone(zns_info *info); +static void update_page_map(zns_info *info, unsigned long long page_addr, + unsigned long long physical_addr, + uint32_t num_pages); +static unsigned request_transfer_size(zns_info *info, uint8_t type); +static void release_transfer_size(zns_info *info, uint8_t type, unsigned size); +static int read_from_zns(zns_info *info, unsigned long long physical_addr, + void *buffer, uint64_t size, uint8_t type); +static int append_to_data_zone(zns_info *info, zone_info *zone, + void *buffer, uint64_t size, uint8_t type); +static int append_to_log_zone(zns_info *info, unsigned long long page_addr, + void *buffer, uint32_t size); +static int read_logical_block(zns_info *info, logical_block *block, + void *buffer); +static void merge(zns_info *info, logical_block *block); +static void *garbage_collection(void *info_ptr); + +int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev) +{ + *my_dev = (user_zns_device *)calloc(1UL, sizeof(user_zns_device)); + (*my_dev)->_private = calloc(1UL, sizeof(zns_info)); + zns_info *info = (zns_info *)(*my_dev)->_private; + // set fd + info->fd = nvme_open(params->name); + if (info->fd < 0) { + printf("Dev %s opened failed %d\n", params->name, info->fd); + return errno; + } + // set nsid + int ret = nvme_get_nsid(info->fd, &info->nsid); + if (ret) { + printf("Error: failed to retrieve the namespace id %d\n", ret); + return ret; + } + // set zns_lba_size or page_size : Its same for now! + nvme_id_ns ns; + ret = nvme_identify_ns(info->fd, info->nsid, &ns); + if (ret) { + printf("Failed to retrieve the nvme identify namespace %d\n", ret); + return ret; + } + info->page_size = 1U << ns.lbaf[ns.flbas & 0xF].ds; + (*my_dev)->tparams.zns_lba_size = info->page_size; + (*my_dev)->lba_size_bytes = info->page_size; + // set zone_num_pages + nvme_zns_id_ns data; + nvme_zns_identify_ns(info->fd, info->nsid, &data); + info->zone_num_pages = data.lbafe[ns.flbas & 0xF].zsze; + // set zns_zone_capacity = zone_num_pages * page_size + (*my_dev)->tparams.zns_zone_capacity = info->zone_num_pages * + info->page_size; + // set num_zones + unsigned zns_report_size = sizeof(nvme_zone_report) + sizeof(nvme_zns_desc); + nvme_zone_report *zns_report = (nvme_zone_report *)calloc(1UL, + zns_report_size); + ret = nvme_zns_mgmt_recv(info->fd, info->nsid, 0ULL, + NVME_ZNS_ZRA_REPORT_ZONES, + NVME_ZNS_ZRAS_REPORT_ALL, false, + zns_report_size, zns_report); + if (ret) { + printf("Failed to report zones, ret %d\n", ret); + return ret; + } + info->num_zones = le64_to_cpu(zns_report->nr_zones); + uint32_t blocks_info_size = le64_to_cpu(zns_report->entries[0].wp) * + info->page_size; + free(zns_report); + (*my_dev)->tparams.zns_num_zones = info->num_zones; + // set num_log_zones + info->num_log_zones = params->log_zones > 0 ? params->log_zones : 0U; + // set num_data_zones = num_zones - num_log_zones + info->num_data_zones = info->num_zones - info->num_log_zones; + // set user capacity bytes = num_data_zones * zone_capacity + (*my_dev)->capacity_bytes = (info->num_data_zones) * + (*my_dev)->tparams.zns_zone_capacity; + // set max_data_transfer_size and free_transfer_size + nvme_id_ctrl id0; + nvme_identify_ctrl(info->fd, &id0); + info->mdts = ((1U << id0.mdts) - 2U) * info->page_size; + info->free_transfer_size = info->mdts; + // set zone_append_size_limit and free_append_size + nvme_zns_id_ctrl id1; + nvme_zns_identify_ctrl(info->fd, &id1); + info->zasl = ((1U << id1.zasl) - 2U) * info->page_size; + info->free_append_size = info->zasl; + // initialise size_limit_lock + pthread_mutex_init(&info->size_limit_lock, NULL); + uint8_t *used_zones_index = (uint8_t *) + calloc(((info->num_zones - 1U) >> 3U) + 1U, + sizeof(uint8_t)); + uint32_t num_used_zones = 0U; + // set log zone page mapped hashmap size to num_data_zones + info->logical_blocks = (logical_block *)calloc(info->num_data_zones, + sizeof(logical_block)); + info->bitmap_size = ((info->zone_num_pages - 1U) >> 3U) + 1U; + if (params->force_reset || !blocks_info_size) { + // reset device + if (params->force_reset) { + ret = nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, true, + NVME_ZNS_ZSA_RESET, 0U, NULL); + if (ret) { + printf("Zone reset failed %d\n", ret); + return ret; + } + } + // set logical block + for (uint32_t i = 0U; i < info->num_data_zones; ++i) { + info->logical_blocks[i].s_page_addr = i * info->zone_num_pages; + info->logical_blocks[i].bitmap = (uint8_t *) + calloc(info->bitmap_size, + sizeof(uint8_t)); + pthread_mutex_init(&info->logical_blocks[i].lock, NULL); + } + } else { + uint8_t *blocks_info = (uint8_t *)calloc(blocks_info_size, + sizeof(uint8_t)); + // read blocks information + read_from_zns(info, 0ULL, blocks_info, blocks_info_size, dev_read); + info->used_status &= ~dev_read; + // reset the first zone + nvme_zns_mgmt_send(info->fd, info->nsid, 0ULL, false, + NVME_ZNS_ZSA_RESET, 0U, NULL); + // set logical block + uint8_t *ptr = blocks_info; + for (uint32_t i = 0U; i < info->num_data_zones; ++i) { + logical_block *block = &info->logical_blocks[i]; + block->s_page_addr = i * info->zone_num_pages; + block->bitmap = (uint8_t *)calloc(info->bitmap_size, + sizeof(uint8_t)); + memcpy(block->bitmap, ptr, info->bitmap_size); + ptr += info->bitmap_size; + // if logical block has a data zone + if (*ptr) { + ptr += sizeof(uint8_t); + block->data_zone = (zone_info *)calloc(1UL, sizeof(zone_info)); + memcpy(&block->data_zone->saddr, ptr, + sizeof(unsigned long long)); + ptr += sizeof(unsigned long long); + memcpy(&block->data_zone->write_ptr, ptr, sizeof(uint32_t)); + ptr += sizeof(uint32_t); + pthread_mutex_init(&block->data_zone->num_valid_pages_lock, + NULL); + pthread_mutex_init(&block->data_zone->write_ptr_lock, NULL); + write_bitmap(used_zones_index, + block->data_zone->saddr / info->zone_num_pages, + 1U); + ++num_used_zones; + } else { + ptr += sizeof(uint8_t) + sizeof(unsigned long long) + + sizeof(uint32_t); + } + pthread_mutex_init(&block->lock, NULL); + } + free(blocks_info); + } + // set rest zone to free_zones + info->free_zones = (zone_info *)calloc(1UL, sizeof(zone_info)); + info->free_zones_tail = info->free_zones; + pthread_mutex_init(&info->free_zones->num_valid_pages_lock, NULL); + pthread_mutex_init(&info->free_zones->write_ptr_lock, NULL); + for (uint32_t i = 1U; i < info->num_zones; ++i) { + if (!read_bitmap(used_zones_index, i, 1U)) { + info->free_zones_tail->next = (zone_info *) + calloc(1UL, sizeof(zone_info)); + info->free_zones_tail = info->free_zones_tail->next; + info->free_zones_tail->saddr = i * info->zone_num_pages; + pthread_mutex_init(&info->free_zones_tail->num_valid_pages_lock, + NULL); + pthread_mutex_init(&info->free_zones_tail->write_ptr_lock, + NULL); + } + } + free(used_zones_index); + // set num_free_zones + info->num_free_zones = info->num_zones - num_used_zones; + //Set current log zone to 0th zone + info->curr_log_zone = info->free_zones; + info->free_zones = info->free_zones->next; + if (!info->free_zones) + info->free_zones_tail = NULL; + info->curr_log_zone->next = NULL; + --info->num_free_zones; + // init zones_lock + pthread_mutex_init(&info->zones_lock, NULL); + // set gc_wmark + info->gc_wmark = params->gc_wmark > 0 ? params->gc_wmark : 0U; + //Start GC + info->run_gc = true; + pthread_create(&info->gc_thread, NULL, &garbage_collection, info); + return 0; +} + +int zns_udevice_read(user_zns_device *my_dev, uint64_t address, + void *buffer, uint32_t size) +{ + zns_info *info = (zns_info *)my_dev->_private; + unsigned long long page_addr = address / info->page_size; + while (size) { + uint32_t index = get_block_index(page_addr, info->zone_num_pages); + uint32_t offset = get_data_offset(page_addr, info->zone_num_pages); + logical_block *block = &info->logical_blocks[index]; + uint32_t curr_block_read_size = (info->zone_num_pages - offset) * + info->page_size; + if (curr_block_read_size > size) + curr_block_read_size = size; + if (!read_bitmap(block->bitmap, offset, + curr_block_read_size / info->page_size)) + return -1; + pthread_mutex_lock(&block->lock); + if (block->data_zone) { + uint32_t curr_read_size = block->data_zone->write_ptr * + info->page_size; + if (curr_read_size > curr_block_read_size) + curr_read_size = curr_block_read_size; + read_from_zns(info, block->data_zone->saddr + offset, + buffer, curr_read_size, user_read); + } + page_map *curr = block->page_maps ? block->page_maps : + block->old_page_maps; + while (curr && curr->page_addr < page_addr) + curr = curr->next; + unsigned long long max_page_addr = page_addr + curr_block_read_size / + info->page_size - 1ULL; + if (curr && curr->page_addr <= max_page_addr) { + page_map *prev = curr; + page_map *start = curr; + curr = curr->next; + while (curr) { + if (curr->page_addr > max_page_addr) + break; + if (curr->page_addr - prev->page_addr != 1ULL || + curr->physical_addr - prev->physical_addr != 1ULL) { + unsigned long long buff_offset = (start->page_addr - + page_addr) * + info->page_size; + uint32_t curr_read_size = (prev->page_addr - + start->page_addr + 1ULL) * + info->page_size; + read_from_zns(info, start->physical_addr, + (uint8_t *)buffer + buff_offset, + curr_read_size, user_read); + start = curr; + } + prev = curr; + curr = curr->next; + } + unsigned long long buff_offset = (start->page_addr - page_addr) * + info->page_size; + uint32_t curr_read_size = (prev->page_addr - start->page_addr + + 1ULL) * info->page_size; + read_from_zns(info, start->physical_addr, + (uint8_t *)buffer + buff_offset, curr_read_size, + user_read); + } + pthread_mutex_unlock(&block->lock); + page_addr += curr_block_read_size / info->page_size; + buffer = (uint8_t *)buffer + curr_block_read_size; + size -= curr_block_read_size; + } + pthread_mutex_lock(&info->size_limit_lock); + info->used_status &= ~user_read; + pthread_mutex_unlock(&info->size_limit_lock); + return errno; +} + +int zns_udevice_write(user_zns_device *my_dev, uint64_t address, + void *buffer, uint32_t size) +{ + zns_info *info = (zns_info *)my_dev->_private; + while (size) { + uint32_t index = get_block_index(address / info->page_size, + info->zone_num_pages); + uint32_t offset = get_data_offset(address / info->page_size, + info->zone_num_pages); + logical_block *block = &info->logical_blocks[index]; + uint32_t curr_append_size = 0U; + pthread_mutex_lock(&block->lock); + // if can write to data zone directly + if (!block->old_page_maps && + block->data_zone && block->data_zone->write_ptr <= offset) { + if (block->data_zone->write_ptr < offset) { + // append null data until arrive offset + uint32_t null_size = (offset - block->data_zone->write_ptr) * + info->page_size; + uint8_t null_buffer[null_size]; + memset(null_buffer, 0, null_size); + int ret = append_to_data_zone(info, block->data_zone, + null_buffer, null_size, + user_write); + if (ret) { + pthread_mutex_unlock(&block->lock); + return ret; + } + } + curr_append_size = (info->zone_num_pages - offset) * + info->page_size; + if (curr_append_size > size) + curr_append_size = size; + int ret = append_to_data_zone(info, block->data_zone, + buffer, curr_append_size, user_write); + if (ret) { + pthread_mutex_unlock(&block->lock); + return ret; + } + pthread_mutex_unlock(&block->lock); + } else { + curr_append_size = size; + if (block->data_zone) { + uint32_t diff_size = (block->data_zone->write_ptr - offset) * + info->page_size; + if (curr_append_size > diff_size) + curr_append_size = diff_size; + } + pthread_mutex_unlock(&block->lock); + int ret = append_to_log_zone(info, address / info->page_size, + buffer, curr_append_size); + if (ret) + return ret; + } + write_bitmap(block->bitmap, offset, curr_append_size / info->page_size); + address += curr_append_size; + buffer = (uint8_t *)buffer + curr_append_size; + size -= curr_append_size; + } + pthread_mutex_lock(&info->size_limit_lock); + info->used_status &= ~user_write; + pthread_mutex_unlock(&info->size_limit_lock); + return errno; +} + +int deinit_ss_zns_device(user_zns_device *my_dev) +{ + zns_info *info = (zns_info *)my_dev->_private; + // Kill gc + if (info->curr_log_zone->write_ptr) { + pthread_mutex_lock(&info->zones_lock); + if (info->used_log_zones) + info->used_log_zones_tail->next = info->curr_log_zone; + else + info->used_log_zones = info->curr_log_zone; + info->used_log_zones_tail = info->curr_log_zone; + info->curr_log_zone = NULL; + ++info->num_used_log_zones; + pthread_mutex_unlock(&info->zones_lock); + } else { + pthread_mutex_lock(&info->zones_lock); + if (info->free_zones) + info->free_zones_tail->next = info->curr_log_zone; + else + info->free_zones = info->curr_log_zone; + info->free_zones_tail = info->curr_log_zone; + info->curr_log_zone = NULL; + ++info->num_free_zones; + pthread_mutex_unlock(&info->zones_lock); + } + info->run_gc = false; + pthread_join(info->gc_thread, NULL); + uint64_t block_info_size = info->bitmap_size + sizeof(uint8_t) + + sizeof(unsigned long long) + sizeof(uint32_t); + uint64_t append_size = ((info->num_data_zones * block_info_size - 1UL) / + info->page_size + 1UL) * info->page_size; + uint8_t *blocks_info = (uint8_t *)calloc(1UL, append_size); + uint8_t *ptr = blocks_info; + logical_block *blocks = info->logical_blocks; + // free hashmap + for (uint32_t i = 0U; i < info->num_data_zones; ++i) { + memcpy(ptr, blocks[i].bitmap, info->bitmap_size); + free(blocks[i].bitmap); + ptr += info->bitmap_size; + // Clear all log heads for a logical block + if (blocks[i].data_zone) { + pthread_mutex_destroy(&blocks[i].data_zone->num_valid_pages_lock); + pthread_mutex_destroy(&blocks[i].data_zone->write_ptr_lock); + memset(ptr, 1, sizeof(uint8_t)); + ptr += sizeof(uint8_t); + memcpy(ptr, &blocks[i].data_zone->saddr, + sizeof(unsigned long long)); + ptr += sizeof(unsigned long long); + memcpy(ptr, &blocks[i].data_zone->write_ptr, sizeof(uint32_t)); + ptr += sizeof(uint32_t); + free(blocks[i].data_zone); + } else { + ptr += sizeof(uint8_t) + sizeof(unsigned long long) + + sizeof(uint32_t); + } + pthread_mutex_destroy(&blocks[i].lock); + } + free(blocks); + append_to_data_zone(info, info->free_zones, + blocks_info, append_size, dev_write); + free(blocks_info); + info->used_status &= ~dev_write; + while (info->free_zones) { + zone_info *tmp = info->free_zones; + info->free_zones = info->free_zones->next; + pthread_mutex_destroy(&tmp->num_valid_pages_lock); + pthread_mutex_destroy(&tmp->write_ptr_lock); + free(tmp); + } + pthread_mutex_destroy(&info->size_limit_lock); + pthread_mutex_destroy(&info->zones_lock); + free(info); + free(my_dev); + return 0; +} + +static inline void increase_num_valid_page(zone_info *zone, uint32_t num_pages) +{ + pthread_mutex_lock(&zone->num_valid_pages_lock); + zone->num_valid_pages += num_pages; + pthread_mutex_unlock(&zone->num_valid_pages_lock); +} + +static inline void decrease_num_valid_page(zone_info *zone, uint32_t num_pages) +{ + pthread_mutex_lock(&zone->num_valid_pages_lock); + zone->num_valid_pages -= num_pages; + pthread_mutex_unlock(&zone->num_valid_pages_lock); +} + +static inline void increase_write_ptr(zone_info *zone, uint32_t num_pages) +{ + pthread_mutex_lock(&zone->write_ptr_lock); + zone->write_ptr += num_pages; + pthread_mutex_unlock(&zone->write_ptr_lock); +} + +static inline void decrease_write_ptr(zone_info *zone, uint32_t num_pages) +{ + pthread_mutex_lock(&zone->write_ptr_lock); + zone->write_ptr -= num_pages; + pthread_mutex_unlock(&zone->write_ptr_lock); +} + +static inline uint32_t get_block_index(unsigned long long page_addr, + uint32_t zone_num_pages) +{ + return page_addr / zone_num_pages; +} + +static inline uint32_t get_data_offset(unsigned long long page_addr, + uint32_t zone_num_pages) +{ + return page_addr % zone_num_pages; +} + +static bool read_bitmap(const uint8_t bitmap[], + uint32_t offset, uint32_t num_pages) +{ + while (num_pages--) { + if (!(bitmap[offset >> 3U] & 1U << (offset & 0x7U))) + return false; + ++offset; + } + return true; +} + +static void write_bitmap(uint8_t bitmap[], uint32_t offset, uint32_t num_pages) +{ + while (num_pages--) { + bitmap[offset >> 3U] |= 1U << (offset & 0x7U); + ++offset; + } +} + +static void change_log_zone(zns_info *info) +{ + pthread_mutex_lock(&info->zones_lock); + if (info->used_log_zones) + info->used_log_zones_tail->next = info->curr_log_zone; + else + info->used_log_zones = info->curr_log_zone; + info->used_log_zones_tail = info->curr_log_zone; + info->curr_log_zone = NULL; + ++info->num_used_log_zones; + pthread_mutex_unlock(&info->zones_lock); + while (info->num_used_log_zones == info->num_log_zones); + //Dequeue from free_zone to curr_log_zone; + while (!info->curr_log_zone) { + pthread_mutex_lock(&info->zones_lock); + if (info->num_free_zones) { + info->curr_log_zone = info->free_zones; + info->free_zones = info->free_zones->next; + info->curr_log_zone->next = NULL; + --info->num_free_zones; + } + pthread_mutex_unlock(&info->zones_lock); + } } -int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size){ - return -ENOSYS; +static void update_page_map(zns_info *info, unsigned long long page_addr, + unsigned long long physical_addr, + uint32_t num_pages) +{ + for (; num_pages--; ++page_addr, ++physical_addr) { + uint32_t index = get_block_index(page_addr, info->zone_num_pages); + logical_block *block = &info->logical_blocks[index]; + //Lock for updating page map + pthread_mutex_lock(&block->lock); + if (!block->page_maps) { + block->page_maps = (page_map *)calloc(1, sizeof(page_map)); + block->page_maps_tail = block->page_maps; + block->page_maps->page_addr = page_addr; + block->page_maps->physical_addr = physical_addr; + block->page_maps->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); + continue; + } + if (block->page_maps->page_addr == page_addr) { + //Update log counter + decrease_num_valid_page(block->page_maps->zone, 1U); + block->page_maps->physical_addr = physical_addr; + block->page_maps->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); + continue; + } + if (block->page_maps->page_addr > page_addr) { + page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); + tmp->next = block->page_maps; + block->page_maps = tmp; + tmp->page_addr = page_addr; + tmp->physical_addr = physical_addr; + tmp->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); + continue; + } + for (page_map *ptr = block->page_maps; ; ptr = ptr->next) { + if (!ptr->next) { + ptr->next = (page_map *)calloc(1, sizeof(page_map)); + block->page_maps_tail = ptr->next; + ptr->next->page_addr = page_addr; + ptr->next->physical_addr = physical_addr; + ptr->next->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); + break; + } else if (ptr->next->page_addr == page_addr) { + //Update log counter + decrease_num_valid_page(ptr->next->zone, 1U); + ptr->next->physical_addr = physical_addr; + ptr->next->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); + break; + } else if (ptr->next->page_addr > page_addr) { + page_map *tmp = (page_map *)calloc(1, sizeof(page_map)); + tmp->next = ptr->next; + ptr->next = tmp; + tmp->page_addr = page_addr; + tmp->physical_addr = physical_addr; + tmp->zone = info->curr_log_zone; + pthread_mutex_unlock(&block->lock); + break; + } + } + } } -int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size){ - return -ENOSYS; + +static unsigned request_transfer_size(zns_info *info, uint8_t type) +{ + if (type & sb_read) { + uint32_t max_transfer_size = info->mdts; + for (;;) { + if (info->free_transfer_size) { + pthread_mutex_lock(&info->size_limit_lock); + break; + } + } + if (info->used_status & sb_write) + max_transfer_size -= info->zasl; + if (info->used_status & (sb_read & ~type)) + max_transfer_size >>= 1; + if (info->free_transfer_size < max_transfer_size) + max_transfer_size = info->free_transfer_size; + info->free_transfer_size -= max_transfer_size; + info->used_status |= type; + pthread_mutex_unlock(&info->size_limit_lock); + return max_transfer_size; + } else { + uint32_t max_transfer_size = info->zasl; + for (;;) { + if (info->free_transfer_size && info->free_append_size) { + pthread_mutex_lock(&info->size_limit_lock); + break; + } + } + if (info->used_status & sb_write) + max_transfer_size >>= 1; + if (info->free_append_size < max_transfer_size) + max_transfer_size = info->free_append_size; + if (info->free_transfer_size < max_transfer_size) + max_transfer_size = info->free_transfer_size; + info->free_transfer_size -= max_transfer_size; + info->free_append_size -= max_transfer_size; + info->used_status |= type; + pthread_mutex_unlock(&info->size_limit_lock); + return max_transfer_size; + } +} + +static void release_transfer_size(zns_info *info, uint8_t type, unsigned size) +{ + pthread_mutex_lock(&info->size_limit_lock); + if (type & sb_write) + info->free_append_size += size; + info->free_transfer_size += size; + pthread_mutex_unlock(&info->size_limit_lock); } -int deinit_ss_zns_device(struct user_zns_device *my_dev){ - return -ENOSYS; +static int read_from_zns(zns_info *info, unsigned long long physical_addr, + void *buffer, uint64_t size, uint8_t type) +{ + while (size) { + unsigned curr_transfer_size = request_transfer_size(info, type); + unsigned curr_read_size = size < curr_transfer_size ? + size : curr_transfer_size; + unsigned short num_pages = curr_read_size / info->page_size; + nvme_read(info->fd, info->nsid, physical_addr, num_pages - 1, + 0U, 0U, 0U, 0U, 0U, curr_read_size, buffer, 0U, NULL); + release_transfer_size(info, type, curr_transfer_size); + physical_addr += num_pages; + buffer = (uint8_t *)buffer + curr_read_size; + size -= curr_read_size; + } + return errno; } + +static int append_to_data_zone(zns_info *info, zone_info *zone, + void *buffer, uint64_t size, uint8_t type) +{ + increase_write_ptr(zone, size / info->page_size); + while (size) { + unsigned long long physical_addr = 0ULL; + unsigned curr_transfer_size = request_transfer_size(info, type); + unsigned curr_append_size = curr_transfer_size; + if (curr_append_size > size) + curr_append_size = size; + unsigned short num_curr_append_pages = curr_append_size / + info->page_size; + nvme_zns_append(info->fd, info->nsid, zone->saddr, + num_curr_append_pages - 1, 0U, 0U, 0U, 0U, + curr_append_size, buffer, 0U, NULL, &physical_addr); + release_transfer_size(info, type, curr_transfer_size); + if (errno) + return errno; + buffer = (uint8_t *)buffer + curr_append_size; + size -= curr_append_size; + } + return errno; +} + +static int append_to_log_zone(zns_info *info, unsigned long long page_addr, + void *buffer, uint32_t size) +{ + while (size) { + bool change = true; + unsigned curr_transfer_size = request_transfer_size(info, user_write); + unsigned curr_append_size = (info->zone_num_pages - + info->curr_log_zone->write_ptr) * + info->page_size; + if (curr_append_size > curr_transfer_size) { + curr_append_size = curr_transfer_size; + change = false; + } + if (curr_append_size > size) { + curr_append_size = size; + change = false; + } + unsigned long long physical_addr = 0ULL; + unsigned short num_curr_append_pages = curr_append_size / + info->page_size; + nvme_zns_append(info->fd, info->nsid, info->curr_log_zone->saddr, + num_curr_append_pages - 1, 0U, 0U, 0U, 0U, + curr_append_size, buffer, 0U, NULL, &physical_addr); + release_transfer_size(info, user_write, curr_transfer_size); + if (errno) + return errno; + increase_num_valid_page(info->curr_log_zone, num_curr_append_pages); + increase_write_ptr(info->curr_log_zone, num_curr_append_pages); + update_page_map(info, page_addr, physical_addr, num_curr_append_pages); + if (change) + change_log_zone(info); + page_addr += num_curr_append_pages; + physical_addr += num_curr_append_pages; + buffer = (uint8_t *)buffer + curr_append_size; + size -= curr_append_size; + } + return errno; +} + +static int read_logical_block(zns_info *info, logical_block *block, + void *buffer) +{ + //FIXME: Proision for contiguos block read, but not written + if (block->data_zone) + read_from_zns(info, block->data_zone->saddr, + buffer, block->data_zone->write_ptr * info->page_size, + gc_read); + page_map *prev = block->old_page_maps; + page_map *start = block->old_page_maps; + page_map *curr = block->old_page_maps->next; + decrease_num_valid_page(prev->zone, 1U); + while (curr) { + if (curr->page_addr - prev->page_addr != 1ULL || + curr->physical_addr - prev->physical_addr != 1ULL) { + unsigned long long buff_offset = (start->page_addr - + block->s_page_addr) * + info->page_size; + uint32_t curr_read_size = (prev->page_addr - start->page_addr + + 1ULL) * info->page_size; + read_from_zns(info, start->physical_addr, + (uint8_t *)buffer + buff_offset, curr_read_size, + gc_read); + start = curr; + } + decrease_num_valid_page(curr->zone, 1U); + prev = curr; + curr = curr->next; + } + unsigned long long buff_offset = (start->page_addr - block->s_page_addr) * + info->page_size; + uint32_t curr_read_size = (prev->page_addr - start->page_addr + 1ULL) * + info->page_size; + read_from_zns(info, start->physical_addr, + (uint8_t *)buffer + buff_offset, curr_read_size, gc_read); + return errno; +} + +static void merge(zns_info *info, logical_block *block) +{ + pthread_mutex_lock(&block->lock); + block->old_page_maps = block->page_maps; + block->page_maps = NULL; + pthread_mutex_unlock(&block->lock); + uint32_t size = get_data_offset(block->page_maps_tail->page_addr, + info->zone_num_pages) + 1U; + if (block->data_zone && block->data_zone->write_ptr > size) + size = block->data_zone->write_ptr; + size *= info->page_size; + uint8_t buffer[size]; + read_logical_block(info, block, buffer); + pthread_mutex_lock(&info->size_limit_lock); + info->used_status &= ~gc_read; + pthread_mutex_unlock(&info->size_limit_lock); + pthread_mutex_lock(&block->lock); + // Append old data zone to free zones list + if (block->data_zone) { + decrease_write_ptr(block->data_zone, block->data_zone->write_ptr); + nvme_zns_mgmt_send(info->fd, info->nsid, block->data_zone->saddr, + false, NVME_ZNS_ZSA_RESET, 0U, NULL); + pthread_mutex_lock(&info->zones_lock); + if (info->free_zones) + info->free_zones_tail->next = block->data_zone; + else + info->free_zones = block->data_zone; + info->free_zones_tail = block->data_zone; + ++info->num_free_zones; + pthread_mutex_unlock(&info->zones_lock); + } + pthread_mutex_lock(&info->zones_lock); + // Get free zone and nullify the next + block->data_zone = info->free_zones; + info->free_zones = info->free_zones->next; + if (!info->free_zones) + info->free_zones_tail = NULL; + block->data_zone->next = NULL; + --info->num_free_zones; + pthread_mutex_unlock(&info->zones_lock); + append_to_data_zone(info, block->data_zone, buffer, size, gc_write); + pthread_mutex_lock(&info->size_limit_lock); + info->used_status &= ~gc_write; + pthread_mutex_unlock(&info->size_limit_lock); + while (block->old_page_maps) { + page_map *tmp = block->old_page_maps; + block->old_page_maps = block->old_page_maps->next; + free(tmp); + } + pthread_mutex_unlock(&block->lock); +} + +static void *garbage_collection(void *info_ptr) +{ + zns_info *info = (zns_info *)info_ptr; + uint32_t index = 0U; + for (;;) { + while (info->run_gc && + info->num_log_zones - info->num_used_log_zones > info->gc_wmark); + if (!info->num_used_log_zones) + break; + logical_block *block = &info->logical_blocks[index]; + while(!block->page_maps) { + index = (index + 1U) % info->num_data_zones; + block = &info->logical_blocks[index]; + } + // Merge logical block to data zone + merge(info, block); + // Check used log zone valid counter + // if zero reset and add to free zone list + // Remove zone from used_log_zones + // if valid_page is zero and add that zone to free zones list + zone_info *prev = NULL; + zone_info *curr = info->used_log_zones; + while (curr) { + if (!curr->num_valid_pages) { + // reset + decrease_write_ptr(curr, curr->write_ptr); + nvme_zns_mgmt_send(info->fd, info->nsid, curr->saddr, + false, NVME_ZNS_ZSA_RESET, 0U, NULL); + pthread_mutex_lock(&info->zones_lock); + // Remove from used_log_zones and add to free_zones + if (info->free_zones) + info->free_zones_tail->next = curr; + else + info->free_zones = curr; + info->free_zones_tail = curr; + ++info->num_free_zones; + curr = curr->next; + info->free_zones_tail->next = NULL; + if (prev) { + prev->next = curr; + if (info->free_zones_tail == info->used_log_zones_tail) + info->used_log_zones_tail = prev; + } else { + info->used_log_zones = curr; + if (!info->used_log_zones) + info->used_log_zones_tail = NULL; + } + --info->num_used_log_zones; + pthread_mutex_unlock(&info->zones_lock); + } else { + prev = curr; + curr = curr->next; + } + } + index = (index + 1U) % info->num_data_zones; + } + // check the first zone is free zone or not + if (!info->free_zones->saddr) + return NULL; + for (zone_info *prev = info->free_zones, *zone = prev->next; + zone; prev = zone, zone = zone->next) { + if (!zone->saddr) { + prev->next = zone->next; + zone->next = info->free_zones; + info->free_zones = zone; + return NULL; + } + } + // find which logical block has the first zone + logical_block *block = NULL; + for (uint32_t i = 0U; i < info->num_data_zones; ++i) { + if (info->logical_blocks[i].data_zone && + !info->logical_blocks[i].data_zone->saddr) { + block = &info->logical_blocks[i]; + break; + } + } + // clean the first zone + uint64_t size = block->data_zone->write_ptr * info->page_size; + uint8_t *buffer = (uint8_t *)calloc(size, sizeof(uint8_t)); + // read data from the first zone + read_from_zns(info, block->data_zone->saddr, buffer, size, gc_read); + info->used_status &= ~gc_read; + zone_info *old_data_zone = block->data_zone; + // reset the first zone + old_data_zone->write_ptr = 0U; + nvme_zns_mgmt_send(info->fd, info->nsid, old_data_zone->saddr, false, + NVME_ZNS_ZSA_RESET, 0U, NULL); + // swap info->free_zones and block->data_zone + block->data_zone = info->free_zones; + old_data_zone->next = info->free_zones->next; + if (info->num_free_zones == 1U) + info->free_zones_tail = old_data_zone; + info->free_zones = old_data_zone; + block->data_zone->next = NULL; + // append data to new data zone + append_to_data_zone(info, block->data_zone, buffer, size, gc_write); + free(buffer); + info->used_status &= ~gc_write; + return NULL; +} + } diff --git a/src/m23-ftl/zns_device.h b/src/m23-ftl/zns_device.h index 1d0eb50..aa8bfdc 100644 --- a/src/m23-ftl/zns_device.h +++ b/src/m23-ftl/zns_device.h @@ -25,7 +25,8 @@ SOFTWARE. #include -extern "C"{ +extern "C" { + //https://github.com/mplulu/google-breakpad/issues/481 - taken from here #define typeof __typeof__ #define container_of(ptr, type, member) ({ \ @@ -33,35 +34,36 @@ extern "C"{ (type *)( (char *)__mptr - offsetof(type,member) );}) /* after a successful initialization of a device, you must set these ZNS device parameters for testing */ -struct zns_device_testing_params { +typedef struct zns_device_testing_params { // LBA size at the ZNS device uint32_t zns_lba_size; // Zone size at the ZNS device uint32_t zns_zone_capacity; // total number of zones uint32_t zns_num_zones; -}; +} zns_device_testing_params; -struct user_zns_device { +typedef struct user_zns_device { /* these are user visible properties */ uint32_t lba_size_bytes; // the user device LBA size - should be some multiple of the ZNS device page size, you can keep it as it is uint64_t capacity_bytes; // total user device capacity - struct zns_device_testing_params tparams; // report back some ZNS device-level properties to the user (for testing only, this is not needed for functions + zns_device_testing_params tparams; // report back some ZNS device-level properties to the user (for testing only, this is not needed for functions // your own private data - void *_private; -}; + void *_private; //Points to zns_info +} user_zns_device; -struct zdev_init_params{ +typedef struct zdev_init_params { char *name; int log_zones; int gc_wmark; bool force_reset; -}; +} zdev_init_params; + +int init_ss_zns_device(zdev_init_params *params, user_zns_device **my_dev); +int zns_udevice_read(user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); +int zns_udevice_write(user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); +int deinit_ss_zns_device(user_zns_device *my_dev); -int init_ss_zns_device(struct zdev_init_params *params, struct user_zns_device **my_dev); -int zns_udevice_read(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); -int zns_udevice_write(struct user_zns_device *my_dev, uint64_t address, void *buffer, uint32_t size); -int deinit_ss_zns_device(struct user_zns_device *my_dev); }; #endif //STOSYS_PROJECT_ZNS_DEVICE_H diff --git a/src/m45-rocksdb/DummyFSForward.cc b/src/m45-rocksdb/DummyFSForward.cc index a858e3c..0346ca9 100644 --- a/src/m45-rocksdb/DummyFSForward.cc +++ b/src/m45-rocksdb/DummyFSForward.cc @@ -42,6 +42,7 @@ namespace ROCKSDB_NAMESPACE { this->_name = this->_name.append(this->_private_fs->Name()); this->_ss.str(""); this->_ss.clear(); + } const char *DummyFSForward::Name() const { @@ -52,10 +53,23 @@ namespace ROCKSDB_NAMESPACE { this->_ss.str(""); this->_ss.clear(); this->_ss << " call_seq: " << this->_seq_id++ << " tid: " << std::hash{}(std::this_thread::get_id()) << " "; - return this->_ss.str(); + return this->_ss.str(); } +/* + MYFS_SequentialFile::MYFS_SequentialFile(const std::string& fname, int fd){} + MYFS_SequentialFile::~MYFS_SequentialFile(){} + + IOStatus MYFS_SequentialFile::Read(size_t n, const IOOptions& opts, Slice* result, + char* scratch, IODebugContext* dbg){std::cout<<"MYSEQ Read"< *result, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->NewSequentialFile(fname, file_opts, result, dbg); + result->reset(); + //int fid; //open(""); + //result->reset(new MYFS_SequentialFile(fname, 100)); + //IOStatus stat; + //return stat; + std::cout << "New seq file : "<_private_fs->NewSequentialFile(fname, file_opts, result, dbg); } // Create a brand new random access read-only file with the @@ -82,7 +102,8 @@ namespace ROCKSDB_NAMESPACE { std::unique_ptr *result, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->NewRandomAccessFile(fname, file_opts, result, dbg); + std::cout << "Random access file : "<_private_fs->NewRandomAccessFile(fname, file_opts, result, dbg); } // Create an object that writes to a new file with the specified @@ -97,7 +118,8 @@ namespace ROCKSDB_NAMESPACE { std::unique_ptr *result, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->NewWritableFile(fname, file_opts, result, dbg); + std::cout << "Writable file : "<_private_fs->NewWritableFile(fname, file_opts, result, dbg); } // Create an object that writes to a new file with the specified @@ -159,7 +181,8 @@ namespace ROCKSDB_NAMESPACE { std::unique_ptr *result, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->NewDirectory(name, io_opts, result, dbg); + std::cout << "XXXXXXXXXXXXXXXXXXXXXXXXXXXX : "<_private_fs->NewDirectory(name, io_opts, result, dbg); } // Returns OK if the named file exists. @@ -171,7 +194,10 @@ namespace ROCKSDB_NAMESPACE { const IOOptions &options, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->FileExists(fname, options, dbg); + std::cout << "Check if file exist : " << fname; + IOStatus stat = this->_private_fs->FileExists(fname, options, dbg); + std::cout << std::endl; + return stat; } // Store in *result the names of the children of the specified directory. @@ -234,7 +260,8 @@ namespace ROCKSDB_NAMESPACE { const IOOptions &options, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->CreateDirIfMissing(dirname, options, dbg); + std::cout << "Create dir path : "<_private_fs->CreateDirIfMissing(dirname, options, dbg); } // Delete the specified directory. @@ -249,7 +276,8 @@ namespace ROCKSDB_NAMESPACE { const IOOptions &options, uint64_t *file_size, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->GetFileSize(fname, options, file_size, dbg); + std::cout << "File size : "<< fname << std::endl; + return this->_private_fs->GetFileSize(fname, options, file_size, dbg); } // Store the last modification time of fname in *file_mtime. @@ -266,7 +294,8 @@ namespace ROCKSDB_NAMESPACE { const IOOptions &options, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->RenameFile(src, target, options, dbg); + std::cout << "Rename file : "<_private_fs->RenameFile(src, target, options, dbg); } // Hard Link file src to target. @@ -310,7 +339,10 @@ namespace ROCKSDB_NAMESPACE { IOStatus DummyFSForward::LockFile(const std::string &fname, const IOOptions &options, FileLock **lock, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->LockFile(fname, options, lock, dbg); + std::cout << "Lock the file : "<_private_fs->LockFile(fname, options, lock, dbg); + IOStatus stat; + return stat; } // Release the lock acquired by a previous successful call to LockFile. @@ -319,7 +351,10 @@ namespace ROCKSDB_NAMESPACE { IOStatus DummyFSForward::UnlockFile(FileLock *lock, const IOOptions &options, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->UnlockFile(lock, options, dbg); + //std::cout << "unlock the file : "<_private_fs->UnlockFile(lock, options, dbg); + IOStatus stat; + return stat; } // *path is set to a temporary directory that can be used for testing. It may @@ -348,7 +383,11 @@ namespace ROCKSDB_NAMESPACE { std::string *output_path, IODebugContext *dbg) { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; - return this->_private_fs->GetAbsolutePath(db_path,options, output_path, dbg); + IOStatus stat; + //stat = this->_private_fs->GetAbsolutePath(db_path,options, output_path, dbg); + *output_path = db_path.substr(0,db_path.size()-1); + std::cout << "Abs Path : " << db_path <<" "<<*output_path << std::endl; + return stat; } // Get the amount of free disk space @@ -366,4 +405,4 @@ namespace ROCKSDB_NAMESPACE { std::cout << get_seq_id() << " func: " << __FUNCTION__ << " line: " << __LINE__ << " " << std::endl; return this->_private_fs->IsDirectory(path, options, is_dir, dgb); } -} \ No newline at end of file +} diff --git a/src/m45-rocksdb/DummyFSForward.h b/src/m45-rocksdb/DummyFSForward.h index 7f4282e..7f1b30e 100644 --- a/src/m45-rocksdb/DummyFSForward.h +++ b/src/m45-rocksdb/DummyFSForward.h @@ -27,8 +27,50 @@ SOFTWARE. #include "rocksdb/io_status.h" #include "rocksdb/file_system.h" #include "rocksdb/status.h" +#include +/* +class MYFS_File : class FSSequentialFile { + public: + MYFS_File(); + ~MYFS_File(); + IOStatus Read(); + IOStatus Write(); + IOStatus Close(); + private: + int fd; + int inode; + char *file; +}; +*/ namespace ROCKSDB_NAMESPACE { + /* + + class MYFS_SequentialFile : public FSSequentialFile{ + private: + std::string filename_; + FILE* file_; + int fd_; + bool use_direct_io_; + size_t logical_sector_size_; + + public: + MYFS_SequentialFile(const std::string& fname, int fd); + virtual ~MYFS_SequentialFile(); + + virtual IOStatus Read(size_t n, const IOOptions& opts, Slice* result, + char* scratch, IODebugContext* dbg) override; + virtual IOStatus PositionedRead(uint64_t offset, size_t n, + const IOOptions& opts, Slice* result, + char* scratch, IODebugContext* dbg) override; + virtual IOStatus Skip(uint64_t n) override; + //virtual IOStatus InvalidateCache(size_t offset, size_t length) override; + virtual bool use_direct_io() const override { return use_direct_io_; } + virtual size_t GetRequiredBufferAlignment() const override { + return logical_sector_size_; + } + }; +*/ class DummyFSForward : public FileSystem { public: // No copying allowed @@ -123,7 +165,8 @@ namespace ROCKSDB_NAMESPACE { IOStatus ReuseWritableFile(const std::string &fname, const std::string &old_fname, const FileOptions &file_opts, std::unique_ptr *result, IODebugContext *dbg); private: - std::string get_seq_id(); + struct user_zns_device *_zns_dev; + std::string get_seq_id(); std::shared_ptr _private_fs; std::atomic _seq_id{}; std::string _name; diff --git a/src/m45-rocksdb/S2FileSystem.cc b/src/m45-rocksdb/S2FileSystem.cc index 623aa5d..a6e64dc 100644 --- a/src/m45-rocksdb/S2FileSystem.cc +++ b/src/m45-rocksdb/S2FileSystem.cc @@ -28,8 +28,462 @@ SOFTWARE. #include #include -namespace ROCKSDB_NAMESPACE { - S2FileSystem::S2FileSystem(std::string uri_db_path, bool debug) { +namespace ROCKSDB_NAMESPACE +{ + int LookupMap_HashFunction(std::string id) + { + unsigned hashindex; + char *ptr = const_cast(id.c_str()); + for (hashindex = 0; *ptr != '\0'; ptr++) + hashindex = *ptr + STRINGENCODE * hashindex; + return hashindex % LOOKUP_MAP_SIZE; + } + + int LookupMap_Insert(MYFS *FSObj, std::string id, Inode *ptr) + { + int index = LookupMap_HashFunction(id); + + mapEntries *map = (mapEntries *)calloc(1, sizeof(mapEntries)); + strcpy(map->id,id.c_str()); + map->ptr = ptr; + map->chain = NULL; + + if (FSObj->LookupCache[index] == NULL) + FSObj->LookupCache[index] = map; + else + { + struct mapEntries *head; + head = FSObj->LookupCache[index]; + while (head->chain != NULL) + head = head->chain; + head->chain = map; + } + + return 0; + } + + int LookupMap_Delete(MYFS *FSObj, std::string id) + { + int index = LookupMap_HashFunction(id); + struct mapEntries *head, *tmp = NULL; + head = FSObj->LookupCache[index]; + + while (head != NULL) + { + if (!strcmp(head->id,id.c_str())) + { + if (tmp == NULL) + FSObj->LookupCache[index] = head->chain; + else + tmp->chain = head->chain; + free(head); + break; + } + tmp = head; + head = head->chain; + } + + return 0; + } + + int LookupMap_Lookup(MYFS *FSObj, std::string id, Inode **ptr) + { + int index = LookupMap_HashFunction(id); + struct mapEntries *head; + head = FSObj->LookupCache[index]; + + while (head != NULL) + { + if (!strcmp(head->id,id.c_str())) + break; + head = head->chain; + } + + if (head == NULL) + return -1; + + *ptr = head->ptr; + return 0; + } + + int Load_From_NVM(MYFS *FSObj, uint64_t addr, void *buffer, uint64_t size) + { + // Check the size if quantization of LBA + int err = zns_udevice_read(FSObj->zns, addr, buffer, size); + std::cout<<"Load from NVM : "<zns, addr, buffer, size); + return 0; + } + + uint32_t get_FreeInode(MYFS *FSObj) + { + uint32_t ptr = (FSObj->InodePtr + 1) % MAX_INODE_COUNT; + while (ptr != FSObj->InodePtr) + { + if (!FSObj->InodeBitMap[ptr]) + { + FSObj->InodePtr = ptr; + FSObj->InodeBitMap[ptr] = true; + return ptr; + } + ptr = (ptr + 1) % MAX_INODE_COUNT; + } + return 0; + } + + uint64_t get_FreeDataBlock(MYFS *FSObj) + { + uint64_t ptr = (FSObj->DataBlockPtr + 1) % FSObj->DataBlockCount; + while (ptr != FSObj->DataBlockPtr) + { + if (!FSObj->DataBitMap[ptr]) + { + FSObj->DataBlockPtr = ptr; + FSObj->DataBitMap[ptr] = true; + return (ptr + DATA_BLOCKS_OFFSET) * FSObj->LogicalBlockSize; + } + ptr = (ptr + 1) % FSObj->DataBlockCount; + } + return 0; + } + + /* + void free_DataBlock(MYFS *FSObj, uint64_t addr) + { + int index = (addr / FSObj->LogicalBlockSize) - DATA_BLOCKS_OFFSET; + FSObj->DataBitMap[index] = false; + } + */ + + // Trim till /../path in /../path/name + void Get_ParentPath(std::string path, std::string &parent) + { + int index; + for (int i = path.size() - 1; i >= 0; i--) + { + if (path[i] == '/') + { + index = i; + break; + } + } + // Trim if additional slash is present + if (path[index - 1] == '/') + index--; + + parent = path.substr(0, index); + } + + // Trim /../path/name to name + void Get_EntityName(std::string path, std::string &entityName) + { + int index; + for (int i = path.size() - 1; i >= 0; i--) + { + if (path[i] == '/') + { + index = i; + break; + } + } + entityName = path.substr(index + 1, path.size()); + } + + void Clean_Path(std::string path, std::string &newPath) + { + std::string entity; + Get_EntityName(path, entity); + Get_ParentPath(path, newPath); + newPath.append("/"); + newPath.append(entity); + } + + // Load_Childrent function reads DIR's data, either store children names in vector or return inode of asked child depending on bool + // return value will be 0 if asked child is not present + uint32_t Load_Children(MYFS *FSObj, Inode *ptr, std::string entityName, std::vector *children, bool loadChildren, std::string targetName = "") + { + + // Check no of children and load it + uint64_t children_count = ptr->FileSize; + + MYFS_Dir *dir_ptr = (MYFS_Dir *)calloc(1, sizeof(MYFS_Dir)); + for (int i = 0; i < children_count / 16; i++) + { + Load_From_NVM(FSObj, ptr->Direct_data_lbas[i], dir_ptr, 4096); + for (int j = 0; j < 16; j++) + { + if (loadChildren) { + if(strcmp(dir_ptr->Entities[i].EntityName,"")) + children->push_back(dir_ptr->Entities[i].EntityName); + } + else + { + if (!strcmp(dir_ptr->Entities[j].EntityName, entityName.c_str())) + { + if (targetName == "") + { + uint32_t ret = dir_ptr->Entities[j].InodeNum; + free(dir_ptr); + return ret; + } + else + { + strcpy(dir_ptr->Entities[j].EntityName, targetName.c_str()); + Store_To_NVM(FSObj, ptr->Direct_data_lbas[i], dir_ptr, 4096); + free(dir_ptr); + return 0; + } + } + } + } + } + + Load_From_NVM(FSObj, ptr->Direct_data_lbas[children_count / 16], dir_ptr, 4096); + for (int i = 0; i < children_count % 16; i++) + { + if (loadChildren) { + if(strcmp(dir_ptr->Entities[i].EntityName,"")) + children->push_back(dir_ptr->Entities[i].EntityName); + } + else + { + if (!strcmp(dir_ptr->Entities[i].EntityName, entityName.c_str())) + { + if (targetName == "") + { + uint32_t ret = dir_ptr->Entities[i].InodeNum; + free(dir_ptr); + return ret; + } + else + { + strcpy(dir_ptr->Entities[i].EntityName, targetName.c_str()); + Store_To_NVM(FSObj, ptr->Direct_data_lbas[children_count / 16], dir_ptr, 4096); + free(dir_ptr); + return 0; + } + } + } + } + free(dir_ptr); + return 0; + } + + // A recursive call to load inode of the given path to lookupmap + // Stores the inode ptr as well, returns 0 in success + int Get_Path_Inode(MYFS *FSObj, std::string path, Inode **ptr) + { + + if (path == "/tmp") + { + *ptr = FSObj->rootEntry; + return 0; + } + + // Check if path in lookupMap cache + int isPresent = LookupMap_Lookup(FSObj, path, ptr); + if (!isPresent) + return 0; + + // if not : Get_Path_Inode for parent dir + std::string parent; + Inode *parentInode; + Get_ParentPath(path, parent); + isPresent = Get_Path_Inode(FSObj, parent, &parentInode); + if (isPresent) + return -1; + // Read parent dir and get asked inode number + if (parentInode->FileSize == 0) + return -1; + // Get Entity to search for + std::string entityName; + Get_EntityName(path, entityName); + uint32_t index = Load_Children(FSObj, parentInode, entityName, NULL, false); + if (!index) + return -1; + + // Load the children index inode from disk and store in lookupMap; + uint64_t address = SUPER_BLOCK_SIZE + index * INODE_SIZE; + Inode *iptr = (Inode *)calloc(1, sizeof(Inode)); + + Load_From_NVM(FSObj, address, iptr, INODE_SIZE); + std::cout<<"Load File : "<EntityName<<" "<Inode_no<FileSize) / 16; + uint64_t addr = ptr->Direct_data_lbas[index]; + + if (!addr) + { + addr = get_FreeDataBlock(FSObj); + ptr->Direct_data_lbas[index] = addr; + } + else + { + index = Load_From_NVM(FSObj, addr, dirPtr, 4096); + if (index) + return -1; + } + + index = (ptr->FileSize) % 16; + std::cout<<"FS : "<FileSize<<" "<Entities[index] = dirDataptr; + Store_To_NVM(FSObj, addr, dirPtr, 4096); + ptr->FileSize++; + free(dirPtr); + + return 0; + } + + void MYFS_DeletePath(MYFS *FSObj, std::string path) + { + Inode *ptr, *parentInode; + int notPresent = Get_Path_Inode(FSObj, path, &ptr); + if (notPresent) + return; + + //Update parent + std::string entityName, ppath; + Get_EntityName(path, entityName); + Get_ParentPath(path, ppath); + Rename_Child_In_Parent(FSObj, ppath, entityName, ""); + //Get_Path_Inode(FSObj, ppath, &parentInode); + //parentInode->FileSize -=1; + //Change lookupmap + LookupMap_Delete(FSObj, path); + FSObj->InodeBitMap[ptr->Inode_no] = false; + //Free Data zones + free(ptr); + } + + int MYFS_CreateFile(MYFS *FSObj, std::string path) + { + uint32_t inode_no = get_FreeInode(FSObj); + Inode *ptr = (Inode *)calloc(1, sizeof(Inode)); + // Fill the ptr + std::string entityName; + Get_EntityName(path, entityName); + strcpy(ptr->EntityName, entityName.c_str()); + ptr->Inode_no = inode_no; + + // Update parent + std::string parent; + Get_ParentPath(path, parent); + int parentUpdated = Update_Parent(FSObj, parent, entityName, inode_no); + if (parentUpdated) + return -1; + + // Load to lookupmap + LookupMap_Insert(FSObj, path, ptr); + + return 0; + } + + int MYFS_CreateDir(MYFS *FSObj, std::string path) + { + uint32_t inode_no = get_FreeInode(FSObj); + Inode *ptr = (Inode *)calloc(1, sizeof(Inode)); + // Fill the ptr + std::string entityName; + Get_EntityName(path, entityName); + strcpy(ptr->EntityName, entityName.c_str()); + ptr->IsDir = true; + ptr->Inode_no = inode_no; + + // Update parent + std::string parent; + Get_ParentPath(path, parent); + int parentUpdated = Update_Parent(FSObj, parent, entityName, inode_no); + if (parentUpdated) + return -1; + + // Load to lookupmap + LookupMap_Insert(FSObj, path, ptr); + + return 0; + } + + int initFS(MYFS *FSObj, user_zns_device *zns) + { + FSObj->zns = zns; + FSObj->FileSystemCapacity = zns->capacity_bytes; + FSObj->LogicalBlockSize = zns->lba_size_bytes; + // We reserve a single block as super block and MAX_INODE_COUNT as + FSObj->DataBlockCount = (FSObj->FileSystemCapacity / FSObj->LogicalBlockSize - (MAX_INODE_COUNT + 1)); + + FSObj->rootEntry = (Inode *)calloc(1, sizeof(Inode)); + FSObj->DataBitMap = (bool *)calloc(FSObj->DataBlockCount, sizeof(bool)); + + // this->FileSystemObj->LookupCache = (mapEntries *) calloc(LOOKUP_MAP_SIZE, sizeof(mapEntries)); + void *ptr = (void *) calloc(1, SUPER_BLOCK_SIZE); + Load_From_NVM(FSObj, 0, ptr, SUPER_BLOCK_SIZE); + struct SuperBlock *sb = (SuperBlock *) ptr; + //memcpy(sb, ptr, sizeof(SuperBlock)); + + if(!sb->persistent) { + //Not stored in disk + FSObj->DataBlockPtr = 0; // Reserved for Root Node + FSObj->InodePtr = 0; + FSObj->InodeBitMap[0] = true; + *(FSObj->DataBitMap) = true; + + //Do the following only if already not present + strcpy(FSObj->rootEntry->EntityName, "tmp"); + FSObj->rootEntry->IsDir = true; + FSObj->rootEntry->Inode_no = 0; + FSObj->rootEntry->FileSize = 0; + FSObj->rootEntry->Direct_data_lbas[0] = DATA_BLOCKS_OFFSET * FSObj->LogicalBlockSize; + } else { + //Load root inode; Stored in disk + Load_From_NVM(FSObj, SUPER_BLOCK_SIZE, FSObj->rootEntry, INODE_SIZE); + FSObj->DataBlockPtr = sb->dataBlockPtr; + FSObj->InodePtr = sb->inodeBlockPtr; + memcpy(FSObj->InodeBitMap, ptr+sizeof(SuperBlock), sizeof(FSObj->InodeBitMap)); + memcpy(FSObj->DataBitMap, ptr+sizeof(SuperBlock)+sizeof(FSObj->InodeBitMap), FSObj->DataBlockCount); + } + free(ptr); + //free(sb); + return 0; + } + + S2FileSystem::S2FileSystem(std::string uri_db_path, bool debug) + { FileSystem::Default(); std::string sdelimiter = ":"; std::string edelimiter = "://"; @@ -42,8 +496,9 @@ namespace ROCKSDB_NAMESPACE { params.name = strdup(device.c_str()); params.log_zones = 3; params.gc_wmark = 1; - params.force_reset = true; + params.force_reset = false; int ret = init_ss_zns_device(¶ms, &this->_zns_dev); + free(params.name); if(ret != 0){ std::cout << "Error: " << uri_db_path << " failed to open the device " << device.c_str() << "\n"; std::cout << "Error: ret " << ret << "\n"; @@ -53,9 +508,45 @@ namespace ROCKSDB_NAMESPACE { assert(this->_zns_dev->capacity_bytes != 0); ss_dprintf(DBG_FS_1, "device %s is opened and initialized, reported LBA size is %u and capacity %lu \n", device.c_str(), this->_zns_dev->lba_size_bytes, this->_zns_dev->capacity_bytes); + + // INIT File System + this->FileSystemObj = (MYFS *)calloc(1, sizeof(MYFS)); + initFS(this->FileSystemObj, this->_zns_dev); } - S2FileSystem::~S2FileSystem() { + S2FileSystem::~S2FileSystem() + { + Store_To_NVM(this->FileSystemObj, SUPER_BLOCK_SIZE, this->FileSystemObj->rootEntry, INODE_SIZE); + free(this->FileSystemObj->rootEntry); + + //Store all inodes from lookup cache to disk + for(int i=0;iFileSystemObj->LookupCache[i], *tmp; + while(head!=NULL) { + tmp = head; + head = head->chain; + Store_To_NVM(this->FileSystemObj, (tmp->ptr->Inode_no * INODE_SIZE) + SUPER_BLOCK_SIZE, tmp->ptr, INODE_SIZE); + std::cout<<"File : "<ptr->EntityName<<" "<ptr->Inode_no<<" @ "<<(tmp->ptr->Inode_no * INODE_SIZE) + SUPER_BLOCK_SIZE<ptr); + free(tmp); + } + } + + void *superBlockWBitMap = (void *) calloc(1,SUPER_BLOCK_SIZE); + struct SuperBlock *sb = (SuperBlock *) superBlockWBitMap;//calloc(1, sizeof(SuperBlock)); + sb->dataBlockPtr = this->FileSystemObj->DataBlockPtr; + sb->inodeBlockPtr = this->FileSystemObj->InodePtr; + sb->persistent = true; + std::cout<<"Inode count : "<FileSystemObj->DataBlockCount<FileSystemObj->InodeBitMap, MAX_INODE_COUNT); + memcpy(superBlockWBitMap+sizeof(SuperBlock)+MAX_INODE_COUNT, this->FileSystemObj->DataBitMap, this->FileSystemObj->DataBlockCount); + Store_To_NVM(this->FileSystemObj, 0, superBlockWBitMap, SUPER_BLOCK_SIZE); + free(superBlockWBitMap); + //free(sb); + free(this->FileSystemObj->DataBitMap); + deinit_ss_zns_device(this->FileSystemObj->zns); + free(this->FileSystemObj); } // Create a brand new sequentially-readable file with the specified name. @@ -65,11 +556,22 @@ namespace ROCKSDB_NAMESPACE { // // The returned file will only be accessed by one thread at a time. IOStatus S2FileSystem::NewSequentialFile(const std::string &fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + std::unique_ptr *result, IODebugContext *dbg) + { + std::string cpath; + Clean_Path(fname, cpath); + Inode *ptr; + int notPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (notPresent) + return IOStatus::IOError(__FUNCTION__); + + result->reset(); + result->reset(new MYFS_SequentialFile(cpath, this->FileSystemObj)); + return IOStatus::OK(); } - IOStatus S2FileSystem::IsDirectory(const std::string &, const IOOptions &options, bool *is_dir, IODebugContext *) { + IOStatus S2FileSystem::IsDirectory(const std::string &, const IOOptions &options, bool *is_dir, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } @@ -81,11 +583,22 @@ namespace ROCKSDB_NAMESPACE { // // The returned file may be concurrently accessed by multiple threads. IOStatus S2FileSystem::NewRandomAccessFile(const std::string &fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + std::unique_ptr *result, IODebugContext *dbg) + { + std::string cpath; + Clean_Path(fname, cpath); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + + result->reset(); + result->reset(new MYFS_RandomAccessFile(cpath, this->FileSystemObj)); + return IOStatus::OK(); } - const char *S2FileSystem::Name() const { + const char *S2FileSystem::Name() const + { return "S2FileSytem"; } @@ -97,21 +610,45 @@ namespace ROCKSDB_NAMESPACE { // // The returned file will only be accessed by one thread at a time. IOStatus S2FileSystem::NewWritableFile(const std::string &fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + std::unique_ptr *result, IODebugContext *dbg) + { + std::string cpath; + Clean_Path(fname, cpath); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + MYFS_CreateFile(this->FileSystemObj, cpath); + else + ptr->FileSize = 0; + + result->reset(); + result->reset(new MYFS_WritableFile(cpath, this->FileSystemObj)); + return IOStatus::OK(); } - IOStatus S2FileSystem::ReopenWritableFile(const std::string &, const FileOptions &, std::unique_ptr *, - IODebugContext *) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::ReopenWritableFile(const std::string &fname, const FileOptions &, std::unique_ptr *result, + IODebugContext *) + { + std::string cpath; + Clean_Path(fname, cpath); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + return IOStatus::IOError(); + + result->reset(); + result->reset(new MYFS_WritableFile(cpath, this->FileSystemObj)); + return IOStatus::OK(); } IOStatus S2FileSystem::NewRandomRWFile(const std::string &, const FileOptions &, std::unique_ptr *, - IODebugContext *) { + IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::NewMemoryMappedFileBuffer(const std::string &, std::unique_ptr *) { + IOStatus S2FileSystem::NewMemoryMappedFileBuffer(const std::string &, std::unique_ptr *) + { return IOStatus::IOError(__FUNCTION__); } @@ -124,66 +661,111 @@ namespace ROCKSDB_NAMESPACE { // returns non-OK. IOStatus S2FileSystem::NewDirectory(const std::string &name, const IOOptions &io_opts, std::unique_ptr *result, - IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IODebugContext *dbg) + { + + result->reset(); + result->reset(new MYFS_Directory(this->FileSystemObj)); + return IOStatus::OK(); } - IOStatus S2FileSystem::GetFreeSpace(const std::string &, const IOOptions &, uint64_t *, IODebugContext *) { + IOStatus S2FileSystem::GetFreeSpace(const std::string &, const IOOptions &, uint64_t *, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::Truncate(const std::string &, size_t, const IOOptions &, IODebugContext *) { + IOStatus S2FileSystem::Truncate(const std::string &, size_t, const IOOptions &, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } // Create the specified directory. Returns error if directory exists. - IOStatus S2FileSystem::CreateDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::CreateDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) + { + std::string cpath; + Clean_Path(dirname, cpath); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + isPresent = MYFS_CreateDir(this->FileSystemObj, cpath); + else + return IOStatus::IOError(__FUNCTION__); + + return IOStatus::OK(); } // Creates directory if missing. Return Ok if it exists, or successful in // Creating. - IOStatus S2FileSystem::CreateDirIfMissing(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::CreateDirIfMissing(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) + { + std::string cpath; + Clean_Path(dirname, cpath); + Inode *ptr; + std::string dir = cpath.substr(0, cpath.size() - 1); + int isPresent = Get_Path_Inode(this->FileSystemObj, dir, &ptr); + if (isPresent) + isPresent = MYFS_CreateDir(this->FileSystemObj, dir); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); } IOStatus - S2FileSystem::GetFileSize(const std::string &fname, const IOOptions &options, uint64_t *file_size, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + S2FileSystem::GetFileSize(const std::string &fname, const IOOptions &options, uint64_t *file_size, IODebugContext *dbg) + { + + std::string cpath; + Clean_Path(fname, cpath); + Inode *ptr; + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + else + *file_size = ptr->FileSize; + return IOStatus::OK(); } - IOStatus S2FileSystem::DeleteDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) { + IOStatus S2FileSystem::DeleteDir(const std::string &dirname, const IOOptions &options, IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::GetFileModificationTime(const std::string &fname, const IOOptions &options, uint64_t *file_mtime, - IODebugContext *dbg) { + IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::GetAbsolutePath(const std::string &db_path, const IOOptions &options, std::string *output_path, - IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IODebugContext *dbg) + { + //*output_path = db_path; + return IOStatus::OK(); } - IOStatus S2FileSystem::DeleteFile(const std::string &fname, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::DeleteFile(const std::string &fname, const IOOptions &options, IODebugContext *dbg) + { + // MYFS_DeletePath(this->FileSystemObj, fname); + return IOStatus::OK(); } IOStatus S2FileSystem::NewLogger(const std::string &fname, const IOOptions &io_opts, std::shared_ptr *result, - IODebugContext *dbg) { + IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::GetTestDirectory(const IOOptions &options, std::string *path, IODebugContext *dbg) { + IOStatus S2FileSystem::GetTestDirectory(const IOOptions &options, std::string *path, IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } // Release the lock acquired by a previous successful call to LockFile. // REQUIRES: lock was returned by a successful LockFile() call // REQUIRES: lock has not already been unlocked. - IOStatus S2FileSystem::UnlockFile(FileLock *lock, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::UnlockFile(FileLock *lock, const IOOptions &options, IODebugContext *dbg) + { + return IOStatus::OK(); } // Lock the specified file. Used to prevent concurrent access to @@ -200,30 +782,63 @@ namespace ROCKSDB_NAMESPACE { // to go away. // // May create the named file if it does not already exist. - IOStatus S2FileSystem::LockFile(const std::string &fname, const IOOptions &options, FileLock **lock, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::LockFile(const std::string &fname, const IOOptions &options, FileLock **lock, IODebugContext *dbg) + { + return IOStatus::OK(); } IOStatus - S2FileSystem::AreFilesSame(const std::string &, const std::string &, const IOOptions &, bool *, IODebugContext *) { + S2FileSystem::AreFilesSame(const std::string &, const std::string &, const IOOptions &, bool *, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::NumFileLinks(const std::string &, const IOOptions &, uint64_t *, IODebugContext *) { + IOStatus S2FileSystem::NumFileLinks(const std::string &, const IOOptions &, uint64_t *, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } - IOStatus S2FileSystem::LinkFile(const std::string &, const std::string &, const IOOptions &, IODebugContext *) { + IOStatus S2FileSystem::LinkFile(const std::string &, const std::string &, const IOOptions &, IODebugContext *) + { return IOStatus::IOError(__FUNCTION__); } IOStatus S2FileSystem::RenameFile(const std::string &src, const std::string &target, const IOOptions &options, - IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IODebugContext *dbg) + { + std::string cpath_target, cpath_src; + Clean_Path(src, cpath_src); + Clean_Path(target, cpath_target); + Inode *targetptr, *sourceptr; + + + // verify if target exists + int notPresent = Get_Path_Inode(this->FileSystemObj, cpath_target, &targetptr); + if (!notPresent) + //If present + MYFS_DeletePath(this->FileSystemObj, cpath_target); + + // if it is not present + // rename the inode + std::string entityName; + Get_EntityName(cpath_src, entityName); + Get_Path_Inode(this->FileSystemObj, cpath_src, &sourceptr); + LookupMap_Insert(this->FileSystemObj, cpath_target, sourceptr); + LookupMap_Delete(this->FileSystemObj, cpath_src); + std::string targetEntityName; + Get_EntityName(cpath_target, targetEntityName); + strcpy(sourceptr->EntityName, targetEntityName.c_str()); + std::string parentPath; + Get_ParentPath(cpath_target, parentPath); + int parentUpdated = Rename_Child_In_Parent(this->FileSystemObj, parentPath, entityName, targetEntityName); + if (parentUpdated) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); } IOStatus S2FileSystem::GetChildrenFileAttributes(const std::string &dir, const IOOptions &options, - std::vector *result, IODebugContext *dbg) { + std::vector *result, IODebugContext *dbg) + { return FileSystem::GetChildrenFileAttributes(dir, options, result, dbg); } @@ -235,8 +850,19 @@ namespace ROCKSDB_NAMESPACE { // permission to access "dir", or if "dir" is invalid. // IOError if an IO Error was encountered IOStatus S2FileSystem::GetChildren(const std::string &dir, const IOOptions &options, std::vector *result, - IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IODebugContext *dbg) + { + std::string cpath; + Get_ParentPath(dir, cpath); + Inode *ptr; + + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + return IOStatus::IOError(__FUNCTION__); + uint32_t err = Load_Children(this->FileSystemObj, ptr, "", result, true); + if (err) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); } // Returns OK if the named file exists. @@ -244,13 +870,317 @@ namespace ROCKSDB_NAMESPACE { // the calling process does not have permission to determine // whether this file exists, or if the path is invalid. // IOError if an IO Error was encountered - IOStatus S2FileSystem::FileExists(const std::string &fname, const IOOptions &options, IODebugContext *dbg) { - return IOStatus::IOError(__FUNCTION__); + IOStatus S2FileSystem::FileExists(const std::string &fname, const IOOptions &options, IODebugContext *dbg) + { + Inode *ptr; + std::string cpath; + Clean_Path(fname, cpath); + int isPresent = Get_Path_Inode(this->FileSystemObj, cpath, &ptr); + if (isPresent) + return IOStatus::NotFound(); + return IOStatus::OK(); } IOStatus S2FileSystem::ReuseWritableFile(const std::string &fname, const std::string &old_fname, const FileOptions &file_opts, - std::unique_ptr *result, IODebugContext *dbg) { + std::unique_ptr *result, IODebugContext *dbg) + { return IOStatus::IOError(__FUNCTION__); } -} \ No newline at end of file + + int load_nth_indirect_block(MYFS *FSObj, uint32_t n, uint64_t indirect_lba, Indirect_ptr **ptr) + { + for (int i = 0; i < n; i++) + Load_From_NVM(FSObj, (*ptr)->Indirect_ptr_lbas, *ptr, 4096); + } + + int get_blocks_addr(MYFS *FSObj, Inode *ptr, uint64_t offset, uint64_t size, std::vector *addresses, bool forWrite) + { + uint64_t *data_block_lba_ptr, next_indirect_block_addr; + Indirect_ptr *iptr = NULL; + uint32_t curr = offset / 4096, end = (offset+size) / 4096; + uint32_t no_of_data_block_ptrs; + + // Load the direct ptr + if (curr < 480) + { + // In Inode block itself + data_block_lba_ptr = ptr->Direct_data_lbas; + no_of_data_block_ptrs = 480; + next_indirect_block_addr = ptr->Indirect_ptr_lbas; + } + else + { + curr -= 480; + int nth_indirect = curr / 510; + //What if ptr->Indirect_ptr_lba + iptr = (Indirect_ptr *)calloc(1, 4096); + if(ptr->Indirect_ptr_lbas == 0) { + ptr->Indirect_ptr_lbas = get_FreeDataBlock(FSObj); + } + + Load_From_NVM(FSObj, ptr->Indirect_ptr_lbas, iptr, 4096); + for (int i = 0; i < nth_indirect; i++) + Load_From_NVM(FSObj, iptr->Indirect_ptr_lbas, iptr, 4096); + + data_block_lba_ptr = iptr->Direct_data_lbas; + next_indirect_block_addr = iptr->Indirect_ptr_lbas; + no_of_data_block_ptrs = 510; + curr = curr % 510; + } + + uint64_t addr; + for (int i = 0; i <= end; i++) + { + addr = *(data_block_lba_ptr + curr); + if (!addr) + { + addr = get_FreeDataBlock(FSObj); + *(data_block_lba_ptr+curr) = addr; + } + addresses->push_back(addr); + curr++; + + if (curr == no_of_data_block_ptrs) + { + if (!next_indirect_block_addr) + { + // If no indirect block ptr, create one and store to mem + next_indirect_block_addr = get_FreeDataBlock(FSObj); + if (iptr == NULL) + { + ptr->Indirect_ptr_lbas = next_indirect_block_addr; + Store_To_NVM(FSObj, SUPER_BLOCK_SIZE + (ptr->Inode_no * INODE_SIZE), ptr, 4096); + + } + else + { + iptr->Indirect_ptr_lbas = next_indirect_block_addr; + Store_To_NVM(FSObj, iptr->Current_addr, iptr, 4096); + free(iptr); + } + iptr = (Indirect_ptr *)calloc(1, 4096); + iptr->Current_addr = next_indirect_block_addr; + } + else + { + if (iptr == NULL) + iptr = (Indirect_ptr *)calloc(1, 4096); + + Load_From_NVM(FSObj, next_indirect_block_addr, iptr, 4096); + } + next_indirect_block_addr = iptr->Indirect_ptr_lbas; + no_of_data_block_ptrs = 510; + data_block_lba_ptr = iptr->Direct_data_lbas; + curr = 0; + } + } + + // Store dirty block to NVM + if (iptr == NULL) + Store_To_NVM(FSObj, SUPER_BLOCK_SIZE + (ptr->Inode_no * INODE_SIZE), ptr, 4096); + else + Store_To_NVM(FSObj, iptr->Current_addr, iptr, 4096); + + free(iptr); + return 0; + } + + // MYFS_File definition + MYFS_File::MYFS_File(std::string filePath, MYFS *FSObj) + { + this->FSObj = FSObj; + Get_Path_Inode(FSObj, filePath, &(this->ptr)); + this->curr_read_offset = 0; + } + + int MYFS_File::PRead(uint64_t offset, uint64_t size, char *data) + { + + if (ptr->FileSize < offset + size) { + if(offset >= ptr->FileSize) + return 0; + size = ptr->FileSize - offset; + } + std::cout<<"Read : "<ptr->EntityName<<" "< addresses_to_read; + int err = get_blocks_addr(this->FSObj, this->ptr, offset, size, &addresses_to_read, false); + if (err) + return 0; + + char *readD = (char *)calloc(addresses_to_read.size(), 4096); + for (int i = 0; i < addresses_to_read.size(); i++) + Load_From_NVM(this->FSObj, addresses_to_read.at(i), readD + (i * 4096), 4096); + + int smargin = offset % 4096; + memcpy(data, readD + smargin, size); + free(readD); + return size; + } + + int MYFS_File::Read(uint64_t size, char *data) + { + // Check with file size + int sizeW = this->PRead(this->curr_read_offset, size, data); + this->curr_read_offset += sizeW; + return sizeW; + } + + int MYFS_File::Seek(uint64_t offset) + { + if (ptr->FileSize < this->curr_read_offset + offset) + return -1; + this->curr_read_offset += offset; + return 0; + } + + int MYFS_File::Truncate(uint64_t size) + { + // TODO: Free Data Block + this->ptr->FileSize = size; + return 0; + } + + int MYFS_File::PAppend(uint64_t offset, uint64_t size, char *data) + { + std::vector addresses_to_read; + int err = get_blocks_addr(this->FSObj, this->ptr, offset, size, &addresses_to_read, false); + if (err) + return -1; + + // Do read-modify-update cycle if smargin is present on 1st address. + int smargin = offset % 4096; + char *buffer = (char *)calloc(addresses_to_read.size(), 4096); + if (smargin) + Load_From_NVM(this->FSObj, addresses_to_read.at(0), buffer, 4096); + + memcpy(buffer + smargin, data, size); + for (int i = 0; i < addresses_to_read.size(); i++) + Store_To_NVM(this->FSObj, addresses_to_read.at(i), buffer + (i * 4096), 4096); + + // Update file size + this->ptr->FileSize = offset + size; + free(buffer); + return 0; + } + + int MYFS_File::Append(uint64_t size, char *data) + { + return this->PAppend(this->ptr->FileSize, size, data); + } + + uint64_t MYFS_File::GetFileSize() + { + return this->ptr->FileSize; + } + + int MYFS_File::Close() + { + // Flush Inode changes to Disk + } + + // Def of MYFS_SequentialFile + MYFS_SequentialFile::MYFS_SequentialFile(std::string fpath, MYFS *FSObj) + { + this->fp = new MYFS_File(fpath, FSObj); + } + + IOStatus MYFS_SequentialFile::Read(size_t n, const IOOptions &opts, Slice *result, char *scratch, IODebugContext *dbg) + { + + int sizeW = this->fp->Read(n, scratch); + *result = Slice(scratch, sizeW); + return IOStatus::OK(); + } + + // IOStatus MYFS_SequentialFile::PositionedRead(uint64_t offset, size_t n, const IOOptions &opts, Slice *result, + // char *scratch, IODebugContext *dbg) + // { + // int err = this->fp->PRead(offset, n, scratch); + // if (err) + // return IOStatus::IOError(__FUNCTION__); + // *result = Slice(scratch, n); + // return IOStatus::OK(); + // } + + IOStatus MYFS_SequentialFile::Skip(uint64_t n) + { + int err = this->fp->Seek(n); + if (err) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); + } + + // Def MYFS_RandomAccessFile + MYFS_RandomAccessFile::MYFS_RandomAccessFile(std::string fname, MYFS *FSObj) + { + this->fp = new MYFS_File(fname, FSObj); + } + + IOStatus MYFS_RandomAccessFile::Read(uint64_t offset, size_t n, const IOOptions &opts, Slice *result, char *scratch, + IODebugContext *dbg) const + { + int sizeW = this->fp->PRead(offset, n, scratch); + *result = Slice(scratch, sizeW); + return IOStatus::OK(); + } + + // Def MYFS_WritableFile + MYFS_WritableFile::MYFS_WritableFile(std::string fname, MYFS *FSObj) + { + this->fp = new MYFS_File(fname, FSObj); + this->cache = false; + this->cacheSize = 0; + } + + IOStatus MYFS_WritableFile::Truncate(uint64_t size, const IOOptions &opts, IODebugContext *dbg) + { + int err = this->fp->Truncate(size); + if (err) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); + } + + IOStatus MYFS_WritableFile::ClearCache() { + if(!this->cache) + return IOStatus::OK(); + int err = this->fp->Append(this->cacheSize, this->cacheData); + if (err) + return IOStatus::IOError(__FUNCTION__); + free(this->cacheData); + this->cache = false; + this->cacheSize = 0; + return IOStatus::OK(); + } + + IOStatus MYFS_WritableFile::Append(const Slice &data, const IOOptions &opts, IODebugContext *dbg) + { + + char *block = (char *)data.data(); + uint64_t size = data.size(); + if(this->cache) { + //Append to cache + char *tmp = (char *)calloc(1, this->cacheSize+size); + memcpy(tmp, this->cacheData, this->cacheSize); + memcpy(tmp+this->cacheSize, block, size); + free(this->cacheData); + this->cacheData = tmp; + this->cacheSize += size; + //If size > 4096 clear cache + if(this->cacheSize >= 4096) + this->ClearCache(); + return IOStatus::OK(); + } else if(size < 4096) { + //Append to cache + this->cache = true; + this->cacheData = (char *)calloc(1, size); + memcpy(this->cacheData, block, size); + this->cacheSize = size; + return IOStatus::OK(); + } + int err = this->fp->Append(size, block); + if (err) + return IOStatus::IOError(__FUNCTION__); + return IOStatus::OK(); + } +} diff --git a/src/m45-rocksdb/S2FileSystem.h b/src/m45-rocksdb/S2FileSystem.h index a7ab2d0..dc2073a 100644 --- a/src/m45-rocksdb/S2FileSystem.h +++ b/src/m45-rocksdb/S2FileSystem.h @@ -31,13 +31,219 @@ SOFTWARE. #include #include -namespace ROCKSDB_NAMESPACE { +#define LOOKUP_MAP_SIZE 1000 +#define MAX_INODE_COUNT 255 +#define INODE_SIZE 4096 +#define SUPER_BLOCK_SIZE 4096*2 +#define STRINGENCODE 31 +#define DATA_BLOCKS_OFFSET 256 +namespace ROCKSDB_NAMESPACE +{ + struct SuperBlock + { + bool persistent; + uint64_t inodeBlockPtr; + uint64_t dataBlockPtr; + }; + + struct Inode + { + uint32_t Inode_no; + char EntityName[235]; + bool IsDir; + uint64_t FileSize; + uint64_t Indirect_ptr_lbas; + uint64_t Direct_data_lbas[480]; + }; + + struct mapEntries + { + char id[1000]; + Inode *ptr; + mapEntries *chain; + }; + + struct Indirect_ptr + { + uint64_t Current_addr; + uint64_t Direct_data_lbas[510]; + uint64_t Indirect_ptr_lbas; + }; + + struct MYFS_DirData + { + char EntityName[252]; + uint32_t InodeNum; + }; + + struct MYFS_Dir + { + MYFS_DirData Entities[16]; + }; + + struct MYFS + { + mapEntries *LookupCache[LOOKUP_MAP_SIZE]; // Map type to void ptrs; + bool InodeBitMap[MAX_INODE_COUNT]; + bool *DataBitMap; + uint32_t InodePtr; + + uint64_t DataBlockPtr; + uint64_t DataBlockMax; + + uint64_t DataBlockCount; + uint64_t FileSystemCapacity; + uint32_t LogicalBlockSize; + Inode *rootEntry; + user_zns_device *zns; + }; + + /* + int Load_From_NVM(MYFS *FSObj, uint64_t address, void *ptr, uint64_t size); + int Store_To_NVM(MYFS *FSObj, uint64_t address, void *ptr, uint64_t size); + void Get_ParentPath(std::string path, std::string &parent); + void Get_EntityName(std::string path, std::string &entityName); + //void Load_Childrens(Inode *ptr, std::string entityName, std::vector *children, bool loadChildren); + // int Get_Path_Inode(MYFS *FSObj, std::string path, Inode *ptr); + int LookupMap_HashFunction(void *data); + */ + + + class MYFS_File + { + private: + struct Inode *ptr; + MYFS *FSObj; + uint64_t curr_read_offset; + void *current_ptr; - class S2FileSystem : public FileSystem { + public: + MYFS_File(std::string filePath, MYFS *FSObj); + virtual ~MYFS_File() = default; + int Read(uint64_t size, char *data); + int PRead(uint64_t offset, uint64_t size, char *data); + int Seek(uint64_t offset); + int Truncate(uint64_t size); + int Append(uint64_t size, char *data); + int PAppend(uint64_t offset, uint64_t size, char *data); + uint64_t GetFileSize(); + int Close(); + }; + + /* + *Creates read only MYFS_File object + */ + class MYFS_SequentialFile : public FSSequentialFile + { + private: + MYFS_File *fp; + + public: + MYFS_SequentialFile(std::string filePath, MYFS *FSObj); + virtual ~MYFS_SequentialFile(){delete this->fp;} + virtual IOStatus Read(size_t n, const IOOptions &opts, Slice *result, + char *scratch, IODebugContext *dbg)override; + + virtual IOStatus Skip(uint64_t n) override; + // virtual IOStatus PositionedRead(uint64_t offset, size_t n, + // const IOOptions &opts, Slice *result, + // char *scratch, IODebugContext *dbg) override; + // virtual IOStatus InvalidateCache(size_t offset, size_t length) override + // { + // return IOStatus::OK(); + // }; + // virtual bool use_direct_io() const override { return true; } + // virtual size_t GetRequiredBufferAlignment() const override { return 4096; } + }; + + class MYFS_RandomAccessFile : public FSRandomAccessFile + { + private: + MYFS_File *fp; + + public: + MYFS_RandomAccessFile(std::string fname, MYFS *FSObj); + virtual ~MYFS_RandomAccessFile(){delete this->fp;} + virtual IOStatus Read(uint64_t offset, size_t n, const IOOptions &opts, + Slice *result, char *scratch, IODebugContext *dbg) const override; + /* + virtual IOStatus MultiRead(FSReadRequest *reqs, size_t num_reqs, + const IOOptions &options, + IODebugContext *dbg) {std::cout<<"MULTIREAD"<ClearCache();delete this->fp;} + virtual IOStatus Truncate(uint64_t size, const IOOptions &opts, + IODebugContext *dbg) override; + virtual IOStatus Close(const IOOptions &opts, IODebugContext *dbg) {return IOStatus::OK();}; + virtual IOStatus Append(const Slice &data, const IOOptions &opts, + IODebugContext *dbg) override; + virtual IOStatus Flush(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } + virtual IOStatus Sync(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } + /* + virtual IOStatus Append(const Slice &data, const IOOptions &opts, + const DataVerificationInfo & /* verification_info , + IODebugContext *dbg) override + { + return Append(data, opts, dbg); + } + virtual IOStatus PositionedAppend(const Slice &data, uint64_t offset, + const IOOptions &opts, + IODebugContext *dbg) override; + virtual IOStatus PositionedAppend(const Slice &data, uint64_t offset, + const IOOptions &opts, const DataVerificationInfo & /* verification_info, + IODebugContext *dbg) override + { + return PositionedAppend(data, offset, opts, dbg); + } + + virtual IOStatus Fsync(const IOOptions &opts, IODebugContext *dbg) override { return IOStatus::OK(); } + virtual bool IsSyncThreadSafe() const { return false; } + virtual bool use_direct_io() const override { return true; } + virtual void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override {} + virtual uint64_t GetFileSize(const IOOptions &opts, + IODebugContext *dbg) override {std::cout<<"Calling this module"<fp->GetFileSize();} + virtual IOStatus InvalidateCache(size_t offset, size_t length) override { return IOStatus::OK(); } + virtual size_t GetRequiredBufferAlignment() const override { return 4096; } + */ + }; + + class MYFS_Directory : public FSDirectory + { + private: + MYFS *fp; + public: + MYFS_Directory(MYFS *FSObj){} + virtual ~MYFS_Directory(){} + virtual IOStatus Fsync(const IOOptions& opts, IODebugContext* dbg) override { + return IOStatus::OK(); + } + }; + + class S2FileSystem : public FileSystem + { public: // No copying allowed S2FileSystem(std::string uri, bool debug); - S2FileSystem(const S2FileSystem&) = delete; + S2FileSystem(const S2FileSystem &) = delete; virtual ~S2FileSystem(); IOStatus IsDirectory(const std::string &, const IOOptions &options, bool *is_dir, IODebugContext *) override; @@ -91,9 +297,9 @@ namespace ROCKSDB_NAMESPACE { GetAbsolutePath(const std::string &db_path, const IOOptions &options, std::string *output_path, IODebugContext *dbg); - IOStatus DeleteFile(const std::string& fname, - const IOOptions& options, - IODebugContext* dbg); + IOStatus DeleteFile(const std::string &fname, + const IOOptions &options, + IODebugContext *dbg); IOStatus NewLogger(const std::string &fname, const IOOptions &io_opts, std::shared_ptr *result, @@ -131,6 +337,7 @@ namespace ROCKSDB_NAMESPACE { struct user_zns_device *_zns_dev; std::string _uri; const std::string _fs_delimiter = "/"; + struct MYFS *FileSystemObj; }; } diff --git a/src/m45-rocksdb/rocks_s2fs.cc b/src/m45-rocksdb/rocks_s2fs.cc index 1ec8443..b4bf276 100644 --- a/src/m45-rocksdb/rocks_s2fs.cc +++ b/src/m45-rocksdb/rocks_s2fs.cc @@ -39,7 +39,7 @@ namespace ROCKSDB_NAMESPACE { std::string *errmsg) { cout<<"Initialization uri is " << uri << " and errmsg: " << (*errmsg) << endl; // we have two setup - one - s2fs-rocksdb which is just forwarding, then the other that we can use to debug - if(false){ + if(true){ S2FileSystem *z = new S2FileSystem(uri, true); ret_fs->reset(z); } else { @@ -50,4 +50,3 @@ namespace ROCKSDB_NAMESPACE { return ret_fs->get(); }); } -