diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6151054..ad05812 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -3,11 +3,36 @@ project(cubes CXX) # default to release build because speed maters. if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE "Release") + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMAKE_BUILD_TYPE: Release, Debug or RelWithDebInfo" FORCE) endif() +if(NOT BUILD_CUBES_MAX_N) + set(BUILD_CUBES_MAX_N 20 CACHE STRING "Limit of maximum N Polycubes to be computed") +endif() + +if(NOT CUBES_PACK_CUBE_XYZ_ADDR) + set(CUBES_PACK_CUBE_XYZ_ADDR ON CACHE BOOL "Pack Cube struct XYZ memory address into 56-bit field.") +endif() + +# Try extract current HEAD commit-id in git +find_package(Git) +if(GIT_FOUND) + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-list -n1 HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE RESULT + OUTPUT_VARIABLE CONFIG_GIT_VERSION) + message(STATUS "Set ${CONFIG_GIT_VERSION} to build version info") +endif() + +# generate config.hpp header in build directory. +set(CONFIG_IS_READONLY "Warning: this file is overwritten during build. Do not edit.") +configure_file("config.hpp.in" "config.hpp") + include_directories("include") include_directories("libraries") +include_directories("${PROJECT_BINARY_DIR}") macro(ConfigureTarget Target) # Enable C++17 @@ -38,18 +63,22 @@ macro(ConfigureTarget Target) ) endmacro() +add_library(mapped_file STATIC "libraries/mapped_file.cpp") +ConfigureTarget(mapped_file) + # Source files add_library(CubeObjs OBJECT "src/cubes.cpp" - "src/cache.cpp" "src/rotations.cpp" "src/newCache.cpp" + "src/cubeSwapSet.cpp" ) ConfigureTarget(CubeObjs) # Build main program add_executable(${PROJECT_NAME} "program.cpp" $) target_link_libraries(${PROJECT_NAME} pthread) +target_link_libraries(${PROJECT_NAME} mapped_file) ConfigureTarget(${PROJECT_NAME}) # Optionally build tests diff --git a/cpp/Readme.md b/cpp/Readme.md index ce7de8c..7e34fba 100644 --- a/cpp/Readme.md +++ b/cpp/Readme.md @@ -1,27 +1,71 @@ # C++ implementation of opencubes - uses list representation of coordinates with ones - hashfunction for coordinate is simple concatination of bytes -- can split problem into threads, but performance can be improoved +- can split problem into threads, but performance can be improved ## usage: ```bash ./cubes -n N ``` -options: +### options: ``` +-n --cube_size +the size of polycube to generate up to +This parameter is required. + -t --threads the number of threads to use while generating This parameter is optional. The default value is '1'. -c --use_cache -whether to load cache files +whether to load cache files. +The last N-1 run must have used -w parameter and that process +must have completed without errors. The cache file +must be present under the cache folder. (-f parameter) This parameter is optional. The default value is '0'. -w --write_cache -wheather to save cache files +whether to save cache files +This parameter is optional. The default value is '0'. + +-s --split_cache +whether to save separated cache files per output shape. +requires -w parameter to take affect. +No combined cache file is saved when -s is present. This parameter is optional. The default value is '0'. + +-u --use_split_cache +whether to load separated cache files per output shape. +The last N-1 run must have used -s parameter and that process +must have completed without errors. The split cache file(s) +must be present under the cache folder. (-f parameter) +This parameter is optional. The default value is '0'. + +-f --cache_file_folder +where to store cache files. +This parameter is optional. The default value is './cache/'. ``` +### split cache usage: +Starting with N=9 and beyond it makes sense to use the disk cache system. +To generate starting cache run: +```bash +./cubes -n 9 -w -s +``` + +Above saves of the results into the cache folder (specified with -f parameter) +as split cache files. Next N=10 run can continue processing from where the last N=9 process stopped: +```bash +./cubes -n 10 -w -s -u +``` +The split cache file mode attempts to minimize memory usage. +All following runs can use above command by incrementing the N by one each time. + +If required you can merge the split cache files +back into single file at last run by dropping the `-s` parameter. +Merging the split cache this way however uses vastly more memory. +(Tool should be developed to export/merge the split cache files as standard cube format file) + ## building (cmake) To build a release version (with optimisations , default) ```bash diff --git a/cpp/config.hpp.in b/cpp/config.hpp.in new file mode 100644 index 0000000..695addc --- /dev/null +++ b/cpp/config.hpp.in @@ -0,0 +1,18 @@ +#pragma once +#ifndef OPENCUBES_CONFIG_HPP +#define OPENCUBES_CONFIG_HPP + +// @CONFIG_IS_READONLY@ + +// Version info embedded into the build +#define CONFIG_VERSION "@CONFIG_GIT_VERSION@" +#define CONFIG_BUILDTYPE "@CMAKE_BUILD_TYPE@" +#define CONFIG_COMPILERID "@CMAKE_CXX_COMPILER_ID@ @CMAKE_CXX_COMPILER_VERSION@" + +// Enable Cube struct pointer compaction +#cmakedefine01 CUBES_PACK_CUBE_XYZ_ADDR + +// Maximum Polycubes N that may be computed +#define CUBES_MAX_N @BUILD_CUBES_MAX_N@ + +#endif diff --git a/cpp/include/cache.hpp b/cpp/include/cache.hpp deleted file mode 100644 index 6c3480d..0000000 --- a/cpp/include/cache.hpp +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once -#ifndef OPENCUBES_CACHE_HPP -#define OPENCUBES_CACHE_HPP -#include - -#include "hashes.hpp" -#include "utils.hpp" - -struct Cache { - static constexpr uint32_t MAGIC = 0x42554350; - static constexpr uint32_t XYZ_SIZE = 3; - static constexpr uint32_t ALL_SHAPES = -1; - struct Header { - uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 - uint32_t n; // we will never need 32bit but it is nicely aligned - uint32_t numShapes; // defines length of the shapeTable - uint64_t numPolycubes; // total number of polycubes - }; - struct ShapeEntry { - uint8_t dim0; // offset by -1 - uint8_t dim1; // offset by -1 - uint8_t dim2; // offset by -1 - uint8_t reserved; // for alignment - uint64_t offset; // from beginning of file - uint64_t size; // in bytes should be multiple of XYZ_SIZE - }; - - static void save(std::string path, Hashy& hashes, uint8_t n); - static Hashy load(std::string path, uint32_t extractShape = ALL_SHAPES); - - int filedesc; - void* mmap_ptr; -}; - -#endif diff --git a/cpp/include/cube.hpp b/cpp/include/cube.hpp index 83feaa7..5abf4c7 100644 --- a/cpp/include/cube.hpp +++ b/cpp/include/cube.hpp @@ -3,11 +3,13 @@ #define OPENCUBES_CUBE_HPP #include +#include #include #include #include #include +#include "config.hpp" #include "utils.hpp" struct XYZ { @@ -45,20 +47,69 @@ using XYZSet = std::unordered_set>; struct Cube { private: - struct { + // cube memory is stored two ways: + // normal, new'd buffer: is_shared == false + // shared, external memory: is_shared == true +#if CUBES_PACK_CUBE_XYZ_ADDR == 1 + struct bits_t { + uint64_t is_shared : 1; + uint64_t size : 7; // MAX 127 + uint64_t addr : 56; // low 56-bits of memory address. + }; + static_assert(sizeof(bits_t) == sizeof(void *)); +#else + struct bits_t { + uint64_t addr; uint8_t is_shared : 1; uint8_t size : 7; // MAX 127 - } bits; - XYZ *array = nullptr; + }; +#endif + // fields + bits_t fields; + // extract the pointer from bits_t + static XYZ *get(bits_t key) { + // pointer bit-hacking: + uint64_t addr = key.addr; +#if CUBES_PACK_CUBE_XYZ_ADDR == 1 +// todo: on x86-64 depending if 5-level-paging is enabled +// either 47-bit or 56-bit should be replicated to the high +// part of the address. Don't know how to do this check yet, +// so the high 8-bits is left zeroed. +// If we get segfaults dereferencing get(fields) +// then CUBES_PACK_CUBE_XYZ_ADDR must be disabled. +#endif + return reinterpret_cast(addr); + } - static_assert(sizeof(bits) == sizeof(uint8_t)); + static bits_t put(bool is_shared, int size, XYZ *addr) { +#if CUBES_PACK_CUBE_XYZ_ADDR == 1 + // pack the memory address into 56-bits + // on x86-64 it is not used by the hardware (yet). + // This hack actually saves 8 bytes because previously + // the uint8_t caused padding to 16 bytes. + uint64_t tmp = reinterpret_cast((void *)addr); + assert((tmp & ~0xffffffffffffff) == 0 && "BUG: CUBES_PACK_CUBE_XYZ_ADDR should be disabled"); + tmp &= 0xffffffffffffff; + bits_t bits; + bits.addr = tmp; + bits.is_shared = is_shared; + bits.size = size; + return bits; +#else + bits_t bits; + bits.addr = reinterpret_cast((void *)addr); + bits.is_shared = is_shared; + bits.size = size; + return bits; +#endif + } public: // Empty cube - Cube() : bits{0, 0} {} + Cube() : fields{put(0, 0, nullptr)} {} // Cube with N capacity - explicit Cube(uint8_t N) : bits{0, N}, array(new XYZ[bits.size]) {} + explicit Cube(uint8_t N) : fields{put(0, N, new XYZ[N])} {} // Construct from pieces Cube(std::initializer_list il) : Cube(il.size()) { std::copy(il.begin(), il.end(), begin()); } @@ -69,20 +120,23 @@ struct Cube { // Construct from external source. // Cube shares this the memory until modified. // Caller guarantees the memory given will live longer than *this - Cube(XYZ *start, uint8_t n) : bits{1, n}, array(start) {} + Cube(const XYZ *start, uint8_t n) : fields{put(1, n, const_cast(start))} {} // Copy ctor. Cube(const Cube ©) : Cube(copy.size()) { std::copy(copy.begin(), copy.end(), begin()); } ~Cube() { + bits_t bits = fields; if (!bits.is_shared) { - delete[] array; + delete[] get(bits); } } friend void swap(Cube &a, Cube &b) { using std::swap; - swap(a.array, b.array); - swap(a.bits, b.bits); + bits_t abits = a.fields; + bits_t bbits = b.fields; + a.fields = bbits; + b.fields = abits; } Cube(Cube &&mv) : Cube() { swap(*this, mv); } @@ -98,19 +152,11 @@ struct Cube { return *this; } - size_t size() const { return bits.size; } + size_t size() const { return fields.size; } - XYZ *data() { - if (bits.is_shared) { - // lift to RAM: this should never happen really. - Cube tmp(array, bits.size); - swap(*this, tmp); - std::printf("Bad use of Cube\n"); - } - return array; - } + XYZ *data() { return get(fields); } - const XYZ *data() const { return array; } + const XYZ *data() const { return get(fields); } XYZ *begin() { return data(); } @@ -138,8 +184,19 @@ struct Cube { void print() const { for (auto &p : *this) std::printf(" (%2d %2d %2d)\n\r", p.x(), p.y(), p.z()); } + + /** + * Copy cube data into destination buffer. + */ + void copyout(int num, XYZ *dest) const { + assert(num <= (signed)size()); + std::copy_n(begin(), num, dest); + } }; +#if CUBES_PACK_CUBE_XYZ_ADDR == 1 +static_assert(sizeof(Cube) == 8, "Unexpected sizeof(Cube) for Cube"); +#endif static_assert(std::is_move_assignable_v, "Cube must be moveable"); static_assert(std::is_swappable_v, "Cube must swappable"); diff --git a/cpp/include/cubeSwapSet.hpp b/cpp/include/cubeSwapSet.hpp new file mode 100644 index 0000000..b3395aa --- /dev/null +++ b/cpp/include/cubeSwapSet.hpp @@ -0,0 +1,231 @@ +#pragma once +#ifndef OPENCUBES_CUBE_DISKSWAP_SET_HPP +#define OPENCUBES_CUBE_DISKSWAP_SET_HPP + +#include +#include +#include +#include +#include + +#include "cube.hpp" +#include "mapped_file.hpp" + +/** + * CubeSwapSet: Implement std::unordered_set<> that offloads XYZ data into a file: + * + * Cubes stored in the set have reduced cost of memory: + * Only the std::unordered_set<> itself and the internal nodes are stored in RAM. + * The element *data* (i.e. XYZ data) is stored in the file. + * The performance cost is that each time the set element is accessed + * the data is read back from the file. + * (Iterating the entire CubeSwapSet involves reading the entire file) + * + * Features: + * - XYZ data is recorded sequentially into the file and + * the Cube size is not saved in the storage file. + * - Cube XYZ data length is constant in CubeStorage instance. + * - Clearing the CubeSwapSet does not release the file managed by CubeStorage. + * (CubePtr(s) cannot be erased from CubeStorage) + * - CubeStorage::read(const CubePtr&) caches up to 1024 Cubes for each thread. + * This read-cache is maintained by any thread that calls CubePtr::get(). + * CubeStorage::discard() is used to begin writing the XYZ data at new file instance. + * - CacheWriter utilizes the file instance from CubeStorage: + * the CubeSwapSet is not iterated through at all by CacheWriter + * and instead CubeStorage::getFile() is assigned into a copy job and then + * copied into the cache-file with mapped::file::copyAt(). + * The source storage file is deleted once the copy is completed. + * This provides wait-free saving of the cache-file and uses + * minimal amount of system memory. + */ +class CubeStorage; + +/** + * CubePtr: "File Pointer to Cube" that reads the cube data from file. + * CubePtr needs CubeStorage instance to be able to access + * its contents with CubePtr::get(). + * The associated CubeStorage should always be available + * in context where CubePtr(s) data is accessed. + */ +class CubePtr { + protected: + mapped::seekoff_t m_seek = 0; + + public: + explicit CubePtr(mapped::seekoff_t offset) : m_seek(offset) {} + CubePtr(const CubePtr& c) : m_seek(c.m_seek) {} + + /** + * Get the Cube pointed by this instance. + * @note The Cube is cached in the thread-local read-cache. + * @warn + * The Cube object is local to calling thread and shall + * not be passed into other threads. + */ + const Cube& get(const CubeStorage& storage) const; + + /** + * Raw data copy. By-passes the thread-local cache. + */ + void copyout(const CubeStorage& storage, size_t n, XYZ* out) const; + + template + void copyout(const CubeStorage& storage, size_t n, Itr out) const { + std::vector buff(n); + copyout(storage, n, buff.data()); + std::copy_n(buff.begin(), n, out); + } + + mapped::seekoff_t seek() const { return m_seek; } +}; + +/** + * Stateful comparator for Cubeptr + */ +class CubePtrEqual { + protected: + const CubeStorage* m_storage = nullptr; + + public: + // C++20 feature: + using is_transparent = void; + + CubePtrEqual(const CubeStorage* ctx) : m_storage(ctx) {} + CubePtrEqual(const CubePtrEqual& ctx) : m_storage(ctx.m_storage) {} + + bool operator()(const CubePtr& a, const CubePtr& b) const { + // todo: there is possibility that + // a.get() returned cube is *deleted* from the cache by b.get() + // The read-cache size must be at least 3 to avoid this. + return a.get(*m_storage) == b.get(*m_storage); + } +}; + +class CubePtrHash { + protected: + const CubeStorage* m_storage = nullptr; + + public: + // C++20 feature: + using is_transparent = void; + using transparent_key_equal = CubePtrEqual; + + CubePtrHash(const CubeStorage* ctx) : m_storage(ctx) {} + CubePtrHash(const CubePtrHash& ctx) : m_storage(ctx.m_storage) {} + + size_t operator()(const CubePtr& x) const { + auto& cube = x.get(*m_storage); + std::size_t seed = cube.size(); + for (auto& p : cube) { + auto x = HashXYZ()(p); + seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + return seed; + } +}; + +class CubeStorage { + protected: + mutable std::mutex m_mtx; + std::filesystem::path m_fpath; + std::shared_ptr m_file; + + static std::atomic m_init_num; + int m_storage_version = 0; + const size_t m_cube_size; + + mapped::seekoff_t m_reserved_end; + // End of committed data. + mapped::seekoff_t m_alloc_seek; + + // m_file_head: 2 MiB memory mapped area at end of the file. + std::unique_ptr m_file_head; + + public: + /** + * Initialize Cube file storage + * @param path directory where to write the storage file. + * @param n The storage is written in n sized chunks of XYZ structs. + * This should be equal to Cube::size() that are passed into local() + * Different sized Cubes in same CubeStorage instance will not work. + * @note the file creation is delayed until commit() is called first time. + */ + CubeStorage(std::filesystem::path path, size_t n); + ~CubeStorage(); + + // not copyable + CubeStorage(const CubeStorage&) = delete; + CubeStorage& operator=(const CubeStorage&) = delete; + // move constructible: but only if no allocations exists in mv + CubeStorage(CubeStorage&& mv); + CubeStorage& operator=(CubeStorage&& mv) = delete; + + size_t cubeSize() const { return m_cube_size; } + + /** + * Make thread local CubePtr instance. + * @note + * Other thread(s) cannot access the returned CubePtr until commit() is called. + * This requires that external lock is held for the data structure + * if CubePtr is made visible to other thread(s) until this thread calls commit() + */ + CubePtr local(const Cube& cube) const; + + /** + * Publish the last local() returned CubePtr. + * commit() writes this the data into the file storage + * making it visible to all threads. + */ + void commit(); + + /** + * Discard the last local() returned CubePtr. + */ + void drop() const; + + /** + * Retrieve the cube data from the backing file + * and cache the result for the caller thread. + */ + const Cube& read(const CubePtr& x) const; + + /** + * Copy the cube data from the storage into destination buffer. + */ + void copydata(const CubePtr& x, size_t n, XYZ* destination) const; + + /** + * Explicitly clear the calling thread's read-cache. + * @note this will *initialize* callers read-cache instance + * if the thread has not used the read-cache yet. + * Only call this from thread that has used to read() previously. + */ + void resetReadCache() const; + + /** + * Get the file name CubeStorage is using. + */ + std::filesystem::path fileName() const { return m_fpath; } + + /** + * Get the mapped::file instance. + * @note this can be null if nothing has been written to the storage yet. + */ + std::shared_ptr getFile() const { return m_file; } + + /** + * Drop all stored data. + */ + void discard(); +}; + +/** + * CubeStorage enabled std::unordered_set<> + * + * The CubeSwapSet must be constructed with already initialized + * stateful instances of CubePtrEqual and CubePtrHash functors + * that resolve the CubePtr(s) using the CubeStorage instance. + */ +using CubeSwapSet = std::unordered_set; + +#endif \ No newline at end of file diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index 7999d5c..cc838ab 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -3,12 +3,15 @@ #define OPENCUBES_HASHES_HPP #include #include +#include +#include #include #include #include #include #include "cube.hpp" +#include "cubeSwapSet.hpp" #include "utils.hpp" struct HashCube { @@ -23,54 +26,108 @@ struct HashCube { } }; -using CubeSet = std::unordered_set>; +// using CubeSet = std::unordered_set>; -struct Hashy { - struct Subsubhashy { - CubeSet set; - std::shared_mutex set_mutex; +class Subsubhashy { + protected: + CubeStorage set_storage; + CubeSwapSet set; + mutable std::shared_mutex set_mutex; - template - void insert(CubeT &&c) { - std::lock_guard lock(set_mutex); - set.emplace(std::forward(c)); - } + public: + explicit Subsubhashy(std::filesystem::path path, size_t n) : set_storage(path, n), set(1, CubePtrHash(&set_storage), CubePtrEqual(&set_storage)) {} - bool contains(const Cube &c) { - std::shared_lock lock(set_mutex); - return set.count(c); + template + void insert(CubeT &&c) { + std::unique_lock lock(set_mutex); + auto cptr = set_storage.local(std::forward(c)); + auto [itr, isnew] = set.emplace(cptr); + if (isnew) { + set_storage.commit(); + } else { + lock.unlock(); + set_storage.drop(); } + } - auto size() { - std::shared_lock lock(set_mutex); - return set.size(); - } - }; - template - struct Subhashy { - std::array byhash; - - template - void insert(CubeT &&c) { - HashCube hash; - auto idx = hash(c) % NUM; - auto &set = byhash[idx]; - if (!set.contains(c)) set.insert(std::forward(c)); - // printf("new size %ld\n\r", byshape[shape].size()); + bool contains(const Cube &c) const { + std::shared_lock lock(set_mutex); + auto cptr = set_storage.local(c); + auto itr = set.find(cptr); + set_storage.drop(); + return itr != set.end(); + } + + auto size() const { + std::shared_lock lock(set_mutex); + return set.size(); + } + + void clear() { + std::lock_guard lock(set_mutex); + set.clear(); + set.reserve(1); + set_storage.discard(); + } + + // Get CubeStorage instance. + const CubeStorage &storage() const { return set_storage; } + + auto begin() const { return set.begin(); } + auto end() const { return set.end(); } + auto begin() { return set.begin(); } + auto end() { return set.end(); } +}; + +class Subhashy { + protected: + std::deque byhash; + + public: + Subhashy(int NUM, size_t N, std::filesystem::path path) { + for (int i = 0; i < NUM; ++i) { + byhash.emplace_back(path, N); } + } + + template + void insert(CubeT &&c) { + HashCube hash; + auto idx = hash(c) % byhash.size(); + auto &set = byhash[idx]; - auto size() { - size_t sum = 0; - for (auto &set : byhash) { - auto part = set.size(); - sum += part; - } - return sum; + if (set.contains(c)) return; + set.insert(std::forward(c)); + // printf("new size %ld\n\r", byshape[shape].size()); + } + + void clear() { + for (auto &set : byhash) set.clear(); + } + + auto size() const { + size_t sum = 0; + for (auto &set : byhash) { + auto part = set.size(); + sum += part; } - }; + return sum; + } - std::map> byshape; + auto begin() const { return byhash.begin(); } + auto end() const { return byhash.end(); } + auto begin() { return byhash.begin(); } + auto end() { return byhash.end(); } +}; + +class Hashy { + protected: + std::map byshape; + std::filesystem::path base_path; + int N; + mutable std::shared_mutex set_mutex; + public: static std::vector generateShapes(int n) { std::vector out; for (int x = 0; x < n; ++x) @@ -83,27 +140,68 @@ struct Hashy { return out; } + explicit Hashy(std::string path = ".") : base_path(path) {} + void init(int n) { // create all subhashy which will be needed for N - for (auto s : generateShapes(n)) byshape[s].size(); + N = n; + for (auto s : generateShapes(n)) { + initSubHashy(n, s); + } std::printf("%ld sets by shape for N=%d\n\r", byshape.size(), n); } + Subhashy &initSubHashy(int n, XYZ s) { + assert(N == n); + + auto itr = byshape.find(s); + if (itr == byshape.end()) { + auto [itr, isnew] = byshape.emplace(s, Subhashy(32, n, base_path)); + assert(isnew); + itr->second.size(); + return itr->second; + } else { + return itr->second; + } + } + + Subhashy &at(XYZ shape) { + std::shared_lock lock(set_mutex); + auto itr = byshape.find(shape); + if (itr != byshape.end()) { + return itr->second; + } + // should never get here... + std::printf("BUG: missing shape [%2d %2d %2d]:\n\r", shape.x(), shape.y(), shape.z()); + std::abort(); + return *((Subhashy *)0); + } + template void insert(CubeT &&c, XYZ shape) { - auto &set = byshape[shape]; - set.insert(std::forward(c)); + at(shape).insert(std::forward(c)); } - auto size() { + auto size() const { + std::shared_lock lock(set_mutex); size_t sum = 0; - DEBUG_PRINTF("%ld maps by shape\n\r", byshape.size()); + DEBUG1_PRINTF("%ld maps by shape\n\r", byshape.size()); for (auto &set : byshape) { auto part = set.second.size(); - DEBUG_PRINTF("bucket [%2d %2d %2d]: %ld\n", set.first.x(), set.first.y(), set.first.z(), part); + DEBUG1_PRINTF("bucket [%2d %2d %2d]: %ld\n", set.first.x(), set.first.y(), set.first.z(), part); sum += part; } return sum; } + + int numShapes() const { + std::shared_lock lock(set_mutex); + return byshape.size(); + } + + auto begin() const { return byshape.begin(); } + auto end() const { return byshape.end(); } + auto begin() { return byshape.begin(); } + auto end() { return byshape.end(); } }; #endif diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 29e622e..20fd660 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -1,15 +1,81 @@ #pragma once #ifndef OPENCUBES_NEWCACHE_HPP #define OPENCUBES_NEWCACHE_HPP +#include #include +#include +#include +#include #include +#include +#include #include "cube.hpp" #include "hashes.hpp" +#include "mapped_file.hpp" -class Workset; +namespace cacheformat { +static constexpr uint32_t MAGIC = 0x42554350; +static constexpr uint32_t XYZ_SIZE = 3; +static constexpr uint32_t ALL_SHAPES = -1; -class CubeIterator { +struct Header { + uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 + uint32_t n; // we will never need 32bit but it is nicely aligned + uint32_t numShapes; // defines length of the shapeTable + uint64_t numPolycubes; // total number of polycubes +}; +struct ShapeEntry { + uint8_t dim0; // offset by -1 + uint8_t dim1; // offset by -1 + uint8_t dim2; // offset by -1 + uint8_t reserved; // for alignment + uint64_t offset; // from beginning of file + uint64_t size; // in bytes should be multiple of XYZ_SIZE +}; +}; // namespace cacheformat + +/** + * newCache.hpp: provide two versions of the cache: + * + * - FlatCache implements "memory-only" cache and is constructed from Hashy. + * It is needed for boot-strapping the cache files and computing + * cubes without writing any data into disk. + * FlatCache::getCubesByShape() return ShapeRange that points into the Cube data in memory. + * ShapeRange then provides the Cube range as CubeIterator(s). + * + * - CacheReader implements the actual cache file system. + * CacheReader::getCubesByShape() return FileShapeRange that + * defines subset shape range from the cache file. + * FileShapeRange then provides the Cube range as CubeFileIterator(s). + */ +class ICubeIterator { + public: + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = Cube; + using pointer = Cube*; // or also value_type* + using reference = Cube&; // or also value_type& + + virtual ~ICubeIterator(){}; + + virtual std::unique_ptr clone() const = 0; + + virtual const value_type operator*() const = 0; + virtual uint64_t seek() const = 0; + virtual ICubeIterator& operator++() = 0; + virtual ICubeIterator& operator+=(int incr) = 0; + + friend bool operator==(const ICubeIterator& a, const ICubeIterator& b) { return a.seek() == b.seek(); }; + friend bool operator<(const ICubeIterator& a, const ICubeIterator& b) { return a.seek() < b.seek(); }; + friend bool operator>(const ICubeIterator& a, const ICubeIterator& b) { return a.seek() > b.seek(); }; + friend bool operator!=(const ICubeIterator& a, const ICubeIterator& b) { return a.seek() != b.seek(); }; +}; + +/** + * Iterator for Cubes stored in some memory area. + */ +class CubeIterator : public ICubeIterator { public: using iterator_category = std::forward_iterator_tag; using difference_type = std::ptrdiff_t; @@ -18,22 +84,27 @@ class CubeIterator { using reference = Cube&; // or also value_type& // constructor - CubeIterator(uint32_t _n, XYZ* ptr) : n(_n), m_ptr(ptr) {} + CubeIterator(uint32_t _n, const XYZ* ptr) : n(_n), m_ptr(ptr) {} // invalid iterator (can't deference) explicit CubeIterator() : n(0), m_ptr(nullptr) {} + std::unique_ptr clone() const override { return std::make_unique(*this); } + // derefecence - const value_type operator*() const { return Cube(m_ptr, n); } + const value_type operator*() const override { return Cube(m_ptr, n); } + // pointer operator->() { return (pointer)m_ptr; } + uint64_t seek() const override { return (uint64_t)m_ptr; } + // Prefix increment - CubeIterator& operator++() { + ICubeIterator& operator++() override { m_ptr += n; return *this; } - CubeIterator& operator+=(int incr) { + ICubeIterator& operator+=(int incr) override { m_ptr += n * incr; return *this; } @@ -49,26 +120,168 @@ class CubeIterator { friend bool operator<(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr < b.m_ptr; }; friend bool operator>(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr > b.m_ptr; }; friend bool operator!=(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr != b.m_ptr; }; - friend class Workset; private: uint32_t n; - XYZ* m_ptr; + const XYZ* m_ptr; }; -class ShapeRange { +class CubeReadIterator : public ICubeIterator { public: - ShapeRange(XYZ* start, XYZ* stop, uint64_t _cubeLen, XYZ _shape) - : b(_cubeLen, start), e(_cubeLen, stop), size_(((uint64_t)stop - (uint64_t)start) / (_cubeLen * sizeof(XYZ))), shape_(_shape) {} + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = Cube; + using pointer = Cube*; // or also value_type* + using reference = Cube&; // or also value_type& + + // constructor + CubeReadIterator(std::shared_ptr file, uint32_t _n, mapped::seekoff_t offset) : n(_n), m_seek(offset), m_file(file) {} - CubeIterator begin() { return b; } - CubeIterator end() { return e; } + // invalid iterator (can't deference) + explicit CubeReadIterator() : n(0), m_seek(-1) {} + + std::unique_ptr clone() const override { return std::make_unique(*this); } + + // derefecence + const value_type operator*() const override { return read(); } - XYZ& shape() { return shape_; } - auto size() const { return size_; } + // pointer operator->() { return (pointer)m_seek; } + + uint64_t seek() const override { return (uint64_t)m_seek; } + + // Prefix increment + ICubeIterator& operator++() override { + m_seek += n * sizeof(XYZ); + return *this; + } + + ICubeIterator& operator+=(int incr) override { + m_seek += n * incr * sizeof(XYZ); + return *this; + } + + // Postfix increment + CubeReadIterator operator++(int) { + CubeReadIterator tmp = *this; + ++(*this); + return tmp; + } + + friend bool operator==(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek == b.m_seek; }; + friend bool operator<(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek < b.m_seek; }; + friend bool operator>(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek > b.m_seek; }; + friend bool operator!=(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek != b.m_seek; }; private: - CubeIterator b, e; + uint32_t n; + mapped::seekoff_t m_seek; + std::shared_ptr m_file; + + // de-reference is implemented by read() + Cube read() const; +}; + +/** + * To avoid complicating the use of the ICubeIterator + * CacheIterator provides type-erased wrapper that can be copied. + */ +class CacheIterator { + public: + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = Cube; + using pointer = Cube*; // or also value_type* + using reference = Cube&; // or also value_type& + + CacheIterator() {} + + template + explicit CacheIterator(Itr&& init) : proxy(std::make_unique>(std::forward(init))) {} + + CacheIterator(const CacheIterator& copy) { + if (copy.proxy) { + proxy = copy.proxy->clone(); + } + } + CacheIterator& operator=(const CacheIterator& x) { + CacheIterator tmp(x); + std::swap(proxy, tmp.proxy); + return *this; + } + CacheIterator(CacheIterator&& copy) =default; + CacheIterator& operator=(CacheIterator&& x) =default; + + const value_type operator*() const { return **proxy; } + + uint64_t seek() const { return proxy->seek(); } + + CacheIterator& operator++() { + ++(*proxy); + return *this; + } + CacheIterator& operator+=(int incr) { + (*proxy) += incr; + return *this; + } + + CacheIterator operator++(int) { + CacheIterator tmp = *this; + ++(*this); + return tmp; + } + + friend bool operator==(const CacheIterator& a, const CacheIterator& b) { return a.seek() == b.seek(); }; + friend bool operator<(const CacheIterator& a, const CacheIterator& b) { return a.seek() < b.seek(); }; + friend bool operator>(const CacheIterator& a, const CacheIterator& b) { return a.seek() > b.seek(); }; + friend bool operator!=(const CacheIterator& a, const CacheIterator& b) { return a.seek() != b.seek(); }; + + private: + std::unique_ptr proxy; +}; + +class IShapeRange { + public: + IShapeRange(){}; + virtual ~IShapeRange() {} + + virtual CacheIterator begin() const = 0; + virtual CacheIterator end() const = 0; + virtual XYZ& shape() = 0; + virtual size_t size() const = 0; +}; + +class ShapeRange : public IShapeRange { + public: + ShapeRange(const XYZ* start, const XYZ* stop, uint64_t _cubeLen, XYZ _shape) + : b(CubeIterator(_cubeLen, start)), e(CubeIterator(_cubeLen, stop)), size_(std::distance(start, stop) / _cubeLen), shape_(_shape) {} + + CacheIterator begin() const override { return b; } + CacheIterator end() const override { return e; } + + XYZ& shape() override { return shape_; } + size_t size() const override { return size_; } + + private: + CacheIterator b, e; + uint64_t size_; + XYZ shape_; +}; + +class FileShapeRange : public IShapeRange { + public: + FileShapeRange(std::shared_ptr file, mapped::seekoff_t start, mapped::seekoff_t stop, uint64_t _cubeLen, XYZ _shape) + : b(CubeReadIterator(file, _cubeLen, start)), + e(CubeReadIterator(file, _cubeLen, stop)), + size_((stop - start) / _cubeLen), shape_(_shape) {} + + CacheIterator begin() const override { return b; } + CacheIterator end() const override { return e; } + + XYZ& shape() override { return shape_; } + size_t size() const override { return size_; } + + private: + CacheIterator b, e; uint64_t size_; XYZ shape_; }; @@ -76,7 +289,7 @@ class ShapeRange { class ICache { public: virtual ~ICache(){}; - virtual ShapeRange getCubesByShape(uint32_t i) = 0; + virtual IShapeRange& getCubesByShape(uint32_t i) = 0; virtual uint32_t numShapes() = 0; virtual size_t size() = 0; }; @@ -98,46 +311,21 @@ class CacheReader : public ICache { uint32_t numShapes() override { return header->numShapes; }; operator bool() { return fileLoaded_; } - static constexpr uint32_t MAGIC = 0x42554350; - static constexpr uint32_t XYZ_SIZE = 3; - static constexpr uint32_t ALL_SHAPES = -1; - - struct Header { - uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 - uint32_t n; // we will never need 32bit but it is nicely aligned - uint32_t numShapes; // defines length of the shapeTable - uint64_t numPolycubes; // total number of polycubes - }; - struct ShapeEntry { - uint8_t dim0; // offset by -1 - uint8_t dim1; // offset by -1 - uint8_t dim2; // offset by -1 - uint8_t reserved; // for alignment - uint64_t offset; // from beginning of file - uint64_t size; // in bytes should be multiple of XYZ_SIZE - }; - - CubeIterator begin() { - uint8_t* start = filePointer + shapes[0].offset; - return CubeIterator(header->n, (XYZ*)start); - } + // get shapes at index [0, numShapes()[ + IShapeRange& getCubesByShape(uint32_t i) override; - CubeIterator end() { - uint8_t* stop = filePointer + shapes[0].offset + header->numPolycubes * header->n * XYZ_SIZE; - return CubeIterator(header->n, (XYZ*)stop); - } + private: + std::shared_ptr file_; + std::unique_ptr> header_; + std::unique_ptr> shapes_; - ShapeRange getCubesByShape(uint32_t i) override; + std::vector shapeRanges; - private: - uint8_t* filePointer; std::string path_; - int fileDescriptor_; - uint64_t fileSize_; bool fileLoaded_; - Header dummyHeader; - Header* header; - ShapeEntry* shapes; + const cacheformat::Header dummyHeader; + const cacheformat::Header* header; + const cacheformat::ShapeEntry* shapes; }; class FlatCache : public ICache { @@ -149,26 +337,63 @@ class FlatCache : public ICache { FlatCache() {} FlatCache(Hashy& hashes, uint8_t n) : n(n) { allXYZs.reserve(hashes.size() * n); - shapes.reserve(hashes.byshape.size()); + shapes.reserve(hashes.numShapes()); // std::printf("Flatcache %d %p %p\n", n, (void*)allXYZs.data(), (void*)shapes.data()); - for (auto& [shape, set] : hashes.byshape) { + for (auto& [shape, set] : hashes) { auto begin = allXYZs.data() + allXYZs.size(); - for (auto& subset : set.byhash) { - for (auto& cube : subset.set) - // allXYZs.emplace_back(allXYZs.end(), subset.set.begin(), subset.set.end()); - std::copy(cube.begin(), cube.end(), std::back_inserter(allXYZs)); + for (auto& subset : set) { + for (auto& cubeptr : subset) cubeptr.copyout(subset.storage(), n, std::back_inserter(allXYZs)); } auto end = allXYZs.data() + allXYZs.size(); // std::printf(" SR %p %p\n", (void*)begin, (void*)end); shapes.emplace_back(begin, end, n, shape); } + + // Add dummy shape range at back: + shapes.emplace_back(nullptr, nullptr, n, XYZ(0, 0, 0)); } - ShapeRange getCubesByShape(uint32_t i) override { - if (i >= shapes.size()) return ShapeRange{nullptr, nullptr, 0, XYZ(0, 0, 0)}; + IShapeRange& getCubesByShape(uint32_t i) override { + if (i >= shapes.size() - 1) return shapes.back(); return shapes[i]; }; uint32_t numShapes() override { return shapes.size(); }; size_t size() override { return allXYZs.size() / n / sizeof(XYZ); } }; +class CacheWriter { + protected: + std::mutex m_mtx; + std::condition_variable m_run; + std::condition_variable m_wait; + bool m_active = true; + + // Jobs that flush and finalize the written file. + size_t m_num_flushes = 0; + std::deque> m_flushes; + + // Temporary copy jobs into the memory mapped file. + size_t m_num_copys = 0; + std::deque> m_copy; + + // thread pool executing the jobs. + std::deque m_flushers; + + void run(); + + public: + CacheWriter(int num_threads = 8); + ~CacheWriter(); + + /** + * Capture snapshot of the Hashy and write cache file. + * The data may not be entirely flushed before save() returns. + */ + void save(std::string path, Hashy& hashes, uint8_t n); + + /** + * Complete all flushes immediately. + */ + void flush(); +}; + #endif diff --git a/cpp/include/utils.hpp b/cpp/include/utils.hpp index 4cd23e3..f895877 100644 --- a/cpp/include/utils.hpp +++ b/cpp/include/utils.hpp @@ -3,12 +3,39 @@ #define OPENCUBES_UTILS_HPP #include + +// Debug print level: all prints enabled +// below DEBUG_LEVEL. +// DEBUG_LEVEL -> 0 all prints disabled. +// DEBUG_LEVEL -> 1 enable DEBUG_PRINTF() statements +// DEBUG_LEVEL -> 2 enable DEBUG1_PRINTF() statements and earlier +// DEBUG_LEVEL -> 3 all prints enabled. +#define DEBUG_LEVEL 1 + #ifdef DEBUG + +#if DEBUG_LEVEL >= 1 #define DEBUG_PRINTF(...) std::printf(__VA_ARGS__) -#else -#define DEBUG_PRINTF(...) \ - do { \ - } while (0) #endif +#if DEBUG_LEVEL >= 2 +#define DEBUG1_PRINTF(...) std::printf(__VA_ARGS__) +#endif + +#if DEBUG_LEVEL >= 3 +#define DEBUG2_PRINTF(...) std::printf(__VA_ARGS__) +#endif + +#endif + +#ifndef DEBUG_PRINTF +#define DEBUG_PRINTF(...) do {} while (0) #endif +#ifndef DEBUG1_PRINTF +#define DEBUG1_PRINTF(...) do {} while (0) +#endif +#ifndef DEBUG2_PRINTF +#define DEBUG2_PRINTF(...) do {} while (0) +#endif + +#endif \ No newline at end of file diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp new file mode 100644 index 0000000..f0e4b0f --- /dev/null +++ b/cpp/libraries/mapped_file.cpp @@ -0,0 +1,489 @@ +/** + * Copyright 2023 Jarmo A Tiitto + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the “Software”), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "mapped_file.hpp" + +#include +#include +#include +#include +#include + +// POSIX/Linux APIs +#include +#include +#include +#include + +#include +#include + +#ifndef MAP_HUGE_2MB +#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) +#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) +#endif + +namespace mapped { + +/** + * Mapped file POSIX/Linux compatible implementation + */ +file::file() : fd(-1), fd_size(0) {} + +file::~file() { close(); } + +void file::close() { + if (fd >= 0) { + ::fsync(fd); + ::close(fd); + fd = -1; + fd_size = 0; + } +} + +int file::open(const char* fname) { + close(); + + fd = ::open64(fname, O_RDONLY); + if (fd == -1) { + // std::fprintf(stderr, "Error opening file for reading\n"); + return -1; + } + + struct stat64 finfo; + if (fstat64(fd, &finfo)) { + std::fprintf(stderr, "Error getting file size: %s\n", std::strerror(errno)); + return -1; + } + fd_size = finfo.st_size; + fd_rw = false; + return 0; +} + +int file::openrw(const char* fname, size_t maxsize, int flags) { + // create new files with "normal" permissions: "-rw-r--r--" + const mode_t fperms = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH; + + close(); + + maxsize = roundUp(maxsize); + + if (!flags) { + fd = ::open64(fname, O_RDWR | O_CLOEXEC); + if (fd == -1) { + // std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + return -1; + } + + fd_rw = true; + + struct stat64 finfo; + if (fstat64(fd, &finfo)) { + std::fprintf(stderr, "Error getting file size:%s\n", std::strerror(errno)); + return -1; + } + return truncate(finfo.st_size); + + } else if ((flags & (CREATE | RESIZE)) == (CREATE | RESIZE)) { + fd = ::open64(fname, O_CREAT | O_RDWR | O_TRUNC | O_CLOEXEC, fperms); + if (fd == -1) { + // std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + return -1; + } + fd_rw = true; + + if(flags & FSTUNE) { + int flags = 0; + ioctl(fd, FS_IOC_GETFLAGS, &flags); + flags |= FS_NOATIME_FL | FS_NOCOW_FL; + ioctl(fd, FS_IOC_SETFLAGS, &flags); + } + return truncate(maxsize); + + } else if ((flags & RESIZE) != 0) { + fd = ::open64(fname, O_RDWR | O_CLOEXEC, fperms); + if (fd == -1) { + // std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + return -1; + } + fd_rw = true; + return truncate(maxsize); + } else { + std::fprintf(stderr, "Invalid open flags:%s\n", std::strerror(errno)); + return -1; + } +} + +bool file::is_rw() const { return fd_rw; } + +seekoff_t file::size() const { return fd_size; } + +int file::truncate(seekoff_t newsize) { + // resize the backing file + if (newsize != fd_size && ftruncate64(fd, newsize)) { + std::fprintf(stderr, "Error resizing backing file:%s\n", std::strerror(errno)); + return -1; + } + fd_size = newsize; + return 0; +} + +int file::readAt(seekoff_t fpos, len_t size, void* dataout) const +{ + ssize_t rd = pread(fd, dataout, size, fpos); + if (rd != (signed)size) { + std::fprintf(stderr, "Error reading data from file:%s\n", std::strerror(errno)); + return -1; + } + return 0; +} + +int file::writeAt(seekoff_t fpos, len_t size, const void* datain) +{ + std::lock_guard lock(mut); + + ssize_t rd = pwrite(fd, datain, size, fpos); + if (rd != (signed)size) { + std::fprintf(stderr, "Error writing data into file:%s\n", std::strerror(errno)); + return -1; + } + + fd_size = std::max(fd_size, fpos+size); + return 0; +} + +int file::copyAt(std::shared_ptr other, seekoff_t other_fpos, len_t size, seekoff_t dest_fpos) +{ + off64_t srcp = other_fpos; + off64_t dstp = dest_fpos; + ssize_t cpy = ::copy_file_range(other->fd, &srcp, fd, &dstp, size, 0); + if (cpy != (signed)size) { + std::fprintf(stderr, "Error copying file data:%s\n", std::strerror(errno)); + return -1; + } + + std::lock_guard lock(mut); + fd_size = std::max(fd_size, dest_fpos+size); + return 0; +} + + +/** + * Mapped region POSIX/Linux compatible implementation. + */ + +region::region(std::shared_ptr src, seekoff_t fpos, len_t size, len_t window) : mfile(src) { + std::lock_guard lock(mtx); + remap(fpos, size, window); +} + +region::region(std::shared_ptr src) : mfile(src) { + std::lock_guard lock(mtx); + auto sz = mfile->size(); + remap(0, sz, sz); +} + +region::~region() { + // destructor is not thread-safe. + std::lock_guard lock(mtx); + map_fseek = 0; + remap(0, 0, 0); +} + +/** + * This is the core implementation of mapped_file: + * remap(0,0) releases the mapping. + * remap(0, n) mmap roundUp(n) bytes at offset 0 + * remap(0, k) mremap roundUp(n) bytes at offset 0 (grows the existing mapping) + * remap(n, j) munmap old region, mmap new at offset roundDown(n) + * + * In read-write mode the backing file is grown to fit the mapping. + * + * @warn this->mtx must be held when this function is called. + */ +void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { + if (fpos == usr_fseek && size == usr_size) return; // No-op + // check if [fpos, fpos+size] fits into the existing + // mmap() window and only adjust the user region. + if (size && map_ptr && (map_fseek <= fpos && fpos + size <= map_fseek + map_size)) { + usr_fseek = fpos; + usr_ptr = (uint8_t*)map_ptr + (fpos - map_fseek); + usr_size = size; + return; + } + + // if size == 0 or the usr_fseek != fpos, + // we have to unmap the old region first, if any. + if (!!map_ptr && (size == 0 || usr_fseek != fpos)) { + if (::munmap(map_ptr, map_size) == -1) { + std::fprintf(stderr, "Error mapping file memory\n"); + return; + } + map_ptr = nullptr; + map_size = 0; + usr_ptr = nullptr; + usr_size = 0; + if (size == 0) return; + } + // keep what user tried to ask: + usr_fseek = fpos; + usr_size = size; + + if (map_ptr && map_fseek == fpos) { + // this mapping exists already at same map_fseek + // remap it to grow the region. + auto newsize = roundUp(std::max(size, window)); + void* newptr = mremap(map_ptr, map_size, newsize, MREMAP_MAYMOVE); + if (newptr == MAP_FAILED) { + std::fprintf(stderr, "Error resizing memory-map of file:%s\n", std::strerror(errno)); + std::abort(); + return; + } + map_ptr = newptr; + map_size = newsize; + return; + } + + // create new mapping + if (mfile->is_rw()) { + // RW mapping + auto newsize = roundUp(std::max(size, window)); + + // take file lock so size() check --> truncate is atomic. + std::unique_lock trunclock(mfile->mut); + if (mfile->size() < fpos + newsize && mfile->truncate(fpos + newsize)) { + // failed. Disk full? + std::abort(); + return; + } + trunclock.unlock(); + + // mmap requires fpos && size to be multiple of PAGE_SIZE + map_fseek = roundDown(fpos); + if (map_fseek < fpos) { + // adjust size to cover. + newsize += PAGE_SIZE; + } + map_size = newsize; + map_ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, mfile->fd, map_fseek); + if (map_ptr == MAP_FAILED) { + // If this gets triggered we are in deep trouble + std::fprintf(stderr, "Error memory-mapping file:%s %lu %d %lu\n", std::strerror(errno), size, mfile->fd, fpos); + std::fprintf(stderr, "Dumping /proc/self/maps:\n"); + // for debugging information try print /proc/self/mmaps contents + // as this explains why we hit some limit of the system. + std::ifstream fmaps("/proc/self/maps"); + std::string buf; + int count = 0; + while(std::getline(fmaps, buf)) { + std::fprintf(stderr, "%s\n", buf.c_str()); + ++count; + } + std::fprintf(stderr, "counted %d memory-maps in process.\n", count); + return; + } + } else { + // RO mapping + if (mfile->size() <= fpos) { + // can't: the backing file is too small. + std::fprintf(stderr, "Error seeking past end of file.\n"); + std::abort(); + return; + } + map_size = roundUp(std::max(size, window)); + map_fseek = roundDown(fpos); + // Map the region. (use huge pages, don't reserve backing store) + map_ptr = mmap(0, map_size, PROT_READ, MAP_SHARED | MAP_NORESERVE, mfile->fd, map_fseek); + + if (!map_ptr || map_ptr == MAP_FAILED) { + std::fprintf(stderr, "Error mapping file\n"); + std::abort(); + return; + } + } + + // hint that this memory is accessed in random order. + if(madvise(map_ptr, map_size, MADV_RANDOM)) { + std::fprintf(stderr, "warn: madvice(MADV_RANDOM) failed: %s\n", std::strerror(errno)); + } + // adjust the usr_ptr to fix + // any page misalignment. + usr_ptr = (uint8_t*)map_ptr + (fpos - map_fseek); +} + +void region::window(len_t window) { + std::lock_guard lock(mtx); + auto usize = usr_size; + // note: remap() does nothing if window == usr_size + remap(usr_fseek, window, window); + usr_size = usize; +} + +void region::jump(seekoff_t fpos) { + std::lock_guard lock(mtx); + remap(fpos, usr_size, map_size); + is_dirty = false; +} + +void region::flushJump(seekoff_t fpos) { + flush(); + std::lock_guard lock(mtx); + remap(fpos, usr_size, map_size); +} + +void region::flush() { + // only flush if dirty and RW mapped. + std::lock_guard lock(mtx); + if (is_dirty && mfile->is_rw()) { + is_dirty = false; + auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); + auto flush_len = roundUp(usr_size); + if (flush_begin < usr_ptr) flush_len += PAGE_SIZE; + if (msync(flush_begin, flush_len, MS_ASYNC)) { + std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); + } + } +} + +void region::sync() { + // only flush if dirty and RW mapped. + std::lock_guard lock(mtx); + if (is_dirty && mfile->is_rw()) { + is_dirty = false; + auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); + auto flush_len = roundUp(usr_size); + if (flush_begin < usr_ptr) flush_len += PAGE_SIZE; + if (msync(flush_begin, flush_len, MS_SYNC)) { + std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); + } + } +} + +void region::writeAt(seekoff_t fpos, len_t datasize, const void* data) { + auto srcmem = (const char*)data; + + // take file lock so that file size check --> truncate is atomic. + std::unique_lock trunclock(mfile->mut); + if(mfile->size() < fpos+datasize && mfile->truncate(fpos+datasize)) { + return; + } + trunclock.unlock(); + + // does write fall out the mapped area begin? + if (fpos < map_fseek) { + // max size that can be written before map_fseek + ssize_t wr = std::min(map_fseek - fpos, datasize); + if (pwrite(mfile->fd, srcmem, wr, fpos) != wr) { + std::fprintf(stderr, "Error writing file:%s\n", std::strerror(errno)); + } + srcmem += wr; + fpos += wr; + datasize -= wr; + } + + if (fpos >= map_fseek && fpos < map_fseek + map_size && datasize) { + // max size that can be copied into this mapping: + ssize_t wr = std::min(map_size - (fpos - map_fseek), datasize); + std::memcpy((char*)map_ptr + (fpos - map_fseek), srcmem, wr); + srcmem += wr; + fpos += wr; + datasize -= wr; + } + + // does write fall out the mapped area end? + if (datasize) { + // write into backing file after the mapped area: + if (pwrite(mfile->fd, srcmem, datasize, fpos) != ssize_t(datasize)) { + std::fprintf(stderr, "Error writing file:%s\n", std::strerror(errno)); + } + } +} + +void region::readAt(seekoff_t fpos, len_t datasize, void* data) const { + auto dstmem = (char*)data; + + // does read fall out the mapped area begin? + if (fpos < map_fseek) { + // max size that can be written before map_fseek + ssize_t rd = std::min(map_fseek - fpos, datasize); + if (pread(mfile->fd, dstmem, rd, fpos) != rd) { + std::fprintf(stderr, "Error reading file:%s\n", std::strerror(errno)); + } + dstmem += rd; + fpos += rd; + datasize -= rd; + } + + if (fpos >= map_fseek && fpos < map_fseek + map_size && datasize) { + // max size that can be copied from this mapping: + ssize_t rd = std::min(map_size - (fpos - map_fseek), datasize); + std::memcpy(dstmem, (char*)map_ptr + (fpos - map_fseek), rd); + dstmem += rd; + fpos += rd; + datasize -= rd; + } + + // does read fall out the mapped area end? + if (datasize) { + // read from backing file after the mapped area: + if (pread(mfile->fd, dstmem, datasize, fpos) != ssize_t(datasize)) { + std::fprintf(stderr, "Error reading file:%s\n", std::strerror(errno)); + } + } +} + + +void region::resident(bool resident) { + std::lock_guard lock(mtx); + if(madvise(map_ptr, map_size, resident ? MADV_WILLNEED : MADV_DONTNEED)) { + std::fprintf(stderr,"Error setting memory-map residency:%s\n",std::strerror(errno)); + } +} + + +void region::discard(seekoff_t fpos, len_t datasize) { + + auto cur = usr_fseek + fpos; + + if (cur < map_fseek + map_size) { + // max size that can discarded from this mapping: + ssize_t dm = std::min(map_size - (cur - map_fseek), datasize); + + // Have to be careful here: if we delete too much + // caller will not have an good time. + // align size down to page size. + dm = roundDown(dm); + // align file offset up + auto _first = roundUp(cur - map_fseek); + if(_first > cur - map_fseek) + dm -= PAGE_SIZE; + + if(dm >= (signed)PAGE_SIZE) { + if(madvise((char*)map_ptr + _first, dm, MADV_REMOVE)) { + std::fprintf(stderr,"Error discarding memory-map region:%s\n",std::strerror(errno)); + } + } + } +} + +}; // namespace mapped diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp new file mode 100644 index 0000000..ce3c2e3 --- /dev/null +++ b/cpp/libraries/mapped_file.hpp @@ -0,0 +1,575 @@ +/** + * Copyright 2023 Jarmo A Tiitto + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the “Software”), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef MAPPEDFILE_HPP_INCLUDED +#define MAPPEDFILE_HPP_INCLUDED + +#include +#include +#include +#include +#include +#include +#include + +/** + * Memory mapped file I/O utilities + * - mapped::file class for opening an file + * - mapped::region class for RW/RO memory mapping part the file instance. + * - mapped::struct_region template for RW/RO accessing part the file as specified type. + * - mapped::array_region template for RW/RO accessing part of the file as array of T elements. + * + * @note + * When doing read-only mapping the region instance + * should be const qualified as this restricts + * the region class API to read-only operations and prevents + * accidental modification of the file. + * Use std::make_unique() in this case. + * + * @note + * When using the read-write features the backing file is resized + * in multiple PAGE_SIZE blocks even if the actually mapped size is + * something else. + * openrw(...,size,RESIZE) always truncates the file to roundUp(size). + * You should do file->truncate(< sizeInBytes>) to make the file + * size exactly what you want before the file is closed. + * + * Modified regions should flush() or sync() before they are destroyed + * or the modified data may not end up in the file. + * + * TODO: + * - Two region instances should not overlap, + * i.e same portion of the file should not be mapped twice. + * (Not sure if this is actually broken now, but you have been warned) + * - Multi-threading support not tested/written. + * Currently the same mapped region can be used by multiple threads, + * but cannot it be modified. + * - Better error handling. (exceptions?, error codes?) + * Currently critical errors are printed and std::abort() is called. + * How do we handle system errors that happen in constructors? + */ +namespace mapped { + +const size_t PAGE_SIZE = 4096; + +static inline size_t roundToPage(ptrdiff_t x) { return (std::max(0, x - 1) & ~(PAGE_SIZE - 1)) + PAGE_SIZE; } + +constexpr inline size_t roundUp(uintptr_t x) { return (x + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); } + +constexpr inline size_t roundDown(uintptr_t x) { return (x & ~(PAGE_SIZE - 1)); } + +/** + * seekoff_t: Position of the file cursor + */ +using seekoff_t = uint64_t; +/** + * len_t: length of file data + */ +using len_t = size_t; + +class file; + +/** + * Memory-mapped region + * @brief + * the base class memory-maps an raw range of bytes from the backing file. + */ +class region { + protected: + std::mutex mtx; + // actually mapped region: + void* map_ptr = nullptr; + size_t map_size = 0; + seekoff_t map_fseek = 0; + // what constructor asked: + void* usr_ptr = nullptr; + size_t usr_size = 0; + seekoff_t usr_fseek = 0; + // todo: maybe use std::weak_ptr? + // that would allow file to be released and + // any any existing region(s) would still work. + // (but only if remap() is not called) + std::shared_ptr mfile; + // non-const data access sets is_dirty. + bool is_dirty = false; + + void remap(const seekoff_t fpos, const len_t size, const len_t window); + + region() {} + + public: + /** + * Open memory mapped region into a file. + * @brief + * Seeks at fpos in file and map size bytes + * starting from that position in file. + * @param window + * over-extend mapping up to max(size,window) bytes. + * Setting window bigger than size allows more efficient operation: + * [fpos, fpos + window] area is memory mapped + * but region will only operate on the + * [roundDown(fpos), roundup(fpos+size)] + * sub-portion of the memory. + * @note + * - Seeking past the EOF in file that is read-only will fail. + * The mapped size may extend past EOF but accessing past EOF + * either returns undefined data or program is terminated by OS. + * (EOF is at file->size()) + * - Seeking past the EOF that is read-write + * grows the backing file to fit the mapping. + * The backing file is always extended in multiple of PAGE_SIZE bytes. + * @note + * If size and/or fpos are not aligned to multiple of PAGE_SIZE + * they are forcibly aligned internally. This results in + * regionSize() and regionSeek() that may differ compared to + * size() and getSeek(). + * Side-effect is that backing file may grow more than expected. + */ + region(std::shared_ptr src, seekoff_t fpos, len_t size, len_t window = 0); + + /** + * Open memory mapped region into the file + * @brief + * same as region(myfile, 0, myfile.size()) + * and memory maps the entire file. + */ + explicit region(std::shared_ptr src); + + /** + * Note: even if region was modified, + * destructor will not flush()/sync() before tearing down the mapping. + */ + virtual ~region(); + + // region is not copyable + region(const region&) =delete; + region& operator=(const region&) =delete; + + // region is moveable + friend void swap(region& a, region& b) { + using std::swap; + //std::lock(a.mtx,b.mtx); + //std::lock_guard l0(a.mtx, std::adopt_lock); + //std::lock_guard l1(b.mtx, std::adopt_lock); + swap(a.map_ptr,b.map_ptr); + swap(a.map_size,b.map_size); + swap(a.map_fseek,b.map_fseek); + swap(a.usr_ptr,b.usr_ptr); + swap(a.usr_size,b.usr_size); + swap(a.usr_fseek,b.usr_fseek); + swap(a.mfile,b.mfile); + swap(a.is_dirty,b.is_dirty); + } + region(region&& mv) : region() { + swap(*this, mv); + } + region& operator=(region&& mv) { + swap(*this, mv); + return *this; + } + + /** + * Get data pointer. + */ + const void* data() const { return usr_ptr; } + void* data() { + is_dirty = true; + return usr_ptr; + } + + std::shared_ptr getFile() { return mfile; } + + /** + * Get the seek used to init this region. + */ + seekoff_t getSeek() const { return usr_fseek; } + /** + * Get the size used to init this region. + */ + len_t size() const { return usr_size; } + + /** + * Get page aligned seek <= getSeek() + */ + seekoff_t regionSeek() const { return map_fseek; } + /** + * Get page aligned size >= size() + */ + len_t regionSize() const { return map_size; } + + /** + * Resize the mapped region. + * @note the mapped memory address may move, + * but current contents are preserved. + * @warn all pointers or references into + * the mapping are invalidated. + */ + void resize(len_t newsize); + + /** + * @brief over-extend mapping up to max(size(),window) bytes. + * Setting window bigger than size() allows more efficient operation: + * [regionSeek(), regionSeek() + window] area is memory mapped + * but region will only operate on the + * [roundDown(getSeek()), roundUp(getSeek()+size())] + * sub-portion of the memory. + */ + void window(len_t window = 0); + + /** + * Flush mapped memory region into the file. + * @brief this is an hint to operating system that + * memory region shall be synchronized to disk. + * It may not wait for this to have completed before returning. + * @note only the page aligned region + * [roundDown(data()), roundUp(data()+size())] + * is flushed. + * @note Use sync() instead if you must guarantee the data has + * reached persistent storage. + */ + void flush(); + + /** + * Synchronize modified memory region onto disk. + */ + void sync(); + + /** + * Write data into the backing file. + * @brief + * writeAt() stores range of bytes into the backing file. + * @note + * The region doesn't need to have this area to be memory-mapped: + * The data that falls into the memory-mapped + * [regionSeek(), regionSeek()+regionSize()] area is simply memcpy'ed. + * Any data that falls out this window is written directly + * into the backing file. + * The backing file is grown to fit the data when needed. + */ + void writeAt(seekoff_t fpos, len_t datasize, const void* data); + + /** + * Read data from the backing file. + * @brief + * readAt() reads [fpos, fpos+datasize] range of bytes from the backing file + * @note + * The region doesn't need to have this area memory-mapped + * The read out area that falls into the memory-mapped + * [regionSeek(), regionSeek()+regionSize()] area is simply memcpy'ed. + * Any data that falls out this window is read directly + * from the backing file. + */ + void readAt(seekoff_t fpos, len_t datasize, void* data) const; + + /** + * Set memory region to resident/or released. + * @brief setting memory range to non-resident state + * causes system to drop the data from system memory. + * Reading non-resident memory region again causes system to + * fetch data from the disk again. + * @warn if memory region is not flushed before setting + * resident(false) any writes may be discarded to backing file. + */ + void resident(bool state); + + /** + * Discard memory region. + * @brief discarding memory range causes system + * to reclaim the memory *and* the on-disk area. + * This means the data is lost in the mapped memory region, + * and any data within will not be written onto disk by sync() + * Subsequent reads after discard() return zero filled data. + * @note + * The discarded area shall be within the mapped area. + * @param fpos + * file offset from begin of this mapping. (getSeek() + fpos) + * @param datasize + * length of the data area to discard. + */ + void discard(seekoff_t fpos, len_t datasize); + + /** + * Seek in the file to fpos position and + * remap the memory region there. + * @warn all pointers or references into + * the mapping are invalidated. + */ + void jump(seekoff_t fpos); + + /** + * Flush the current region and + * Seek in the file to fpos position and + * remap the memory region there. + * @warn all pointers or references into + * the mapping are invalidated. + */ + void flushJump(seekoff_t fpos); +}; + +static_assert(std::is_move_constructible_v); +static_assert(std::is_move_assignable_v); +static_assert(std::is_swappable_v); + +/** + * Typed region. + * struct_region allows directly accessing an on-disk structure. + * The region size is implicit from the type. + */ +template +class struct_region : protected region { + public: + using type = typename std::decay::type; + static_assert(std::is_standard_layout_v, "T must be plain-old-data type"); + + /** + * Memory map struct_region at fpos in file. + */ + struct_region(std::shared_ptr f, seekoff_t fpos, len_t window = 0) : region(f, fpos, sizeof(type), window) {} + + type* get() { return static_cast(data()); } + const type* get() const { return static_cast(data()); } + + type* operator->() { return get(); } + const type* operator->() const { return get(); } + + type& operator*() { return *get(); } + const type& operator*() const { return *get(); } + + using region::flush; + using region::getFile; + using region::getSeek; + using region::readAt; + using region::sync; + using region::writeAt; + using region::resident; + using region::window; + using region::discard; + + // note: size means the sizeof(T) + using region::size; + + /** + * Get the file seek position just after *this. + */ + seekoff_t getEndSeek() const { return getSeek() + sizeof(T); } + + /** + * Seek to fpos in file and remap the region. + * @return the pointer into the new position + */ + type* jump(seekoff_t fpos) { + region::jump(fpos); + return get(); + } + + type* flushJump(seekoff_t fpos) { + region::flushJump(fpos); + return get(); + } +}; + +static_assert(std::is_move_constructible_v>); +static_assert(std::is_move_assignable_v>); +static_assert(std::is_swappable_v>); + +/** + * Typed array region. + * @brief + * array_region allows directly accessing an on-disk array of structures + * The element size is implicit from the type and length of the array + * is provided by the constructor. + * @provides resize(), operator[], begin(), end() + */ +template +class array_region : protected region { + protected: + size_t num_elements = 0; + + public: + using type = typename std::decay::type; + static_assert(std::is_standard_layout_v, "T must be plain-old-data type"); + + /** + * Memory map array_region at fpos in file and map array_size elements. + */ + array_region(std::shared_ptr f, seekoff_t fpos, size_t array_size) : region(f, fpos, sizeof(type) * array_size), num_elements(array_size) {} + + /** + * Get pointer to first mapped element. + */ + type* get() { return static_cast(data()); } + const type* get() const { return static_cast(data()); } + + using region::flush; + using region::getFile; + using region::getSeek; + using region::readAt; + using region::sync; + using region::writeAt; + + /** + * Resize the mapped array region. + */ + void resize(size_t elements) { + region::resize(sizeof(T) * elements); + num_elements = elements; + } + + /** + * Get number of mapped *elements* + */ + size_t size() const { return num_elements; } + + /** + * Access the array elements + */ + T& operator[](size_t index) { + assert(index < num_elements); + return get()[index]; + } + const T& operator[](size_t index) const { + assert(index < num_elements); + return get()[index]; + } + /** + * Iterators + */ + T* begin() { return get(); } + T* end() { return get() + num_elements; } + const T* begin() const { return get(); } + const T* end() const { return get() + num_elements; } + + /** + * Get the file seek position just after *this. + */ + seekoff_t getEndSeek() const { return getSeek() + sizeof(T) * num_elements; } + + /** + * Seek to fpos in file and remap the region. + * @return the pointer into the first element in the array + */ + type* jump(seekoff_t fpos) { + region::jump(fpos); + return get(); + } + + type* flushJump(seekoff_t fpos) { + region::flushJump(fpos); + return get(); + } +}; + +/** + * Memory-mapped file I/O class. + * @note + * file should be created with std::make_shared() + * as mapped region(s) take shared ownership of the file. + */ +class file : public std::enable_shared_from_this { + private: + std::mutex mut; + int fd; + seekoff_t fd_size; + bool fd_rw; + // the file and region classes are inherently coupled, + // and we don't want to expose the internals. + friend class region; + + public: + enum : int { + CREATE = 0x1, //!< Create new file, if doesn't exist. + RESIZE = 0x2, //!< Resize file. + FSTUNE = 0x4 //!< When creating new file attempt to set + //!< file system attributes to improve performance. + }; + + file(); + ~file(); + + /** + * Open file in read-only mode. + * @return non-zero if error occurred. + */ + int open(const char* file); + + /** + * Create/Open file in read-write mode. + * @param flags + * - CREATE|RESIZE creates or replaces existing file + * that is truncated to maxsize. + * - RESIZE opens existing file and truncates it to + * maxsize. The file must exist already. + * - flags == 0 ignores the maxsize argument and opens + * existing file. + * @warn default open mode discards any previous file contents! + * @return non-zero if error occurred. + */ + int openrw(const char* file, len_t maxsize, int flags = CREATE | RESIZE); + + /** + * Check if file open R/W or RO + */ + bool is_rw() const; + + /** + * Resize the open file to newsize bytes. + * (file must be open in R/W mode) + * @return non-zero if error occurred. + */ + int truncate(seekoff_t newsize); + + /** + * Read @size bytes starting at file offset @fpos + * @note copies [fpos, fpos+size] into [dataout, dataout+size] + * @return non-zero if error occurred. + */ + int readAt(seekoff_t fpos, len_t size, void* dataout) const; + + /** + * Write @size bytes starting at file offset @fpos + * @note copies [datain, datain+size] into [fpos, fpos+size] + * @note the file size after writeAt() is std::max(size(), fpos+size) + * @return non-zero if error occurred. + */ + int writeAt(seekoff_t fpos, len_t size, const void* datain); + + /** + * Copy @size bytes starting at file offset @other_fpos + * from @other file copying the data at @dest_fpos in this file. + * @note copies from other:[other_fpos, other_fpos+size] into this:[dest_fpos, dest_fpos+size] + * @note if other is same as *this the destination range cannot overlap with the source range. + */ + int copyAt(std::shared_ptr other, seekoff_t other_fpos, len_t size, seekoff_t dest_fpos); + + /** + * Current length of the file + * The file EOF (end-of-file) is at this position. + */ + seekoff_t size() const; + + // Close the file. + void close(); +}; + +}; // namespace mapped +#endif diff --git a/cpp/program.cpp b/cpp/program.cpp index d9f0169..5c37ab0 100644 --- a/cpp/program.cpp +++ b/cpp/program.cpp @@ -1,14 +1,16 @@ #include #include "cmdparser.hpp" +#include "config.hpp" #include "cubes.hpp" void configure_arguments(cli::Parser& parser) { - parser.set_required("n", "cube_size", "the size of polycube to generate up to"); + parser.set_optional("n", "cube_size", 1, "the size of polycube to generate up to"); parser.set_optional("t", "threads", 1, "the number of threads to use while generating"); parser.set_optional("c", "use_cache", false, "whether to load cache files"); parser.set_optional("w", "write_cache", false, "wheather to save cache files"); parser.set_optional("s", "split_cache", false, "wheather to save in sparate cache files per output shape"); + parser.set_optional("v", "version", false, "print build version info"); parser.set_optional("u", "use_split_cache", false, "use separate cachefile by input shape"); parser.set_optional("f", "cache_file_folder", "./cache/", "where to store cache files"); } @@ -17,6 +19,9 @@ int main(int argc, char** argv) { cli::Parser parser(argc, argv); configure_arguments(parser); parser.run_and_exit_if_error(); + if (parser.get("v")) { + std::printf("Built from %s, %s, %s\n", CONFIG_VERSION, CONFIG_BUILDTYPE, CONFIG_COMPILERID); + } gen(parser.get("n"), parser.get("t"), parser.get("c"), parser.get("w"), parser.get("s"), parser.get("u"), parser.get("f")); return 0; } diff --git a/cpp/src/cache.cpp b/cpp/src/cache.cpp deleted file mode 100644 index 071ff0f..0000000 --- a/cpp/src/cache.cpp +++ /dev/null @@ -1,163 +0,0 @@ -#include "cache.hpp" - -#include -#include -#include -#include -#include - -#include "utils.hpp" - -/* -==================== -cache file header -==================== - -uint32_t magic = "PCUB" -uint32_t n = cache file for n cubes in a polycube -uint32_t numShapes = number of different shapes in cachefile -------- - -==================== -shapetable: -==================== -shapeEntry { - uint8_t dim0 // offset by -1 - uint8_t dim1 // offset by -1 - uint8_t dim2 // offset by -1 - uint8_t reserved - uint64_t offset in file -} -shapeEntry[numShapes] - - -==================== -XYZ data -==================== - -*/ - -void Cache::save(std::string path, Hashy &hashes, uint8_t n) { - if (hashes.size() == 0) return; - std::ofstream ofs(path, std::ios::binary); - Header header; - header.magic = MAGIC; - header.n = n; - header.numShapes = hashes.byshape.size(); - header.numPolycubes = hashes.size(); - ofs.write((const char *)&header, sizeof(header)); - - std::vector keys; - keys.reserve(header.numShapes); - for (auto &pair : hashes.byshape) keys.push_back(pair.first); - std::sort(keys.begin(), keys.end()); - uint64_t offset = sizeof(Header) + header.numShapes * sizeof(ShapeEntry); - for (auto &key : keys) { - ShapeEntry se; - se.dim0 = key.x(); - se.dim1 = key.y(); - se.dim2 = key.z(); - se.reserved = 0; - se.offset = offset; - se.size = hashes.byshape[key].size() * XYZ_SIZE * n; - offset += se.size; - ofs.write((const char *)&se, sizeof(ShapeEntry)); - } - // put XYZs - for (auto &key : keys) { - for (auto &subset : hashes.byshape[key].byhash) - for (const auto &c : subset.set) { - if constexpr (sizeof(XYZ) == XYZ_SIZE) { - ofs.write((const char *)c.data(), sizeof(XYZ) * c.size()); - } else { - for (const auto &p : c) { - ofs.write((const char *)p.data, XYZ_SIZE); - } - } - } - } - - std::printf("saved %s\n\r", path.c_str()); -} - -Hashy Cache::load(std::string path, uint32_t extractShape) { - Hashy cubes; - auto ifs = std::ifstream(path, std::ios::binary); - if (!ifs.is_open()) return cubes; - Header header; - if (!ifs.read((char *)&header, sizeof(header))) { - return cubes; - } - // check magic - if (header.magic != MAGIC) { - return cubes; - } -#ifdef CACHE_LOAD_HEADER_ONLY - std::printf("loading cache file \"%s\" for N = %u", path.c_str(), header.n); - std::printf(", %u shapes, %lu XYZs\n\r", header.numShapes, header.numPolycubes); -#endif - auto cubeSize = XYZ_SIZE * header.n; - DEBUG_PRINTF("cubeSize: %u\n\r", cubeSize); - - for (uint32_t i = 0; i < header.numShapes; ++i) { - ShapeEntry shapeEntry; - if (!ifs.read((char *)&shapeEntry, sizeof(shapeEntry))) { - std::printf("ERROR reading ShapeEntry %u\n\r", i); - exit(-1); - } - if (ALL_SHAPES != extractShape && i != extractShape) continue; -#ifdef CACHE_PRINT_SHAPEENTRIES - std::printf("ShapeEntry %3u: [%2d %2d %2d] offset: 0x%08lx size: 0x%08lx (%ld polycubes)\n\r", i, shapeEntry.dim0, shapeEntry.dim1, shapeEntry.dim2, - shapeEntry.offset, shapeEntry.size, shapeEntry.size / cubeSize); -#endif - if (shapeEntry.size % cubeSize != 0) { - std::printf("ERROR shape block is not divisible by cubeSize!\n\r"); - exit(-1); - } -#ifndef CACHE_LOAD_HEADER_ONLY - // remember pos in file - auto pos = ifs.tellg(); - - // read XYZ contents - ifs.seekg(shapeEntry.offset); - const uint32_t CHUNK_SIZE = 512 * XYZ_SIZE; - uint8_t buf[CHUNK_SIZE] = {0}; - uint64_t buf_offset = 0; - uint32_t numCubes = shapeEntry.size / cubeSize; - XYZ shape(shapeEntry.dim0, shapeEntry.dim1, shapeEntry.dim2); - uint64_t readsize = shapeEntry.size - buf_offset; - if (readsize > CHUNK_SIZE) readsize = CHUNK_SIZE; - if (!ifs.read((char *)&buf, readsize)) { - std::printf("ERROR reading XYZs for Shape %u\n\r", i); - exit(-1); - } - for (uint32_t j = 0; j < numCubes; ++j) { - Cube next(header.n); - for (uint32_t k = 0; k < header.n; ++k) { - // check if buf contains next XYZ - uint64_t curr_offset = j * cubeSize + k * XYZ_SIZE; - if (curr_offset >= buf_offset + CHUNK_SIZE) { - // std::printf("reload buffer\n\r"); - buf_offset += CHUNK_SIZE; - readsize = shapeEntry.size - buf_offset; - if (readsize > CHUNK_SIZE) readsize = CHUNK_SIZE; - if (!ifs.read((char *)&buf, readsize)) { - std::printf("ERROR reading XYZs for Shape %u\n\r", i); - exit(-1); - } - } - - next.data()[k].data[0] = buf[curr_offset - buf_offset + 0]; - next.data()[k].data[1] = buf[curr_offset - buf_offset + 1]; - next.data()[k].data[2] = buf[curr_offset - buf_offset + 2]; - } - cubes.insert(next, shape); - } - - // restore pos - ifs.seekg(pos); -#endif - } - std::printf(" loaded %lu cubes\n\r", cubes.size()); - return cubes; -} diff --git a/cpp/src/cubeSwapSet.cpp b/cpp/src/cubeSwapSet.cpp new file mode 100644 index 0000000..979e7ab --- /dev/null +++ b/cpp/src/cubeSwapSet.cpp @@ -0,0 +1,212 @@ +#include "cubeSwapSet.hpp" + +#include +#include +#include + +/** + * thread-local read-cache for Cube(s) + */ +class ThreadCache { + public: + static ThreadCache& get(); + + struct entry { + // read-cache "key" + const CubeStorage* storage; + mapped::seekoff_t seek; + int version; + + friend bool operator==(const entry& a, const entry& b) { return std::tie(a.storage, a.seek, a.version) == std::tie(b.storage, b.seek, b.version); } + }; + + struct state { + // cached data. + Cube cube; + std::list::iterator lru; + }; + + struct entry_hash { + size_t operator()(const entry& x) const { + size_t seed = uintptr_t(x.storage); + seed ^= x.seek + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed ^= x.version + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + }; + }; + + // Least-recently-used, LRU eviction policy list. + std::list lru; + // trick: make map with reference_wrapper + // as key so we don't need to duplicate the data from the lru list. + // surprisingly C++17 cache.find(entry) works. + std::unordered_map, state, entry_hash, std::equal_to> cache; + + bool local_enabled = false; + mapped::seekoff_t local_seek = -1; + Cube local; +}; + +ThreadCache& ThreadCache::get() { + static thread_local ThreadCache instance; + return instance; +} + +std::atomic CubeStorage::m_init_num(0); + +CubeStorage::CubeStorage(std::filesystem::path path, size_t n) : m_cube_size(n) { + // Generate file name: + m_fpath = path / ("storage_" + std::to_string(m_init_num.fetch_add(1)) + ".bin"); +} + +CubeStorage::~CubeStorage() { discard(); } + +CubeStorage::CubeStorage(CubeStorage&& mv) + : m_fpath(std::move(mv.m_fpath)), m_file(std::move(mv.m_file)), m_cube_size(mv.m_cube_size), m_alloc_seek(mv.m_alloc_seek) { + // no allocations can exist in the moved from object: + assert(m_alloc_seek == 0); +} + +CubePtr CubeStorage::local(const Cube& cube) const { + auto& ctx = ThreadCache::get(); + ctx.local = cube; + ctx.local_seek = m_alloc_seek; + ctx.local_enabled = true; + return CubePtr(ctx.local_seek); +} + +void CubeStorage::commit() { + std::unique_lock lock(m_mtx); + + if (!m_file) { + using namespace mapped; + // file not open yet. + m_file = std::make_shared(); + if (m_file->openrw(m_fpath.c_str(), 0, file::CREATE | file::RESIZE | file::FSTUNE)) { + std::printf("CubeStorage::allocate() ERROR: Failed to create file: %s\n", m_fpath.c_str()); + std::abort(); + } + + // memory map 2 MiB chunk for writing. + // This also works as "pre-read-cache" for read(): + // Any CubePtr(s) in this window even if they + // are not yet in thread's read-cache have fast readAt(). + m_file_head = std::make_unique(m_file, 0, 2 * 1024 * 1024); + } + auto datasize = m_cube_size * sizeof(XYZ); + auto write_fpos = m_alloc_seek; + + if(m_reserved_end < m_alloc_seek + datasize) { + // advance the backing file m_file_head to next 2 MiB chunk. + m_reserved_end += 2 * 1024 * 1024; + m_file_head->flushJump(m_reserved_end); + } + // advance write offset: + m_alloc_seek = write_fpos + datasize; + // allow parallel m_file_head->writeAt() calls: + lock.unlock(); + + auto& ctx = ThreadCache::get(); + assert(ctx.local_enabled); + assert(ctx.local_seek == write_fpos); + ctx.local_enabled = false; + + m_file_head->writeAt(write_fpos, datasize, ctx.local.data()); +} + +void CubeStorage::drop() const { + auto& ctx = ThreadCache::get(); + assert(ctx.local_enabled); + ctx.local_enabled = false; + ctx.local_seek = -1; +} + +const Cube& CubeStorage::read(const CubePtr& x) const { + // Get thread's cache instance: + auto& ctx = ThreadCache::get(); + + // Check if x is actually the object returned by local(): + if (ctx.local_enabled && x.seek() == ctx.local_seek) { + assert(ctx.local.size() == m_cube_size); + return ctx.local; + } + + ThreadCache::entry key{this, x.seek(), m_storage_version}; + auto itr = ctx.cache.find(key); + if (itr != ctx.cache.end()) { + // cache-hit. + // LRU policy simply moves the element at back of the list: + if (std::next(itr->second.lru) != ctx.lru.end()) { + ctx.lru.splice(itr->second.lru, ctx.lru, ctx.lru.end()); + } + return itr->second.cube; + } else { + // cache-miss. + // Evict entry at front if read-cache is full: + if (ctx.cache.size() >= 1024) { + auto rm = ctx.cache.find(ctx.lru.front()); + ctx.cache.erase(rm); + ctx.lru.pop_front(); + } + + // Read Cube data + Cube tmp(m_cube_size); + m_file_head->readAt(x.seek(), m_cube_size * sizeof(XYZ), tmp.data()); + + // Move it into an new read-cache entry: + auto nitr = ctx.lru.insert(ctx.lru.end(), key); + auto [itr, ok] = ctx.cache.emplace(std::ref(*nitr), ThreadCache::state{std::move(tmp), nitr}); + assert(ok); + return itr->second.cube; + } +} + +void CubeStorage::resetReadCache() const { + auto& ctx = ThreadCache::get(); + ctx.cache.clear(); + ctx.lru.clear(); +} + +void CubeStorage::copydata(const CubePtr& x, size_t n, XYZ* destination) const { + // copydata() doesn't use thread's read-cache + // so local() cannot be active: + assert(!ThreadCache::get().local_enabled); + m_file_head->readAt(x.seek(), n * sizeof(XYZ), destination); +} + +void CubeStorage::discard() { + std::lock_guard lock(m_mtx); + + if (m_file) { + // The backing file is kept intact + // so that CacheWriter can process it. + m_file_head->flush(); + m_file_head.reset(); + m_file.reset(); + m_alloc_seek = 0; + m_reserved_end = 0; + // Thread read-cache problem: + // discard() must cause eviction of all read-cache + // entries for each thread's read cache that point into this. + // This done by incrementing m_storage_version: + // the entries can't simply be found as they are + // made with m_storage_version - 1 value. + // The entries are eventually evicted by + // the read-cache this way. + ++m_storage_version; + } +} + +const Cube& CubePtr::get(const CubeStorage& storage) const { + // CubePtr::get() is really just an convenience function... + // However this cannot be implemented in the header file because + // CubeStorage definition is not known. + return storage.read(*this); +} + +void CubePtr::copyout(const CubeStorage& storage, size_t n, XYZ* out) const { + // CubePtr::copyout() is really just an convenience function... + // However this cannot be implemented in the header file because + // CubeStorage definition is not known. + storage.copydata(*this, n, out); +} \ No newline at end of file diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index cdcc5b4..ec34936 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -2,13 +2,14 @@ #include #include +#include #include +#include #include #include #include #include -#include "cache.hpp" #include "cube.hpp" #include "hashes.hpp" #include "newCache.hpp" @@ -19,28 +20,29 @@ const int PERF_STEP = 500; struct Workset { std::mutex mu; - CubeIterator _begin_total; - CubeIterator _begin; - CubeIterator _end; + + CacheReader cr; + CacheIterator _begin_total; + CacheIterator _begin; + CacheIterator _end; Hashy &hashes; XYZ targetShape, shape, expandDim; bool notSameShape; - Workset(ShapeRange &data, Hashy &hashes, XYZ targetShape, XYZ shape, XYZ expandDim, bool notSameShape) - : _begin_total(data.begin()) - , _begin(data.begin()) - , _end(data.end()) - , hashes(hashes) - , targetShape(targetShape) - , shape(shape) - , expandDim(expandDim) - , notSameShape(notSameShape) {} + Workset(Hashy &hashes, XYZ targetShape, XYZ shape, XYZ expandDim, bool notSameShape) + : hashes(hashes), targetShape(targetShape), shape(shape), expandDim(expandDim), notSameShape(notSameShape) {} + + void setRange(IShapeRange &data) { + _begin_total = data.begin(); + _begin = data.begin(); + _end = data.end(); + } struct Subset { - CubeIterator _begin, _end; + CacheIterator _begin, _end; bool valid; float percent; - auto begin() { return _begin; } - auto end() { return _end; } + CacheIterator begin() { return _begin; } + CacheIterator end() { return _end; } }; Subset getPart() { @@ -48,7 +50,7 @@ struct Workset { auto a = _begin; _begin += 500; if (_begin > _end) _begin = _end; - return {a, _begin, a < _end, 100 * (float)((uint64_t)a.m_ptr - (uint64_t)_begin_total.m_ptr) / ((uint64_t)_end.m_ptr - (uint64_t)_begin_total.m_ptr)}; + return {a, _begin, a < _end, 100 * float(a.seek() - _begin_total.seek() + 1) / (_end.seek() - _begin_total.seek() + 1)}; } void expand(const Cube &c) { @@ -87,14 +89,14 @@ struct Workset { std::set_difference(candidates.begin(), end, c.begin(), c.end(), std::back_inserter(tmp)); candidates = std::move(tmp); - DEBUG_PRINTF("candidates: %lu\n\r", candidates.size()); + DEBUG1_PRINTF("candidates: %lu\n\r", candidates.size()); Cube newCube(c.size() + 1); Cube lowestHashCube(newCube.size()); Cube rotatedCube(newCube.size()); for (const auto &p : candidates) { - DEBUG_PRINTF("(%2d %2d %2d)\n\r", p.x(), p.y(), p.z()); + DEBUG2_PRINTF("(%2d %2d %2d)\n\r", p.x(), p.y(), p.z()); int ax = (p.x() < 0) ? 1 : 0; int ay = (p.y() < 0) ? 1 : 0; int az = (p.z() < 0) ? 1 : 0; @@ -131,26 +133,67 @@ struct Workset { }; struct Worker { - Workset &ws; + std::shared_ptr ws; int id; - Worker(Workset &ws_, int id_) : ws(ws_), id(id_) {} + int state = 3; // 1 == completed/waiting for job, 2 == processing, 3 == job assigned. + std::mutex mtx; + std::condition_variable cond; + std::condition_variable cond2; + std::thread thr; + + Worker(int id_) : id(id_), thr(&Worker::run, this) {} + ~Worker() { + std::unique_lock lock(mtx); + state = 0; + cond.notify_one(); + lock.unlock(); + thr.join(); + } + + void launch(std::shared_ptr ws_) { + std::unique_lock lock(mtx); + while (state != 1) { + cond2.wait(lock); + } + ws = ws_; + state = 3; + cond.notify_one(); + } + + void sync() { + std::unique_lock lock(mtx); + while (state != 1) { + cond2.wait(lock); + } + ws.reset(); + } + void run() { - // std::printf("start %d\n", id); - auto subset = ws.getPart(); - while (subset.valid) { - if (id == 0) { - std::printf(" %5.2f%%\r", subset.percent); - std::flush(std::cout); - } - // std::cout << id << " next subset " << &*subset.begin() << " to " << &*subset.end() << "\n"; - for (auto &c : subset) { - // std::printf("%p\n", (void *)&c); - // c.print(); - ws.expand(c); + std::unique_lock lock(mtx); + std::printf("thread nro. %d started.\n", id); + while (state) { + state = 1; + cond2.notify_one(); + while (state == 1) cond.wait(lock); + if (!state) return; + state = 2; + // std::printf("start %d\n", id); + auto subset = ws->getPart(); + while (subset.valid) { + if (id == 0) { + std::printf(" %5.2f%%\r", subset.percent); + std::flush(std::cout); + } + // std::cout << id << " next subset " << &*subset.begin() << " to " << &*subset.end() << "\n"; + for (auto &c : subset) { + // std::printf("%p\n", (void *)&c); + // c.print(); + ws->expand(c); + } + subset = ws->getPart(); } - subset = ws.getPart(); + // std::printf("finished %d\n", id); } - // std::printf("finished %d\n", id); } }; @@ -158,7 +201,7 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c if (!std::filesystem::is_directory(base_path)) { std::filesystem::create_directory(base_path); } - Hashy hashes; + Hashy hashes(base_path); if (n < 1) return {}; else if (n == 1) { @@ -166,7 +209,8 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c hashes.insert(Cube{{XYZ(0, 0, 0)}}, XYZ(0, 0, 0)); std::printf("%ld elements for %d\n\r", hashes.size(), n); if (write_cache) { - Cache::save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); + CacheWriter cw(1); + cw.save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); } return FlatCache(hashes, n); } @@ -185,15 +229,26 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c } std::printf("N = %d || generating new cubes from %lu base cubes.\n\r", n, base->size()); hashes.init(n); + + // Start worker threads. + std::deque workers; + for (int i = 0; i < threads; ++i) { + workers.emplace_back(i); + } + + CacheWriter cw(threads); + uint64_t totalSum = 0; auto start = std::chrono::steady_clock::now(); - uint32_t totalOutputShapes = hashes.byshape.size(); + uint32_t totalOutputShapes = hashes.numShapes(); uint32_t outShapeCount = 0; auto prevShapes = Hashy::generateShapes(n - 1); - for (auto &tup : hashes.byshape) { + + for (const auto &tup : hashes) { outShapeCount++; XYZ targetShape = tup.first; std::printf("process output shape %3d/%d [%2d %2d %2d]\n\r", outShapeCount, totalOutputShapes, targetShape.x(), targetShape.y(), targetShape.z()); + for (uint32_t sid = 0; sid < prevShapes.size(); ++sid) { auto &shape = prevShapes[sid]; int diffx = targetShape.x() - shape.x(); @@ -210,51 +265,62 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c if (diffy == 1) if (shape.y() == shape.x()) diffx = 1; - std::printf(" shape %d %d %d\n\r", shape.x(), shape.y(), shape.z()); + auto ws = std::make_shared(hashes, targetShape, shape, XYZ(diffx, diffy, diffz), abssum); if (use_split_cache) { // load cache file only for this shape std::string cachefile = base_path + "cubes_" + std::to_string(n - 1) + "_" + std::to_string(prevShapes[sid].x()) + "-" + std::to_string(prevShapes[sid].y()) + "-" + std::to_string(prevShapes[sid].z()) + ".bin"; - cr.loadFile(cachefile); + ws->cr.loadFile(cachefile); + base = &ws->cr; // cr.printHeader(); } - auto s = base->getCubesByShape(sid); + auto& s = base->getCubesByShape(sid); if (shape != s.shape()) { std::printf("ERROR caches shape does not match expected shape!\n"); exit(-1); } - // std::printf("starting %d threads\n\r", threads); - std::vector ts; - Workset ws(s, hashes, targetShape, shape, XYZ(diffx, diffy, diffz), abssum); - std::vector workers; - ts.reserve(threads); - workers.reserve(threads); - for (int i = 0; i < threads; ++i) { - workers.emplace_back(ws, i); - ts.emplace_back(&Worker::run, std::ref(workers[i])); + + ws->setRange(s); + + // Wait for jobs to complete. + for (auto &thr : workers) { + thr.sync(); } - for (int i = 0; i < threads; ++i) { - ts[i].join(); + std::printf(" shape %d %d %d\n\r", shape.x(), shape.y(), shape.z()); + // launch the new jobs. + // Because the workset is held by shared_ptr + // main thread can do above preparation work in parallel + // while the jobs are running. + for (auto &thr : workers) { + thr.launch(ws); } } - std::printf(" num: %lu\n\r", hashes.byshape[targetShape].size()); - totalSum += hashes.byshape[targetShape].size(); + // Wait for jobs to complete. + for (auto &thr : workers) { + thr.sync(); + } + std::printf(" num: %lu\n\r", hashes.at(targetShape).size()); + totalSum += hashes.at(targetShape).size(); if (write_cache && split_cache) { - Cache::save(base_path + "cubes_" + std::to_string(n) + "_" + std::to_string(targetShape.x()) + "-" + std::to_string(targetShape.y()) + "-" + - std::to_string(targetShape.z()) + ".bin", - hashes, n); + cw.save(base_path + "cubes_" + std::to_string(n) + "_" + std::to_string(targetShape.x()) + "-" + std::to_string(targetShape.y()) + "-" + + std::to_string(targetShape.z()) + ".bin", + hashes, n); } if (split_cache) { - for (auto &subset : hashes.byshape[targetShape].byhash) { - subset.set.clear(); - subset.set.reserve(1); - } + hashes.at(targetShape).clear(); } } + + // Stop the workers. + workers.clear(); + if (write_cache && !split_cache) { - Cache::save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); + cw.save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); } + + cw.flush(); + auto end = std::chrono::steady_clock::now(); auto dt_ms = std::chrono::duration_cast(end - start).count(); std::printf("took %.2f s\033[0K\n\r", dt_ms / 1000.f); diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index ef925dc..9e0c54e 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -1,13 +1,10 @@ -#include "../include/newCache.hpp" - -#include -#include -#include +#include "newCache.hpp" #include -CacheReader::CacheReader() - : filePointer(nullptr), path_(""), fileDescriptor_(-1), fileSize_(0), fileLoaded_(false), dummyHeader{0, 0, 0, 0}, header(&dummyHeader), shapes(nullptr) {} +#include "cubeSwapSet.hpp" + +CacheReader::CacheReader() : path_(""), fileLoaded_(false), dummyHeader{0, 0, 0, 0}, header(&dummyHeader), shapes(nullptr) {} void CacheReader::printHeader() { if (fileLoaded_) { @@ -33,59 +30,272 @@ int CacheReader::printShapes(void) { int CacheReader::loadFile(const std::string path) { unload(); path_ = path; - fileDescriptor_ = open(path.c_str(), O_RDONLY); - if (fileDescriptor_ == -1) { + // open read-only backing file: + file_ = std::make_shared(); + if (file_->open(path.c_str())) { std::printf("error opening file\n"); return 1; } - // get filesize - fileSize_ = lseek(fileDescriptor_, 0, SEEK_END); - lseek(fileDescriptor_, 0, SEEK_SET); + // map the header struct + header_ = std::make_unique>(file_, 0); + header = header_->get(); + + if (header->magic != cacheformat::MAGIC) { + std::printf("error opening file: file not recognized\n"); + return 1; + } + + // map the ShapeEntry array: + shapes_ = std::make_unique>(file_, header_->getEndSeek(), (*header_)->numShapes); + shapes = shapes_->get(); - // memory map file - filePointer = (uint8_t*)mmap(NULL, fileSize_, PROT_READ, MAP_SHARED, fileDescriptor_, 0); - if (filePointer == MAP_FAILED) { - // error handling - std::printf("errorm mapping file memory"); - close(fileDescriptor_); - return 2; + // Initialize ShapeRanges + size_t datasize = 0; + for (unsigned int i = 0; i < header->numShapes; ++i) { + datasize += shapes[i].size; } - header = (Header*)(filePointer); - shapes = (ShapeEntry*)(filePointer + sizeof(Header)); + if (file_->size() != shapes_->getEndSeek() + datasize) { + std::printf("warn: file size does not match expected value\n"); + } + + // Initialize shapeRanges array: + for (unsigned int i = 0; i < header->numShapes; ++i) { + if (shapes[i].size) { + auto start = shapes[i].offset; + auto end = start + shapes[i].size; + shapeRanges.emplace_back(file_, start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); + } else { + // table entry has no data. + // shapes[i].offset may have bogus value. + shapeRanges.emplace_back(file_, -1, -1, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); + } + } + + // Add dummy entry at back: + shapeRanges.emplace_back(file_, -1, -1, header->n, XYZ(0, 0, 0)); fileLoaded_ = true; return 0; } -ShapeRange CacheReader::getCubesByShape(uint32_t i) { +Cube CubeReadIterator::read() const { + Cube tmp(n); + m_file->readAt(m_seek, n * sizeof(XYZ), tmp.data()); + return tmp; +} + + +IShapeRange &CacheReader::getCubesByShape(uint32_t i) { if (i >= header->numShapes) { - return ShapeRange{nullptr, nullptr, 0, XYZ(0, 0, 0)}; + return shapeRanges.back(); } - XYZ* start = reinterpret_cast(filePointer + shapes[i].offset); - XYZ* end = reinterpret_cast(filePointer + shapes[i].offset + shapes[i].size); - return ShapeRange(start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); + + return shapeRanges[i]; } void CacheReader::unload() { - // unmap file from memory + // unload file from memory if (fileLoaded_) { - if (munmap(filePointer, fileSize_) == -1) { - // error handling - std::printf("error unmapping file\n"); - } - - // close file descriptor - close(fileDescriptor_); + shapeRanges.clear(); + shapes_.reset(); + header_.reset(); + file_.reset(); fileLoaded_ = false; } - fileDescriptor_ = -1; - filePointer = nullptr; header = &dummyHeader; shapes = nullptr; } CacheReader::~CacheReader() { unload(); } + +CacheWriter::CacheWriter(int num_threads) { + for (int i = 0; i < num_threads; ++i) { + m_flushers.emplace_back(&CacheWriter::run, this); + } +} + +CacheWriter::CacheWriter::~CacheWriter() { + flush(); + // stop the threads. + std::unique_lock lock(m_mtx); + m_active = false; + m_run.notify_all(); + lock.unlock(); + for (auto &thr : m_flushers) thr.join(); +} + +void CacheWriter::run() { + std::unique_lock lock(m_mtx); + while (m_active) { + // do copy jobs: + if (!m_copy.empty()) { + auto task = std::move(m_copy.front()); + m_copy.pop_front(); + lock.unlock(); + + task(); + + lock.lock(); + --m_num_copys; + continue; + } + // file flushes: + if (!m_flushes.empty()) { + auto task = std::move(m_flushes.front()); + m_flushes.pop_front(); + lock.unlock(); + + task(); + + lock.lock(); + --m_num_flushes; + continue; + } + // notify that we are done here. + m_wait.notify_one(); + // wait for jobs. + m_run.wait(lock); + } + m_wait.notify_one(); +} + +void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { + if (hashes.size() == 0) return; + + using namespace mapped; + using namespace cacheformat; + + auto file_ = std::make_shared(); + if (file_->openrw(path.c_str(), 0)) { + std::printf("error opening file\n"); + return; + } + + // Write header: + auto header = std::make_shared>(file_, 0); + (*header)->magic = cacheformat::MAGIC; + (*header)->n = n; + (*header)->numShapes = hashes.numShapes(); + (*header)->numPolycubes = hashes.size(); + header->flush(); + + std::vector keys; + keys.reserve((*header)->numShapes); + for (auto &pair : hashes) keys.push_back(pair.first); + std::sort(keys.begin(), keys.end()); + + // Write shape table: + auto shapeEntry = std::make_shared>(file_, header->getEndSeek(), (*header)->numShapes); + header.reset(); + + static_assert(XYZ_SIZE == sizeof(XYZ), "XYZ_SIZE differs from sizeof(XYZ)"); + + uint64_t offset = shapeEntry->getEndSeek(); + size_t num_cubes = 0; + int i = 0; + for (auto &key : keys) { + auto &se = (*shapeEntry)[i++]; + se.dim0 = key.x(); + se.dim1 = key.y(); + se.dim2 = key.z(); + se.reserved = 0; + se.offset = offset; + auto count = hashes.at(key).size(); + num_cubes += count; + se.size = count * XYZ_SIZE * n; + offset += se.size; + } + shapeEntry->flush(); + + // put XYZs + // Schedule merging of the cache file. + // CubeSwapSet enables massive optimizations in how + // CacheWriter can merge the SubsubHashy's data into the final cache file: + // - copystorage lambda takes the source file and it's file name from the + // SubsubHashy::storage() returned CubeStorage. + // - mapped::file::copyAt() is used to efficiently copy the source file contents into this cache file + // - Finally the copystorage lambda *deletes* the source storage file + // The main program does not need to wait for this process to complete. + + // copystorage takes shared ownership of the file_ + auto copystorage = [n, file = file_](std::shared_ptr src, std::filesystem::path rmname, size_t num, mapped::seekoff_t dest) -> void { + file->copyAt(src, 0, num * n * sizeof(XYZ), dest); + src.reset(); + + // Try remove the source storage file. + std::error_code ec; + auto stat = std::filesystem::status(rmname, ec); + if (!ec && std::filesystem::is_regular_file(stat)) { + if (!std::filesystem::remove(rmname, ec)) { + std::printf("WARN: failed to remove file: %s", rmname.c_str()); + } + } else { + std::printf("WARN: failed to get file status: %s", rmname.c_str()); + } + }; + + mapped::seekoff_t fileEnd = shapeEntry->getEndSeek(); + auto time_start = std::chrono::steady_clock::now(); + for (size_t i = 0; i < keys.size(); ++i) { + auto put = (*shapeEntry)[i].offset; + for (auto &subset : hashes.at(keys[i])) { + ptrdiff_t num = subset.size(); + if (num) { + // By pass iterating the Subsubhashy entirely + // and copy the data from CubeStorage file *directly* into this file. + // the Cube data does end up in different order than when copying one-by-one. + // But we don't care as the order is random already. + // the copy job also deletes the CubeStorage::fileName() file from the disk + // once the data copy completes. + std::unique_lock lock(m_mtx); + m_copy.emplace_back(std::bind(copystorage, subset.storage().getFile(), subset.storage().fileName(), num, put)); + ++m_num_copys; + m_run.notify_all(); + std::printf("scheduled copy jobs: %*d ... \r", 3, (int)m_num_copys); + std::flush(std::cout); + } + put += num * n * XYZ_SIZE; + } + fileEnd = std::max(fileEnd, put); + } + shapeEntry.reset(); + + // sync up a bit. + // don't allow the copy job queue to grow indefinitely + // if the disk can't keep up. + std::unique_lock lock(m_mtx); + while (m_num_copys > m_flushers.size()) { + std::printf("waiting for %*d copy jobs to complete ... \r", 3, (int)m_num_copys); + std::flush(std::cout); + m_wait.wait(lock); + } + + // move the file into flush job. + m_flushes.emplace_back(std::bind( + [fileEnd](auto &&file) -> void { + file->truncate(fileEnd); + file->close(); + file.reset(); + }, + std::move(file_))); + ++m_num_flushes; + m_run.notify_all(); + + auto time_end = std::chrono::steady_clock::now(); + auto dt_ms = std::chrono::duration_cast(time_end - time_start).count(); + + std::printf("saved %s, took %.2f s\n\r", path.c_str(), dt_ms / 1000.f); +} + +void CacheWriter::flush() { + std::unique_lock lock(m_mtx); + while (m_num_flushes) { + std::printf("%*d copy jobs total remaining on %*d files ... \r", 3, (int)m_num_copys, 2, (int)m_num_flushes); + std::flush(std::cout); + m_wait.wait(lock); + } +} diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index b30d160..42e0014 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -19,4 +19,5 @@ add_executable(${PROJECT_NAME} $ ${TESTS}) target_link_libraries(GTest::GTest INTERFACE gtest_main) target_link_libraries(${PROJECT_NAME} pthread GTest::GTest) +target_link_libraries(${PROJECT_NAME} mapped_file) ConfigureTarget(${PROJECT_NAME}) diff --git a/cpp/tests/src/test_cache.cpp b/cpp/tests/src/test_cache.cpp deleted file mode 100644 index ae10cbf..0000000 --- a/cpp/tests/src/test_cache.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include - -#include "cache.hpp" - -TEST(CacheTests, TestCacheLoadDoesNotThrow) { EXPECT_NO_THROW(Cache::load("./test_data.bin")); } - -TEST(CacheTests, TestCacheSaveDoesNotThrow) { - auto data = Cache::load("./test_data.bin"); - EXPECT_NO_THROW(Cache::save("./temp.bin", data, 255)); -} \ No newline at end of file diff --git a/rust/src/cli/cli.rs b/rust/src/cli/cli.rs index b298e5d..d50654b 100644 --- a/rust/src/cli/cli.rs +++ b/rust/src/cli/cli.rs @@ -1,16 +1,16 @@ use std::{ collections::{BTreeMap, HashSet}, path::PathBuf, - time::{Duration, Instant}, + time::Duration, }; use clap::{Args, Parser, Subcommand, ValueEnum}; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use opencubes::{naive_polycube::NaivePolyCube, pcube::PCubeFile}; -use rayon::prelude::{IntoParallelIterator, ParallelIterator}; mod enumerate; use enumerate::enumerate; +use rayon::prelude::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; fn finish_bar(bar: &ProgressBar, duration: Duration, expansions: usize, n: usize) { let time = duration.as_micros(); @@ -37,7 +37,17 @@ fn finish_bar(bar: &ProgressBar, duration: Duration, expansions: usize, n: usize } fn unknown_bar() -> ProgressBar { - let style = ProgressStyle::with_template("[{elapsed_precise}] [{spinner:10.cyan/blue}] {msg}") + unknown_bar_with_pos(false) +} + +fn unknown_bar_with_pos(with_pos: bool) -> ProgressBar { + let template = if with_pos { + "[{elapsed_precise}] [{spinner:10.cyan/blue}] {pos} {msg}" + } else { + "[{elapsed_precise}] [{spinner:10.cyan/blue}] {msg}" + }; + + let style = ProgressStyle::with_template(template) .unwrap() .tick_strings(&[ ">---------", @@ -204,33 +214,36 @@ pub fn validate(opts: &ValidateArgs) -> std::io::Result<()> { let in_memory = !opts.no_in_memory; let n = opts.n; - println!("Validating {}", path); + let file = PCubeFile::new_file(path)?; + let canonical = file.canonical(); + let len = file.len(); + + let bar = if let Some(len) = len { + make_bar(len as u64) + } else { + unknown_bar_with_pos(true) + }; + + bar.set_message("cubes validated"); + + bar.println(format!("Validating {}", path)); let mut uniqueness = match (in_memory, uniqueness) { (true, true) => { - eprintln!("Verifying uniqueness."); + bar.println("Verifying uniqueness."); Some(HashSet::new()) } (false, true) => { + bar.abandon(); println!("Cannot verify uniqueness without placing all entries in memory. Re-run with `--no-uniqueness` enabled to run."); std::process::exit(1); } (_, false) => { - eprintln!("Not verifying uniqueness"); + bar.println("Not verifying uniqueness"); None } }; - let file = PCubeFile::new_file(path)?; - let canonical = file.canonical(); - let len = file.len(); - - let bar = if let Some(len) = len { - make_bar(len as u64) - } else { - unknown_bar() - }; - let exit = |msg: &str| { bar.abandon(); println!("{msg}"); @@ -238,21 +251,18 @@ pub fn validate(opts: &ValidateArgs) -> std::io::Result<()> { }; match (canonical, validate_canonical) { - (true, true) => eprintln!("Verifying entry canonicality. File indicates that entries are canonical."), - (false, true) => eprintln!("Not verifying entry canonicality. File header does not indicate that entries are canonical"), - (true, false) => eprintln!("Not verifying entry canonicality. File header indicates that they are, but check is disabled."), - (false, false) => eprintln!("Not verifying canonicality. File header does not indicate that entries are canonical, and check is disabled.") + (true, true) => bar.println("Verifying entry canonicality. File indicates that entries are canonical."), + (false, true) => bar.println("Not verifying entry canonicality. File header does not indicate that entries are canonical"), + (true, false) => bar.println("Not verifying entry canonicality. File header indicates that they are, but check is disabled."), + (false, false) => bar.println("Not verifying canonicality. File header does not indicate that entries are canonical, and check is disabled.") } if let Some(n) = n { - eprintln!("Verifying that all entries are N = {n}"); + bar.println(format!("Verifying that all entries are N = {n}")); } let mut total_read = 0; - let mut last_tick = Instant::now(); - bar.tick(); - for cube in file { let cube = match cube { Ok(c) => NaivePolyCube::from(c), @@ -264,14 +274,7 @@ pub fn validate(opts: &ValidateArgs) -> std::io::Result<()> { total_read += 1; - if len.is_some() { - bar.inc(1); - } else if last_tick.elapsed() >= Duration::from_millis(66) { - last_tick = Instant::now(); - bar.set_message(format!("{total_read}")); - bar.inc(1); - bar.tick(); - } + bar.inc(1); let mut form: Option = None; let canonical_form = || cube.pcube_canonical_form(); @@ -297,10 +300,10 @@ pub fn validate(opts: &ValidateArgs) -> std::io::Result<()> { exit("Found non-unique polycubes."); } } - - bar.finish(); } + bar.finish(); + println!("Success: {path}, containing {total_read} cubes, is valid"); Ok(()) @@ -318,7 +321,7 @@ pub fn convert(opts: &ConvertArgs) { // that the longest files are yielded last. let files: BTreeMap<_, _> = opts .path - .iter() + .par_iter() .map(|path| { let input_file = match PCubeFile::new_file(&path) { Ok(f) => f, @@ -327,6 +330,7 @@ pub fn convert(opts: &ConvertArgs) { std::process::exit(1); } }; + (input_file.len(), (input_file, path.to_string())) }) .collect(); @@ -334,36 +338,43 @@ pub fn convert(opts: &ConvertArgs) { // Iterate over the files and do some printing, in-order let files: Vec<_> = files .into_iter() - .map(|(_, (input_file, path))| { + .map(|(len, (input_file, path))| { let output_path = opts.output_path.clone().unwrap_or(path.clone()); - println!("Converting file {}", path); - println!("Final output path: {output_path}"); + multi_bar + .println(format!("Converting file {}", path)) + .unwrap(); + multi_bar + .println(format!("Final output path: {output_path}")) + .unwrap(); + if opts.canonicalize { - println!("Canonicalizing output"); + multi_bar.println("Canonicalizing output").unwrap(); } - println!("Input compression: {:?}", input_file.compression()); - println!("Output compression: {:?}", opts.compression); - - let len = input_file.len(); + multi_bar + .println(format!("Input compression: {:?}", input_file.compression())) + .unwrap(); + multi_bar + .println(format!("Output compression: {:?}", opts.compression)) + .unwrap(); let bar = if let Some(len) = len { make_bar(len as u64) } else { - unknown_bar() + unknown_bar_with_pos(true) }; let bar = multi_bar.add(bar); - (input_file, path, output_path, len, bar) + (input_file, path, output_path, bar) }) .collect(); // Convert, in parallel files .into_par_iter() - .for_each(|(input_file, path, output_path, len, bar)| { - bar.set_message(path.to_string()); + .for_each(|(input_file, path, output_path, bar)| { + bar.set_message(format!("cubes converted for {path}")); let canonical = input_file.canonical(); let mut output_path_temp = PathBuf::from(&output_path); @@ -373,12 +384,7 @@ pub fn convert(opts: &ConvertArgs) { output_path_temp.pop(); output_path_temp.push(filename); - let mut total_read = 0; - let mut last_tick = Instant::now(); - let input = input_file.filter_map(|v| { - total_read += 1; - let cube = match v { Ok(v) => Some(v), Err(e) => { @@ -388,14 +394,7 @@ pub fn convert(opts: &ConvertArgs) { } }?; - if len.is_some() { - bar.inc(1); - } else if last_tick.elapsed() >= Duration::from_millis(66) { - last_tick = Instant::now(); - bar.set_message(format!("{total_read}")); - bar.inc(1); - bar.tick(); - } + bar.inc(1); if opts.canonicalize { Some(NaivePolyCube::from(cube).canonical_form().into()) @@ -421,7 +420,7 @@ pub fn convert(opts: &ConvertArgs) { if !bar.is_finished() { match std::fs::rename(output_path_temp, output_path) { - Ok(_) => bar.finish_with_message(format!("{path} Done!")), + Ok(_) => bar.finish(), Err(e) => { bar.abandon_with_message(format!("{path} Failed to write final file: {e}")); return; diff --git a/rust/src/pcube/compression.rs b/rust/src/pcube/compression.rs index 61ea73b..8e09bdc 100644 --- a/rust/src/pcube/compression.rs +++ b/rust/src/pcube/compression.rs @@ -1,7 +1,9 @@ -use std::io::{BufReader, Read, Write}; +use std::io::{BufReader, BufWriter, Read, Write}; use flate2::{read::GzDecoder, write::GzEncoder}; +const BUF_SIZE: usize = 1024 * 16384; + /// Compression types supported for `.pcube` files. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Compression { @@ -45,7 +47,7 @@ where { pub fn new(compression: Compression, reader: T) -> Self { match compression { - Compression::None => Self::Uncompressed(BufReader::new(reader)), + Compression::None => Self::Uncompressed(BufReader::with_capacity(BUF_SIZE, reader)), Compression::Gzip => Self::Gzip(GzDecoder::new(reader)), } } @@ -74,7 +76,7 @@ pub enum Writer where T: Write, { - Uncompressed(T), + Uncompressed(BufWriter), Gzip(GzEncoder), } @@ -84,7 +86,7 @@ where { pub fn new(compression: Compression, writer: T) -> Self { match compression { - Compression::None => Self::Uncompressed(writer), + Compression::None => Self::Uncompressed(BufWriter::with_capacity(BUF_SIZE, writer)), Compression::Gzip => Self::Gzip(GzEncoder::new(writer, flate2::Compression::default())), } } diff --git a/rust/src/pcube/mod.rs b/rust/src/pcube/mod.rs index 8dc6175..c9885d2 100644 --- a/rust/src/pcube/mod.rs +++ b/rust/src/pcube/mod.rs @@ -2,7 +2,7 @@ use std::{ fs::File, - io::{ErrorKind, Read, Seek, Write}, + io::{ErrorKind, Read, Write}, iter::Peekable, path::Path, }; @@ -93,25 +93,7 @@ where let [orientation, compression] = header; let canonicalized = orientation != 0; - let mut cube_count: u64 = 0; - let mut shift = 0; - loop { - let mut next_byte = [0u8; 1]; - input.read_exact(&mut next_byte)?; - - let [next_byte] = next_byte; - - cube_count |= ((next_byte & 0x7F) as u64) << shift; - - shift += 7; - if shift > 64 { - panic!("Cannot load possibly more than u64 cubes..."); - } - - if next_byte & 0x80 == 0 { - break; - } - } + let cube_count = PCubeFile::read_leb128(&mut input)?; let len = if cube_count == 0 { None @@ -177,54 +159,102 @@ impl PCubeFile { Self::new(file) } - /// Write implementation - fn write_impl( - write_magic: bool, - mut cubes: I, - is_canonical: bool, - compression: Compression, - mut write: W, - ) -> std::io::Result<()> - where - I: Iterator, - W: Write, - { - if write_magic { - write.write_all(&MAGIC)?; - } + fn read_leb128(mut reader: impl Read) -> std::io::Result { + let mut cube_count: u64 = 0; + let mut shift = 0; + loop { + let mut next_byte = [0u8; 1]; + reader.read_exact(&mut next_byte)?; - let compression_val = compression.into(); - let orientation_val = if is_canonical { 1 } else { 0 }; + let [next_byte] = next_byte; - write.write_all(&[orientation_val, compression_val])?; + let is_last_byte = (next_byte & 0x80) == 0x00; + let value = (next_byte & 0x7F) as u64; - let mut cube_count = 0; - let (_, max) = cubes.size_hint(); + if shift > 63 && value != 0 || shift > 56 && value > 1 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Cannot load more than u64 cubes", + )); + } + + cube_count |= value.overflowing_shl(shift).0; + shift += 7; - if let Some(max) = max { - cube_count = max; + if is_last_byte { + break; + } } + return Ok(cube_count); + } + + /// Write a leb128 value + /// + /// If `prefill` is `true`, this function will always + /// write 10 bytes of data describing `number`. + fn write_leb128(mut number: u64, mut writer: impl Write, prefill: bool) -> std::io::Result<()> { let mut ran_once = false; - while cube_count > 0 || !ran_once { + let mut bytes_written = 0; + while number > 0 || !ran_once || (prefill && bytes_written < 10) { ran_once = true; - let mut next_byte = (cube_count as u8) & 0x7F; - cube_count >>= 7; + let mut next_byte = (number as u8) & 0x7F; + number >>= 7; - if cube_count > 0 { + if number > 0 || (prefill && bytes_written != 9) { next_byte |= 0x80; } - write.write_all(&[next_byte])?; + writer.write_all(&[next_byte])?; + bytes_written += 1; } + Ok(()) + } + + /// Write the header + /// + /// If `prefill_len` is `true`, the length is _always_ written + /// as 10 bytes. This way, rewriting the header in-place is possible. + fn write_header( + mut write: impl Write, + magic: [u8; 4], + is_canonical: bool, + compression: Compression, + cube_count: Option, + prefill_len: bool, + ) -> std::io::Result<()> { + let compression_val = compression.into(); + let orientation_val = if is_canonical { 1 } else { 0 }; + + let cube_count = cube_count.unwrap_or(0); + + write.write_all(&magic)?; + write.write_all(&[orientation_val, compression_val])?; + Self::write_leb128(cube_count, &mut write, prefill_len)?; + + Ok(()) + } + + /// Write implementation + fn write_impl(cubes: I, compression: Compression, write: W) -> std::io::Result + where + I: Iterator, + W: Write, + { let mut writer = Writer::new(compression, write); - if let Some(e) = cubes.find_map(|v| v.pack(&mut writer).err()) { + let mut cube_count = 0; + if let Some(e) = cubes + .inspect(|_| cube_count += 1) + .find_map(|v| v.pack(&mut writer).err()) + { return Err(e); } - Ok(()) + writer.flush()?; + + Ok(cube_count) } /// Write the [`RawPCube`]s produced by `I` into `W`. @@ -235,13 +265,43 @@ impl PCubeFile { is_canonical: bool, compression: Compression, cubes: I, - write: W, + mut write: W, + ) -> std::io::Result + where + I: Iterator, + W: std::io::Write, + { + let len = cubes.size_hint().1.map(|v| v as u64); + + Self::write_header(&mut write, MAGIC, is_canonical, compression, len, false)?; + + Self::write_impl(cubes, compression, write) + } + + pub fn write_seekable( + mut seekable: S, + is_canonical: bool, + compression: Compression, + cubes: I, ) -> std::io::Result<()> where + S: std::io::Seek + std::io::Write, I: Iterator, - W: Write, { - Self::write_impl(true, cubes, is_canonical, compression, write) + let len = cubes.size_hint().1.map(|v| v as u64); + let magic = [0, 0, 0, 0]; + Self::write_header(&mut seekable, magic, is_canonical, compression, len, true)?; + + let len = Self::write_impl(cubes, compression, &mut seekable)?; + let len = Some(len as u64); + + // Write magic and cube length at the end + seekable.rewind()?; + Self::write_header(&mut seekable, MAGIC, is_canonical, compression, len, true)?; + + seekable.flush()?; + + Ok(()) } /// Write the [`RawPCube`]s produced by `I` to the file at `path`. @@ -264,19 +324,10 @@ impl PCubeFile { where I: Iterator, { - let mut file = std::fs::File::create(path.as_ref())?; - + let file = std::fs::File::create(path.as_ref())?; file.set_len(0)?; - file.seek(std::io::SeekFrom::Start(0))?; - file.write_all(&[0, 0, 0, 0])?; - - Self::write_impl(false, cubes, is_canonical, compression, &mut file)?; - // Write magic last - file.seek(std::io::SeekFrom::Start(0))?; - file.write_all(&MAGIC)?; - - Ok(()) + Self::write_seekable(file, is_canonical, compression, cubes) } } @@ -407,3 +458,34 @@ where } impl AllUniquePolycubeIterator for AllUnique where T: Read {} + +#[test] +pub fn leb128_len() { + let values = [0, 1, 24, 150283, 0x7FFFF_FFFF, u64::MAX - 1, u64::MAX]; + + for value in values { + let mut data = Vec::new(); + PCubeFile::write_leb128(value, &mut data, true).unwrap(); + + assert_eq!(value, PCubeFile::read_leb128(&data[..]).unwrap()); + } + + let mut many_zeros = [0x80; 20]; + many_zeros[19] = 0x00; + + assert!(PCubeFile::read_leb128(&many_zeros[..]).is_ok()); +} + +#[test] +pub fn leb128_unparseable() { + let unparseable_values = [ + &[0x81, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x02][..], + &[ + 0x81, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, + ][..], + ]; + + for unparseable in unparseable_values { + assert!(PCubeFile::read_leb128(unparseable).is_err()); + } +}