diff --git a/CMakeLists.txt b/CMakeLists.txt index 4dc208d..c97e0d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,9 @@ cmake_minimum_required(VERSION 3.16) project(bits VERSION 0.1.0 LANGUAGES CXX) +set(CMAKE_CXX_STANDARD 20) +set(CXX_EXTENSIONS OFF) + add_library(bits INTERFACE) add_library(bits::bits ALIAS bits) add_library(org::ttldtor::bits::bits ALIAS bits) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 768afa2..b69fa8b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -3,21 +3,26 @@ include(FetchContent) FetchContent_Declare( - doctest - GIT_REPOSITORY https://github.com/doctest/doctest.git - GIT_TAG v2.4.12) + doctest + GIT_REPOSITORY https://github.com/doctest/doctest.git + GIT_TAG v2.4.12) FetchContent_MakeAvailable(doctest) FetchContent_Declare( - nanobench - GIT_REPOSITORY https://github.com/martinus/nanobench.git - GIT_TAG v4.1.0 - GIT_SHALLOW TRUE) + nanobench + GIT_REPOSITORY https://github.com/martinus/nanobench.git + GIT_TAG v4.1.0 + GIT_SHALLOW TRUE) FetchContent_MakeAvailable(nanobench) add_executable(bits_tests - main.cpp - bits.cpp + main.cpp + bits.cpp +) + +set_target_properties(bits_tests PROPERTIES + CXX_STANDARD 20 + CXX_EXTENSIONS OFF ) target_link_libraries(bits_tests PRIVATE bits::bits) @@ -30,7 +35,22 @@ add_executable(bits_bench bench.cpp ) +set_target_properties(bits_bench PROPERTIES + CXX_STANDARD 20 + CXX_EXTENSIONS OFF +) + target_link_libraries(bits_bench PRIVATE bits::bits nanobench) target_include_directories(bits_bench PRIVATE ${doctest_SOURCE_DIR}) -add_test(NAME bits_bench COMMAND bits_bench) \ No newline at end of file +if (CMAKE_CONFIGURATION_TYPES) + set_property(TARGET bits_bench PROPERTY EXCLUDE_FROM_DEFAULT_BUILD_Debug TRUE) + add_test(NAME bits_bench COMMAND bits_bench) + set_tests_properties(bits_bench PROPERTIES DISABLED "$") +else () + if (CMAKE_BUILD_TYPE STREQUAL "Release") + add_test(NAME bits_bench COMMAND bits_bench) + else () + set_property(TARGET bits_bench PROPERTY EXCLUDE_FROM_ALL TRUE) + endif () +endif () diff --git a/tests/bench.cpp b/tests/bench.cpp index 8961b1a..ac73606 100644 --- a/tests/bench.cpp +++ b/tests/bench.cpp @@ -12,12 +12,107 @@ #include using namespace org::ttldtor::bits; +using namespace std::literals; // NOLINTNEXTLINE -TEST_CASE("tutorial_fast_v2") { - uint64_t x = 1; - ankerl::nanobench::Bench().run("++x", [&]() { - ankerl::nanobench::doNotOptimizeAway(x += 1); - }); +TEST_CASE("bench_sal_sar_vs_builtin") { + constexpr size_t N = 1u << 15; + + std::mt19937_64 rng{0xB00B00}; + // For comparison with builtin << we use unsigned types and non-negative shifts + std::uniform_int_distribution u32dist{0u, 0x7FFF'FFFFu}; + std::uniform_int_distribution u64dist{0ull, (1ull << 50)}; + std::uniform_int_distribution shldist{0u, 63u}; + + // For comparison with builtin >> we use signed types and non-negative shifts + std::uniform_int_distribution s32dist{-(1 << 29), (1 << 29)}; + std::uniform_int_distribution s64dist{-(1ll << 50), (1ll << 50)}; + std::uniform_int_distribution shrdist{0u, 63u}; + + std::vector u32(N); + std::vector shl32(N); + std::vector u64(N); + std::vector shl64(N); + + std::vector s32(N); + std::vector shr32(N); + std::vector s64(N); + std::vector shr64(N); + + for (size_t i = 0; i < N; ++i) { + u32[i] = u32dist(rng); + shl32[i] = shldist(rng) % 32; + u64[i] = u64dist(rng); + shl64[i] = shldist(rng) % 64; + + s32[i] = s32dist(rng); + shr32[i] = shrdist(rng) % 32; + s64[i] = s64dist(rng); + shr64[i] = shrdist(rng) % 64; + } + + auto createBench = [&](auto title) { + ankerl::nanobench::Bench bench; + + bench.title(title) + .unit("op") + .batch(N) + .warmup(100) + .minEpochTime(150ms) + .minEpochIterations(60000) + .relative(true) + .performanceCounters(true); + + return bench; + }; + + auto runSalBench = [&](auto& bench, const auto& typeName, const auto& values, const auto& shifts) { + bench.run("builtin << ("s + typeName + ")", [&] { + uint32_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= values[i] << shifts[i]; + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + + bench.run("sal<"s + typeName + ">", [&] { + uint32_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= sal(values[i], shifts[i]); + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + }; + + + auto runSarBench = [&](auto& bench, const auto& typeName, const auto& values, const auto& shifts) { + bench.run("builtin >> ("s + typeName + ")", [&] { + uint32_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= values[i] >> shifts[i]; + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + + bench.run("sar<"s + typeName + ">", [&] { + uint32_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= sar(values[i], shifts[i]); + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + }; + + auto salBench1 = createBench("Compare sal vs builtin << (uint32_t)"); + runSalBench(salBench1, "uint32_t", u32, shl32); + + auto salBench2 = createBench("Compare sal vs builtin << (uint64_t)"); + runSalBench(salBench2, "uint64_t", u64, shl64); + + auto sarBench1 = createBench("Compare sar vs builtin >> (uint32_t)"); + runSarBench(sarBench1, "uint32_t", u32, shl32); + + auto sarBench2 = createBench("Compare sar vs builtin >> (uint64_t)"); + runSarBench(sarBench2, "uint64_t", u64, shl64); } \ No newline at end of file