From d773bde2ac861bbfd44553a8797d58e92e67a43c Mon Sep 17 00:00:00 2001 From: ttldtor Date: Mon, 8 Sep 2025 21:54:22 +0300 Subject: [PATCH 1/8] Add sal\sar benchmarks --- tests/bench.cpp | 117 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 113 insertions(+), 4 deletions(-) diff --git a/tests/bench.cpp b/tests/bench.cpp index 8961b1a..00bd163 100644 --- a/tests/bench.cpp +++ b/tests/bench.cpp @@ -12,12 +12,121 @@ #include using namespace org::ttldtor::bits; +using namespace std::literals; // NOLINTNEXTLINE -TEST_CASE("tutorial_fast_v2") { - uint64_t x = 1; - ankerl::nanobench::Bench().run("++x", [&]() { - ankerl::nanobench::doNotOptimizeAway(x += 1); +TEST_CASE("bench_sal_sar_vs_builtin") { + constexpr size_t N = 1u << 15; + + std::mt19937_64 rng{0xB00B00}; + // Для сравнения с builtin << используем беззнаковые типы и неотрицательные сдвиги + std::uniform_int_distribution u32dist{0u, 0x7FFF'FFFFu}; + std::uniform_int_distribution u64dist{0ull, (1ull << 50)}; + std::uniform_int_distribution shldist{0u, 63u}; + + // Для сравнения с builtin >> используем знаковые типы и неотрицательные сдвиги + std::uniform_int_distribution s32dist{-(1 << 29), (1 << 29)}; + std::uniform_int_distribution s64dist{-(1ll << 50), (1ll << 50)}; + std::uniform_int_distribution shrdist{0u, 63u}; + + std::vector u32(N); + std::vector shl32(N); + std::vector u64(N); + std::vector shl64(N); + + std::vector s32(N); + std::vector shr32(N); + std::vector s64(N); + std::vector shr64(N); + + for (size_t i = 0; i < N; ++i) { + u32[i] = u32dist(rng); + shl32[i]= shldist(rng) % 32; + u64[i] = u64dist(rng); + shl64[i]= shldist(rng) % 64; + + s32[i] = s32dist(rng); + shr32[i]= shrdist(rng) % 32; + s64[i] = s64dist(rng); + shr64[i]= shrdist(rng) % 64; + } + + ankerl::nanobench::Bench bench; + bench.title("Compare sal/sar vs builtin shifts") + .unit("op") + .batch(N) + .warmup(10) +#if defined(WIN32) + .minEpochTime(150ms) +#endif + .relative(false) + .performanceCounters(true); + + // sal vs builtin << (беззнаковые типы, чтобы избежать UB для <<) + bench.run("sal", [&] { + uint32_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= sal(u32[i], static_cast(shl32[i])); + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + + bench.run("builtin << (uint32_t)", [&] { + uint32_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= static_cast(u32[i] << shl32[i]); + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + + bench.run("sal", [&] { + uint64_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= sal(u64[i], static_cast(shl64[i])); + } + ankerl::nanobench::doNotOptimizeAway(acc); }); + + bench.run("builtin << (uint64_t)", [&] { + uint64_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= static_cast(u64[i] << shl64[i]); + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + + // sar vs builtin >> (знаковые типы, неотрицательные сдвиги) + bench.run("sar", [&] { + int32_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= sar(s32[i], static_cast(shr32[i])); + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + + bench.run("builtin >> (int32_t)", [&] { + int32_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= static_cast(s32[i] >> shr32[i]); + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + + bench.run("sar", [&] { + int64_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= sar(s64[i], static_cast(shr64[i])); + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + + bench.run("builtin >> (int64_t)", [&] { + int64_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= static_cast(s64[i] >> shr64[i]); + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + } \ No newline at end of file From 983d34f1c3dc8bd69eff7e3eece678281a45ff1d Mon Sep 17 00:00:00 2001 From: ttldtor Date: Mon, 8 Sep 2025 22:03:18 +0300 Subject: [PATCH 2/8] Add sal\sar benchmarks --- tests/CMakeLists.txt | 2 ++ tests/bench.cpp | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 768afa2..7f3f732 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -20,6 +20,7 @@ add_executable(bits_tests bits.cpp ) +target_compile_features(bits_tests PRIVATE cxx_std_20) target_link_libraries(bits_tests PRIVATE bits::bits) target_include_directories(bits_tests PRIVATE ${doctest_SOURCE_DIR}) @@ -30,6 +31,7 @@ add_executable(bits_bench bench.cpp ) +target_compile_features(bits_bench PRIVATE cxx_std_20) target_link_libraries(bits_bench PRIVATE bits::bits nanobench) target_include_directories(bits_bench PRIVATE ${doctest_SOURCE_DIR}) diff --git a/tests/bench.cpp b/tests/bench.cpp index 00bd163..c25b5b6 100644 --- a/tests/bench.cpp +++ b/tests/bench.cpp @@ -57,9 +57,7 @@ TEST_CASE("bench_sal_sar_vs_builtin") { .unit("op") .batch(N) .warmup(10) -#if defined(WIN32) .minEpochTime(150ms) -#endif .relative(false) .performanceCounters(true); From 0882598ada4d7a635dcd06bd0a1cced1920cdb25 Mon Sep 17 00:00:00 2001 From: ttldtor Date: Mon, 8 Sep 2025 22:08:53 +0300 Subject: [PATCH 3/8] Add sal\sar benchmarks --- CMakeLists.txt | 3 +++ tests/CMakeLists.txt | 12 ++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4dc208d..c97e0d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,9 @@ cmake_minimum_required(VERSION 3.16) project(bits VERSION 0.1.0 LANGUAGES CXX) +set(CMAKE_CXX_STANDARD 20) +set(CXX_EXTENSIONS OFF) + add_library(bits INTERFACE) add_library(bits::bits ALIAS bits) add_library(org::ttldtor::bits::bits ALIAS bits) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7f3f732..086cdbb 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -20,7 +20,11 @@ add_executable(bits_tests bits.cpp ) -target_compile_features(bits_tests PRIVATE cxx_std_20) +set_target_properties(bits_tests PROPERTIES + CXX_STANDARD 20 + CXX_EXTENSIONS OFF +) + target_link_libraries(bits_tests PRIVATE bits::bits) target_include_directories(bits_tests PRIVATE ${doctest_SOURCE_DIR}) @@ -31,7 +35,11 @@ add_executable(bits_bench bench.cpp ) -target_compile_features(bits_bench PRIVATE cxx_std_20) +set_target_properties(bits_bench PROPERTIES + CXX_STANDARD 20 + CXX_EXTENSIONS OFF +) + target_link_libraries(bits_bench PRIVATE bits::bits nanobench) target_include_directories(bits_bench PRIVATE ${doctest_SOURCE_DIR}) From 9ca8579371934ef49e227f9a3a70d7fae633d6a5 Mon Sep 17 00:00:00 2001 From: ttldtor Date: Mon, 8 Sep 2025 22:12:25 +0300 Subject: [PATCH 4/8] Add sal\sar benchmarks --- tests/bench.cpp | 54 ++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tests/bench.cpp b/tests/bench.cpp index c25b5b6..d55789b 100644 --- a/tests/bench.cpp +++ b/tests/bench.cpp @@ -23,43 +23,44 @@ TEST_CASE("bench_sal_sar_vs_builtin") { // Для сравнения с builtin << используем беззнаковые типы и неотрицательные сдвиги std::uniform_int_distribution u32dist{0u, 0x7FFF'FFFFu}; std::uniform_int_distribution u64dist{0ull, (1ull << 50)}; - std::uniform_int_distribution shldist{0u, 63u}; + std::uniform_int_distribution shldist{0u, 63u}; // Для сравнения с builtin >> используем знаковые типы и неотрицательные сдвиги - std::uniform_int_distribution s32dist{-(1 << 29), (1 << 29)}; - std::uniform_int_distribution s64dist{-(1ll << 50), (1ll << 50)}; + std::uniform_int_distribution s32dist{-(1 << 29), (1 << 29)}; + std::uniform_int_distribution s64dist{-(1ll << 50), (1ll << 50)}; std::uniform_int_distribution shrdist{0u, 63u}; - std::vector u32(N); - std::vector shl32(N); - std::vector u64(N); - std::vector shl64(N); + std::vector u32(N); + std::vector shl32(N); + std::vector u64(N); + std::vector shl64(N); - std::vector s32(N); - std::vector shr32(N); - std::vector s64(N); - std::vector shr64(N); + std::vector s32(N); + std::vector shr32(N); + std::vector s64(N); + std::vector shr64(N); for (size_t i = 0; i < N; ++i) { - u32[i] = u32dist(rng); - shl32[i]= shldist(rng) % 32; - u64[i] = u64dist(rng); - shl64[i]= shldist(rng) % 64; - - s32[i] = s32dist(rng); - shr32[i]= shrdist(rng) % 32; - s64[i] = s64dist(rng); - shr64[i]= shrdist(rng) % 64; + u32[i] = u32dist(rng); + shl32[i] = shldist(rng) % 32; + u64[i] = u64dist(rng); + shl64[i] = shldist(rng) % 64; + + s32[i] = s32dist(rng); + shr32[i] = shrdist(rng) % 32; + s64[i] = s64dist(rng); + shr64[i] = shrdist(rng) % 64; } ankerl::nanobench::Bench bench; bench.title("Compare sal/sar vs builtin shifts") - .unit("op") - .batch(N) - .warmup(10) - .minEpochTime(150ms) - .relative(false) - .performanceCounters(true); + .unit("op") + .batch(N) + .warmup(10) + .minEpochTime(150ms) + .minEpochIterations(500'000) + .relative(false) + .performanceCounters(true); // sal vs builtin << (беззнаковые типы, чтобы избежать UB для <<) bench.run("sal", [&] { @@ -126,5 +127,4 @@ TEST_CASE("bench_sal_sar_vs_builtin") { } ankerl::nanobench::doNotOptimizeAway(acc); }); - } \ No newline at end of file From 66ccc32937348000c4c0de612473b96db14d6639 Mon Sep 17 00:00:00 2001 From: ttldtor Date: Mon, 8 Sep 2025 22:16:37 +0300 Subject: [PATCH 5/8] Add sal\sar benchmarks --- tests/bench.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/bench.cpp b/tests/bench.cpp index d55789b..f705425 100644 --- a/tests/bench.cpp +++ b/tests/bench.cpp @@ -58,7 +58,6 @@ TEST_CASE("bench_sal_sar_vs_builtin") { .batch(N) .warmup(10) .minEpochTime(150ms) - .minEpochIterations(500'000) .relative(false) .performanceCounters(true); From ed109a957b476f7f8d3ee380b3a8b69e54f8b471 Mon Sep 17 00:00:00 2001 From: ttldtor Date: Mon, 8 Sep 2025 23:24:40 +0300 Subject: [PATCH 6/8] Add sal\sar benchmarks --- tests/bench.cpp | 137 ++++++++++++++++++++++-------------------------- 1 file changed, 63 insertions(+), 74 deletions(-) diff --git a/tests/bench.cpp b/tests/bench.cpp index f705425..159adb7 100644 --- a/tests/bench.cpp +++ b/tests/bench.cpp @@ -52,78 +52,67 @@ TEST_CASE("bench_sal_sar_vs_builtin") { shr64[i] = shrdist(rng) % 64; } - ankerl::nanobench::Bench bench; - bench.title("Compare sal/sar vs builtin shifts") - .unit("op") - .batch(N) - .warmup(10) - .minEpochTime(150ms) - .relative(false) - .performanceCounters(true); - - // sal vs builtin << (беззнаковые типы, чтобы избежать UB для <<) - bench.run("sal", [&] { - uint32_t acc = 0; - for (size_t i = 0; i < N; ++i) { - acc ^= sal(u32[i], static_cast(shl32[i])); - } - ankerl::nanobench::doNotOptimizeAway(acc); - }); - - bench.run("builtin << (uint32_t)", [&] { - uint32_t acc = 0; - for (size_t i = 0; i < N; ++i) { - acc ^= static_cast(u32[i] << shl32[i]); - } - ankerl::nanobench::doNotOptimizeAway(acc); - }); - - bench.run("sal", [&] { - uint64_t acc = 0; - for (size_t i = 0; i < N; ++i) { - acc ^= sal(u64[i], static_cast(shl64[i])); - } - ankerl::nanobench::doNotOptimizeAway(acc); - }); - - bench.run("builtin << (uint64_t)", [&] { - uint64_t acc = 0; - for (size_t i = 0; i < N; ++i) { - acc ^= static_cast(u64[i] << shl64[i]); - } - ankerl::nanobench::doNotOptimizeAway(acc); - }); - - // sar vs builtin >> (знаковые типы, неотрицательные сдвиги) - bench.run("sar", [&] { - int32_t acc = 0; - for (size_t i = 0; i < N; ++i) { - acc ^= sar(s32[i], static_cast(shr32[i])); - } - ankerl::nanobench::doNotOptimizeAway(acc); - }); - - bench.run("builtin >> (int32_t)", [&] { - int32_t acc = 0; - for (size_t i = 0; i < N; ++i) { - acc ^= static_cast(s32[i] >> shr32[i]); - } - ankerl::nanobench::doNotOptimizeAway(acc); - }); - - bench.run("sar", [&] { - int64_t acc = 0; - for (size_t i = 0; i < N; ++i) { - acc ^= sar(s64[i], static_cast(shr64[i])); - } - ankerl::nanobench::doNotOptimizeAway(acc); - }); - - bench.run("builtin >> (int64_t)", [&] { - int64_t acc = 0; - for (size_t i = 0; i < N; ++i) { - acc ^= static_cast(s64[i] >> shr64[i]); - } - ankerl::nanobench::doNotOptimizeAway(acc); - }); + auto createBench = [&](auto title) { + ankerl::nanobench::Bench bench; + + bench.title(title) + .unit("op") + .batch(N) + .warmup(100) + .minEpochTime(150ms) + .minEpochIterations(60000) + .relative(true) + .performanceCounters(true); + + return bench; + }; + + auto runSalBench = [&](auto& bench, const auto& typeName, const auto& values, const auto& shifts) { + bench.run("builtin << ("s + typeName + ")", [&] { + uint32_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= values[i] << shifts[i]; + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + + bench.run("sal<"s + typeName + ">", [&] { + uint32_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= sal(values[i], shifts[i]); + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + }; + + + auto runSarBench = [&](auto& bench, const auto& typeName, const auto& values, const auto& shifts) { + bench.run("builtin >> ("s + typeName + ")", [&] { + uint32_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= values[i] >> shifts[i]; + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + + bench.run("sar<"s + typeName + ">", [&] { + uint32_t acc = 0; + for (size_t i = 0; i < N; ++i) { + acc ^= sar(values[i], shifts[i]); + } + ankerl::nanobench::doNotOptimizeAway(acc); + }); + }; + + auto salBench1 = createBench("Compare sal vs builtin << (uint32_t)"); + runSalBench(salBench1, "uint32_t", u32, shl32); + + auto salBench2 = createBench("Compare sal vs builtin << (uint64_t)"); + runSalBench(salBench2, "uint64_t", u64, shl64); + + auto sarBench1 = createBench("Compare sar vs builtin >> (uint32_t)"); + runSarBench(sarBench1, "uint32_t", u32, shl32); + + auto sarBench2 = createBench("Compare sar vs builtin >> (uint64_t)"); + runSarBench(sarBench2, "uint64_t", u64, shl64); } \ No newline at end of file From f835e6967b40a58962603f700731b218a8f5f24d Mon Sep 17 00:00:00 2001 From: ttldtor Date: Mon, 8 Sep 2025 23:45:21 +0300 Subject: [PATCH 7/8] Add sal\sar benchmarks --- tests/CMakeLists.txt | 31 +++++++++++++++++++++---------- tests/bench.cpp | 4 ++-- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 086cdbb..1eb617a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -3,21 +3,21 @@ include(FetchContent) FetchContent_Declare( - doctest - GIT_REPOSITORY https://github.com/doctest/doctest.git - GIT_TAG v2.4.12) + doctest + GIT_REPOSITORY https://github.com/doctest/doctest.git + GIT_TAG v2.4.12) FetchContent_MakeAvailable(doctest) FetchContent_Declare( - nanobench - GIT_REPOSITORY https://github.com/martinus/nanobench.git - GIT_TAG v4.1.0 - GIT_SHALLOW TRUE) + nanobench + GIT_REPOSITORY https://github.com/martinus/nanobench.git + GIT_TAG v4.1.0 + GIT_SHALLOW TRUE) FetchContent_MakeAvailable(nanobench) add_executable(bits_tests - main.cpp - bits.cpp + main.cpp + bits.cpp ) set_target_properties(bits_tests PROPERTIES @@ -43,4 +43,15 @@ set_target_properties(bits_bench PROPERTIES target_link_libraries(bits_bench PRIVATE bits::bits nanobench) target_include_directories(bits_bench PRIVATE ${doctest_SOURCE_DIR}) -add_test(NAME bits_bench COMMAND bits_bench) \ No newline at end of file +if (CMAKE_CONFIGURATION_TYPES) + set_property(TARGET bits_bench PROPERTY EXCLUDE_FROM_DEFAULT_BUILD_Debug TRUE) + set_property(TARGET bits_bench PROPERTY EXCLUDE_FROM_ALL "$") + add_test(NAME bits_bench COMMAND bits_bench) + set_tests_properties(bits_bench PROPERTIES DISABLED "$") +else () + if (CMAKE_BUILD_TYPE STREQUAL "Release") + add_test(NAME bits_bench COMMAND bits_bench) + else () + set_property(TARGET bits_bench PROPERTY EXCLUDE_FROM_ALL TRUE) + endif () +endif () diff --git a/tests/bench.cpp b/tests/bench.cpp index 159adb7..ac73606 100644 --- a/tests/bench.cpp +++ b/tests/bench.cpp @@ -20,12 +20,12 @@ TEST_CASE("bench_sal_sar_vs_builtin") { constexpr size_t N = 1u << 15; std::mt19937_64 rng{0xB00B00}; - // Для сравнения с builtin << используем беззнаковые типы и неотрицательные сдвиги + // For comparison with builtin << we use unsigned types and non-negative shifts std::uniform_int_distribution u32dist{0u, 0x7FFF'FFFFu}; std::uniform_int_distribution u64dist{0ull, (1ull << 50)}; std::uniform_int_distribution shldist{0u, 63u}; - // Для сравнения с builtin >> используем знаковые типы и неотрицательные сдвиги + // For comparison with builtin >> we use signed types and non-negative shifts std::uniform_int_distribution s32dist{-(1 << 29), (1 << 29)}; std::uniform_int_distribution s64dist{-(1ll << 50), (1ll << 50)}; std::uniform_int_distribution shrdist{0u, 63u}; From 68450a5e5ffe8289dece8adc53ee706bf04ac07c Mon Sep 17 00:00:00 2001 From: ttldtor Date: Mon, 8 Sep 2025 23:51:26 +0300 Subject: [PATCH 8/8] Add sal\sar benchmarks --- tests/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1eb617a..b69fa8b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -45,7 +45,6 @@ target_include_directories(bits_bench PRIVATE ${doctest_SOURCE_DIR}) if (CMAKE_CONFIGURATION_TYPES) set_property(TARGET bits_bench PROPERTY EXCLUDE_FROM_DEFAULT_BUILD_Debug TRUE) - set_property(TARGET bits_bench PROPERTY EXCLUDE_FROM_ALL "$") add_test(NAME bits_bench COMMAND bits_bench) set_tests_properties(bits_bench PROPERTIES DISABLED "$") else ()