diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml new file mode 100644 index 0000000..5834c3d --- /dev/null +++ b/.github/workflows/bench.yml @@ -0,0 +1,18 @@ +name: bench + +on: + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + bench: + name: Benchmarks + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Run Benchmarks + run: make bench diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml index 3b202f9..2463b5f 100644 --- a/.github/workflows/doc.yml +++ b/.github/workflows/doc.yml @@ -5,12 +5,20 @@ on: branches: - master paths-ignore: + - '.devcontainer/**' - '.vscode/**' + - 'benchmarks/**' + - 'tests/**' - 'LICENSE' + - 'Makefile' pull_request: paths-ignore: + - '.devcontainer/**' - '.vscode/**' + - 'benchmarks/**' + - 'tests/**' - 'LICENSE' + - 'Makefile' workflow_dispatch: concurrency: diff --git a/Makefile b/Makefile index 7fcebd6..92d13b0 100644 --- a/Makefile +++ b/Makefile @@ -7,14 +7,12 @@ DOCS_DIR = docs all: bench: - - sudo cpupower frequency-set --governor performance - - rm -rf $(BENCH_DIR) && mkdir $(BENCH_DIR) && cd $(BENCH_DIR) \ + mkdir -p $(BENCH_DIR) && cd $(BENCH_DIR) \ && cmake ../.. -DCMAKE_BUILD_TYPE=Release -DCPP_CHANNEL_BUILD_BENCHMARKS=ON \ && cmake --build . --config Release --target channel_benchmark -j \ - && ./benchmarks/channel_benchmark - - - sudo cpupower frequency-set --governor powersave + && ./benchmarks/channel_benchmark \ + --benchmark_repetitions=10 \ + --benchmark_report_aggregates_only=true coverage: rm -rf $(COV_DIR) && mkdir $(COV_DIR) && cd $(COV_DIR) \ diff --git a/benchmarks/channel_benchmark.cpp b/benchmarks/channel_benchmark.cpp index e5c6422..d503455 100644 --- a/benchmarks/channel_benchmark.cpp +++ b/benchmarks/channel_benchmark.cpp @@ -2,75 +2,155 @@ #include +#include +#include #include +#include +// clang-format off /** Results on release build with CPU scaling disabled c++ (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0 - 2025-06-13T00:17:30+03:00 - Running ./tests/channel_benchmark - Run on (8 X 3999.91 MHz CPU s) + 2025-06-17T19:55:02+03:00 + Running ./benchmarks/channel_benchmark + Run on (8 X 4000.08 MHz CPU s) CPU Caches: L1 Data 32 KiB (x4) L1 Instruction 32 KiB (x4) L2 Unified 256 KiB (x4) L3 Unified 8192 KiB (x1) - Load Average: 2.65, 1.61, 1.50 - ------------------------------------------------------------------------------ - Benchmark Time CPU Iterations - ------------------------------------------------------------------------------ - channel_with_queue_storage 42602 ns 42598 ns 16407 - channel_with_vector_storage 42724 ns 42723 ns 16288 - channel_with_vector_storage 51332 ns 51328 ns 11776 + Load Average: 1.38, 1.22, 1.06 + ------------------------------------------------------------------------------------------------------------------------------------------------------------ + Benchmark Time CPU Iterations + ------------------------------------------------------------------------------------------------------------------------------------------------------------ + bench_dynamic_storage, string_input<100000>>_mean 652607002 ns 226690848 ns 10 + bench_dynamic_storage, string_input<100000>>_median 651695229 ns 225379690 ns 10 + bench_dynamic_storage, string_input<100000>>_stddev 12253781 ns 15462972 ns 10 + bench_dynamic_storage, string_input<100000>>_cv 1.88 % 6.82 % 10 + bench_dynamic_storage, string_input<100000>>_max 672915805 ns 255534858 ns 10 + bench_dynamic_storage, string_input<100000>>_mean 974087950 ns 514260828 ns 10 + bench_dynamic_storage, string_input<100000>>_median 977160289 ns 516344216 ns 10 + bench_dynamic_storage, string_input<100000>>_stddev 18312948 ns 28280400 ns 10 + bench_dynamic_storage, string_input<100000>>_cv 1.88 % 5.50 % 10 + bench_dynamic_storage, string_input<100000>>_max 1003003285 ns 558790265 ns 10 + bench_static_storage, string_input<100000>>_mean 628774895 ns 213404616 ns 10 + bench_static_storage, string_input<100000>>_median 629143659 ns 215630841 ns 10 + bench_static_storage, string_input<100000>>_stddev 8790540 ns 8340659 ns 10 + bench_static_storage, string_input<100000>>_cv 1.40 % 3.91 % 10 + bench_static_storage, string_input<100000>>_max 640584436 ns 224198673 ns 10 + bench_dynamic_storage, string_input<1000>>_mean 43353148 ns 33321779 ns 10 + bench_dynamic_storage, string_input<1000>>_median 43035735 ns 33114531 ns 10 + bench_dynamic_storage, string_input<1000>>_stddev 626857 ns 516438 ns 10 + bench_dynamic_storage, string_input<1000>>_cv 1.45 % 1.55 % 10 + bench_dynamic_storage, string_input<1000>>_max 44420815 ns 34055142 ns 10 + bench_dynamic_storage, string_input<1000>>_mean 143175350 ns 134608661 ns 10 + bench_dynamic_storage, string_input<1000>>_median 143349862 ns 135104870 ns 10 + bench_dynamic_storage, string_input<1000>>_stddev 9874397 ns 9112605 ns 10 + bench_dynamic_storage, string_input<1000>>_cv 6.90 % 6.77 % 10 + bench_dynamic_storage, string_input<1000>>_max 160931701 ns 149620486 ns 10 + bench_static_storage, string_input<1000>>_mean 37482750 ns 36598866 ns 10 + bench_static_storage, string_input<1000>>_median 37678000 ns 36697213 ns 10 + bench_static_storage, string_input<1000>>_stddev 972055 ns 739164 ns 10 + bench_static_storage, string_input<1000>>_cv 2.59 % 2.02 % 10 + bench_static_storage, string_input<1000>>_max 38740257 ns 37767023 ns 10 + bench_dynamic_storage, struct_input>_mean 56195102 ns 37959789 ns 10 + bench_dynamic_storage, struct_input>_median 56222959 ns 37916027 ns 10 + bench_dynamic_storage, struct_input>_stddev 239106 ns 192415 ns 10 + bench_dynamic_storage, struct_input>_cv 0.43 % 0.51 % 10 + bench_dynamic_storage, struct_input>_max 56524553 ns 38392052 ns 10 + bench_dynamic_storage, struct_input>_mean 318745363 ns 299820882 ns 10 + bench_dynamic_storage, struct_input>_median 333031832 ns 312967363 ns 10 + bench_dynamic_storage, struct_input>_stddev 30118977 ns 28236407 ns 10 + bench_dynamic_storage, struct_input>_cv 9.45 % 9.42 % 10 + bench_dynamic_storage, struct_input>_max 343551976 ns 323198986 ns 10 + bench_static_storage, struct_input>_mean 39037187 ns 32142886 ns 10 + bench_static_storage, struct_input>_median 39015373 ns 32017939 ns 10 + bench_static_storage, struct_input>_stddev 557539 ns 701550 ns 10 + bench_static_storage, struct_input>_cv 1.43 % 2.18 % 10 + bench_static_storage, struct_input>_max 40336146 ns 33191282 ns 10 */ +// clang-format on -static void channel_with_queue_storage(benchmark::State& state) -{ - msd::channel> channel{10}; +static constexpr std::size_t channel_capacity = 1024; +static constexpr std::size_t number_of_inputs = 100000; - std::string input(1000000, 'x'); - std::string out{}; - out.resize(input.size()); +template +struct string_input { + static std::string make() { return std::string(Size, 'c'); } +}; - for (auto _ : state) { - benchmark::DoNotOptimize(channel << input); - benchmark::DoNotOptimize(channel >> out); - } -} +struct data { + std::array data{}; +}; -BENCHMARK(channel_with_queue_storage); +struct struct_input { + static data make() { return data{}; } +}; -static void channel_with_vector_storage(benchmark::State& state) +template +static void bench_dynamic_storage(benchmark::State& state) { - msd::channel> channel{10}; - - std::string input(1000000, 'x'); - std::string out{}; - out.resize(input.size()); + const auto input = Input::make(); for (auto _ : state) { - benchmark::DoNotOptimize(channel << input); - benchmark::DoNotOptimize(channel >> out); + msd::channel channel{channel_capacity}; + + std::thread producer([&] { + for (std::size_t i = 0; i < number_of_inputs; ++i) { + channel << input; + } + channel.close(); + }); + + for (auto& value : channel) { + volatile auto* do_not_optimize = &value; + (void)do_not_optimize; + } + + producer.join(); } } -BENCHMARK(channel_with_vector_storage); - -static void channel_with_array_storage(benchmark::State& state) +template +static void bench_static_storage(benchmark::State& state) { - msd::channel> channel{}; - - std::string input(1000000, 'x'); - std::string out{}; - out.resize(input.size()); + const auto input = Input::make(); for (auto _ : state) { - benchmark::DoNotOptimize(channel << input); - benchmark::DoNotOptimize(channel >> out); + msd::channel channel{}; + + std::thread producer([&] { + for (std::size_t i = 0; i < number_of_inputs; ++i) { + channel << input; + } + channel.close(); + }); + + for (auto& value : channel) { + volatile auto* do_not_optimize = &value; + (void)do_not_optimize; + } + + producer.join(); } } -BENCHMARK(channel_with_array_storage); +#define BENCH(...) \ + BENCHMARK_TEMPLATE(__VA_ARGS__)->ComputeStatistics("max", [](const std::vector& v) { \ + return *std::max_element(v.begin(), v.end()); \ + }) + +BENCH(bench_dynamic_storage, std::string, msd::queue_storage, string_input<100000>); +BENCH(bench_dynamic_storage, std::string, msd::vector_storage, string_input<100000>); +BENCH(bench_static_storage, std::string, msd::array_storage, string_input<100000>); + +BENCH(bench_dynamic_storage, std::string, msd::queue_storage, string_input<1000>); +BENCH(bench_dynamic_storage, std::string, msd::vector_storage, string_input<1000>); +BENCH(bench_static_storage, std::string, msd::array_storage, string_input<1000>); + +BENCH(bench_dynamic_storage, data, msd::queue_storage, struct_input); +BENCH(bench_dynamic_storage, data, msd::vector_storage, struct_input); +BENCH(bench_static_storage, data, msd::array_storage, struct_input); BENCHMARK_MAIN();