diff --git a/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java b/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java index 7bfc5808..d96dfc93 100644 --- a/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java +++ b/benchmarks/src/main/java/com/demcha/compose/CurrentSpeedBenchmark.java @@ -461,7 +461,12 @@ private BenchmarkConfig resolveConfig(BenchmarkProfile profile) { parseThreadCounts(System.getProperty("graphcompose.benchmark.threads", defaultThreadCounts))); } - private PerformanceGateResult evaluatePerformanceGate(BenchmarkProfile profile, List latencyRows) { + // Package-private and static (uses no instance state) so + // CurrentSpeedBenchmarkPerfGateTest can drive it with synthetic LatencyRow + // values instead of real, non-deterministic measurements. LatencyRow, + // PerformanceGateResult and BenchmarkProfile are package-private for the + // same reason. + static PerformanceGateResult evaluatePerformanceGate(BenchmarkProfile profile, List latencyRows) { if (profile != BenchmarkProfile.SMOKE) { return new PerformanceGateResult(true, "Performance gate skipped for profile " + profile.id()); } @@ -721,7 +726,7 @@ private record ThroughputResult(String scenarioName, long totalBytes) { } - private record LatencyRow(String scenario, + record LatencyRow(String scenario, String description, double avgMillis, double p50Millis, @@ -762,10 +767,10 @@ private record BenchmarkConfig(int warmupIterations, private record SmokeThreshold(double maxAvgMillis, double maxPeakHeapMb) { } - private record PerformanceGateResult(boolean passed, String message) { + record PerformanceGateResult(boolean passed, String message) { } - private enum BenchmarkProfile { + enum BenchmarkProfile { FULL("full", true, Map.of()), SMOKE("smoke", false, Map.of( // Thresholds calibrated against the post-warmup smoke profile diff --git a/benchmarks/src/test/java/com/demcha/compose/CurrentSpeedBenchmarkPerfGateTest.java b/benchmarks/src/test/java/com/demcha/compose/CurrentSpeedBenchmarkPerfGateTest.java new file mode 100644 index 00000000..cae8d91f --- /dev/null +++ b/benchmarks/src/test/java/com/demcha/compose/CurrentSpeedBenchmarkPerfGateTest.java @@ -0,0 +1,137 @@ +package com.demcha.compose; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Unit tests for {@link CurrentSpeedBenchmark#evaluatePerformanceGate} — the + * absolute smoke-profile performance gate that actually fails a benchmark run. + * + *

The gate is driven with synthetic {@code LatencyRow} values so the + * pass/fail decision is deterministic and independent of real measurement + * (which varies by machine and would make these tests flaky). engine-simple + * smoke thresholds are avg {@code 8.0} ms / peak heap {@code 96.0} MB.

+ */ +class CurrentSpeedBenchmarkPerfGateTest { + + private static final String ENGINE_SIMPLE = "engine-simple"; + private static final String GATE_AVG = + "graphcompose.benchmark.gate." + ENGINE_SIMPLE + ".maxAvgMillis"; + private static final String GATE_HEAP = + "graphcompose.benchmark.gate." + ENGINE_SIMPLE + ".maxPeakHeapMb"; + + @AfterEach + void clearGateOverrides() { + System.clearProperty(GATE_AVG); + System.clearProperty(GATE_HEAP); + } + + @Test + void passesWhenEveryScenarioIsWithinSmokeThresholds() { + CurrentSpeedBenchmark.PerformanceGateResult result = + CurrentSpeedBenchmark.evaluatePerformanceGate( + CurrentSpeedBenchmark.BenchmarkProfile.SMOKE, + List.of(latency(ENGINE_SIMPLE, 1.0, 24.0))); + + assertThat(result.passed()).isTrue(); + assertThat(result.message()).contains("passed"); + } + + @Test + void failsWhenAverageLatencyExceedsThreshold() { + CurrentSpeedBenchmark.PerformanceGateResult result = + CurrentSpeedBenchmark.evaluatePerformanceGate( + CurrentSpeedBenchmark.BenchmarkProfile.SMOKE, + List.of(latency(ENGINE_SIMPLE, 50.0, 24.0))); // 50 > 8 + + assertThat(result.passed()).isFalse(); + assertThat(result.message()).contains(ENGINE_SIMPLE + " avg"); + } + + @Test + void failsWhenPeakHeapExceedsThreshold() { + CurrentSpeedBenchmark.PerformanceGateResult result = + CurrentSpeedBenchmark.evaluatePerformanceGate( + CurrentSpeedBenchmark.BenchmarkProfile.SMOKE, + List.of(latency(ENGINE_SIMPLE, 1.0, 999.0))); // 999 > 96 + + assertThat(result.passed()).isFalse(); + assertThat(result.message()).contains("peak heap"); + } + + @Test + void reportsEveryFailingScenarioWhenMultipleBreach() { + CurrentSpeedBenchmark.PerformanceGateResult result = + CurrentSpeedBenchmark.evaluatePerformanceGate( + CurrentSpeedBenchmark.BenchmarkProfile.SMOKE, + List.of( + latency(ENGINE_SIMPLE, 50.0, 24.0), // avg breach + latency("cv-template", 1.0, 24.0), // ok (avg 25 / heap 192) + latency("invoice-template", 1.0, 999.0)));// heap breach (heap 384) + + assertThat(result.passed()).isFalse(); + assertThat(result.message()) + .contains(ENGINE_SIMPLE + " avg") + .contains("invoice-template peak heap") + .doesNotContain("cv-template"); + } + + @Test + void skipsGateForNonSmokeProfiles() { + CurrentSpeedBenchmark.PerformanceGateResult result = + CurrentSpeedBenchmark.evaluatePerformanceGate( + CurrentSpeedBenchmark.BenchmarkProfile.FULL, + List.of(latency(ENGINE_SIMPLE, 9999.0, 9999.0))); + + assertThat(result.passed()).isTrue(); + assertThat(result.message()).contains("skipped"); + } + + @Test + void ignoresScenariosWithoutAConfiguredThreshold() { + CurrentSpeedBenchmark.PerformanceGateResult result = + CurrentSpeedBenchmark.evaluatePerformanceGate( + CurrentSpeedBenchmark.BenchmarkProfile.SMOKE, + List.of(latency("scenario-without-threshold", 9999.0, 9999.0))); + + assertThat(result.passed()).isTrue(); + assertThat(result.message()).contains("passed"); + } + + @Test + void honorsSystemPropertyThresholdOverride() { + // Tighten engine-simple to 2.0 ms: a 5.0 ms row passes the default + // 8.0 ms threshold but must fail under the override. + System.setProperty(GATE_AVG, "2.0"); + + CurrentSpeedBenchmark.PerformanceGateResult result = + CurrentSpeedBenchmark.evaluatePerformanceGate( + CurrentSpeedBenchmark.BenchmarkProfile.SMOKE, + List.of(latency(ENGINE_SIMPLE, 5.0, 24.0))); + + assertThat(result.passed()).isFalse(); + assertThat(result.message()).contains(ENGINE_SIMPLE + " avg"); + } + + /** + * Builds a latency row where only {@code scenario}, {@code avgMillis} and + * {@code peakHeapMb} matter to the gate; the rest are filler. + */ + private static CurrentSpeedBenchmark.LatencyRow latency(String scenario, double avgMillis, double peakHeapMb) { + return new CurrentSpeedBenchmark.LatencyRow( + scenario, + "test row", + avgMillis, // avgMillis + 0.0, // p50Millis + 0.0, // p95Millis + 0.0, // maxMillis + 0.0, // docsPerSecond + 0.0, // avgKilobytes + peakHeapMb // peakHeapMb + ); + } +}