ordo-one · MahdiBM · Jul 14, 2025 · Jul 14, 2025 · Jul 15, 2025 · Jul 15, 2025
diff --git a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift
@@ -41,8 +41,12 @@ import Glibc
         let quietRunning = argumentExtractor.extractFlag(named: "quiet")
         let noProgress = argumentExtractor.extractFlag(named: "no-progress")
         let checkAbsoluteThresholdsPath = argumentExtractor.extractOption(named: "check-absolute-path")
+        let skipLoadingBenchmarks = argumentExtractor.extractFlag(named: "skip-loading-benchmark-targets")
         let checkAbsoluteThresholds =
             checkAbsoluteThresholdsPath.count > 0 ? 1 : argumentExtractor.extractFlag(named: "check-absolute")
+        let runCount = argumentExtractor.extractOption(named: "run-count")
+        let relative = argumentExtractor.extractFlag(named: "relative")
+        let range = argumentExtractor.extractFlag(named: "range")
         let groupingToUse = argumentExtractor.extractOption(named: "grouping")
         let metricsToUse = argumentExtractor.extractOption(named: "metric")
         let timeUnits = argumentExtractor.extractOption(named: "time-units")
@@ -233,6 +237,8 @@ import Glibc
             throw MyError.invalidArgument
         }
 
+        var totalRunCount = 1
+        var skipLoadingBenchmarksFlagIsValid = skipLoadingBenchmarks == 0
         if commandToPerform == .thresholds {
             guard positionalArguments.count > 0,
                 let thresholdsOperation = ThresholdsOperation(rawValue: positionalArguments.removeFirst())
@@ -262,11 +268,30 @@ import Glibc
                     )
                     throw MyError.invalidArgument
                 }
-                if positionalArguments.count > 0 {
+                let usesExistingBaseline = positionalArguments.count > 0
+                if usesExistingBaseline {
                     shouldBuildTargets = false
                 }
-                break
+                let requestedRunCount = runCount.first.flatMap { Int($0) } ?? 1
+                /// These update the run count to 5 by default if it's set to 1.
+                /// Using relative/range flags doesn't mean anything if we're not running multiple times.
+                /// The benchmarks will need to be run multiple times in order to be able to calculate a
+                /// relative/range of thresholds which satisfy all benchmark runs.
+                if relative > 0 {
+                    args.append("--wants-relative-thresholds")
+                    if !usesExistingBaseline {
+                        totalRunCount = requestedRunCount < 2 ? 5 : requestedRunCount
+                    }
+                }
+                if range > 0 {
+                    args.append("--wants-range-thresholds")
+                    if !usesExistingBaseline {
+                        totalRunCount = requestedRunCount < 2 ? 5 : requestedRunCount
+                    }
+                }
             case .check:
+                skipLoadingBenchmarksFlagIsValid = true
+                shouldBuildTargets = skipLoadingBenchmarks == 0
                 let validRange = 0...1
                 guard validRange.contains(positionalArguments.count) else {
                     print(
@@ -281,6 +306,19 @@ import Glibc
             }
         }
 
+        if !skipLoadingBenchmarksFlagIsValid {
+            print("")
+            print(
+                "Flag --skip-loading-benchmark-targets is only valid for 'thresholds check' operations."
+            )
+            print("")
+            print(help)
+            print("")
+            print("Please visit https://github.com/ordo-one/package-benchmark for more in-depth documentation")
+            print("")
+            throw MyError.invalidArgument
+        }
+
         if commandToPerform == .baseline {
             guard positionalArguments.count > 0,
                 let baselineOperation = BaselineOperation(rawValue: positionalArguments.removeFirst())
@@ -463,53 +501,108 @@ import Glibc
 
         var failedBenchmarkCount = 0
 
-        try withCStrings(args) { cArgs in
-            if debug > 0 {
-                print("To debug, start \(benchmarkToolName) in LLDB using:")
-                print("lldb \(benchmarkTool.string)")
-                print("")
-                print("Then launch \(benchmarkToolName) with:")
-                print("run \(args.dropFirst().joined(separator: " "))")
-                print("")
-                return
-            }
+        var allFailureCount = 0
+        let results: [Result<Void, Error>] = (0..<max(totalRunCount, 1))
+            .map { runIdx in
+                // If we're running multiple times, we need to add the run count to the arguments
+                var args = args
+                if totalRunCount > 1 {
+                    args += ["--run-number", "\(runIdx + 1)"]
+                    if quietRunning == 0 {
+                        print(
+                            """
 
-            var pid: pid_t = 0
-            var status = posix_spawn(&pid, benchmarkTool.string, nil, nil, cArgs, environ)
-
-            if status == 0 {
-                if waitpid(pid, &status, 0) != -1 {
-                    // Ok, this sucks, but there is no way to get a C support target for plugins and
-                    // the way the status is extracted portably is with macros - so we just need to
-                    // reimplement the logic here in Swift according to the waitpid man page to
-                    // get some nicer feedback on failure reason.
-                    guard let waitStatus = ExitCode(rawValue: (status & 0xFF00) >> 8) else {
-                        print("One or more benchmarks returned an unexpected return code \(status)")
-                        throw MyError.benchmarkUnexpectedReturnCode
+                            Running the command multiple times, round \(runIdx + 1) of \(totalRunCount)...
+                            """
+                        )
                     }
-                    switch waitStatus {
-                    case .success:
-                        break
-                    case .baselineNotFound:
-                        throw MyError.baselineNotFound
-                    case .genericFailure:
-                        print("One or more benchmark suites crashed during runtime.")
-                        throw MyError.benchmarkCrashed
-                    case .thresholdRegression:
-                        throw MyError.benchmarkThresholdRegression
-                    case .thresholdImprovement:
-                        throw MyError.benchmarkThresholdImprovement
-                    case .benchmarkJobFailed:
-                        failedBenchmarkCount += 1
-                    case .noPermissions:
-                        throw MyError.noPermissions
+                }
+
+                return Result<Void, Error> {
+                    try withCStrings(args) { cArgs in
+                        /// We'll decrement this in the success path
+                        allFailureCount += 1
+
+                        if debug > 0 {
+                            print("To debug, start \(benchmarkToolName) in LLDB using:")
+                            print("lldb \(benchmarkTool.string)")
+                            print("")
+                            print("Then launch \(benchmarkToolName) with:")
+                            print("run \(args.dropFirst().joined(separator: " "))")
+                            print("")
+                            return
+                        }
+
+                        var pid: pid_t = 0
+                        var status = posix_spawn(&pid, benchmarkTool.string, nil, nil, cArgs, environ)
+
+                        if status == 0 {
+                            if waitpid(pid, &status, 0) != -1 {
+                                // Ok, this sucks, but there is no way to get a C support target for plugins and
+                                // the way the status is extracted portably is with macros - so we just need to
+                                // reimplement the logic here in Swift according to the waitpid man page to
+                                // get some nicer feedback on failure reason.
+                                guard let waitStatus = ExitCode(rawValue: (status & 0xFF00) >> 8) else {
+                                    print("One or more benchmarks returned an unexpected return code \(status)")
+                                    throw MyError.benchmarkUnexpectedReturnCode
+                                }
+                                switch waitStatus {
+                                case .success:
+                                    allFailureCount -= 1
+                                case .baselineNotFound:
+                                    throw MyError.baselineNotFound
+                                case .genericFailure:
+                                    print("One or more benchmark suites crashed during runtime.")
+                                    throw MyError.benchmarkCrashed
+                                case .thresholdRegression:
+                                    throw MyError.benchmarkThresholdRegression
+                                case .thresholdImprovement:
+                                    throw MyError.benchmarkThresholdImprovement
+                                case .benchmarkJobFailed:
+                                    failedBenchmarkCount += 1
+                                case .noPermissions:
+                                    throw MyError.noPermissions
+                                }
+                            } else {
+                                print(
+                                    "waitpid() for pid \(pid) returned a non-zero exit code \(status), errno = \(errno)"
+                                )
+                                exit(errno)
+                            }
+                        } else {
+                            print("Failed to run BenchmarkTool, posix_spawn() returned [\(status)]")
+                        }
                     }
-                } else {
-                    print("waitpid() for pid \(pid) returned a non-zero exit code \(status), errno = \(errno)")
-                    exit(errno)
                 }
-            } else {
-                print("Failed to run BenchmarkTool, posix_spawn() returned [\(status)]")
+            }
+
+        switch results.count {
+        case ...0:
+            throw MyError.unknownFailure
+        case 1:
+            try results[0].get()
+        default:
+            if allFailureCount > 0 {
+                print(
+                    """
+                    Ran BenchmarkTool \(results.count) times, but it failed \(allFailureCount) times.
+                    Will exit with the first failure.
+
+                    """
+                )
+                guard
+                    let failure = results.first(where: { result in
+                        switch result {
+                        case .failure:
+                            return true
+                        case .success:
+                            return false
+                        }
+                    })
+                else {
+                    throw MyError.unknownFailure
+                }
+                try failure.get()
             }
         }
 
@@ -529,5 +622,6 @@ import Glibc
         case noPermissions = 6
         case invalidArgument = 101
         case buildFailed = 102
+        case unknownFailure = 103
     }
 }
diff --git a/Plugins/BenchmarkHelpGenerator/BenchmarkHelpGenerator.swift b/Plugins/BenchmarkHelpGenerator/BenchmarkHelpGenerator.swift
@@ -153,6 +153,42 @@ struct Benchmark: AsyncParsableCommand {
     )
     var checkAbsolute = false
 
+    @Flag(
+        name: .long,
+        help: """
+            Specifies that thresholds check command should skip loading benchmark targets.
+            Use this flag to skip unnecessary building of benchmark targets and loading of benchmark results, to save time.
+            This flag is specially useful when combined with static threshold files that contain the newly supported relative or range thresholds.
+            With such a set up, you'll save the time needed to build the benchmark targets and the thresholds check operation
+            will only read the threshold tolerance values from the static files.
+            """
+    )
+    var skipLoadingBenchmarks = false
+
+    @Option(
+        name: .long,
+        help: """
+            The number of times to run each benchmark in thresholds update operation.
+            This is only valid when --relative or --range are also specified.
+            When combined with --relative or --range flags, this option will run the benchmarks multiple times to calculate
+            relative or range thresholds, and each time it'll widen the threshold tolerances according to the new result.
+            Defaults to 1.
+            """
+    )
+    var runCount: Int?
+
+    @Flag(
+        name: .long,
+        help: "Specifies that thresholds update command should output relative thresholds to the static files."
+    )
+    var relative = false
+
+    @Flag(
+        name: .long,
+        help: "Specifies that thresholds update command should output min-max range thresholds to the static files."
+    )
+    var range = false
+
     @Option(
         name: .long,
         help:

diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Baselines.swift b/Plugins/BenchmarkTool/BenchmarkTool+Baselines.swift
@@ -449,8 +449,7 @@ extension BenchmarkBaseline: Equatable {
                             benchmarks,
                             name: lhsBenchmarkIdentifier.name,
                             target: lhsBenchmarkIdentifier.target,
-                            metric: lhsBenchmarkResult.metric,
-                            defaultThresholds: lhsBenchmarkResult.thresholds ?? BenchmarkThresholds.default
+                            metric: lhsBenchmarkResult.metric
                         )
 
                         let deviationResults = lhsBenchmarkResult.deviationsComparedWith(
@@ -483,7 +482,7 @@ extension BenchmarkBaseline: Equatable {
     public func failsAbsoluteThresholdChecks(
         benchmarks: [Benchmark],
         p90Thresholds: [BenchmarkIdentifier:
-            [BenchmarkMetric: BenchmarkThresholds.AbsoluteThreshold]]
+            [BenchmarkMetric: BenchmarkThreshold]]
     ) -> BenchmarkResult.ThresholdDeviations {
         var allDeviationResults = BenchmarkResult.ThresholdDeviations()
 

diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Export+InfluxCSVFormatter.swift b/Plugins/BenchmarkTool/BenchmarkTool+Export+InfluxCSVFormatter.swift
@@ -54,7 +54,8 @@ class InfluxCSVFormatter {
         let memory = machine.memory
 
         if header {
-            let dataTypeHeader = "#datatype tag,tag,tag,tag,tag,tag,tag,tag,tag,double,double,double,long,long,dateTime\n"
+            let dataTypeHeader =
+                "#datatype tag,tag,tag,tag,tag,tag,tag,tag,tag,double,double,double,long,long,dateTime\n"
             finalFileFormat.append(dataTypeHeader)
             let headers =
                 "measurement,hostName,processoryType,processors,memory,kernelVersion,metric,unit,test,percentile,value,test_average,iterations,warmup_iterations,time\n"

diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Export+JMHFormatter.swift b/Plugins/BenchmarkTool/BenchmarkTool+Export+JMHFormatter.swift
@@ -34,23 +34,23 @@ extension JMHPrimaryMetric {
         let factor = result.metric.countable == false ? 1_000 : 1
 
         for p in percentiles {
-            percentileValues[String(p)] = Statistics.roundToDecimalplaces(
+            percentileValues[String(p)] = Statistics.roundToDecimalPlaces(
                 Double(histogram.valueAtPercentile(p)) / Double(factor),
                 3
             )
         }
 
         for value in histogram.recordedValues() {
             for _ in 0..<value.count {
-                recordedValues.append(Statistics.roundToDecimalplaces(Double(value.value) / Double(factor), 3))
+                recordedValues.append(Statistics.roundToDecimalPlaces(Double(value.value) / Double(factor), 3))
             }
         }
 
-        self.score = Statistics.roundToDecimalplaces(score / Double(factor), 3)
-        scoreError = Statistics.roundToDecimalplaces(error / Double(factor), 3)
+        self.score = Statistics.roundToDecimalPlaces(score / Double(factor), 3)
+        scoreError = Statistics.roundToDecimalPlaces(error / Double(factor), 3)
         scoreConfidence = [
-            Statistics.roundToDecimalplaces(score - error) / Double(factor),
-            Statistics.roundToDecimalplaces(score + error) / Double(factor),
+            Statistics.roundToDecimalPlaces(score - error) / Double(factor),
+            Statistics.roundToDecimalPlaces(score + error) / Double(factor),
         ]
         scorePercentiles = percentileValues
         if result.metric.countable {