Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 139 additions & 45 deletions Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,12 @@ import Glibc
let quietRunning = argumentExtractor.extractFlag(named: "quiet")
let noProgress = argumentExtractor.extractFlag(named: "no-progress")
let checkAbsoluteThresholdsPath = argumentExtractor.extractOption(named: "check-absolute-path")
let skipLoadingBenchmarks = argumentExtractor.extractFlag(named: "skip-loading-benchmark-targets")
let checkAbsoluteThresholds =
checkAbsoluteThresholdsPath.count > 0 ? 1 : argumentExtractor.extractFlag(named: "check-absolute")
let runCount = argumentExtractor.extractOption(named: "run-count")
let relative = argumentExtractor.extractFlag(named: "relative")
let range = argumentExtractor.extractFlag(named: "range")
let groupingToUse = argumentExtractor.extractOption(named: "grouping")
let metricsToUse = argumentExtractor.extractOption(named: "metric")
let timeUnits = argumentExtractor.extractOption(named: "time-units")
Expand Down Expand Up @@ -233,6 +237,8 @@ import Glibc
throw MyError.invalidArgument
}

var totalRunCount = 1
var skipLoadingBenchmarksFlagIsValid = skipLoadingBenchmarks == 0
if commandToPerform == .thresholds {
guard positionalArguments.count > 0,
let thresholdsOperation = ThresholdsOperation(rawValue: positionalArguments.removeFirst())
Expand Down Expand Up @@ -262,11 +268,30 @@ import Glibc
)
throw MyError.invalidArgument
}
if positionalArguments.count > 0 {
let usesExistingBaseline = positionalArguments.count > 0
if usesExistingBaseline {
shouldBuildTargets = false
}
break
let requestedRunCount = runCount.first.flatMap { Int($0) } ?? 1
/// These update the run count to 5 by default if it's set to 1.
/// Using relative/range flags doesn't mean anything if we're not running multiple times.
/// The benchmarks will need to be run multiple times in order to be able to calculate a
/// relative/range of thresholds which satisfy all benchmark runs.
if relative > 0 {
args.append("--wants-relative-thresholds")
if !usesExistingBaseline {
totalRunCount = requestedRunCount < 2 ? 5 : requestedRunCount
}
}
if range > 0 {
args.append("--wants-range-thresholds")
if !usesExistingBaseline {
totalRunCount = requestedRunCount < 2 ? 5 : requestedRunCount
}
}
case .check:
skipLoadingBenchmarksFlagIsValid = true
shouldBuildTargets = skipLoadingBenchmarks == 0
let validRange = 0...1
guard validRange.contains(positionalArguments.count) else {
print(
Expand All @@ -281,6 +306,19 @@ import Glibc
}
}

if !skipLoadingBenchmarksFlagIsValid {
print("")
print(
"Flag --skip-loading-benchmark-targets is only valid for 'thresholds check' operations."
)
print("")
print(help)
print("")
print("Please visit https://github.com/ordo-one/package-benchmark for more in-depth documentation")
print("")
throw MyError.invalidArgument
}

if commandToPerform == .baseline {
guard positionalArguments.count > 0,
let baselineOperation = BaselineOperation(rawValue: positionalArguments.removeFirst())
Expand Down Expand Up @@ -463,53 +501,108 @@ import Glibc

var failedBenchmarkCount = 0

try withCStrings(args) { cArgs in
if debug > 0 {
print("To debug, start \(benchmarkToolName) in LLDB using:")
print("lldb \(benchmarkTool.string)")
print("")
print("Then launch \(benchmarkToolName) with:")
print("run \(args.dropFirst().joined(separator: " "))")
print("")
return
}
var allFailureCount = 0
let results: [Result<Void, Error>] = (0..<max(totalRunCount, 1))
.map { runIdx in
// If we're running multiple times, we need to add the run count to the arguments
var args = args
if totalRunCount > 1 {
args += ["--run-number", "\(runIdx + 1)"]
if quietRunning == 0 {
print(
"""

var pid: pid_t = 0
var status = posix_spawn(&pid, benchmarkTool.string, nil, nil, cArgs, environ)

if status == 0 {
if waitpid(pid, &status, 0) != -1 {
// Ok, this sucks, but there is no way to get a C support target for plugins and
// the way the status is extracted portably is with macros - so we just need to
// reimplement the logic here in Swift according to the waitpid man page to
// get some nicer feedback on failure reason.
guard let waitStatus = ExitCode(rawValue: (status & 0xFF00) >> 8) else {
print("One or more benchmarks returned an unexpected return code \(status)")
throw MyError.benchmarkUnexpectedReturnCode
Running the command multiple times, round \(runIdx + 1) of \(totalRunCount)...
"""
)
}
switch waitStatus {
case .success:
break
case .baselineNotFound:
throw MyError.baselineNotFound
case .genericFailure:
print("One or more benchmark suites crashed during runtime.")
throw MyError.benchmarkCrashed
case .thresholdRegression:
throw MyError.benchmarkThresholdRegression
case .thresholdImprovement:
throw MyError.benchmarkThresholdImprovement
case .benchmarkJobFailed:
failedBenchmarkCount += 1
case .noPermissions:
throw MyError.noPermissions
}

return Result<Void, Error> {
try withCStrings(args) { cArgs in
/// We'll decrement this in the success path
allFailureCount += 1

if debug > 0 {
print("To debug, start \(benchmarkToolName) in LLDB using:")
print("lldb \(benchmarkTool.string)")
print("")
print("Then launch \(benchmarkToolName) with:")
print("run \(args.dropFirst().joined(separator: " "))")
print("")
return
}

var pid: pid_t = 0
var status = posix_spawn(&pid, benchmarkTool.string, nil, nil, cArgs, environ)

if status == 0 {
if waitpid(pid, &status, 0) != -1 {
// Ok, this sucks, but there is no way to get a C support target for plugins and
// the way the status is extracted portably is with macros - so we just need to
// reimplement the logic here in Swift according to the waitpid man page to
// get some nicer feedback on failure reason.
guard let waitStatus = ExitCode(rawValue: (status & 0xFF00) >> 8) else {
print("One or more benchmarks returned an unexpected return code \(status)")
throw MyError.benchmarkUnexpectedReturnCode
}
switch waitStatus {
case .success:
allFailureCount -= 1
case .baselineNotFound:
throw MyError.baselineNotFound
case .genericFailure:
print("One or more benchmark suites crashed during runtime.")
throw MyError.benchmarkCrashed
case .thresholdRegression:
throw MyError.benchmarkThresholdRegression
case .thresholdImprovement:
throw MyError.benchmarkThresholdImprovement
case .benchmarkJobFailed:
failedBenchmarkCount += 1
case .noPermissions:
throw MyError.noPermissions
}
} else {
print(
"waitpid() for pid \(pid) returned a non-zero exit code \(status), errno = \(errno)"
)
exit(errno)
}
} else {
print("Failed to run BenchmarkTool, posix_spawn() returned [\(status)]")
}
}
} else {
print("waitpid() for pid \(pid) returned a non-zero exit code \(status), errno = \(errno)")
exit(errno)
}
} else {
print("Failed to run BenchmarkTool, posix_spawn() returned [\(status)]")
}

switch results.count {
case ...0:
throw MyError.unknownFailure
case 1:
try results[0].get()
default:
if allFailureCount > 0 {
print(
"""
Ran BenchmarkTool \(results.count) times, but it failed \(allFailureCount) times.
Will exit with the first failure.

"""
)
guard
let failure = results.first(where: { result in
switch result {
case .failure:
return true
case .success:
return false
}
})
else {
throw MyError.unknownFailure
}
try failure.get()
}
}

Expand All @@ -529,5 +622,6 @@ import Glibc
case noPermissions = 6
case invalidArgument = 101
case buildFailed = 102
case unknownFailure = 103
}
}
36 changes: 36 additions & 0 deletions Plugins/BenchmarkHelpGenerator/BenchmarkHelpGenerator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,42 @@ struct Benchmark: AsyncParsableCommand {
)
var checkAbsolute = false

@Flag(
name: .long,
help: """
Specifies that thresholds check command should skip loading benchmark targets.
Use this flag to skip unnecessary building of benchmark targets and loading of benchmark results, to save time.
This flag is specially useful when combined with static threshold files that contain the newly supported relative or range thresholds.
With such a set up, you'll save the time needed to build the benchmark targets and the thresholds check operation
will only read the threshold tolerance values from the static files.
"""
)
var skipLoadingBenchmarks = false

@Option(
name: .long,
help: """
The number of times to run each benchmark in thresholds update operation.
This is only valid when --relative or --range are also specified.
When combined with --relative or --range flags, this option will run the benchmarks multiple times to calculate
relative or range thresholds, and each time it'll widen the threshold tolerances according to the new result.
Defaults to 1.
"""
)
var runCount: Int?

@Flag(
name: .long,
help: "Specifies that thresholds update command should output relative thresholds to the static files."
)
var relative = false

@Flag(
name: .long,
help: "Specifies that thresholds update command should output min-max range thresholds to the static files."
)
var range = false

@Option(
name: .long,
help:
Expand Down
5 changes: 2 additions & 3 deletions Plugins/BenchmarkTool/BenchmarkTool+Baselines.swift
Original file line number Diff line number Diff line change
Expand Up @@ -449,8 +449,7 @@ extension BenchmarkBaseline: Equatable {
benchmarks,
name: lhsBenchmarkIdentifier.name,
target: lhsBenchmarkIdentifier.target,
metric: lhsBenchmarkResult.metric,
defaultThresholds: lhsBenchmarkResult.thresholds ?? BenchmarkThresholds.default
metric: lhsBenchmarkResult.metric
)

let deviationResults = lhsBenchmarkResult.deviationsComparedWith(
Expand Down Expand Up @@ -483,7 +482,7 @@ extension BenchmarkBaseline: Equatable {
public func failsAbsoluteThresholdChecks(
benchmarks: [Benchmark],
p90Thresholds: [BenchmarkIdentifier:
[BenchmarkMetric: BenchmarkThresholds.AbsoluteThreshold]]
[BenchmarkMetric: BenchmarkThreshold]]
) -> BenchmarkResult.ThresholdDeviations {
var allDeviationResults = BenchmarkResult.ThresholdDeviations()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ class InfluxCSVFormatter {
let memory = machine.memory

if header {
let dataTypeHeader = "#datatype tag,tag,tag,tag,tag,tag,tag,tag,tag,double,double,double,long,long,dateTime\n"
let dataTypeHeader =
"#datatype tag,tag,tag,tag,tag,tag,tag,tag,tag,double,double,double,long,long,dateTime\n"
finalFileFormat.append(dataTypeHeader)
let headers =
"measurement,hostName,processoryType,processors,memory,kernelVersion,metric,unit,test,percentile,value,test_average,iterations,warmup_iterations,time\n"
Expand Down
12 changes: 6 additions & 6 deletions Plugins/BenchmarkTool/BenchmarkTool+Export+JMHFormatter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,23 @@ extension JMHPrimaryMetric {
let factor = result.metric.countable == false ? 1_000 : 1

for p in percentiles {
percentileValues[String(p)] = Statistics.roundToDecimalplaces(
percentileValues[String(p)] = Statistics.roundToDecimalPlaces(
Double(histogram.valueAtPercentile(p)) / Double(factor),
3
)
}

for value in histogram.recordedValues() {
for _ in 0..<value.count {
recordedValues.append(Statistics.roundToDecimalplaces(Double(value.value) / Double(factor), 3))
recordedValues.append(Statistics.roundToDecimalPlaces(Double(value.value) / Double(factor), 3))
}
}

self.score = Statistics.roundToDecimalplaces(score / Double(factor), 3)
scoreError = Statistics.roundToDecimalplaces(error / Double(factor), 3)
self.score = Statistics.roundToDecimalPlaces(score / Double(factor), 3)
scoreError = Statistics.roundToDecimalPlaces(error / Double(factor), 3)
scoreConfidence = [
Statistics.roundToDecimalplaces(score - error) / Double(factor),
Statistics.roundToDecimalplaces(score + error) / Double(factor),
Statistics.roundToDecimalPlaces(score - error) / Double(factor),
Statistics.roundToDecimalPlaces(score + error) / Double(factor),
]
scorePercentiles = percentileValues
if result.metric.countable {
Expand Down
Loading
Loading