diff --git a/examples/burn.go b/examples/burn.go new file mode 100644 index 000000000..7d72c5c20 --- /dev/null +++ b/examples/burn.go @@ -0,0 +1,197 @@ +// This program bursn as much CPU as it can, while measuring runtime lag. +// +// CPU burn is achieved by computing random collatz trajectories. +// +// Runtime lag is measured by consuming time.Ticker ticks at an expected 100ms interval, +// and then measuring the actual observed elapsed time between ticks. +// Doing so can measure various effects such as OS scheduling or pressure on the go runtime itself. +// +// We then report boxplot stats over a buffer of recent such measurements. +// The buffer is sized so that ic can contain samples from at least the last 10 seconds +// under normal rate (10hz from a 100ms ticker). + +package main + +import ( + "log" + "math/rand" + "runtime" + "sort" + "sync" + "time" +) + +func makeLagMonitor(every, keepLast time.Duration) *lagMonitor { + bufferCap := keepLast / every + return &lagMonitor{ + every: every, + buffer: make([]time.Time, bufferCap*2), + } +} + +type lagMonitor struct { + every time.Duration + ticker *time.Ticker + + bufferLock sync.Mutex + buffer []time.Time + cur int + full bool +} + +func (mon *lagMonitor) collect(t0, t1 time.Time) { + mon.buffer[mon.cur] = t0 + mon.cur += 1 + + mon.buffer[mon.cur] = t1 + mon.cur += 1 + + if mon.cur >= len(mon.buffer) { + mon.cur = mon.cur % len(mon.buffer) + mon.full = true + } +} + +func (mon *lagMonitor) monitor() { + ticker := time.NewTicker(mon.every) + defer ticker.Stop() + mon.ticker = ticker + + last := time.Now() + for now := range ticker.C { + mon.collect(last, now) + last = now + } +} + +func (mon *lagMonitor) Start() { + if mon.ticker == nil { + go mon.monitor() + } +} + +func (mon *lagMonitor) Stop() { + if mon.ticker != nil { + mon.ticker.Stop() + } +} + +type lagData struct { + Start time.Time + End time.Time + Actual time.Duration + Expected time.Duration +} + +func (mon *lagMonitor) Data() []lagData { + data := make([]lagData, 0, len(mon.buffer)/2) + max := mon.cur + if mon.full { + max = len(mon.buffer) + } + + for i := 0; i < max; i += 2 { + j := i + if mon.full { + j = (mon.cur + i) % len(mon.buffer) + } + start := mon.buffer[j] + end := mon.buffer[j+1] + actual := end.Sub(start) + data = append(data, lagData{start, end, actual, mon.every}) + } + + return data +} + +func burnForever() { + max := 0 + var last []int + + for true { + + var path []int + n := rand.Intn(1_000_000_000) + for n > 1 { + path = append(path, n) + if n%2 == 0 { + n /= 2 + } else { + n = 3*n + 1 + } + } + + last = path + if m := len(last); m > max { + max = m + } + } +} + +func main() { + + mon := makeLagMonitor( + 100*time.Millisecond, + 10*time.Second, + ) + mon.Start() + defer mon.Stop() + + numProcs := runtime.GOMAXPROCS(0) + for i := 0; i < numProcs; i += 1 { + go burnForever() + } + log.Printf("started %v burners", numProcs) + + threshold := 2 * time.Millisecond + + for range time.Tick(time.Second) { + sample := mon.Data() + sort.Slice(sample, func(i, j int) bool { + return sample[i].Actual < sample[j].Actual + }) + + min := sample[0].Actual + max := sample[len(sample)-1].Actual + if max-min < threshold { + log.Printf("[lag report] min:%v max:%v", min, max) + continue + } + + q25 := sample[len(sample)/4].Actual + q50 := sample[len(sample)/2].Actual + q75 := sample[len(sample)*3/4].Actual + iqr := q75 - q25 + + add := 3 * iqr + if add < threshold { + add = threshold + } + hi := q50 + add + + numOutliers := 0 + for _, s := range sample { + if s.Actual >= hi { + numOutliers += 1 + } + } + + if numOutliers == 0 { + log.Printf("[lag report] min:%v max:%v box:[ %v %v %v ] no outliers within threshold:%v", min, max, q25, q50, q75, threshold) + continue + } + + log.Printf( + "[lag report] min:%v max:%v box:[ %v %v %v ] hi:%v hiOutliers:%v %.1f%%", + min, max, + q25, q50, q75, hi, + numOutliers, + float64(numOutliers)/float64(len(sample))*100, + ) + for _, s := range sample { + if s.Actual >= hi { + log.Printf("%+v", s) + } + } + } +} diff --git a/examples/burn.js b/examples/burn.js new file mode 100644 index 000000000..c6a59edc8 --- /dev/null +++ b/examples/burn.js @@ -0,0 +1,171 @@ +/* This program burns as much CPU time as it can, while measuring runtime lag. + * + * CPU burn is achieved by computing random collatz trajectories. + * + * Runtime lag is measured by setTimeout() for a fixed interval of time (100ms), + * and then measuring actual elapsed time when the timer finishes. + * Doing so can measure various effects such as OS scheduling or event loop saturation / queue backloggin. + * + * We then report boxplot stats over a buffer of recent such timer measurements. + * The buffer is sized so that it can contain samples from at least the last 10 seconds, + * but when running into heavy delay, will cover a wider span of actual time. + */ + +// @ts-check + +/** @param {number} msec */ +function sleep(msec) { + return new Promise(resolve => setTimeout(resolve, msec)); +} + +function schedYield() { + // @ts-ignore + return new Promise(resolve => setImmediate(resolve)); +} + +/** + * @param {object} config + * @param {number} config.every -- how often to sample in msec + * @param {number} config.keepLast -- timespan over which to keep data in msec + */ +function makeLagMonitor(config) { + const { every, keepLast } = config; + + const bufferCap = Math.ceil(keepLast / every); + const buffer = new Int32Array(bufferCap * 2); + let bufferCur = 0; + let bufferFull = false; + + /** + * @param {number} t0 + * @param {number} t1 + */ + function collect(t0, t1) { + buffer[bufferCur++] = t0; + buffer[bufferCur++] = t1; + if (bufferCur >= buffer.length) { + bufferCur = bufferCur % buffer.length; + bufferFull = true; + } + } + + let running = false; + + async function monitor() { + let last = performance.now() + for (running = true; running;) { + await sleep(every); + const now = performance.now() + collect(last, now); + last = now; + } + } + + return { + start() { + if (!running) monitor(); + }, + + stop() { + running = false; + }, + + *data() { + const max = bufferFull ? buffer.length : bufferCur; + for (let i = 0; i < max; i += 2) { + let j = bufferFull ? (bufferCur + i) % buffer.length : i; + const start = buffer[j]; + const end = buffer[j + 1]; + const actual = end - start; + yield { start, end, actual, expected: every }; + } + }, + + }; +} + +function* burn() { + // collatz recurrance goes brrr + let n = Math.round(Math.random() * 1_000_000_000); + while (n > 1) { + yield n; + n = n % 2 == 0 ? n / 2 : 3 * n + 1; + } +} + +async function burnForever() { + let max = 0; + /** @type {Array} */ + let last = []; + + for (; ;) { + + // burn at least 2ms of elapsed time here before yielding back + for ( + const start = performance.now(); + performance.now() - start < 2; + ) { + last = Array.from(burn()); + max = Math.max(max, last.length); + } + + await schedYield(); + } +} + +async function main() { + const lagMonitor = makeLagMonitor({ + every: 100, // msec + keepLast: 10_000, // msec + }); + lagMonitor.start(); + + burnForever(); + + // Below we use boxplot stats to report on observed sleep time outliers + // + // See https://www.itl.nist.gov/div898/handbook/prc/section1/prc16.htm for maths refresher + + const threshold = 2; // minimum msec threshold to care about + + for (; ;) { + await sleep(1_000); + + /** @type {Array<{start: number, actual: number, expected: number}>} */ + const sample = []; + for (const { start, actual, expected } of lagMonitor.data()) { + sample.push({ start, actual, expected }); + } + sample.sort(({ actual: a }, { actual: b }) => a - b); + + const min = sample[0].actual; + const max = sample[sample.length - 1].actual; + if (max - min < threshold) { + console.log(`[lag report] min:${min} max:${max}`); + continue; + } + + /** @param {number} q */ + const sampleQuantile = q => sample[Math.floor(q * sample.length)]; + + const q25 = sampleQuantile(0.25).actual; + const q50 = sampleQuantile(0.50).actual; + const q75 = sampleQuantile(0.75).actual; + const iqr = q75 - q25; + + const hi = q50 + Math.max(threshold, 3 * iqr); // "extreme" outliers only, mnot just "mile" ones at 1.5* + const hiOutliers = sample.filter(({ actual }) => actual >= hi); + + if (hiOutliers.length == 0) { + console.log(`[lag report] min:${min} max:${max} box:[ ${q25} ${q50} ${q75} ] no hi-outliers within threshold:${threshold}`); + continue; + } + + console.group(`[lag report] min:${min} max:${max} box:[ ${q25} ${q50} ${q75} ] hi:${hi} hiOutliers:${hiOutliers.length} ${Math.round(hiOutliers.length / sample.length * 1000) / 100}%`); + console.table(hiOutliers); + console.groupEnd(); + } + +} + +main(); diff --git a/examples/burn.ts b/examples/burn.ts new file mode 100644 index 000000000..36ca327f4 --- /dev/null +++ b/examples/burn.ts @@ -0,0 +1,130 @@ +#!/usr/bin/env auraescript +/* -------------------------------------------------------------------------- *\ + * Apache 2.0 License Copyright © 2022-2023 The Aurae Authors * + * * + * +--------------------------------------------+ * + * | █████╗ ██╗ ██╗██████╗ █████╗ ███████╗ | * + * | ██╔══██╗██║ ██║██╔══██╗██╔══██╗██╔════╝ | * + * | ███████║██║ ██║██████╔╝███████║█████╗ | * + * | ██╔══██║██║ ██║██╔══██╗██╔══██║██╔══╝ | * + * | ██║ ██║╚██████╔╝██║ ██║██║ ██║███████╗ | * + * | ╚═╝ ╚═╝ ╚═════╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝ | * + * +--------------------------------------------+ * + * * + * Distributed Systems Runtime * + * * + * -------------------------------------------------------------------------- * + * * + * Licensed under the Apache License, Version 2.0 (the "License"); * + * you may not use this file except in compliance with the License. * + * You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, software * + * distributed under the License is distributed on an "AS IS" BASIS, * + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * + * See the License for the specific language governing permissions and * + * limitations under the License. * + * * +\* -------------------------------------------------------------------------- */ + +import * as helpers from "../auraescript/gen/helpers.ts"; +import * as runtime from "../auraescript/gen/cells.ts"; + +// TODO can't sleep, no setTimeout and no equivalent in Deno.core as far as I can tell +// function sleep(n: number) { +// return new Promise(resolve => setTimeout(resolve, n)); +// } + +const cells = new runtime.CellServiceClient(); + +const cellName = "cpu-burn-room"; + +let allocated = await cells.allocate({ + cell: runtime.Cell.fromPartial({ + name: cellName, + cpu: runtime.CpuController.fromPartial({ + weight: 100, // Percentage of CPUs + + // 40% -- 2/5 of one core + max: 400_000, // usec + + // // 200% -- 2 cpu cores + // max: 2_000_000, // usec + + // period: 1_000_000 // usec, hardcoded default + }), + }) +}); +helpers.print('Allocated:', allocated) + +// TODO if we had a fs api and injected script name, this could be based on those... +const devRoot = '/home/jcorbin/aurae'; + +const nodeBurn = runtime.Executable.fromPartial({ + command: `/usr/bin/bash -c 'node ${devRoot}/examples/burn.js >${devRoot}/node-burn.out 2>&1'`, + description: "Burn CPU in Node.JS while monitoring runtime lag", + name: "node-burn" +}); + +// // NOTE: build exe with `go build -o go-burn examples/burn.go` +// const goBurn = runtime.Executable.fromPartial({ +// command: `/usr/bin/bash -c 'GOMAXPROCS=4 ${devRoot}/go-burn >${devRoot}/go-burn.out 2>&1'`, +// description: "Burn CPU in Go while monitoring runtime lag", +// name: "go-burn" +// }); + +let started = await cells.start({ + cellName, + executable: nodeBurn, + // executable: goBurn, +}); + +helpers.print('Started:', started); + +// NOTE: to run this, it's best to start a fresh auraed like: +// +// $ sudo -E auraed -v +// +// Then run this auraescript: +// +// $ auraescript examples/burn.ts +// +// Confirm that you've got a successuflly started pid above, maybe take a moment to look it it in (h)top. Then run: +// +// $ watch -n 0.1 cat /sys/fs/cgroup/cpu-burn-room/cpu.stat +// +// And confirm that you see nr_throttled going up. +// +// Then take a look at the redirected log file: +// +// $ less node-burn.out +// +// You should see either a constant rate of outlier times (for the node +// program), or an entirely skewed box (75%-ile) value in the case of a heavily +// over-subscribed Go program. +// +// Once done, simply stop the auraed, or its cpu-burn-room cell within it if you prefer. +// +// For the Go version, you'll need to swap commented-out code around above to +// use the goBurn executable, build the executable as noted above, and maybe +// also change the allocated max CPU value to be ast least 2 cores. Output can then be seen using: +// +// $ less go-burn.out + +// TODO if we had the ability to sleep, we could sit here and poll +// /sys/fs/cgroup/${cellName}/cpu.stat + +// TODO harvest stdout/err log streams; instead we use bash to redirect into a file above for now + +// await sleep(20_000); // let it run for 20s + +// TODO oncw we can do the above, we can then stop and free +// await cells.stop({ +// cellName, +// executableName: "node-burn", +// }); +// await cells.free({ +// cellName +// });