diff --git a/catalog/redis-bench/README.md b/catalog/redis-bench/README.md new file mode 100644 index 0000000..86e0bb2 --- /dev/null +++ b/catalog/redis-bench/README.md @@ -0,0 +1,164 @@ +# redis-bench — Redis Server/Client Benchmark Workload + +Multi-VM database benchmarking workload with a persistent Redis server and +`redis-benchmark` client(s). The server stores data on a 10Gi persistent volume +with AOF durability; clients generate configurable load and report throughput and +latency percentiles. + +## Architecture + +``` + ┌──────────────────────────────┐ + │ K8s Service │ + │ virtwork-redis-bench:6379 │ + └──────────┬───────────────────┘ + │ + ┌───────▼────────┐ + │ Server (1 VM) │ + │ redis-server │ + │ :6379 │ + │ /var/lib/redis │◄── 10Gi persistent volume + └───────▲────────┘ + │ DNS + ┌─────────────┼─────────────┐ + │ │ │ + ┌────┴───┐ ┌────┴───┐ ┌────┴───┐ + │Client 0│ │Client 1│ │Client N│ + │ bench │ │ bench │ │ bench │ + └────────┘ └────────┘ └────────┘ + (scale with --vm-count) +``` + +## Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `num-clients` | int | `10` | Concurrent clients per benchmark run (`-c`) | +| `num-requests` | int | `100000` | Total requests per iteration (`-n`) | +| `pipeline-size` | int | `1` | Pipelining factor (`-P`); `1` = no pipelining | +| `test-mode` | string | `set,get` | Redis operations to benchmark (`-t`) | +| `data-size` | int | `256` | Value size in bytes (`-d`) | +| `loop-delay` | int | `30` | Seconds between benchmark iterations | +| `maxmemory` | string | `256mb` | Redis server memory limit (`--maxmemory`) | + +### Parameter details + +**num-clients** — Number of parallel connections per `redis-benchmark` invocation. +Higher values increase throughput but also increase latency. Typical range: 1–100. + +**num-requests** — Total operations per iteration. More requests produce more stable +latency percentiles. Use 100000+ for reliable p99 numbers. + +**pipeline-size** — Batches multiple requests per round-trip. Setting this to 16+ +dramatically increases throughput by reducing network overhead. Start at 1 for +latency-focused testing. + +**test-mode** — Comma-separated list of Redis commands to benchmark. Options include +`set`, `get`, `incr`, `lpush`, `lpop`, `sadd`, `spop`, `hset`, `hget`, `zadd`, +`zrange`. Common combinations: + +- `set,get` — key-value read/write (default, most representative) +- `get` — read-only (cache simulation) +- `set` — write-only (ingest simulation) +- `lpush,lpop` — list operations (queue simulation) + +**data-size** — Value payload in bytes for SET/GET. Larger values stress network I/O +and memory. Typical range: 64–4096. + +**loop-delay** — Pause between benchmark runs. Allows distinct measurement windows for +metrics scrapers. + +**maxmemory** — Server-side memory cap. When reached, Redis evicts keys using the +`allkeys-lru` policy. Set relative to VM memory allocation. + +## Storage + +| Field | Value | +|-------|-------| +| Volume name | `redis-data` | +| Size | `10Gi` | +| Serial | `vw-redis` | +| Device path | `/dev/disk/by-id/virtio-vw-redis` | +| Mount point | `/var/lib/redis` | +| Filesystem | XFS (auto-formatted) | + +The disk setup script (injected automatically by the catalog system) waits for the +device, formats it with XFS if needed, mounts it, and adds an fstab entry. Redis +AOF files are written to this volume for durability. + +## Usage + +```bash +# Default: 10 clients, SET+GET, 100K requests per iteration +virtwork run --from-catalog redis-bench --catalog-dir ./catalog --dry-run + +# High-throughput: 32 clients, pipelining, 1M requests +virtwork run --from-catalog redis-bench --catalog-dir ./catalog \ + --params redis-bench.num-clients=32,redis-bench.pipeline-size=16,redis-bench.num-requests=1000000 + +# Read-heavy: GET-only, 64 clients, large values +virtwork run --from-catalog redis-bench --catalog-dir ./catalog \ + --params redis-bench.test-mode=get,redis-bench.num-clients=64,redis-bench.data-size=4096 + +# Write-heavy: SET-only, small values +virtwork run --from-catalog redis-bench --catalog-dir ./catalog \ + --params redis-bench.test-mode=set,redis-bench.num-clients=50,redis-bench.data-size=64 + +# Queue workload: list operations +virtwork run --from-catalog redis-bench --catalog-dir ./catalog \ + --params redis-bench.test-mode=lpush,redis-bench.num-clients=20 + +# Scale to 3 client VMs +virtwork run --from-catalog redis-bench --catalog-dir ./catalog \ + --vm-count 3 --params redis-bench.num-clients=16,redis-bench.pipeline-size=8 +``` + +## Monitoring + +### Server VM + +```bash +# Check Redis is running +systemctl status server.service +journalctl -u server.service -f + +# Verify storage mount +df -h /var/lib/redis +ls -la /dev/disk/by-id/ | grep vw-redis + +# Redis stats +redis-cli info server +redis-cli info memory +redis-cli info stats +``` + +### Client VM + +```bash +# Check benchmark is running +systemctl status client.service +journalctl -u client.service -f +``` + +### Key metrics + +- **Throughput** (requests/sec) — typical range: 10K–100K ops/sec +- **Latency** (avg/p50/p95/p99 in ms) — sub-millisecond for in-cluster traffic +- **Memory usage** — server memory grows with data-size and working set +- **Disk I/O** — AOF writes visible in storage metrics + +## Troubleshooting + +**Server fails to start** — Check that the storage volume mounted correctly. +Inspect `/var/log/cloud-init-output.log` for disk setup errors. Verify the device +exists: `ls /dev/disk/by-id/virtio-vw-redis`. + +**Client can't connect** — Verify the K8s Service exists (`oc get svc | grep redis`) +and DNS resolves (`nslookup virtwork-redis-bench` from the client VM). Check that +Redis is listening: `redis-cli -h ping`. + +**Low throughput** — Increase `pipeline-size` to reduce round-trip overhead. Check +for network contention with `oc top`. Ensure the server VM has enough CPU cores. + +**Redis OOM / evictions** — Increase `maxmemory` or reduce `data-size`. Monitor with +`redis-cli info memory`. The `allkeys-lru` eviction policy is configured by default. diff --git a/catalog/redis-bench/client.service b/catalog/redis-bench/client.service new file mode 100644 index 0000000..f5e5ec1 --- /dev/null +++ b/catalog/redis-bench/client.service @@ -0,0 +1,12 @@ +[Unit] +Description=Virtwork Redis benchmark client +After=network-online.target + +[Service] +Type=simple +ExecStart=/bin/bash -c 'while true; do redis-benchmark -h virtwork-redis-bench -p 6379 -c {{num-clients}} -n {{num-requests}} -d {{data-size}} -P {{pipeline-size}} -t {{test-mode}} && sleep {{loop-delay}}; done' +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target diff --git a/catalog/redis-bench/server.service b/catalog/redis-bench/server.service new file mode 100644 index 0000000..b58fde1 --- /dev/null +++ b/catalog/redis-bench/server.service @@ -0,0 +1,14 @@ +[Unit] +Description=Virtwork Redis server with persistent storage +After=network-online.target local-fs.target +Requires=local-fs.target + +[Service] +Type=simple +ExecStartPre=/bin/bash -c 'mkdir -p /var/lib/redis && chown redis:redis /var/lib/redis' +ExecStart=/usr/bin/redis-server --bind 0.0.0.0 --port 6379 --dir /var/lib/redis --appendonly yes --appendfilename appendonly.aof --maxmemory {{maxmemory}} --maxmemory-policy allkeys-lru +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target diff --git a/catalog/redis-bench/workload.yaml b/catalog/redis-bench/workload.yaml new file mode 100644 index 0000000..eb660da --- /dev/null +++ b/catalog/redis-bench/workload.yaml @@ -0,0 +1,53 @@ +description: "Redis server/client benchmark with persistent storage — multi-VM workload" + +packages: + - redis + +params: + - key: num-clients + type: int + default: "10" + desc: "Number of concurrent redis-benchmark clients (-c)" + - key: num-requests + type: int + default: "100000" + desc: "Total requests per benchmark iteration (-n)" + - key: pipeline-size + type: int + default: "1" + desc: "Pipelining factor — requests per batch (-P)" + - key: test-mode + type: string + default: "set,get" + desc: "Benchmark commands to run (-t), e.g. set,get,incr,lpush" + - key: data-size + type: int + default: "256" + desc: "Value size in bytes (-d)" + - key: loop-delay + type: int + default: "30" + desc: "Delay in seconds between benchmark iterations" + - key: maxmemory + type: string + default: "256mb" + desc: "Redis server maxmemory limit (e.g. 128mb, 1gb)" + +roles: + - name: server + vm-count: 1 + - name: client + vm-count: 1 + +storage: + - name: redis-data + size: 10Gi + serial: vw-redis + mount: /var/lib/redis + +service: + ports: + - name: redis + port: 6379 + protocol: TCP + selector-role: server diff --git a/catalog/sysbench/README.md b/catalog/sysbench/README.md new file mode 100644 index 0000000..93484eb --- /dev/null +++ b/catalog/sysbench/README.md @@ -0,0 +1,79 @@ +# sysbench — CPU Benchmarking Workload + +Single-VM continuous CPU stress test using [sysbench](https://github.com/akopytov/sysbench). +Runs repeated benchmark iterations with configurable threads, CPU methods, and timing to +generate sustained CPU load and throughput metrics. + +## Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `threads` | int | `4` | Number of worker threads (`--threads`) | +| `cpu-method` | string | `sum` | CPU stressor method: `sum`, `sqrt`, or `pi` | +| `test-duration` | int | `60` | Seconds per test iteration (`--time`) | +| `loop-delay` | int | `10` | Seconds to pause between iterations | + +### Parameter details + +**threads** — Each thread performs independent CPU operations. Set this relative to VM +CPU cores; values higher than the core count are valid and test contention behavior. + +**cpu-method** — Controls which mathematical operation each thread performs: + +- `sum` — integer addition (fastest, lowest per-op latency) +- `sqrt` — square root calculations (moderate load) +- `pi` — pi calculation via series (highest complexity per operation) + +**test-duration** — Duration of each benchmark pass. Use `0` for infinite (runs until +stopped). Longer durations produce more stable throughput numbers. + +**loop-delay** — Pause between iterations. Allows metrics scraping systems to observe +distinct test windows. Set to `0` for back-to-back runs. + +## Usage + +```bash +# Default: 4 threads, sum method, 60s tests, 10s delay +virtwork run --from-catalog sysbench --catalog-dir ./catalog --dry-run + +# High-intensity: 8 threads, pi method, 5-minute tests +virtwork run --from-catalog sysbench --catalog-dir ./catalog \ + --params sysbench.threads=8,sysbench.cpu-method=pi,sysbench.test-duration=300 + +# Rapid iteration: 2 threads, 10s tests, 5s delay +virtwork run --from-catalog sysbench --catalog-dir ./catalog \ + --params sysbench.threads=2,sysbench.test-duration=10,sysbench.loop-delay=5 + +# Cluster-wide stress: 4 VMs +virtwork run --from-catalog sysbench --catalog-dir ./catalog \ + --vm-count 4 --params sysbench.threads=4 +``` + +## What it measures + +- **Throughput** (events/sec) — CPU operations completed per second +- **Latency** (avg/p95/p99 in ms) — per-event timing distribution +- **CPU utilization** — should approach 100% per thread under sustained load + +## Monitoring + +SSH into the VM and inspect the service: + +```bash +systemctl status workload.service +journalctl -u workload.service -f +``` + +sysbench prints a summary after each iteration with events/sec, latency +percentiles, and total events processed. + +## Troubleshooting + +**Service fails immediately** — sysbench package not installed. Check +`/var/log/cloud-init-output.log` for dnf errors. + +**Lower-than-expected throughput** — VM may be oversubscribed. Reduce `threads` +or deploy on dedicated nodes. + +**CPU% below expected** — `test-duration` too short relative to `loop-delay`. +Increase duration or decrease delay. diff --git a/catalog/sysbench/workload.service b/catalog/sysbench/workload.service new file mode 100644 index 0000000..466a0f3 --- /dev/null +++ b/catalog/sysbench/workload.service @@ -0,0 +1,12 @@ +[Unit] +Description=Virtwork sysbench CPU benchmark +After=network-online.target + +[Service] +Type=simple +ExecStart=/bin/bash -c 'while true; do sysbench cpu --threads={{threads}} --time={{test-duration}} --cpu-method={{cpu-method}} run; sleep {{loop-delay}}; done' +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target diff --git a/catalog/sysbench/workload.yaml b/catalog/sysbench/workload.yaml new file mode 100644 index 0000000..de31fef --- /dev/null +++ b/catalog/sysbench/workload.yaml @@ -0,0 +1,22 @@ +description: "Continuous CPU benchmarking with sysbench — configurable threads, methods, and duration" + +packages: + - sysbench + +params: + - key: threads + type: int + default: "4" + desc: "Number of sysbench worker threads (--threads)" + - key: cpu-method + type: string + default: "sum" + desc: "CPU test method: sum, sqrt, or pi (--cpu-method)" + - key: test-duration + type: int + default: "60" + desc: "Duration per test iteration in seconds (--time)" + - key: loop-delay + type: int + default: "10" + desc: "Delay in seconds between test iterations" diff --git a/internal/workloads/catalog_entries_test.go b/internal/workloads/catalog_entries_test.go new file mode 100644 index 0000000..fb8318f --- /dev/null +++ b/internal/workloads/catalog_entries_test.go @@ -0,0 +1,105 @@ +// Copyright 2026 Red Hat +// SPDX-License-Identifier: Apache-2.0 + +package workloads_test + +import ( + "os" + "path/filepath" + "runtime" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/opdev/virtwork/internal/config" + "github.com/opdev/virtwork/internal/workloads" +) + +func projectRoot() string { + _, file, _, _ := runtime.Caller(0) + return filepath.Join(filepath.Dir(file), "..", "..") +} + +var _ = Describe("Shipped catalog entries", func() { + catalogDir := filepath.Join(projectRoot(), "catalog") + + var entries []string + + BeforeEach(func() { + dirEntries, err := os.ReadDir(catalogDir) + if os.IsNotExist(err) { + Skip("catalog directory not found at " + catalogDir) + } + Expect(err).NotTo(HaveOccurred()) + + entries = nil + for _, d := range dirEntries { + if d.IsDir() { + entries = append(entries, d.Name()) + } + } + if len(entries) == 0 { + Skip("no catalog entries found in " + catalogDir) + } + }) + + It("should discover at least one entry", func() { + Expect(entries).NotTo(BeEmpty()) + }) + + It("should load, validate, and produce working workloads for every entry", func() { + for _, name := range entries { + By("validating entry: " + name) + + entry, err := workloads.LoadCatalogEntry(catalogDir, name) + Expect(err).NotTo(HaveOccurred(), "LoadCatalogEntry failed for %s", name) + + schema := entry.Schema() + Expect(schema).NotTo(BeNil(), "Schema() returned nil for %s", name) + + factory := entry.Factory() + Expect(factory).NotTo(BeNil(), "Factory() returned nil for %s", name) + + cfg := config.WorkloadConfig{VMCount: 1, CPUCores: 2, Memory: "2Gi"} + opts := &workloads.RegistryOpts{ + Namespace: "test", + SSHUser: "virtwork", + SSHPassword: "test", + } + wl := factory(cfg, opts) + Expect(wl).NotTo(BeNil(), "factory produced nil workload for %s", name) + Expect(wl.Name()).To(Equal(name), "Name() mismatch for %s", name) + + userdata, err := wl.CloudInitUserdata() + Expect(err).NotTo(HaveOccurred(), "CloudInitUserdata failed for %s", name) + Expect(userdata).To(HavePrefix("#cloud-config"), "userdata missing #cloud-config prefix for %s", name) + + if entry.IsMultiRole() { + multi, ok := wl.(workloads.MultiVMWorkload) + Expect(ok).To(BeTrue(), "multi-role entry %s did not produce MultiVMWorkload", name) + + roles := multi.RoleDistribution() + Expect(roles).NotTo(BeEmpty(), "RoleDistribution empty for multi-role entry %s", name) + + for _, rs := range roles { + roleUserdata, roleErr := multi.UserdataForRole(rs.Role, "test") + Expect(roleErr).NotTo(HaveOccurred(), "UserdataForRole(%s) failed for %s", rs.Role, name) + Expect(roleUserdata).To( + HavePrefix("#cloud-config"), + "role %s userdata missing #cloud-config for %s", rs.Role, name) + } + } + + if len(entry.Manifest.Storage) > 0 { + dvts, dvtErr := wl.DataVolumeTemplates() + Expect(dvtErr).NotTo(HaveOccurred(), "DataVolumeTemplates failed for %s", name) + Expect(dvts).To(HaveLen(len(entry.Manifest.Storage)), + "DataVolumeTemplates count mismatch for %s: expected %d", name, len(entry.Manifest.Storage)) + } + + if entry.Manifest.Service != nil { + Expect(wl.RequiresService()).To(BeTrue(), "RequiresService should be true for %s", name) + } + } + }) +})