diff --git a/.github/workflows/quality-bench-monthly.yml b/.github/workflows/quality-bench-monthly.yml
new file mode 100644
index 0000000..ccdba06
--- /dev/null
+++ b/.github/workflows/quality-bench-monthly.yml
@@ -0,0 +1,77 @@
+name: "⚡ Quality: Benchmarks (monthly)"
+run-name: >-
+ ${{
+ github.event_name == 'schedule' && '⚡ Quality: Benchmarks — Monthly schedule' ||
+ format('⚡ Quality: Benchmarks — Monthly, manual by {0}', github.actor)
+ }}
+
+# Monthly performance-regression signal. Runs every Go benchmark in the tree
+# (auth middleware, Argon2id verify, client-IP extraction, the PHC / image-ref /
+# label / trusted-proxy parsers, and the MCP dispatch path) five times so the
+# numbers are stable enough to diff month over month. Deep fuzzing lives in
+# `quality-fuzz-monthly.yml`; this workflow answers the separate question of
+# whether the hot paths got slower or started allocating more. Mirrors
+# sockguard's monthly benchmark tier; same rationale.
+#
+# Results upload as an artifact (90-day retention) and the top lines are echoed
+# into the run summary so a regression is visible without downloading anything.
+
+on:
+ workflow_dispatch:
+ schedule:
+ - cron: '45 7 1 * *' # Monthly on day 1 at 07:45 UTC (between mutation 06:30 and deep fuzz 08:30)
+
+permissions:
+ contents: read
+
+concurrency:
+ group: quality-bench-monthly-${{ github.workflow }}
+ cancel-in-progress: true
+
+jobs:
+ benchmarks:
+ name: "⚡ Go benchmarks (hot paths)"
+ runs-on: ubuntu-latest
+ timeout-minutes: 30
+ steps:
+ - name: Harden Runner
+ uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
+ with:
+ egress-policy: audit
+
+ - name: Checkout
+ uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+ with:
+ persist-credentials: false
+
+ - name: Setup Go
+ uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
+ with:
+ go-version: "1.26"
+
+ - name: Run benchmarks
+ run: |
+ # -run='^$' skips the unit tests (CI already gates those) so the job
+ # spends its time only on benchmarks. -count=5 gives benchstat-ready
+ # samples; -benchmem tracks allocations alongside ns/op.
+ go test -run='^$' -bench=. -benchmem -count=5 -timeout=20m ./... \
+ | tee benchmark-results.txt
+
+ - name: Upload benchmark results
+ if: always()
+ uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+ with:
+ name: benchmark-results-${{ github.run_id }}
+ path: benchmark-results.txt
+ retention-days: 90
+
+ - name: Summarize
+ if: always()
+ run: |
+ {
+ echo "### Go benchmarks (monthly)"
+ echo ""
+ echo '```'
+ grep -E '^Benchmark' benchmark-results.txt | head -60 || echo "No benchmark results captured."
+ echo '```'
+ } >> "$GITHUB_STEP_SUMMARY"
diff --git a/.github/workflows/quality-fuzz-monthly.yml b/.github/workflows/quality-fuzz-monthly.yml
new file mode 100644
index 0000000..5b7d979
--- /dev/null
+++ b/.github/workflows/quality-fuzz-monthly.yml
@@ -0,0 +1,172 @@
+name: "🔀 Quality: Deep Fuzz (monthly)"
+run-name: >-
+ ${{
+ github.event_name == 'schedule' && '🔀 Quality: Deep Fuzz — Monthly schedule' ||
+ format('🔀 Quality: Deep Fuzz — Monthly, manual by {0}', github.actor)
+ }}
+
+# Tier 3 of the fuzz strategy. Tier 1 lives in `ci.yml go-fuzz` (60-second
+# smoke per PR/push), Tier 2 in `quality-fuzz-nightly.yml` (5 minutes per
+# fuzzer daily). This workflow gives every fuzzer a 1-hour coverage budget on
+# the first day of each month so we get one very deep pass that can reach paths
+# the shorter tiers miss, plus on-demand longer runs via `workflow_dispatch`
+# before a release. Mirrors sockguard's monthly tier; same rationale.
+#
+# 1 hour per fuzzer × 5 fuzzers × matrix-parallel = ~1 hour wall time. Crashes
+# upload with 180-day retention so a failing input from an older monthly run
+# stays recoverable long enough to turn into a committed regression test.
+
+on:
+ workflow_dispatch:
+ inputs:
+ fuzztime:
+ description: "Per-fuzzer coverage budget (Go duration, e.g. 1h, 3h). Max ≈5h45m before the job timeout trims it."
+ required: false
+ default: "1h"
+ schedule:
+ - cron: '30 8 1 * *' # Monthly on day 1 at 08:30 UTC (after mutation at 06:30)
+
+permissions:
+ contents: read
+
+concurrency:
+ # Don't cancel an in-flight monthly when a manual dispatch fires — they
+ # answer different questions and either can confirm health.
+ group: quality-fuzz-monthly-${{ github.workflow }}-${{ github.event.inputs.fuzztime || 'scheduled' }}
+ cancel-in-progress: false
+
+jobs:
+ monthly-fuzz:
+ name: "🔀 Fuzz ${{ matrix.fuzzer.name }} (monthly)"
+ runs-on: ubuntu-latest
+ # 6-hour ceiling is the hard cap regardless of the fuzztime input.
+ # Anything longer should run on a dedicated box, not a shared runner.
+ timeout-minutes: 360
+
+ strategy:
+ fail-fast: false
+ matrix:
+ fuzzer:
+ - { name: FuzzParsePHC, pkg: ./internal/server/ }
+ - { name: FuzzParseTrustedProxies, pkg: ./internal/server/ }
+ - { name: FuzzParseImageRef, pkg: ./internal/adapter/ }
+ - { name: FuzzParseLabels, pkg: ./internal/adapter/drydock/ }
+ - { name: FuzzMCPHandler, pkg: ./internal/mcp/ }
+
+ steps:
+ - name: Harden Runner
+ uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
+ with:
+ egress-policy: audit
+
+ - name: Checkout
+ uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+ with:
+ persist-credentials: false
+
+ - name: Setup Go
+ uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
+ with:
+ go-version: "1.26"
+
+ - name: Resolve fuzz budget
+ id: budget
+ env:
+ INPUT_FUZZTIME: ${{ github.event.inputs.fuzztime }}
+ run: |
+ FUZZTIME="${INPUT_FUZZTIME:-1h}"
+ echo "fuzztime=${FUZZTIME}" >> "$GITHUB_OUTPUT"
+ # Parse the trailing unit (h, m, or s) and convert to seconds, then add
+ # a 120-second cushion so the go test -timeout always outlasts -fuzztime.
+ if [[ "${FUZZTIME}" =~ ^([0-9]+)h$ ]]; then
+ budget_s=$(( BASH_REMATCH[1] * 3600 ))
+ elif [[ "${FUZZTIME}" =~ ^([0-9]+)m$ ]]; then
+ budget_s=$(( BASH_REMATCH[1] * 60 ))
+ elif [[ "${FUZZTIME}" =~ ^([0-9]+)s$ ]]; then
+ budget_s=${BASH_REMATCH[1]}
+ else
+ budget_s=3600
+ fi
+ echo "test_timeout=$(( budget_s + 120 ))s" >> "$GITHUB_OUTPUT"
+
+ - name: Fuzz ${{ matrix.fuzzer.name }}
+ id: fuzz
+ env:
+ FUZZER: ${{ matrix.fuzzer.name }}
+ PKG: ${{ matrix.fuzzer.pkg }}
+ FUZZTIME: ${{ steps.budget.outputs.fuzztime }}
+ TEST_TIMEOUT: ${{ steps.budget.outputs.test_timeout }}
+ run: |
+ LOG="${RUNNER_TEMP}/fuzz-${FUZZER}.log"
+
+ run_fuzz() {
+ go test -run='^$' \
+ -fuzz="^${FUZZER}\$" \
+ -fuzztime="${FUZZTIME}" \
+ -timeout="${TEST_TIMEOUT}" \
+ "${PKG}" 2>&1 | tee "${LOG}"
+ return "${PIPESTATUS[0]}"
+ }
+
+ emit() { echo "kind=$1" >> "$GITHUB_OUTPUT"; }
+
+ for attempt in 1 2; do
+ rc=0
+ run_fuzz || rc=$?
+
+ if [ "${rc}" -eq 0 ]; then
+ emit pass
+ exit 0
+ fi
+
+ if grep -q "Failing input written to testdata" "${LOG}"; then
+ emit crash
+ echo "::error::${FUZZER} found a crashing input — commit it to the seed corpus and fix the regression."
+ exit "${rc}"
+ fi
+
+ if ! grep -q "context deadline exceeded" "${LOG}"; then
+ emit error
+ echo "::error::${FUZZER} failed for a non-flake reason (exit ${rc})."
+ exit "${rc}"
+ fi
+
+ echo "::warning::${FUZZER}: known -fuzztime boundary flake on attempt ${attempt}/2."
+ done
+
+ emit flake
+ echo "::error::${FUZZER} hit the boundary flake on both attempts."
+ exit 1
+
+ - name: Upload fuzz corpus on failure or cancel
+ if: failure() || cancelled()
+ uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+ with:
+ name: fuzz-corpus-${{ matrix.fuzzer.name }}-${{ github.run_id }}
+ path: "**/testdata/fuzz/${{ matrix.fuzzer.name }}/"
+ retention-days: 180
+ if-no-files-found: ignore
+
+ - name: Summarize
+ if: always()
+ env:
+ FUZZER: ${{ matrix.fuzzer.name }}
+ PKG: ${{ matrix.fuzzer.pkg }}
+ FUZZTIME: ${{ steps.budget.outputs.fuzztime }}
+ STATUS: ${{ job.status }}
+ KIND: ${{ steps.fuzz.outputs.kind }}
+ RUN_ID: ${{ github.run_id }}
+ run: |
+ {
+ echo "### ${FUZZER} (monthly deep)"
+ echo ""
+ echo "- Package: \`${PKG}\`"
+ echo "- Budget: ${FUZZTIME}"
+ echo "- Result: ${STATUS}"
+ if [ "${KIND}" = "crash" ]; then
+ echo ""
+ echo "A crashing input was saved to \`testdata/fuzz/${FUZZER}/\`."
+ echo "Download artifact \`fuzz-corpus-${FUZZER}-${RUN_ID}\`, commit the"
+ echo "minimized input to the seed corpus, fix the bug, and push."
+ fi
+ } >> "$GITHUB_STEP_SUMMARY"
diff --git a/.github/workflows/quality-soak-weekly.yml b/.github/workflows/quality-soak-weekly.yml
new file mode 100644
index 0000000..ec90bf6
--- /dev/null
+++ b/.github/workflows/quality-soak-weekly.yml
@@ -0,0 +1,136 @@
+name: "⏱️ Quality: Soak"
+run-name: >-
+ ${{
+ github.event_name == 'schedule' && '⏱️ Quality: Soak — Weekly' ||
+ format('⏱️ Quality: Soak — Manual by {0}', github.actor)
+ }}
+
+# RSS + thread-drift soak. Runs the Portwing agent (generic adapter) in front of
+# a mock Docker daemon under a sustained loadgen mix — cached-inventory reads,
+# version/info, a raw Docker proxy read, and a stream of SSE subscribers that
+# connect/hold/disconnect (the leak-prone path) — and asserts the agent's
+# working-set growth stays inside a configured threshold. This is the
+# long-lived-agent leak signal the unit/integration/fuzz tiers can't give.
+#
+# GitHub-hosted runners cap a single job at 6h, so the scheduled run soaks for
+# 4h — enough wall time that a per-request allocation/goroutine leak shows up as
+# multiple-MiB RSS growth well above the 64 MiB threshold. The 24h target lives
+# on once a self-hosted runner is wired up; the manual dispatch inputs let a
+# maintainer override the duration for that, or shorten it for a one-off check.
+
+on:
+ workflow_dispatch:
+ inputs:
+ duration:
+ description: "Soak duration (Go time.Duration; e.g. 30m, 4h)"
+ required: false
+ default: "4h"
+ concurrency:
+ description: "Concurrent loadgen workers for the inventory scenario"
+ required: false
+ default: "20"
+ rss_growth_threshold_bytes:
+ description: "Fail if VmRSS grows by more than this many bytes from the post-warmup baseline"
+ required: false
+ default: "67108864"
+ schedule:
+ - cron: '15 6 * * 0' # Sundays 06:15 UTC
+
+permissions:
+ contents: read
+
+concurrency:
+ group: quality-soak-${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ soak:
+ name: "⏱️ Soak (portwing RSS + thread drift)"
+ runs-on: ubuntu-latest
+ # Allow 30 minutes for build + warmup + post-run reporting on top of the
+ # 4-hour soak; well under the 6-hour github-hosted ceiling.
+ timeout-minutes: 270
+
+ steps:
+ - name: Harden Runner
+ uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
+ with:
+ egress-policy: audit
+
+ - name: Checkout
+ uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
+ with:
+ persist-credentials: false
+
+ - name: Setup Go
+ uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
+ with:
+ go-version: "1.26"
+
+ - name: Resolve soak parameters
+ id: params
+ env:
+ INPUT_DURATION: ${{ github.event.inputs.duration }}
+ INPUT_CONCURRENCY: ${{ github.event.inputs.concurrency }}
+ INPUT_RSS_THRESHOLD: ${{ github.event.inputs.rss_growth_threshold_bytes }}
+ run: |
+ # Scheduled runs see empty inputs and fall back to these defaults.
+ {
+ echo "duration=${INPUT_DURATION:-4h}"
+ echo "concurrency=${INPUT_CONCURRENCY:-20}"
+ echo "rss_threshold=${INPUT_RSS_THRESHOLD:-67108864}"
+ } >> "$GITHUB_OUTPUT"
+
+ - name: Validate the soak script accepts the resolved parameters
+ env:
+ # Pull inputs through env vars so the shell never interpolates raw
+ # `${{ … }}` — closes zizmor's template-injection audit even though
+ # soak.sh re-validates downstream.
+ SOAK_DURATION: ${{ steps.params.outputs.duration }}
+ SOAK_CONCURRENCY: ${{ steps.params.outputs.concurrency }}
+ SOAK_RSS_THRESHOLD: ${{ steps.params.outputs.rss_threshold }}
+ run: |
+ scripts/soak.sh --dry-run \
+ --duration "${SOAK_DURATION}" \
+ --concurrency "${SOAK_CONCURRENCY}" \
+ --rss-growth-threshold-bytes "${SOAK_RSS_THRESHOLD}"
+
+ - name: Run soak
+ id: soak
+ env:
+ SOAK_DURATION: ${{ steps.params.outputs.duration }}
+ SOAK_CONCURRENCY: ${{ steps.params.outputs.concurrency }}
+ SOAK_RSS_THRESHOLD: ${{ steps.params.outputs.rss_threshold }}
+ run: |
+ scripts/soak.sh \
+ --duration "${SOAK_DURATION}" \
+ --concurrency "${SOAK_CONCURRENCY}" \
+ --rss-growth-threshold-bytes "${SOAK_RSS_THRESHOLD}" \
+ | tee soak-output.txt
+
+ - name: Upload soak output
+ if: always()
+ uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+ with:
+ name: soak-output-${{ github.run_id }}
+ path: soak-output.txt
+ retention-days: 90
+
+ - name: Summarize
+ if: always()
+ env:
+ SOAK_DURATION: ${{ steps.params.outputs.duration }}
+ SOAK_CONCURRENCY: ${{ steps.params.outputs.concurrency }}
+ SOAK_RSS_THRESHOLD: ${{ steps.params.outputs.rss_threshold }}
+ run: |
+ {
+ echo "### Portwing soak"
+ echo "- Duration: ${SOAK_DURATION}"
+ echo "- Concurrency: ${SOAK_CONCURRENCY}"
+ echo "- RSS threshold: ${SOAK_RSS_THRESHOLD} bytes"
+ echo ""
+ echo "Last 20 lines of soak output:"
+ echo '```'
+ tail -n 20 soak-output.txt 2>/dev/null || echo "(no output captured)"
+ echo '```'
+ } >> "$GITHUB_STEP_SUMMARY"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7b43f9f..8c6d8ae 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
+### Added
+
+- **Tier-3 monthly deep fuzz**: `quality-fuzz-monthly.yml` gives each of the five fuzz targets a 1-hour budget on the first of the month (dispatchable to longer budgets before a release), completing the smoke → nightly → monthly fuzz tiering. Crash corpora retain for 180 days.
+- **Weekly soak test**: `quality-soak-weekly.yml` runs the agent (generic adapter) against a mock Docker upstream under a sustained loadgen mix — inventory/version/proxy reads plus SSE subscriber connect/hold/disconnect churn — and fails if its resident set grows past a configurable budget (64 MiB default) over a multi-hour soak. New harness under `benchmarks/cmd/{mockdocker,loadgen}` driven by `scripts/soak.sh`. Catches the long-lived-agent leak profile the unit/integration/fuzz tiers don't.
+- **Monthly benchmark tracking**: Go benchmarks on the per-request hot paths (auth middleware, Argon2id verify — cold derivation vs. warm SHA-256 cache, client-IP extraction, rate limiter) and the parse paths (PHC, image-ref, Drydock labels, trusted-proxy CIDRs, MCP dispatch). `quality-bench-monthly.yml` reruns them with `-benchmem -count=5` on the first of each month and retains the results for 90 days so a ns/op or allocs/op regression shows up month over month. Completes the test-posture parity with sockguard.
+
## [0.3.0] - 2026-06-15
### Added
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 2d35beb..e93ca7e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -21,7 +21,7 @@ Questions or help? Open an [issue](https://github.com/CodesWhat/portwing/issues)
4. **Create a branch** from the appropriate base:
- Bug fixes for the current release: branch from `main`
- - New features targeting the next release: branch from the active dev branch (e.g. `dev/0.2.0`)
+ - New features targeting the next release: branch from the active dev branch (e.g. `dev/0.4.0`)
## Development loop
diff --git a/README.md b/README.md
index ad5f1ff..5706d77 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@
> [!WARNING]
> ### 🚧 Alpha software — not yet production-ready
-> Portwing is in **active alpha** (`v0.2.x`). APIs, environment variables, and on-disk/wire formats may change between minor releases **without notice**. Pin to an exact version, review the [CHANGELOG](CHANGELOG.md) before upgrading, and expect breaking changes before `v1.0.0`.
+> Portwing is in **active alpha** (`v0.3.x`). APIs, environment variables, and on-disk/wire formats may change between minor releases **without notice**. Pin to an exact version, review the [CHANGELOG](CHANGELOG.md) before upgrading, and expect breaking changes before `v1.0.0`.
@@ -46,8 +46,6 @@
-
-
@@ -73,7 +71,7 @@
> [!NOTE]
-> **v0.2.0 is the current release.** Ships Ed25519 per-client authentication, key enrollment, Argon2id token hashing, a read-only MCP server, Prometheus metrics, structured audit logging, and hardened CI/supply-chain infrastructure. See [CHANGELOG.md](CHANGELOG.md) for full release notes.
+> **v0.3.0 is the current release.** Adds a startup banner, completes the rename from Lookout to Portwing, migrates the release pipeline to GoReleaser `dockers_v2`, and fixes two edge-mode bugs (reconnect backoff reset, steady-state read deadline). The security foundation from v0.2.0 is all present: Ed25519 per-client authentication, key enrollment, Argon2id token hashing, a read-only MCP server, Prometheus metrics, structured audit logging, and hardened CI/supply-chain infrastructure. See [CHANGELOG.md](CHANGELOG.md) for full release notes.
```mermaid
flowchart LR
@@ -191,7 +189,7 @@ volumes:
Edge mode variant (outbound WebSocket — early access)
-> **Early access.** Edge mode is usable end-to-end: Drydock 1.5 ships the `/api/portwing/ws` controller endpoint (Ed25519-only) and Portwing signs its hello with an Ed25519 key. Drydock 1.5 and Portwing 0.2.2 are both pre-release; full exec robustness under load lands in Portwing 0.2.2.
+> **Early access.** Edge mode is usable end-to-end as of the current release: Drydock 1.5 ships the `/api/portwing/ws` controller endpoint (Ed25519-only) and Portwing signs its hello with an Ed25519 key. Both Drydock 1.5 and the current Portwing release are pre-release; full exec robustness under load is still being hardened.
For hosts behind NAT or a firewall, [`examples/docker-compose.edge.yml`](examples/docker-compose.edge.yml) has Portwing dial out to your Drydock controller's edge endpoint (`DRYDOCK_URL` + `/api/portwing/ws`); no port is published on the remote host.
@@ -263,6 +261,7 @@ curl -fsSL https://raw.githubusercontent.com/codeswhat/portwing/main/scripts/ins
Latest release highlights
+- **v0.3.0 shipped on 2026-06-15** — startup banner, Lookout→Portwing rename completed, GoReleaser `dockers_v2` migration, and two edge-mode bug fixes (reconnect backoff reset, steady-state read deadline). See [CHANGELOG.md](CHANGELOG.md).
- **v0.2.0 shipped on 2026-06-12** — Ed25519 per-request authentication with signed requests via `X-Portwing-Key-ID` / `X-Portwing-Timestamp` / `X-Portwing-Nonce` / `X-Portwing-Signature` headers, verified against an `authorized_keys` file. Replay protection via nonce LRU and timestamp window, SIGHUP hot-reload of the key file, `portwing keygen` CLI subcommand, and `X-Portwing-Reason` diagnostic header on 401s. Signed edge-mode hello via `PRIVATE_KEY_FILE`.
- **Key enrollment** — optional single-use `ENROLLMENT_TOKEN` (`POST /api/portwing/enroll`) for bootstrapping the first Ed25519 key — burned on first use, rate-limited, and audit-logged.
- **Argon2id token hashing** — `TOKEN_HASH` / `TOKEN_HASH_FILE` with OWASP-recommended parameters; SHA-256 success cache keeps per-request cost flat.
@@ -283,7 +282,7 @@ See [CHANGELOG.md](CHANGELOG.md) for the full itemized history.
| | Feature | Description |
|---|---|---|
-| 🔄 | **Connection Modes** | Standard mode (the Drydock controller connects inbound over HTTP/SSE) is the primary integration. Edge mode (agent dials out over WebSocket, for NAT/firewalled hosts) is usable end-to-end as of Drydock 1.5 + Portwing 0.2.2 (both pre-release). |
+| 🔄 | **Connection Modes** | Standard mode (the Drydock controller connects inbound over HTTP/SSE) is the primary integration. Edge mode (agent dials out over WebSocket, for NAT/firewalled hosts) is usable end-to-end as of the current release with Drydock 1.5 (both pre-release). |
| 🐳 | **Transparent Docker API Proxy** | All Docker Engine API paths forwarded to the local daemon — streaming endpoints, exec session hijacking, and long-lived connections included. |
| 🔑 | **Ed25519 Per-Client Authentication** | Per-request signatures with per-client keys, replay protection via nonce LRU and timestamp window, `authorized_keys`-style rotation via SIGHUP, zero shared secrets. |
| 🔐 | **Argon2id Token Hashing** | Hash your token at rest with OWASP-recommended Argon2id parameters; `TOKEN_HASH_FILE` for Docker secrets support; SHA-256 success cache keeps per-request overhead flat. |
@@ -386,7 +385,7 @@ Portwing runs an HTTP(S) server; the **Drydock controller connects inbound** and
### Edge Mode — early access
-Portwing initiates an outbound WebSocket to the controller's edge endpoint (`DRYDOCK_URL` + `/api/portwing/ws`) for hosts with no inbound port. Both sides are implemented — Drydock 1.5 ships the controller endpoint and Portwing signs an Ed25519 hello — so edge mode is **usable end-to-end**. Drydock 1.5 and Portwing 0.2.2 are pre-release; full exec robustness under load lands in Portwing 0.2.2. The endpoint is **Ed25519-only**: set `PRIVATE_KEY_FILE` and register the public key with Drydock.
+Portwing initiates an outbound WebSocket to the controller's edge endpoint (`DRYDOCK_URL` + `/api/portwing/ws`) for hosts with no inbound port. Both sides are implemented — Drydock 1.5 ships the controller endpoint and Portwing signs an Ed25519 hello — so edge mode is **usable end-to-end** as of the current release. Both Drydock 1.5 and the current Portwing release are pre-release; full exec robustness under load is still being hardened. The endpoint is **Ed25519-only**: set `PRIVATE_KEY_FILE` and register the public key with Drydock.
- Set when `DRYDOCK_URL` is configured along with `TOKEN`, `AUTHORIZED_KEYS`, or `PRIVATE_KEY_FILE`
- Targets hosts behind NAT, firewalls, and dynamic IPs
diff --git a/ROADMAP.md b/ROADMAP.md
index ee71c49..dc08bd8 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -1,11 +1,11 @@
# Portwing Roadmap
-> Portwing is **alpha** software (`v0.2.x`). This roadmap describes direction and
+> Portwing is **alpha** software (`v0.3.x`). This roadmap describes direction and
> priorities — not commitments. Items and ordering may change between releases.
> For the authoritative record of what has shipped, see the
> [CHANGELOG](CHANGELOG.md).
-## Now — `v0.2.x` (hardening the alpha)
+## Now — `v0.3.x` (hardening the alpha)
The current line prioritizes production-readiness of the existing feature set
over new surface area.
@@ -14,8 +14,26 @@ over new surface area.
signing, and resource limits across the Docker proxy and the edge tunnel.
- **Release & supply chain** — reproducible multi-arch builds, cosign-signed
images, SBOMs, build provenance, and a CI-gated tag → release pipeline.
-- **Test coverage** — broaden unit, integration, and fuzz coverage, closing
- gaps in the auth, MCP, and adapter paths.
+- **Test coverage & quality gates** — broaden unit, integration, and fuzz
+ coverage across the auth, MCP, and adapter paths, and bring the CI quality
+ posture to parity with sockguard's:
+ - **Three-tier fuzzing** — *shipped.* 60s smoke per PR (`ci.yml go-fuzz`),
+ 5m nightly (`quality-fuzz-nightly.yml`), and a 1h monthly deep pass
+ (`quality-fuzz-monthly.yml`).
+ - **Soak testing** — *shipped.* `quality-soak-weekly.yml` drives the agent
+ (generic adapter, mock Docker upstream) under a sustained mix of inventory/
+ version/proxy reads plus SSE subscriber connect/hold/disconnect churn, and
+ fails if its resident set grows past a budget (64 MiB default) over a
+ multi-hour soak — the long-lived-agent leak profile the unit/integration
+ tiers don't catch. Harness: `benchmarks/cmd/{mockdocker,loadgen}` +
+ `scripts/soak.sh`.
+ - **Benchmark tracking** — *shipped.* Go benchmarks cover the per-request hot
+ paths (auth middleware, Argon2id verify — cold derivation and warm SHA-256
+ cache, client-IP extraction, rate limiter) and the parse paths (PHC,
+ image-ref, Drydock labels, trusted-proxy CIDRs, MCP dispatch).
+ `quality-bench-monthly.yml` reruns them with `-benchmem -count=5` on the
+ first of each month and keeps the results as a 90-day artifact, so a ns/op
+ or allocs/op regression is visible month over month.
- **Documentation** — keep `SPEC.md`, `README.md`, and the design docs in sync
with the code as behavior settles.
@@ -27,9 +45,10 @@ over new surface area.
and the paired Portwing release are pre-release.
- **Edge tunnel robustness** — ordered exec I/O, backpressure under load, and a
dedicated test harness for the tunnel (auth hello, request fan-out, exec
- sessions). Lands in v0.2.2.
-- **Reproducible base images** — pin runtime base images by digest with
- automated update tracking.
+ sessions). Ongoing.
+- **Reproducible base images** — *shipped.* Both `Dockerfile` and
+ `Dockerfile.release` pin every base image by digest (`wolfi-base`, `alpine`,
+ `golang`), and Dependabot tracks the `docker` ecosystem weekly for updates.
## Later — toward `v1.0`
diff --git a/SECURITY.md b/SECURITY.md
index 323c923..ff0a3bf 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -6,8 +6,8 @@ Security fixes are shipped on the **latest release line only**.
| Version | Supported |
| -------------- | ------------------ |
-| 0.2.x (latest) | :white_check_mark: |
-| < 0.2 | :x: |
+| 0.3.x (latest) | :white_check_mark: |
+| < 0.3 | :x: |
## Reporting a Vulnerability
diff --git a/SPEC.md b/SPEC.md
index 0a9d9f6..d40a7b3 100644
--- a/SPEC.md
+++ b/SPEC.md
@@ -51,7 +51,7 @@ Portwing runs an HTTP(S) server. The Drydock controller connects inbound.
### 2.3 Edge Mode
-Portwing initiates an outbound WebSocket connection to the Drydock controller's edge endpoint (`/api/portwing/ws`). All communication is multiplexed over this single connection. Both sides are implemented: Drydock 1.5 ships the controller endpoint (Ed25519-only, `portwing/1.0`) and Portwing signs its hello with Ed25519. Full exec robustness lands in Portwing 0.2.2; Drydock 1.5 and Portwing 0.2.2 are pre-release.
+Portwing initiates an outbound WebSocket connection to the Drydock controller's edge endpoint (`/api/portwing/ws`). All communication is multiplexed over this single connection. Both sides are implemented: Drydock 1.5 ships the controller endpoint (Ed25519-only, `portwing/1.0`) and Portwing signs its hello with Ed25519. Edge mode is usable end-to-end as of the current release; full exec robustness under load is still being hardened. Drydock 1.5 and the current Portwing release are both pre-release.
- Works behind NAT, firewalls, dynamic IPs
- Auto-reconnect with exponential backoff + jitter
@@ -446,6 +446,6 @@ Packages: `ca-certificates`, `busybox`, `docker-cli`, `docker-compose`, `wget`
## 15. Migration Strategy
1. **Phase 1: Drop-in Standard Mode** -- Replace existing Node.js agent with Portwing binary
-2. **Phase 2: Edge Mode** -- Drydock controller `/api/portwing/ws` WebSocket endpoint shipped in Drydock 1.5; end-to-end edge mode is functional (full exec robustness in Portwing 0.2.2)
+2. **Phase 2: Edge Mode** -- Drydock controller `/api/portwing/ws` WebSocket endpoint shipped in Drydock 1.5; end-to-end edge mode is functional as of the current release (full exec robustness under load still being hardened)
3. **Phase 3: Native WebSocket in Drydock** -- Replace AgentClient SSE with WebSocket
4. **Phase 4: Deprecate SSE** -- Remove SSE endpoints after one release cycle
diff --git a/benchmarks/cmd/loadgen/main.go b/benchmarks/cmd/loadgen/main.go
new file mode 100644
index 0000000..788aba5
--- /dev/null
+++ b/benchmarks/cmd/loadgen/main.go
@@ -0,0 +1,252 @@
+// loadgen hammers a Portwing HTTP endpoint at a configurable concurrency for a
+// fixed duration and prints a single-line JSON summary (p50/p90/p99/max
+// latency, RPS, status/error counts). It keeps one http.Client per worker so we
+// measure steady-state behavior, not per-request transport setup.
+//
+// Two modes:
+//
+// -mode req (default) — fire request, drain body, close, repeat.
+// -mode sse — open the endpoint, hold it open for -sse-hold, then
+// cancel and close. This churns Portwing's SSE
+// subscriber registration/teardown path (one
+// broadcaster + event-stream goroutine per connection),
+// which is the most leak-prone path in a long-lived
+// agent.
+//
+// Output is one line of JSON on stdout so the soak orchestrator can parse it
+// without scraping columnar text.
+package main
+
+import (
+ "context"
+ "encoding/json"
+ "flag"
+ "fmt"
+ "io"
+ "log"
+ "net/http"
+ "os"
+ "sort"
+ "sync"
+ "sync/atomic"
+ "time"
+)
+
+type result struct {
+ Scenario string `json:"scenario"`
+ Base string `json:"base"`
+ Method string `json:"method"`
+ Path string `json:"path"`
+ Mode string `json:"mode"`
+ Concurrency int `json:"concurrency"`
+ DurationSeconds float64 `json:"duration_seconds"`
+ TotalRequests int64 `json:"total_requests"`
+ ErrorRequests int64 `json:"error_requests"`
+ ErrorCounts map[string]int64 `json:"error_counts,omitempty"`
+ StatusCodeCounts map[int]int64 `json:"status_code_counts"`
+ RPS float64 `json:"rps"`
+ LatencyP50Micros int64 `json:"latency_p50_us"`
+ LatencyP90Micros int64 `json:"latency_p90_us"`
+ LatencyP99Micros int64 `json:"latency_p99_us"`
+ LatencyMaxMicros int64 `json:"latency_max_us"`
+}
+
+type options struct {
+ Base string
+ Method string
+ Path string
+ Auth string
+ Concurrency int
+ Duration time.Duration
+ Scenario string
+ Mode string
+ SSEHold time.Duration
+}
+
+func main() {
+ var (
+ base = flag.String("base", "http://127.0.0.1:3000", "base URL of the Portwing server")
+ method = flag.String("method", "GET", "HTTP method")
+ path = flag.String("path", "/_portwing/health", "request path")
+ auth = flag.String("auth", "", "bearer token (sent as Authorization: Bearer …)")
+ concurrency = flag.Int("concurrency", 20, "concurrent workers")
+ duration = flag.Duration("duration", 20*time.Second, "run duration")
+ scenario = flag.String("scenario", "custom", "label for this run")
+ mode = flag.String("mode", "req", "req | sse")
+ sseHold = flag.Duration("sse-hold", time.Second, "how long each sse connection is held before close")
+ )
+ flag.Parse()
+
+ out := run(options{
+ Base: *base,
+ Method: *method,
+ Path: *path,
+ Auth: *auth,
+ Concurrency: *concurrency,
+ Duration: *duration,
+ Scenario: *scenario,
+ Mode: *mode,
+ SSEHold: *sseHold,
+ })
+
+ if err := json.NewEncoder(os.Stdout).Encode(out); err != nil {
+ log.Fatalf("encode: %v", err)
+ }
+ fmt.Fprintf(os.Stderr, "%-18s mode=%-3s conc=%-3d rps=%.0f p50=%dus p99=%dus max=%dus errs=%d\n",
+ out.Scenario, out.Mode, out.Concurrency, out.RPS, out.LatencyP50Micros, out.LatencyP99Micros, out.LatencyMaxMicros, out.ErrorRequests)
+}
+
+func run(opts options) result {
+ stop := make(chan struct{})
+ timer := time.NewTimer(opts.Duration)
+ defer timer.Stop()
+ go func() { <-timer.C; close(stop) }()
+
+ transport := &http.Transport{
+ MaxIdleConns: opts.Concurrency * 2,
+ MaxIdleConnsPerHost: opts.Concurrency * 2,
+ IdleConnTimeout: 90 * time.Second,
+ }
+ defer transport.CloseIdleConnections()
+
+ var totalReqs, totalErrs atomic.Int64
+ var errorMu, statusMu, latMu sync.Mutex
+ errorCounts := make(map[string]int64)
+ statusCounts := make(map[int]int64)
+ latencies := make([]int64, 0, 1<<16)
+
+ recordErr := func(err error) {
+ totalReqs.Add(1)
+ totalErrs.Add(1)
+ errorMu.Lock()
+ errorCounts[err.Error()]++
+ errorMu.Unlock()
+ }
+ recordOK := func(status int, micros int64) {
+ totalReqs.Add(1)
+ statusMu.Lock()
+ statusCounts[status]++
+ statusMu.Unlock()
+ latMu.Lock()
+ latencies = append(latencies, micros)
+ latMu.Unlock()
+ }
+
+ var wg sync.WaitGroup
+ wg.Add(opts.Concurrency)
+ started := time.Now()
+
+ for i := 0; i < opts.Concurrency; i++ {
+ go func() {
+ defer wg.Done()
+ client := &http.Client{Transport: transport}
+ if opts.Mode != "sse" {
+ client.Timeout = 10 * time.Second
+ }
+ for {
+ select {
+ case <-stop:
+ return
+ default:
+ }
+ if opts.Mode == "sse" {
+ doSSE(client, opts, recordErr, recordOK)
+ } else {
+ doReq(client, opts, recordErr, recordOK)
+ }
+ }
+ }()
+ }
+
+ wg.Wait()
+ elapsed := time.Since(started)
+
+ sort.Slice(latencies, func(i, j int) bool { return latencies[i] < latencies[j] })
+ pct := func(q float64) int64 {
+ if len(latencies) == 0 {
+ return 0
+ }
+ idx := int(q * float64(len(latencies)))
+ if idx >= len(latencies) {
+ idx = len(latencies) - 1
+ }
+ return latencies[idx]
+ }
+ var maxLat int64
+ if len(latencies) > 0 {
+ maxLat = latencies[len(latencies)-1]
+ }
+
+ return result{
+ Scenario: opts.Scenario,
+ Base: opts.Base,
+ Method: opts.Method,
+ Path: opts.Path,
+ Mode: opts.Mode,
+ Concurrency: opts.Concurrency,
+ DurationSeconds: elapsed.Seconds(),
+ TotalRequests: totalReqs.Load(),
+ ErrorRequests: totalErrs.Load(),
+ ErrorCounts: errorCounts,
+ StatusCodeCounts: statusCounts,
+ RPS: float64(totalReqs.Load()) / elapsed.Seconds(),
+ LatencyP50Micros: pct(0.50),
+ LatencyP90Micros: pct(0.90),
+ LatencyP99Micros: pct(0.99),
+ LatencyMaxMicros: maxLat,
+ }
+}
+
+func newRequest(ctx context.Context, opts options) (*http.Request, error) {
+ req, err := http.NewRequestWithContext(ctx, opts.Method, opts.Base+opts.Path, nil)
+ if err != nil {
+ return nil, err
+ }
+ if opts.Auth != "" {
+ req.Header.Set("Authorization", "Bearer "+opts.Auth)
+ }
+ return req, nil
+}
+
+func doReq(client *http.Client, opts options, recordErr func(error), recordOK func(int, int64)) {
+ req, err := newRequest(context.Background(), opts)
+ if err != nil {
+ recordErr(err)
+ return
+ }
+ t0 := time.Now()
+ resp, err := client.Do(req)
+ micros := time.Since(t0).Microseconds()
+ if err != nil {
+ recordErr(err)
+ return
+ }
+ _, _ = io.Copy(io.Discard, resp.Body)
+ _ = resp.Body.Close()
+ recordOK(resp.StatusCode, micros)
+}
+
+// doSSE opens the endpoint, drains it until -sse-hold elapses, then cancels and
+// closes — exercising connect → subscribe → teardown on every iteration.
+func doSSE(client *http.Client, opts options, recordErr func(error), recordOK func(int, int64)) {
+ ctx, cancel := context.WithTimeout(context.Background(), opts.SSEHold)
+ defer cancel()
+ req, err := newRequest(ctx, opts)
+ if err != nil {
+ recordErr(err)
+ return
+ }
+ t0 := time.Now()
+ resp, err := client.Do(req)
+ if err != nil {
+ // A deadline-exceeded here means we never got headers; count it.
+ recordErr(err)
+ return
+ }
+ status := resp.StatusCode
+ // Read until the context deadline fires (server holds the stream open),
+ // then close. The copy returns with a context error, which is expected.
+ _, _ = io.Copy(io.Discard, resp.Body)
+ _ = resp.Body.Close()
+ recordOK(status, time.Since(t0).Microseconds())
+}
diff --git a/benchmarks/cmd/mockdocker/main.go b/benchmarks/cmd/mockdocker/main.go
new file mode 100644
index 0000000..f023146
--- /dev/null
+++ b/benchmarks/cmd/mockdocker/main.go
@@ -0,0 +1,219 @@
+// mockdocker is a minimal Docker-API-shaped HTTP server that listens on a unix
+// socket. It exists so the Portwing soak benchmark has a stable Docker upstream
+// whose behavior doesn't drift between runs and needs no real daemon.
+//
+// Portwing's docker client negotiates an API version and prefixes most paths
+// with `/v1.NN`, but hits bare `/version` and `/_ping` during negotiation and
+// health checks, so the handler strips an optional leading version segment
+// before routing. It implements just the endpoints the agent touches:
+//
+// GET /version → daemon version (drives version negotiation)
+// GET /_ping → 200 OK
+// GET /info → DockerRootDir
+// GET /containers/json → JSON array of fake containers
+// GET /containers/{id}/json → container inspect
+// GET /containers/{id}/logs → an 8-byte-framed log chunk (multiplexed)
+// GET /containers/{id}/stats→ one-shot stats snapshot
+// GET /events → long-lived stream emitting a container event
+// every 2s until the client disconnects
+//
+// Anything else returns 404. Logs are silenced unless -log is passed.
+package main
+
+import (
+ "encoding/binary"
+ "encoding/json"
+ "flag"
+ "fmt"
+ "io"
+ "log"
+ "net"
+ "net/http"
+ "os"
+ "os/signal"
+ "regexp"
+ "strings"
+ "syscall"
+ "time"
+)
+
+type mockContainer struct {
+ ID string `json:"Id"`
+ Names []string `json:"Names"`
+ Image string `json:"Image"`
+ State string `json:"State"`
+ Status string `json:"Status"`
+ Labels map[string]string `json:"Labels"`
+}
+
+var fakeContainers = []mockContainer{
+ {ID: "c0000000001", Names: []string{"/traefik"}, Image: "traefik:v3", State: "running", Status: "Up 3 days", Labels: map[string]string{"com.docker.compose.project": "infra"}},
+ {ID: "c0000000002", Names: []string{"/grafana"}, Image: "grafana/grafana:10", State: "running", Status: "Up 3 days", Labels: map[string]string{"com.docker.compose.project": "infra"}},
+ {ID: "c0000000003", Names: []string{"/prometheus"}, Image: "prom/prometheus:v2", State: "running", Status: "Up 2 days", Labels: map[string]string{"com.docker.compose.project": "infra"}},
+ {ID: "c0000000004", Names: []string{"/postgres"}, Image: "postgres:17", State: "running", Status: "Up 5 hours", Labels: map[string]string{"com.docker.compose.project": "db"}},
+ {ID: "c0000000005", Names: []string{"/redis"}, Image: "redis:8", State: "running", Status: "Up 5 hours", Labels: map[string]string{"com.docker.compose.project": "db"}},
+}
+
+// versionPrefix matches a leading Docker API version segment like "/v1.44".
+var versionPrefix = regexp.MustCompile(`^/v[0-9]+\.[0-9]+`)
+
+var verbose bool
+
+func main() {
+ socket := flag.String("socket", "/tmp/portwing-soak-mock.sock", "unix socket path")
+ flag.BoolVar(&verbose, "log", false, "log every request")
+ flag.Parse()
+
+ _ = os.Remove(*socket)
+ ln, err := net.Listen("unix", *socket)
+ if err != nil {
+ log.Fatalf("listen %s: %v", *socket, err)
+ }
+ // Owner-only: the soak runs portwing as the same user, so it can connect
+ // without the world-writable bit gosec (G302) rightly objects to.
+ if err := os.Chmod(*socket, 0o600); err != nil {
+ log.Fatalf("chmod %s: %v", *socket, err)
+ }
+
+ containersPayload, err := json.Marshal(fakeContainers)
+ if err != nil {
+ log.Fatalf("marshal containers: %v", err)
+ }
+
+ mux := http.NewServeMux()
+ mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+ path := versionPrefix.ReplaceAllString(r.URL.Path, "")
+ if verbose {
+ // #nosec G706 -- benchmark-only mock; %q quotes the request fields so
+ // control chars can't forge log lines, and -log is opt-in for debugging.
+ log.Printf("method=%q path=%q (raw=%q)", r.Method, path, r.URL.Path)
+ }
+
+ switch {
+ case path == "/_ping":
+ w.Header().Set("Api-Version", "1.44")
+ w.Header().Set("Content-Type", "text/plain")
+ _, _ = io.WriteString(w, "OK")
+ case path == "/version":
+ writeJSON(w, map[string]string{"Version": "24.0.0-mock", "ApiVersion": "1.44"})
+ case path == "/info":
+ writeJSON(w, map[string]string{"DockerRootDir": "/var/lib/docker"})
+ case path == "/containers/json":
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write(containersPayload)
+ case path == "/events":
+ streamEvents(w, r)
+ case strings.HasPrefix(path, "/containers/") && strings.HasSuffix(path, "/json"):
+ writeInspect(w, containerID(path, "/json"))
+ case strings.HasPrefix(path, "/containers/") && strings.Contains(path, "/logs"):
+ writeLogs(w)
+ case strings.HasPrefix(path, "/containers/") && strings.Contains(path, "/stats"):
+ writeStats(w)
+ default:
+ http.NotFound(w, r)
+ }
+ })
+
+ srv := &http.Server{Handler: mux, ReadHeaderTimeout: 5 * time.Second}
+ done := make(chan struct{})
+ go func() {
+ defer close(done)
+ if err := srv.Serve(ln); err != nil && err != http.ErrServerClosed {
+ log.Fatalf("serve: %v", err)
+ }
+ }()
+
+ log.Printf("mockdocker listening on %s", *socket)
+
+ stop := make(chan os.Signal, 1)
+ signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
+ <-stop
+
+ _ = srv.Close()
+ <-done
+ _ = os.Remove(*socket)
+}
+
+func writeJSON(w http.ResponseWriter, v any) {
+ w.Header().Set("Content-Type", "application/json")
+ _ = json.NewEncoder(w).Encode(v)
+}
+
+// containerID extracts the id from "/containers/".
+func containerID(path, suffix string) string {
+ id := strings.TrimPrefix(path, "/containers/")
+ id = strings.TrimSuffix(id, suffix)
+ return id
+}
+
+func writeInspect(w http.ResponseWriter, id string) {
+ writeJSON(w, map[string]any{
+ "Id": id,
+ "Name": "/" + id,
+ "Image": "nginx:latest",
+ "Created": "2026-01-01T00:00:00Z",
+ "State": map[string]any{"Status": "running", "Running": true, "Pid": 4242},
+ "Config": map[string]any{"Image": "nginx:latest", "Env": []string{"A=1", "B=2"}, "Labels": map[string]string{"app": "web"}},
+ "Mounts": []any{},
+ })
+}
+
+// writeLogs writes a single Docker-multiplexed stdout frame: an 8-byte header
+// (stream byte + 3 pad + big-endian payload length) followed by the payload.
+func writeLogs(w http.ResponseWriter) {
+ payload := []byte("soak log line\n")
+ header := make([]byte, 8)
+ header[0] = 1 // stdout
+ binary.BigEndian.PutUint32(header[4:8], uint32(len(payload)))
+ w.Header().Set("Content-Type", "application/octet-stream")
+ _, _ = w.Write(header)
+ _, _ = w.Write(payload)
+}
+
+func writeStats(w http.ResponseWriter) {
+ writeJSON(w, map[string]any{
+ "cpu_stats": map[string]any{"cpu_usage": map[string]any{"total_usage": 123456789}},
+ "memory_stats": map[string]any{"usage": 33554432, "limit": 2147483648},
+ "networks": map[string]any{"eth0": map[string]any{"rx_bytes": 1024, "tx_bytes": 2048}},
+ })
+}
+
+// streamEvents holds the connection open and emits a container event every 2s
+// until the client disconnects, mirroring a quiet-but-alive Docker daemon.
+func streamEvents(w http.ResponseWriter, r *http.Request) {
+ flusher, ok := w.(http.Flusher)
+ if !ok {
+ http.Error(w, "streaming unsupported", http.StatusInternalServerError)
+ return
+ }
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(http.StatusOK)
+ flusher.Flush()
+
+ ticker := time.NewTicker(2 * time.Second)
+ defer ticker.Stop()
+
+ enc := json.NewEncoder(w)
+ for i := 0; ; i++ {
+ select {
+ case <-r.Context().Done():
+ return
+ case <-ticker.C:
+ c := fakeContainers[i%len(fakeContainers)]
+ evt := map[string]any{
+ "Type": "container",
+ "Action": "start",
+ "Actor": map[string]any{
+ "ID": c.ID,
+ "Attributes": map[string]string{"name": strings.TrimPrefix(c.Names[0], "/"), "image": c.Image},
+ },
+ "time": time.Now().Unix(),
+ }
+ if err := enc.Encode(evt); err != nil {
+ return
+ }
+ _, _ = fmt.Fprint(w, "\n")
+ flusher.Flush()
+ }
+ }
+}
diff --git a/internal/adapter/containers_bench_test.go b/internal/adapter/containers_bench_test.go
new file mode 100644
index 0000000..c16341a
--- /dev/null
+++ b/internal/adapter/containers_bench_test.go
@@ -0,0 +1,28 @@
+package adapter
+
+import "testing"
+
+// BenchmarkParseImageRef measures Docker image-reference parsing, which runs
+// once per container on every inventory refresh. It's a fuzz target and a pure
+// string-splitting hot path, so we track it across the common reference shapes.
+func BenchmarkParseImageRef(b *testing.B) {
+ cases := []struct {
+ name string
+ ref string
+ }{
+ {"bare_name", "nginx"},
+ {"name_tag", "nginx:latest"},
+ {"registry_org_tag", "registry.example.com/org/image:1.2.3"},
+ {"ghcr", "ghcr.io/owner/repo:v1.2"},
+ {"registry_port", "localhost:5000/team/app:dev"},
+ {"digest", "ubuntu@sha256:cafebabe"},
+ }
+ for _, c := range cases {
+ b.Run(c.name, func(b *testing.B) {
+ b.ReportAllocs()
+ for b.Loop() {
+ _, _, _ = ParseImageRef(c.ref)
+ }
+ })
+ }
+}
diff --git a/internal/adapter/drydock/labels_bench_test.go b/internal/adapter/drydock/labels_bench_test.go
new file mode 100644
index 0000000..eaf3d7c
--- /dev/null
+++ b/internal/adapter/drydock/labels_bench_test.go
@@ -0,0 +1,41 @@
+package drydock
+
+import "testing"
+
+// BenchmarkParseLabels measures extraction of the Drydock-specific labels from a
+// container's label map, which runs once per container on every sync. The "full"
+// case exercises a label set padded with unrelated keys, the realistic shape on
+// a busy host.
+func BenchmarkParseLabels(b *testing.B) {
+ empty := map[string]string{}
+
+ full := map[string]string{
+ LabelDisplayName: "web frontend",
+ LabelDisplayIcon: "mdi:web",
+ LabelTagInclude: "^v\\d+",
+ LabelTagExclude: "rc|beta",
+ LabelTagTransform: "$1",
+ LabelWatch: "true",
+ // Unrelated labels a real container carries alongside the dd.* ones.
+ "com.docker.compose.project": "portwing",
+ "com.docker.compose.service": "web",
+ "org.opencontainers.version": "1.2.3",
+ "maintainer": "ops@example.com",
+ }
+
+ cases := []struct {
+ name string
+ labels map[string]string
+ }{
+ {"empty", empty},
+ {"full", full},
+ }
+ for _, c := range cases {
+ b.Run(c.name, func(b *testing.B) {
+ b.ReportAllocs()
+ for b.Loop() {
+ _ = ParseLabels(c.labels)
+ }
+ })
+ }
+}
diff --git a/internal/mcp/mcp_bench_test.go b/internal/mcp/mcp_bench_test.go
new file mode 100644
index 0000000..61d20fd
--- /dev/null
+++ b/internal/mcp/mcp_bench_test.go
@@ -0,0 +1,38 @@
+package mcp
+
+import (
+ "net/http"
+ "net/http/httptest"
+ "strings"
+ "testing"
+)
+
+// BenchmarkMCPHandler measures the JSON-RPC dispatch hot path for the methods
+// that don't touch Docker — the envelope decode, version check, method switch,
+// and response encode. A nil docker client is safe here for the same reason it
+// is in FuzzMCPHandler: none of these methods reach it.
+func BenchmarkMCPHandler(b *testing.B) {
+ h := &Handler{docker: nil, collector: nil}
+
+ cases := []struct {
+ name string
+ body string
+ }{
+ {"initialize", `{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}}`},
+ {"tools_list", `{"jsonrpc":"2.0","id":2,"method":"tools/list"}`},
+ {"ping", `{"jsonrpc":"2.0","id":3,"method":"ping"}`},
+ {"parse_error", `not json at all`},
+ {"method_not_found", `{"jsonrpc":"2.0","id":4,"method":"unknown/method"}`},
+ }
+
+ for _, c := range cases {
+ b.Run(c.name, func(b *testing.B) {
+ b.ReportAllocs()
+ for b.Loop() {
+ req := httptest.NewRequest(http.MethodPost, "/_portwing/mcp", strings.NewReader(c.body))
+ req.Header.Set("Content-Type", "application/json")
+ h.ServeHTTP(httptest.NewRecorder(), req)
+ }
+ })
+ }
+}
diff --git a/internal/server/argon2_bench_test.go b/internal/server/argon2_bench_test.go
new file mode 100644
index 0000000..03183fe
--- /dev/null
+++ b/internal/server/argon2_bench_test.go
@@ -0,0 +1,117 @@
+package server
+
+import "testing"
+
+// benchPHC is a real OWASP-parameter Argon2id PHC string (m=19456,t=2,p=1),
+// generated once so the parse/verify benchmarks below run against the genuine
+// production hash shape rather than a hand-rolled constant.
+var benchPHC, benchPHCErr = HashToken("correct-horse-battery-staple")
+
+func mustBenchPHC(b *testing.B) string {
+ b.Helper()
+ if benchPHCErr != nil {
+ b.Fatalf("HashToken: %v", benchPHCErr)
+ }
+ return benchPHC
+}
+
+// BenchmarkParsePHC measures the startup-path cost of decoding a PHC string into
+// Argon2id parameters. Cheap, but it runs once per process boot and is a fuzz
+// target, so we track it for regressions.
+func BenchmarkParsePHC(b *testing.B) {
+ valid := mustBenchPHC(b)
+ cases := []struct {
+ name string
+ phc string
+ }{
+ {"valid", valid},
+ {"wrong_prefix", "$argon2i$v=19$m=19456,t=2,p=1$c2FsdHNhbHQ$aGFzaGhhc2g"},
+ {"malformed", "$argon2id$v=19$m=19456,t=2$short"},
+ }
+ for _, c := range cases {
+ b.Run(c.name, func(b *testing.B) {
+ b.ReportAllocs()
+ for b.Loop() {
+ _, _ = ParsePHC(c.phc)
+ }
+ })
+ }
+}
+
+// BenchmarkArgon2idParamsVerify measures the full Argon2id key derivation — the
+// cold, deliberately-expensive path taken on the first request (and on every
+// failed attempt). This is the dominant auth cost when no token has been cached
+// yet, so a regression here directly raises tail latency under credential churn.
+func BenchmarkArgon2idParamsVerify(b *testing.B) {
+ params, err := ParsePHC(mustBenchPHC(b))
+ if err != nil {
+ b.Fatalf("ParsePHC: %v", err)
+ }
+ cases := []struct {
+ name string
+ password string
+ }{
+ {"correct", "correct-horse-battery-staple"},
+ {"wrong", "wrong-horse-battery-staple"},
+ }
+ for _, c := range cases {
+ b.Run(c.name, func(b *testing.B) {
+ b.ReportAllocs()
+ for b.Loop() {
+ _ = params.Verify(c.password)
+ }
+ })
+ }
+}
+
+// BenchmarkArgon2VerifierVerify measures the per-request verifier as the server
+// actually uses it: a warmed verifier compares only the SHA-256 of the token
+// (the flat-cost steady state), while a rejected token always falls through to
+// the full Argon2id derivation since wrong tokens never populate the cache.
+func BenchmarkArgon2VerifierVerify(b *testing.B) {
+ params, err := ParsePHC(mustBenchPHC(b))
+ if err != nil {
+ b.Fatalf("ParsePHC: %v", err)
+ }
+
+ b.Run("warm_cache_hit", func(b *testing.B) {
+ v := newArgon2Verifier(params)
+ // Prime the SHA-256 success cache with one real verification.
+ if !v.Verify("correct-horse-battery-staple") {
+ b.Fatal("priming verification failed")
+ }
+ b.ReportAllocs()
+ for b.Loop() {
+ _ = v.Verify("correct-horse-battery-staple")
+ }
+ })
+
+ b.Run("reject", func(b *testing.B) {
+ v := newArgon2Verifier(params)
+ b.ReportAllocs()
+ for b.Loop() {
+ _ = v.Verify("wrong-horse-battery-staple")
+ }
+ })
+}
+
+// BenchmarkRawTokenVerifierVerify measures the plain-text constant-time compare
+// used when TOKEN (not TOKEN_HASH) is configured — the cheapest auth path.
+func BenchmarkRawTokenVerifierVerify(b *testing.B) {
+ v := &rawTokenVerifier{token: "correct-horse-battery-staple"}
+ cases := []struct {
+ name string
+ token string
+ }{
+ {"match", "correct-horse-battery-staple"},
+ {"mismatch", "wrong-horse-battery-staple"},
+ }
+ for _, c := range cases {
+ b.Run(c.name, func(b *testing.B) {
+ b.ReportAllocs()
+ for b.Loop() {
+ _ = v.Verify(c.token)
+ }
+ })
+ }
+}
diff --git a/internal/server/middleware_bench_test.go b/internal/server/middleware_bench_test.go
new file mode 100644
index 0000000..cb00dc0
--- /dev/null
+++ b/internal/server/middleware_bench_test.go
@@ -0,0 +1,193 @@
+package server
+
+import (
+ "io"
+ "log/slog"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+
+ "github.com/codeswhat/portwing/internal/audit"
+)
+
+// silenceSlog routes the package default logger to io.Discard for the duration
+// of a benchmark and restores it afterward, so the rejection path's slog.Warn
+// calls don't flood CI stderr with tens of thousands of lines.
+func silenceSlog(b *testing.B) {
+ b.Helper()
+ prev := slog.Default()
+ slog.SetDefault(slog.New(slog.NewTextHandler(io.Discard, nil)))
+ b.Cleanup(func() { slog.SetDefault(prev) })
+}
+
+// noopAuditor returns a disabled audit logger (writes nowhere), so the middleware
+// benchmarks measure the auth path itself, not log I/O.
+func noopAuditor(b *testing.B) *audit.Logger {
+ b.Helper()
+ l, cleanup, err := audit.New("")
+ if err != nil {
+ b.Fatalf("audit.New: %v", err)
+ }
+ b.Cleanup(cleanup)
+ return l
+}
+
+// BenchmarkAuthMiddleware measures the full per-request middleware cost —
+// rate-limit lookup, token extraction, verification, and the statusRecorder
+// wrap — for the authorized, rejected, and no-auth-configured paths. This is the
+// tax every proxied request pays, so it's the most load-bearing benchmark here.
+func BenchmarkAuthMiddleware(b *testing.B) {
+ silenceSlog(b)
+ auditor := noopAuditor(b)
+ next := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ })
+
+ cases := []struct {
+ name string
+ verifier tokenVerifier
+ token string
+ }{
+ {"authorized_raw", &rawTokenVerifier{token: "secret"}, "secret"},
+ {"rejected_raw", &rawTokenVerifier{token: "secret"}, "wrong"},
+ {"passthrough_no_auth", nil, ""},
+ }
+
+ for _, c := range cases {
+ b.Run(c.name, func(b *testing.B) {
+ rl := NewRateLimiter()
+ h := rl.AuthMiddleware(c.verifier, auditor, next)
+ req := httptest.NewRequest(http.MethodGet, "/api/v1/containers", nil)
+ req.RemoteAddr = "192.0.2.10:40000"
+ if c.token != "" {
+ req.Header.Set("Authorization", "Bearer "+c.token)
+ }
+ b.ReportAllocs()
+ for b.Loop() {
+ h.ServeHTTP(httptest.NewRecorder(), req)
+ }
+ })
+ }
+}
+
+// BenchmarkClientIP measures client-IP extraction, which runs on every request.
+// The trusted-proxy case walks an X-Forwarded-For chain right-to-left, the most
+// expensive shape.
+func BenchmarkClientIP(b *testing.B) {
+ direct := NewRateLimiter()
+
+ proxied := NewRateLimiter()
+ nets, err := ParseTrustedProxies([]string{"10.0.0.0/8", "192.0.2.0/24"})
+ if err != nil {
+ b.Fatalf("ParseTrustedProxies: %v", err)
+ }
+ proxied.SetTrustedProxies(nets)
+
+ b.Run("direct_no_proxies", func(b *testing.B) {
+ req := httptest.NewRequest(http.MethodGet, "/", nil)
+ req.RemoteAddr = "203.0.113.5:51000"
+ b.ReportAllocs()
+ for b.Loop() {
+ _ = direct.clientIP(req)
+ }
+ })
+
+ b.Run("trusted_proxy_xff_chain", func(b *testing.B) {
+ req := httptest.NewRequest(http.MethodGet, "/", nil)
+ req.RemoteAddr = "192.0.2.1:51000"
+ req.Header.Set("X-Forwarded-For", "203.0.113.7, 10.1.2.3, 192.0.2.9")
+ b.ReportAllocs()
+ for b.Loop() {
+ _ = proxied.clientIP(req)
+ }
+ })
+
+ b.Run("untrusted_peer", func(b *testing.B) {
+ req := httptest.NewRequest(http.MethodGet, "/", nil)
+ req.RemoteAddr = "203.0.113.5:51000"
+ req.Header.Set("X-Forwarded-For", "8.8.8.8")
+ b.ReportAllocs()
+ for b.Loop() {
+ _ = proxied.clientIP(req)
+ }
+ })
+}
+
+// BenchmarkParseTrustedProxies measures the startup parse of the TRUSTED_PROXIES
+// CIDR list (also a fuzz target).
+func BenchmarkParseTrustedProxies(b *testing.B) {
+ cases := []struct {
+ name string
+ entries []string
+ }{
+ {"cidrs", []string{"10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"}},
+ {"bare_ips", []string{"203.0.113.1", "203.0.113.2", "2001:db8::1"}},
+ }
+ for _, c := range cases {
+ b.Run(c.name, func(b *testing.B) {
+ b.ReportAllocs()
+ for b.Loop() {
+ if _, err := ParseTrustedProxies(c.entries); err != nil {
+ b.Fatal(err)
+ }
+ }
+ })
+ }
+}
+
+// BenchmarkAgentToken measures token header extraction across the three accepted
+// schemes, in the order the middleware probes them.
+func BenchmarkAgentToken(b *testing.B) {
+ cases := []struct {
+ name string
+ header string
+ value string
+ }{
+ {"bearer", "Authorization", "Bearer secret-token-value"},
+ {"portwing_header", headerPortwingToken, "secret-token-value"},
+ {"drydock_secret", headerDrydockAgentSecret, "secret-token-value"},
+ }
+ for _, c := range cases {
+ b.Run(c.name, func(b *testing.B) {
+ req := httptest.NewRequest(http.MethodGet, "/", nil)
+ req.Header.Set(c.header, c.value)
+ b.ReportAllocs()
+ for b.Loop() {
+ _ = agentToken(req)
+ }
+ })
+ }
+}
+
+// BenchmarkRateLimiter measures the two hot rate-limiter operations under both
+// sequential and concurrent access, since every request takes the mutex once for
+// the IsRateLimited check and rejected requests take it again to record.
+func BenchmarkRateLimiter(b *testing.B) {
+ b.Run("is_rate_limited", func(b *testing.B) {
+ rl := NewRateLimiter()
+ rl.RecordFailure("203.0.113.5")
+ b.ReportAllocs()
+ for b.Loop() {
+ _ = rl.IsRateLimited("203.0.113.5")
+ }
+ })
+
+ b.Run("record_failure", func(b *testing.B) {
+ rl := NewRateLimiter()
+ b.ReportAllocs()
+ for b.Loop() {
+ rl.RecordFailure("203.0.113.5")
+ }
+ })
+
+ b.Run("is_rate_limited_parallel", func(b *testing.B) {
+ rl := NewRateLimiter()
+ rl.RecordFailure("203.0.113.5")
+ b.ReportAllocs()
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ _ = rl.IsRateLimited("203.0.113.5")
+ }
+ })
+ })
+}
diff --git a/scripts/soak.sh b/scripts/soak.sh
new file mode 100755
index 0000000..edb36d6
--- /dev/null
+++ b/scripts/soak.sh
@@ -0,0 +1,161 @@
+#!/usr/bin/env bash
+#
+# soak.sh — RSS + thread-drift soak for the Portwing agent.
+#
+# Stands up the long-lived topology the unit/integration tiers don't exercise:
+#
+# loadgen (HTTP) ──▶ portwing (generic adapter) ──▶ mockdocker (unix socket)
+#
+# then drives a sustained mixed load — cached-inventory reads, version/info,
+# a raw Docker proxy read, and (the leak stressor) a stream of SSE subscribers
+# that connect, hold, and disconnect — for the configured duration. It samples
+# the agent's resident set over the run and fails if working-set growth from
+# the post-warmup baseline exceeds the threshold. That's the "zero RSS/goroutine
+# growth over a long soak" signal you can't get from a short test.
+#
+# GitHub-hosted runners cap a job at 6h, so CI soaks for 4h by default — long
+# enough that a per-request allocation/goroutine leak shows up as multi-MiB RSS
+# growth well above the 64 MiB threshold. A self-hosted runner can push the
+# duration input toward the 24h target.
+#
+# Usage:
+# scripts/soak.sh [--duration 4h] [--concurrency 20] \
+# [--rss-growth-threshold-bytes 67108864] \
+# [--warmup 30s] [--port 38080] [--dry-run]
+#
+set -euo pipefail
+
+DURATION="4h"
+CONCURRENCY="20"
+RSS_THRESHOLD="67108864" # 64 MiB
+WARMUP="30s"
+PORT="38080"
+DRY_RUN="0"
+
+die() { echo "soak: $*" >&2; exit 2; }
+
+while [ $# -gt 0 ]; do
+ case "$1" in
+ --duration) DURATION="${2:?}"; shift 2;;
+ --concurrency) CONCURRENCY="${2:?}"; shift 2;;
+ --rss-growth-threshold-bytes) RSS_THRESHOLD="${2:?}"; shift 2;;
+ --warmup) WARMUP="${2:?}"; shift 2;;
+ --port) PORT="${2:?}"; shift 2;;
+ --dry-run) DRY_RUN="1"; shift;;
+ -h|--help) sed -n '2,30p' "$0"; exit 0;;
+ *) die "unknown argument: $1";;
+ esac
+done
+
+# Validate the option surface (the workflow dry-runs this before a real soak).
+[[ "$DURATION" =~ ^[0-9]+(h|m|s)([0-9]+(m|s))?$ ]] || die "invalid --duration: $DURATION"
+[[ "$WARMUP" =~ ^[0-9]+(h|m|s)([0-9]+(m|s))?$ ]] || die "invalid --warmup: $WARMUP"
+[[ "$CONCURRENCY" =~ ^[0-9]+$ ]] || die "invalid --concurrency: $CONCURRENCY"
+[[ "$RSS_THRESHOLD" =~ ^[0-9]+$ ]] || die "invalid --rss-growth-threshold-bytes: $RSS_THRESHOLD"
+[[ "$PORT" =~ ^[0-9]+$ ]] || die "invalid --port: $PORT"
+
+ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+BINDIR="$(mktemp -d)"
+RUNDIR="$(mktemp -d)"
+SOCK="$RUNDIR/mock.sock"
+TOKEN="soak-token"
+BASE="http://127.0.0.1:$PORT"
+
+cleanup() {
+ [ -n "${SAMPLER_PID:-}" ] && kill "$SAMPLER_PID" 2>/dev/null || true
+ [ -n "${PW_PID:-}" ] && kill "$PW_PID" 2>/dev/null || true
+ [ -n "${MOCK_PID:-}" ] && kill "$MOCK_PID" 2>/dev/null || true
+ rm -rf "$BINDIR" "$RUNDIR" 2>/dev/null || true
+}
+trap cleanup EXIT
+
+echo "soak: building mockdocker, loadgen, portwing…"
+( cd "$ROOT" && go build -o "$BINDIR/mockdocker" ./benchmarks/cmd/mockdocker )
+( cd "$ROOT" && go build -o "$BINDIR/loadgen" ./benchmarks/cmd/loadgen )
+( cd "$ROOT" && go build -o "$BINDIR/portwing" ./cmd/portwing )
+
+echo "soak: resolved → duration=$DURATION concurrency=$CONCURRENCY warmup=$WARMUP port=$PORT threshold=${RSS_THRESHOLD}B"
+
+if [ "$DRY_RUN" = "1" ]; then
+ echo "soak: --dry-run OK (binaries build, parameters valid); not running the soak."
+ exit 0
+fi
+
+# rss_kb PID → resident set in KiB (portable: ps works on Linux + macOS).
+rss_kb() { ps -o rss= -p "$1" 2>/dev/null | tr -d ' '; }
+
+echo "soak: starting mockdocker on $SOCK"
+"$BINDIR/mockdocker" -socket "$SOCK" & MOCK_PID=$!
+for _ in $(seq 1 50); do [ -S "$SOCK" ] && break; sleep 0.1; done
+[ -S "$SOCK" ] || die "mockdocker socket never appeared"
+
+echo "soak: starting portwing (generic adapter) on $BASE"
+TOKEN="$TOKEN" ADAPTER=generic DOCKER_SOCKET="$SOCK" PORT="$PORT" \
+ BIND_ADDRESS=127.0.0.1 LOG_LEVEL=warn REQUEST_TIMEOUT=10 NO_COLOR=1 \
+ "$BINDIR/portwing" & PW_PID=$!
+
+ok=""
+for _ in $(seq 1 60); do
+ if curl -fsS "$BASE/_portwing/health" >/dev/null 2>&1; then ok=1; break; fi
+ kill -0 "$PW_PID" 2>/dev/null || die "portwing exited during startup"
+ sleep 0.5
+done
+[ -n "$ok" ] || die "portwing health never went green"
+
+echo "soak: warmup ${WARMUP}…"
+"$BINDIR/loadgen" -base "$BASE" -auth "$TOKEN" -path /api/v1/containers \
+ -concurrency "$CONCURRENCY" -duration "$WARMUP" -scenario warmup >/dev/null 2>&1 || true
+
+sleep 3
+BASELINE_KB="$(rss_kb "$PW_PID")"
+[ -n "$BASELINE_KB" ] || die "could not read portwing RSS (process gone?)"
+echo "soak: post-warmup baseline RSS = ${BASELINE_KB} KiB"
+
+# Background RSS sampler.
+(
+ start=$(date +%s)
+ while sleep 60; do
+ now=$(date +%s); cur="$(rss_kb "$PW_PID")"
+ [ -n "$cur" ] || break
+ echo "soak: [+$((now - start))s] rss=${cur} KiB"
+ done
+) & SAMPLER_PID=$!
+disown "$SAMPLER_PID" 2>/dev/null || true # silence job-control "Terminated" notice on kill
+
+half=$(( CONCURRENCY / 2 )); [ "$half" -lt 1 ] && half=1
+
+echo "soak: driving load for $DURATION (concurrency=$CONCURRENCY)…"
+SUMMARY="$RUNDIR/summary.jsonl"
+: > "$SUMMARY"
+pids=()
+"$BINDIR/loadgen" -base "$BASE" -auth "$TOKEN" -path /api/v1/containers -concurrency "$CONCURRENCY" -duration "$DURATION" -scenario inventory >>"$SUMMARY" & pids+=($!)
+"$BINDIR/loadgen" -base "$BASE" -auth "$TOKEN" -path /api/v1/version -concurrency "$half" -duration "$DURATION" -scenario version >>"$SUMMARY" & pids+=($!)
+"$BINDIR/loadgen" -base "$BASE" -auth "$TOKEN" -path /v1.44/containers/json -concurrency "$half" -duration "$DURATION" -scenario proxy >>"$SUMMARY" & pids+=($!)
+"$BINDIR/loadgen" -base "$BASE" -path /_portwing/health -concurrency 5 -duration "$DURATION" -scenario health >>"$SUMMARY" & pids+=($!)
+"$BINDIR/loadgen" -base "$BASE" -auth "$TOKEN" -path /api/v1/events -mode sse -sse-hold 1s -concurrency "$half" -duration "$DURATION" -scenario sse-churn >>"$SUMMARY" & pids+=($!)
+
+fail=0
+for p in "${pids[@]}"; do wait "$p" || fail=1; done
+
+kill "$SAMPLER_PID" 2>/dev/null || true; SAMPLER_PID=""
+
+sleep 5
+FINAL_KB="$(rss_kb "$PW_PID")"
+[ -n "$FINAL_KB" ] || die "portwing exited during the soak"
+
+GROWTH_BYTES=$(( (FINAL_KB - BASELINE_KB) * 1024 ))
+
+echo ""
+echo "soak: ───────── per-scenario results ─────────"
+cat "$SUMMARY"
+echo "soak: ─────────────────────────────────────────"
+echo "soak: baseline=${BASELINE_KB} KiB final=${FINAL_KB} KiB growth=${GROWTH_BYTES} B threshold=${RSS_THRESHOLD} B"
+
+if [ "$fail" -ne 0 ]; then
+ die "a loadgen scenario exited non-zero"
+fi
+if [ "$GROWTH_BYTES" -gt "$RSS_THRESHOLD" ]; then
+ echo "soak: FAIL — RSS grew ${GROWTH_BYTES} B, over the ${RSS_THRESHOLD} B budget" >&2
+ exit 1
+fi
+echo "soak: PASS — RSS growth within budget"