diff --git a/README.md b/README.md
index e7e56c2..d343aef 100644
--- a/README.md
+++ b/README.md
@@ -253,6 +253,70 @@ mkdir -p ~/.skvm/profiles
 cp -R skvm-data/profiles/. ~/.skvm/profiles/
 ```
 
+## Sandbox (Docker)
+
+Pass `--sandbox` to any skvm command to run the entire skvm process inside an
+ephemeral Docker container. Default behaviour is unchanged — without
+`--sandbox`, skvm runs on the host as before.
+
+```bash
+skvm run --sandbox --skill=./my-skill --task=./task.json
+skvm bench --sandbox --suite=...
+skvm jit-optimize --sandbox --skill=./foo --target-model=openrouter/...
+```
+
+### Image
+
+The launcher pulls
+`ghcr.io/sjtu-ipads/skvm-sandbox:<your-skvm-version>` from GitHub Container
+Registry. If the pull fails (offline, no auth, image not published yet for
+your version), build the image locally:
+
+```bash
+bun run build:binary
+docker build -f docker/skvm-sandbox.Dockerfile \
+  -t ghcr.io/sjtu-ipads/skvm-sandbox:$(bun run skvm --version) .
+```
+
+### Cleaning up leaked containers
+
+The launcher reaps containers automatically on the next invocation, but you
+can force-clean any time:
+
+```bash
+docker ps -a --filter label=skvm-sandbox=1 -q | xargs docker rm -f
+```
+
+### Mounts
+
+Three host paths are bind-mounted into the container:
+
+| Host | Inner | Mode |
+| --- | --- | --- |
+| `$(pwd)` | `/workspace` (container `WORKDIR`) | rw |
+| `$SKVM_CACHE` (default `~/.skvm`) | `/skvm-cache` | rw |
+| `$SKVM_DATA_DIR` (if set) | `/skvm-data` | ro |
+
+Path-shaped CLI flags whose values fall outside these roots get a dynamic
+per-flag mount under `/extra/`. The launcher rewrites the value to the
+inner path automatically.
+
+### Making sandbox the default
+
+Set `defaults.sandbox = true` in `~/.skvm/skvm.config.json` (or via
+`skvm config init`). With the default on, every command runs in sandbox
+unless you pass `--sandbox=false`.
+
+### Limits
+
+Default `--cap-drop=ALL`, `--security-opt no-new-privileges`,
+`--network=bridge`, `--memory=2g`, `--cpus=2`, `--pids-limit=512`. Override
+any of these in `sandbox.docker.*` in `skvm.config.json`.
+
+`--sandbox` is incompatible with `native` adapter mode (which imports host
+credentials by design) and with `skvm config init|show|doctor` (which
+manage host-side state).
+
 ## Learn more
 
 - **[docs/usage.md](docs/usage.md)** — full command reference: `profile`, `aot-compile`, `run`, `bench`, `jit-optimize`, `proposals`, and more
diff --git a/docker/skvm-sandbox.Dockerfile b/docker/skvm-sandbox.Dockerfile
new file mode 100644
index 0000000..f7504fe
--- /dev/null
+++ b/docker/skvm-sandbox.Dockerfile
@@ -0,0 +1,48 @@
+# syntax=docker/dockerfile:1.7
+FROM ubuntu:24.04
+
+ENV DEBIAN_FRONTEND=noninteractive \
+    LANG=C.UTF-8 \
+    SKVM_IN_SANDBOX=1
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates curl git python3 python3-pip nodejs npm jq unzip \
+    && rm -rf /var/lib/apt/lists/*
+
+# Bun
+RUN curl -fsSL https://bun.sh/install | bash \
+    && mv /root/.bun/bin/bun /usr/local/bin/bun \
+    && chmod +x /usr/local/bin/bun
+
+# opencode — not published on npm; installed from GitHub release binary.
+# Pinned to v1.4.3 (anomalyco/opencode). Matches skvm install/opencode-version.json.
+# SHA-256 verified for linux-x64 asset; bump deliberately and update the hash.
+ARG OPENCODE_VERSION=v1.4.3
+ARG OPENCODE_SHA256=34d503ebb029853293be6fd4d441bbb2dbb03919bfa4525e88b1ca55d68f3e17
+RUN set -e \
+    && curl -fsSL \
+        "https://github.com/anomalyco/opencode/releases/download/${OPENCODE_VERSION}/opencode-linux-x64.tar.gz" \
+        -o /tmp/opencode.tar.gz \
+    && echo "${OPENCODE_SHA256}  /tmp/opencode.tar.gz" | sha256sum -c - \
+    && tar -xzf /tmp/opencode.tar.gz -C /tmp \
+    && mv /tmp/opencode /usr/local/bin/opencode \
+    && chmod +x /usr/local/bin/opencode \
+    && rm /tmp/opencode.tar.gz
+
+# @anthropic-ai/claude-code — published on npm. Pin at known version; bump deliberately.
+RUN npm install -g @anthropic-ai/claude-code@2.1.152
+
+# pi / hermes / openclaw: install paths to be filled in when the image is
+# first built; TODO follow-ups will add them. For now bare-agent + opencode +
+# claude-code is the minimum useful image.
+
+# Baked skvm binary. Build host-side with `bun run build:binary` against the
+# matching skvm version, then copy. (The Dockerfile expects dist/skvm to be a
+# Linux x86_64 binary — cross-compile on the host before docker build.)
+COPY dist/skvm /usr/local/bin/skvm
+RUN chmod +x /usr/local/bin/skvm
+
+WORKDIR /workspace
+
+# Do not bake a USER; the launcher passes -u host-uid:host-gid at run time so
+# bind-mounted writes are owned by the invoking user.
diff --git a/src/cli-config/index.ts b/src/cli-config/index.ts
index 3876027..433f244 100644
--- a/src/cli-config/index.ts
+++ b/src/cli-config/index.ts
@@ -16,6 +16,9 @@ import { spawnSync } from "node:child_process"
 import path from "node:path"
 import { stdin } from "node:process"
 
+import pkgJson from "../../package.json" with { type: "json" }
+import { resolveImageRef } from "../launcher/image.ts"
+
 import { checkbox, confirm, input, password, select } from "@inquirer/prompts"
 import { createPrompt, isEnterKey, useKeypress, useState } from "@inquirer/core"
 
@@ -37,6 +40,8 @@ import {
   getAdapterRepoDir,
   getAdapterSettings,
   getDefaultAdapterConfigMode,
+  getSandboxConfig,
+  getDefaultSandboxMode,
   detectLegacyHeadlessFields,
   invalidateConfigCache,
   resolveConfigWritePath,
@@ -80,7 +85,7 @@ interface AdapterDraft {
 interface ConfigDraft {
   adapters: Partial<Record<AdapterName, AdapterDraft>>
   providers: { routes: RouteDraft[] }
-  defaults?: { adapterConfigMode?: AdapterConfigMode }
+  defaults?: { adapterConfigMode?: AdapterConfigMode; sandbox?: boolean }
   /**
    * Preserved as an opaque passthrough on re-init — the wizard doesn't
    * configure these fields (credentials and endpoints come from
@@ -231,6 +236,23 @@ async function runShow(): Promise<void> {
   const defMode = getDefaultAdapterConfigMode() ?? "(unset → managed)"
   printRow("Adapter mode", String(defMode), "defaults.adapterConfigMode")
 
+  console.log(c.bold("\nSandbox (Docker):"))
+  try {
+    const sandboxSlice = getSandboxConfig()
+    const sandboxDefaultsOn = getDefaultSandboxMode()
+    console.log(`  Default for new invocations: ${sandboxDefaultsOn ? c.green("on") : c.dim("off")}`)
+    console.log(`  Image:    ${sandboxSlice.docker.image ?? c.dim("(built-in default)")}`)
+    console.log(`  Network:  ${sandboxSlice.docker.network}`)
+    console.log(`  Resources: memory=${sandboxSlice.docker.memory}  cpus=${sandboxSlice.docker.cpus}  pids=${sandboxSlice.docker.pidsLimit}`)
+    const xm = sandboxSlice.docker.extraMounts
+    if (xm.length > 0) {
+      console.log(`  Extra mounts:`)
+      for (const m of xm) console.log(`    ${m.host} → ${m.inner} (${m.mode})`)
+    }
+  } catch (e) {
+    console.log(`  ${c.red("✗")} could not parse: ${String(e)}`)
+  }
+
   console.log(c.bold("\nAdapters"))
   const labelW = Math.max(...ALL_ADAPTERS.map(a => a.length))
   for (const a of ALL_ADAPTERS) {
@@ -409,6 +431,7 @@ async function runInit(): Promise<void> {
       if (action.section === "providers") await stepProviders(draft)
       else if (action.section === "mode") await stepDefaultMode(draft)
       else if (action.section === "adapters") await stepAdapters(draft)
+      else if (action.section === "sandbox") await stepSandbox(draft)
     }
 
     tuiClear()
@@ -533,9 +556,14 @@ function loadExistingDraft(): ConfigDraft {
   }
   if (raw.defaults && typeof raw.defaults === "object") {
     const d = raw.defaults as Record<string, unknown>
+    const restored: ConfigDraft["defaults"] = {}
     if (d.adapterConfigMode === "native" || d.adapterConfigMode === "managed") {
-      draft.defaults = { adapterConfigMode: d.adapterConfigMode }
+      restored.adapterConfigMode = d.adapterConfigMode
+    }
+    if (typeof d.sandbox === "boolean") {
+      restored.sandbox = d.sandbox
     }
+    if (Object.keys(restored).length > 0) draft.defaults = restored
   }
   if (raw.providers && typeof raw.providers === "object") {
     const routes = (raw.providers as { routes?: unknown }).routes
@@ -1066,9 +1094,35 @@ async function pickNativeAgent(opts: {
   })).trim() || def
 }
 
+// --- Step 4: sandbox (Docker) ------------------------------------------------
+
+async function stepSandbox(draft: ConfigDraft): Promise<void> {
+  try {
+    console.log(c.bold("Sandbox (Docker)"))
+    console.log(c.dim("  When --sandbox is set on a command, skvm re-execs itself inside an"))
+    console.log(c.dim("  ephemeral Docker container. You can opt in per-invocation or make"))
+    console.log(c.dim("  sandbox the default for every command."))
+
+    const sandboxDefault = await confirm({
+      message: "Make --sandbox the default for every command?",
+      default: draft.defaults?.sandbox ?? false,
+    })
+
+    draft.defaults = draft.defaults ?? {}
+    draft.defaults.sandbox = sandboxDefault
+
+    if (sandboxDefault) {
+      console.log(c.dim("  (You can opt out of any single invocation with --sandbox=false.)"))
+    }
+  } catch (e) {
+    if (isExit(e)) return
+    throw e
+  }
+}
+
 // --- TUI section pager -------------------------------------------------------
 
-type SectionId = "providers" | "mode" | "adapters" | "write"
+type SectionId = "providers" | "mode" | "adapters" | "sandbox" | "write"
 
 interface Section {
   id: SectionId
@@ -1079,6 +1133,7 @@ const SECTIONS: Section[] = [
   { id: "providers", label: "Providers" },
   { id: "mode", label: "Default mode" },
   { id: "adapters", label: "Adapters" },
+  { id: "sandbox", label: "Sandbox" },
   { id: "write", label: "✓ Write & exit" },
 ]
 
@@ -1130,11 +1185,15 @@ function renderSectionBody(draft: ConfigDraft, index: number): string {
     case "adapters":
       return indent(summarizeAdapters(draft).trimStart())
         + "\n\n  " + c.dim("Press Enter to configure adapters.")
+    case "sandbox":
+      return indent(summarizeSandbox(draft).trimStart())
+        + "\n\n  " + c.dim("Press Enter to configure sandbox defaults.")
     case "write": {
       const full = [
         summarizeProviders(draft),
         summarizeDefaultMode(draft),
         summarizeAdapters(draft),
+        summarizeSandbox(draft),
       ].join("\n")
       const target = shortenPath(CONFIG_WRITE_PATH)
       return indent(full.trimStart())
@@ -1185,6 +1244,11 @@ function summarizeAdapters(draft: ConfigDraft): string {
   return lines.join("\n")
 }
 
+function summarizeSandbox(draft: ConfigDraft): string {
+  const on = draft.defaults?.sandbox === true
+  return `\n${c.bold("Sandbox (Docker):")} default --sandbox ${on ? c.green("on") : c.dim("off")}`
+}
+
 // ---------------------------------------------------------------------------
 // `doctor` — environment health check
 // ---------------------------------------------------------------------------
@@ -1391,6 +1455,48 @@ async function runDoctor(): Promise<void> {
     })
   }
 
+  // Sandbox (Docker) checks — rendered as a separate section after the main
+  // results table so the output groups clearly. `sandboxOk` participates in
+  // the overall exit code only when sandbox is the default for all invocations.
+  let sandboxOk = true
+  const sandboxSectionLines: string[] = []
+  let sandboxSlice
+  try {
+    sandboxSlice = getSandboxConfig()
+  } catch (e) {
+    sandboxSectionLines.push(`  ${c.red("✗")} sandbox slice malformed: ${e}`)
+    sandboxOk = false
+  }
+
+  const sandboxDefaultsOn = getDefaultSandboxMode()
+  sandboxSectionLines.push(`  default --sandbox: ${sandboxDefaultsOn ? "on" : "off"}`)
+
+  const dockerCheck = spawnSync("docker", ["--version"], { encoding: "utf-8" })
+  if (dockerCheck.status === 0) {
+    sandboxSectionLines.push(`  ${c.green("✓")} docker available (${dockerCheck.stdout.trim()})`)
+  } else {
+    const sev = sandboxDefaultsOn ? "✗" : "(info)"
+    sandboxSectionLines.push(`  ${sandboxDefaultsOn ? c.red(sev) : c.dim(sev)} docker not on PATH`)
+    if (sandboxDefaultsOn) sandboxOk = false
+  }
+
+  if (sandboxSlice) {
+    const imageRef = resolveImageRef({
+      cliOverride: null,
+      configImage: sandboxSlice.docker.image,
+      skvmVersion: (pkgJson as { version: string }).version,
+    })
+    const inspect = spawnSync("docker", ["image", "inspect", imageRef], { stdio: "ignore" })
+    if (inspect.status === 0) {
+      sandboxSectionLines.push(`  ${c.green("✓")} image present: ${imageRef}`)
+    } else {
+      const sev = sandboxDefaultsOn ? "✗" : "(info)"
+      sandboxSectionLines.push(`  ${sandboxDefaultsOn ? c.red(sev) : c.dim(sev)} image not pulled: ${imageRef}`)
+      sandboxSectionLines.push(`     build with: docker build -f docker/skvm-sandbox.Dockerfile -t ${imageRef} .`)
+      if (sandboxDefaultsOn) sandboxOk = false
+    }
+  }
+
   // Print results
   console.log()
   let fails = 0, warns = 0
@@ -1406,6 +1512,11 @@ async function runDoctor(): Promise<void> {
   }
   console.log()
 
+  // Sandbox section output
+  console.log(c.bold("Sandbox (Docker):"))
+  for (const line of sandboxSectionLines) console.log(line)
+  console.log()
+
   // Migration note: warn if prior opencode proposals exist but the config
   // does not pin headlessAgent.driver (meaning the user may not have noticed
   // that the default flipped from opencode to pi).
@@ -1420,8 +1531,9 @@ async function runDoctor(): Promise<void> {
     ))
   }
 
-  if (fails > 0) {
-    console.log(c.yellow(`${fails} issue(s) to look at.`) + ` See the items above marked ${c.red("✗")}.`)
+  const totalFails = fails + (sandboxOk ? 0 : 1)
+  if (totalFails > 0) {
+    console.log(c.yellow(`${totalFails} issue(s) to look at.`) + ` See the items above marked ${c.red("✗")}.`)
   } else if (warns > 0) {
     console.log(c.yellow(`${warns} warning(s).`) + " Things should work, but read the notes above.")
   } else {
@@ -1465,8 +1577,11 @@ export { appendDiscoveredRoute } from "../core/config-write.ts"
 function serialize(draft: ConfigDraft): string {
   // Drop empty optional fields so the output stays minimal.
   const out: Record<string, unknown> = {}
-  if (draft.defaults && draft.defaults.adapterConfigMode !== undefined) {
-    out.defaults = { adapterConfigMode: draft.defaults.adapterConfigMode }
+  if (draft.defaults && (draft.defaults.adapterConfigMode !== undefined || draft.defaults.sandbox !== undefined)) {
+    const d: Record<string, unknown> = {}
+    if (draft.defaults.adapterConfigMode !== undefined) d.adapterConfigMode = draft.defaults.adapterConfigMode
+    if (draft.defaults.sandbox !== undefined) d.sandbox = draft.defaults.sandbox
+    out.defaults = d
   }
   const adaptersOut: Record<string, unknown> = {}
   for (const [k, v] of Object.entries(draft.adapters)) {
diff --git a/src/core/cli-flags.ts b/src/core/cli-flags.ts
index 516754e..577c00d 100644
--- a/src/core/cli-flags.ts
+++ b/src/core/cli-flags.ts
@@ -14,6 +14,7 @@ export const GLOBAL_FLAGS: ReadonlySet<string> = new Set([
   "verbose",
   "skvm-cache",
   "skvm-data-dir",
+  "sandbox",
 ])
 
 /**
diff --git a/src/core/config.ts b/src/core/config.ts
index 85a2f56..7c807b0 100644
--- a/src/core/config.ts
+++ b/src/core/config.ts
@@ -3,8 +3,10 @@ import { existsSync } from "node:fs"
 import {
   ProvidersConfigSchema,
   HeadlessAgentConfigSchema,
+  SandboxConfigSchema,
   type ProvidersConfig,
   type HeadlessAgentConfig,
+  type SandboxConfig,
   type AdapterConfigMode,
 } from "./types.ts"
 
@@ -273,8 +275,10 @@ interface SkVMConfig {
   proposalsDir?: string
   providers?: unknown
   headlessAgent?: unknown
+  sandbox?: unknown
   defaults?: {
     adapterConfigMode?: AdapterConfigMode
+    sandbox?: boolean
   }
 }
 
@@ -396,6 +400,49 @@ export function getProvidersConfig(): ProvidersConfig {
   return _providersConfigCache
 }
 
+/**
+ * Sanitize a route match string for use as the suffix of an env var. The
+ * launcher exports each route's key as `SKVM_ROUTE_<safeRouteId>_KEY`; the
+ * in-container loader reads from the same form when the route's in-config
+ * `apiKey` is absent.
+ */
+export function safeRouteId(match: string): string {
+  return match.replace(/[^a-zA-Z0-9]/g, "_")
+}
+
+/**
+ * Resolve a route's API key. Order:
+ *   1. `route.apiKey` from skvm.config.json
+ *   2. `process.env[SKVM_ROUTE_<safeRouteId>_KEY]` — populated by the launcher
+ *      inside the sandbox so the on-disk config can stay sanitized
+ *   3. `process.env[route.apiKeyEnv]` — the existing user-controlled env hook
+ */
+export function resolveRouteApiKey(route: {
+  match: string
+  apiKey?: string
+  apiKeyEnv?: string
+}): string | undefined {
+  if (route.apiKey) return route.apiKey
+  const envKey = `SKVM_ROUTE_${safeRouteId(route.match)}_KEY`
+  const fromSandboxEnv = process.env[envKey]
+  if (fromSandboxEnv) return fromSandboxEnv
+  if (route.apiKeyEnv) return process.env[route.apiKeyEnv]
+  return undefined
+}
+
+let _sandboxConfigCache: SandboxConfig | undefined
+
+export function getSandboxConfig(): SandboxConfig {
+  if (_sandboxConfigCache) return _sandboxConfigCache
+  const raw = getProjectConfig().sandbox
+  _sandboxConfigCache = SandboxConfigSchema.parse(raw ?? {})
+  return _sandboxConfigCache
+}
+
+export function getDefaultSandboxMode(): boolean {
+  return getProjectConfig().defaults?.sandbox === true
+}
+
 let _headlessAgentConfigCache: HeadlessAgentConfig | undefined
 
 /**
@@ -449,17 +496,35 @@ export function getDefaultAdapterConfigMode(): AdapterConfigMode | undefined {
  *
  * Throws on an invalid flag value so the user sees a clear error instead of
  * the adapter silently reverting to `"managed"`.
+ *
+ * Sandbox guard: this is the single choke point through which every
+ * adapter-running command resolves its mode, so it is also where the
+ * `--sandbox` + native incompatibility is enforced. Inside the container
+ * (`SKVM_IN_SANDBOX=1`) a resolved `native` mode is a hard error — native
+ * imports host credentials that are deliberately not mounted, which defeats
+ * isolation. Commands that never resolve an adapter mode (e.g. `logs`,
+ * `clean-jit`) are unaffected.
  */
 export function resolveAdapterConfigMode(flagValue: string | undefined): AdapterConfigMode {
+  let mode: AdapterConfigMode
   if (flagValue !== undefined) {
     if (flagValue !== "native" && flagValue !== "managed") {
       throw new Error(
         `--adapter-config must be "native" or "managed" (got "${flagValue}")`,
       )
     }
-    return flagValue
+    mode = flagValue
+  } else {
+    mode = getDefaultAdapterConfigMode() ?? "managed"
+  }
+  if (mode === "native" && process.env.SKVM_IN_SANDBOX === "1") {
+    throw new Error(
+      `--sandbox requires managed adapter mode. Native mode imports host ` +
+      `credentials, which defeats container isolation. Pass ` +
+      `--adapter-config=managed or set defaults.adapterConfigMode = "managed".`,
+    )
   }
-  return getDefaultAdapterConfigMode() ?? "managed"
+  return mode
 }
 
 /**
@@ -503,4 +568,5 @@ export function invalidateConfigCache(): void {
   _configCache = undefined
   _providersConfigCache = undefined
   _headlessAgentConfigCache = undefined
+  _sandboxConfigCache = undefined
 }
diff --git a/src/core/types.ts b/src/core/types.ts
index 0e61b92..057fbda 100644
--- a/src/core/types.ts
+++ b/src/core/types.ts
@@ -518,6 +518,41 @@ export const ProvidersConfigSchema = z.object({
 })
 export type ProvidersConfig = z.infer<typeof ProvidersConfigSchema>
 
+// ---------------------------------------------------------------------------
+// Sandbox Config (docker sandbox slice)
+// ---------------------------------------------------------------------------
+
+export const SandboxNetworkSchema = z.enum(["none", "bridge", "host"])
+
+export const SandboxExtraMountSchema = z.object({
+  host: z.string().min(1),
+  inner: z.string().min(1),
+  mode: z.enum(["ro", "rw"]),
+})
+
+export const SandboxDockerConfigSchema = z.object({
+  image: z.string().nullable().default(null),
+  network: SandboxNetworkSchema.default("bridge"),
+  // Docker `--memory` form: a number with an optional b/k/m/g unit (e.g.
+  // "2g", "512m", "1073741824"). Validated here so a typo like "banana"
+  // fails at config-load with a clear message instead of as a cryptic
+  // docker-daemon error at container start.
+  memory: z.string().regex(/^\d+(\.\d+)?[bkmg]?$/i, "memory must be a docker size like \"2g\", \"512m\", or a byte count").default("2g"),
+  // Docker `--cpus` form: a positive decimal (e.g. "2", "1.5", "0.5").
+  cpus: z.string().regex(/^\d+(\.\d+)?$/, "cpus must be a positive number like \"2\" or \"1.5\"").default("2"),
+  pidsLimit: z.number().int().positive().default(512),
+  extraMounts: z.array(SandboxExtraMountSchema).default([]),
+})
+
+export const SandboxConfigSchema = z.object({
+  docker: SandboxDockerConfigSchema.default({}),
+})
+
+export type SandboxNetwork = z.infer<typeof SandboxNetworkSchema>
+export type SandboxExtraMount = z.infer<typeof SandboxExtraMountSchema>
+export type SandboxDockerConfig = z.infer<typeof SandboxDockerConfigSchema>
+export type SandboxConfig = z.infer<typeof SandboxConfigSchema>
+
 // ---------------------------------------------------------------------------
 // Headless Agent Config (jit-optimize / jit-boost agent runs)
 // ---------------------------------------------------------------------------
diff --git a/src/index.ts b/src/index.ts
index febb469..4eb851f 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -43,6 +43,64 @@ function parseFlags(args: string[]): Record<string, string> {
   return flags
 }
 
+export interface SandboxFlagParse {
+  value: boolean
+  present: boolean
+}
+
+export function parseSandboxFlag(args: string[]): SandboxFlagParse {
+  for (const a of args) {
+    if (a === "--sandbox") return { value: true, present: true }
+    if (a.startsWith("--sandbox=")) {
+      const v = a.slice("--sandbox=".length)
+      if (v === "true") return { value: true, present: true }
+      if (v === "false") return { value: false, present: true }
+      // Hard error on any other value. Silently treating `--sandbox=yes` as
+      // "flag absent" would run UNSANDBOXED while the user believes they are
+      // contained — the exact silent-no-containment failure this feature
+      // must never produce.
+      throw new Error(`--sandbox must be "true" or "false" (got "${v}")`)
+    }
+  }
+  return { value: false, present: false }
+}
+
+export interface ShouldEnterLauncherArgs {
+  parsed: SandboxFlagParse
+  defaultsSandbox: boolean
+  inSandboxEnv: boolean
+}
+
+export function shouldEnterLauncher(o: ShouldEnterLauncherArgs): boolean {
+  if (o.inSandboxEnv) return false
+  if (o.parsed.present) return o.parsed.value
+  return o.defaultsSandbox
+}
+
+export interface AssertSandboxCompatibleArgs {
+  sandboxOn: boolean
+  command: string | undefined
+  subcommand: string | undefined
+  adapterMode: "native" | "managed" | undefined
+}
+
+export function assertSandboxCompatible(o: AssertSandboxCompatibleArgs): void {
+  if (!o.sandboxOn) return
+  if (o.command === "config") {
+    throw new Error(
+      `skvm config ${o.subcommand ?? ""} cannot run under --sandbox: ` +
+      `config commands always run on host (they manage host-side state).`,
+    )
+  }
+  if (o.adapterMode === "native") {
+    throw new Error(
+      `--sandbox requires managed adapter mode. ` +
+      `Native mode imports host credentials, which defeats container isolation. ` +
+      `Pass --adapter-config=managed or set defaults.adapterConfigMode = "managed".`,
+    )
+  }
+}
+
 async function main() {
   // Hidden subcommand for `skvm jit-optimize --detach`. Spawned by the
   // parent CLI with stdio: ignore + IPC channel; takes a JSON-stringified
@@ -60,6 +118,38 @@ async function main() {
 
   if (flags.verbose) setLogLevel("debug")
 
+  // Strategy C — sandbox dispatch. If `--sandbox` is set (or sandbox is the
+  // configured default) and we are not already inside the container, hand off
+  // to the launcher and never return.
+  const sandboxParsed = parseSandboxFlag(args)
+  const inSandboxEnv = process.env.SKVM_IN_SANDBOX === "1"
+  {
+    let defaultsSandbox = false
+    if (!inSandboxEnv && !sandboxParsed.present) {
+      const { getDefaultSandboxMode } = await import("./core/config.ts")
+      defaultsSandbox = getDefaultSandboxMode()
+    }
+    if (shouldEnterLauncher({ parsed: sandboxParsed, defaultsSandbox, inSandboxEnv })) {
+      const forwarded = args.filter(a => a !== "--sandbox" && !a.startsWith("--sandbox="))
+      // Guard: config commands always run on host — reject early before launching
+      // the container. Derive command/subcommand from positional args (not
+      // rawCommand), because parseSandboxFlag scans positionally: `--sandbox`
+      // may precede the subcommand, making rawCommand === "--sandbox".
+      // Note: per-command native-adapter guard (assertSandboxCompatible with
+      // adapterMode resolved) is deferred to each command entry point.
+      const positionals = forwarded.filter(a => !a.startsWith("-"))
+      assertSandboxCompatible({
+        sandboxOn: true,
+        command: positionals[0],
+        subcommand: positionals[1],
+        adapterMode: undefined,
+      })
+      const { runLauncher } = await import("./launcher/index.ts")
+      await runLauncher(forwarded)
+      /* unreachable */ return
+    }
+  }
+
   if (isTopLevelVersion) {
     console.log(pkgJson.version)
     process.exit(0)
@@ -84,6 +174,7 @@ Global Options:
   --skvm-cache=<path>      Override cache root (default: ~/.skvm)
   --skvm-data-dir=<path>   Override dataset root (default: ./skvm-data)
   --verbose                Enable debug logging
+  --sandbox[=false]        Run inside a Docker sandbox (or opt out when defaults.sandbox=true)
   --no-auto-probe          Disable auto-probe for this invocation (also via SKVM_AUTO_PROBE=0)
   --version, -v            Print version and exit
   --help, -h               Print this help and exit
@@ -2048,6 +2139,8 @@ function buildTaskSource(flags: Record<string, string>): import("./jit-optimize/
     return { kind: "real-task", trainTasks, testTasks }
   }
   if (kind === "log" || kind === "execution-log") {
+    // TODO(docker-sandbox): --logs and --failures are comma-separated path lists;
+    // comma-list path flags are not yet handled by PATH_FLAGS in src/launcher/path-flags.ts.
     const raw = flags.logs
     if (!raw) {
       console.error("jit-optimize: --logs is required for --task-source=log")
@@ -2236,7 +2329,17 @@ async function resolveSkillDirs(flags: Record<string, string>): Promise<string[]
   return dirs
 }
 
-main().catch((err) => {
-  console.error(err)
-  process.exit(1)
-})
+if (import.meta.main) {
+  main().catch((err) => {
+    // Print a clean `error: <message>` for expected user-errors (bad flags,
+    // sandbox guards, config validation) instead of a raw stack trace. Pass
+    // --verbose or set SKVM_DEBUG=1 to see the full stack when debugging.
+    const verbose = process.argv.includes("--verbose") || process.env.SKVM_DEBUG === "1"
+    if (verbose && err instanceof Error && err.stack) {
+      console.error(err.stack)
+    } else {
+      console.error(err instanceof Error ? `error: ${err.message}` : String(err))
+    }
+    process.exit(1)
+  })
+}
diff --git a/src/launcher/config-sanitize.ts b/src/launcher/config-sanitize.ts
new file mode 100644
index 0000000..a375ec3
--- /dev/null
+++ b/src/launcher/config-sanitize.ts
@@ -0,0 +1,49 @@
+import { existsSync, mkdirSync, readFileSync, writeFileSync, chmodSync } from "node:fs"
+import path from "node:path"
+import { safeRouteId } from "../core/config.ts"
+
+const LAUNCHER_TMP_PREFIX = "/tmp/skvm-launcher-"
+
+/**
+ * Read the host's skvm.config.json and write a key-free copy to a per-host-pid
+ * tmp file. The caller bind-mounts it at `/skvm-cache/skvm.config.json:ro`
+ * inside the container so a `cat /skvm-cache/skvm.config.json` from a tool
+ * call never sees a literal key.
+ *
+ * For each route that had key material, the secret `apiKey` is dropped and
+ * `apiKeyEnv` is rewritten to point at the env var the launcher injects
+ * (`SKVM_ROUTE_<safeRouteId>_KEY`, see env.ts). This keeps the route
+ * schema-valid (ProviderRouteSchema requires `apiKey` or `apiKeyEnv`) while
+ * keeping the secret out of the file — the in-container loader resolves the
+ * key from the env var via `resolveRouteApiKey` in core/config.ts.
+ */
+export function writeSanitizedConfig(hostConfigPath: string, hostPid: number): string {
+  const tmpDir = `${LAUNCHER_TMP_PREFIX}${hostPid}`
+  mkdirSync(tmpDir, { recursive: true })
+  chmodSync(tmpDir, 0o700)
+  const outPath = path.join(tmpDir, "skvm.config.json")
+
+  let raw: unknown = {}
+  if (existsSync(hostConfigPath)) {
+    try {
+      raw = JSON.parse(readFileSync(hostConfigPath, "utf-8"))
+    } catch {
+      raw = {}
+    }
+  }
+
+  const config = raw as { providers?: { routes?: Array<Record<string, unknown>> } }
+  if (config.providers?.routes) {
+    config.providers.routes = config.providers.routes.map(r => {
+      const { apiKey, apiKeyEnv, ...rest } = r
+      const hadKey = apiKey !== undefined || apiKeyEnv !== undefined
+      if (hadKey && typeof rest.match === "string") {
+        rest.apiKeyEnv = `SKVM_ROUTE_${safeRouteId(rest.match)}_KEY`
+      }
+      return rest
+    })
+  }
+
+  writeFileSync(outPath, JSON.stringify(config, null, 2), { mode: 0o600 })
+  return outPath
+}
diff --git a/src/launcher/docker-argv.ts b/src/launcher/docker-argv.ts
new file mode 100644
index 0000000..636d353
--- /dev/null
+++ b/src/launcher/docker-argv.ts
@@ -0,0 +1,42 @@
+import type { SandboxNetwork } from "../core/types.ts"
+
+export interface DockerRunArgvOpts {
+  mountArgv: string[]
+  env: Record<string, string>
+  image: string
+  networkMode: SandboxNetwork
+  resourceLimits: { memory: string; cpus: string; pidsLimit: number }
+  hostUid: number
+  hostGid: number
+  hostPid: number
+  command: string[]
+}
+
+export function buildDockerRunArgv(opts: DockerRunArgvOpts): string[] {
+  const argv: string[] = ["docker", "run", "--rm", "-i"]
+
+  argv.push("-u", `${opts.hostUid}:${opts.hostGid}`)
+  argv.push("--cap-drop=ALL")
+  argv.push("--security-opt", "no-new-privileges")
+  argv.push(`--pids-limit=${opts.resourceLimits.pidsLimit}`)
+  argv.push(`--memory=${opts.resourceLimits.memory}`)
+  argv.push(`--cpus=${opts.resourceLimits.cpus}`)
+  argv.push(`--network=${opts.networkMode}`)
+  argv.push("--label", "skvm-sandbox=1")
+  argv.push("--label", `skvm-sandbox-host-pid=${opts.hostPid}`)
+  argv.push("-w", "/workspace")
+  argv.push(...opts.mountArgv)
+  for (const [k, v] of Object.entries(opts.env)) {
+    // A newline (or NUL) in an env value would corrupt the `-e K=V` argument
+    // docker receives — e.g. an API key with a stray trailing newline from a
+    // secrets manager. Reject loudly rather than silently injecting a
+    // truncated/garbled value.
+    if (/[\n\r\0]/.test(v)) {
+      throw new Error(`sandbox env value for "${k}" contains a newline or NUL byte; refusing to pass a corrupt -e argument to docker.`)
+    }
+    argv.push("-e", `${k}=${v}`)
+  }
+  argv.push(opts.image)
+  argv.push(...opts.command)
+  return argv
+}
diff --git a/src/launcher/env.ts b/src/launcher/env.ts
new file mode 100644
index 0000000..e820bd1
--- /dev/null
+++ b/src/launcher/env.ts
@@ -0,0 +1,81 @@
+import { safeRouteId, resolveRouteApiKey } from "../core/config.ts"
+
+interface RouteLike {
+  match: string
+  kind: string
+  apiKey?: string
+  apiKeyEnv?: string
+}
+
+export interface ComposeEnvArgs {
+  routes: RouteLike[]
+  hostEnv: Record<string, string | undefined>
+  /** Whether the launcher mounted the dataset at /skvm-data. When true the
+   *  container is told to resolve its dataset root there. */
+  skvmDataMounted?: boolean
+}
+
+const PROXY_VARS = [
+  "HTTP_PROXY", "HTTPS_PROXY", "NO_PROXY",
+  "http_proxy", "https_proxy", "no_proxy",
+]
+
+export function composeEnv(opts: ComposeEnvArgs): Record<string, string> {
+  const env: Record<string, string> = {
+    SKVM_IN_SANDBOX: "1",
+    HOME: "/workspace",
+    // Point the in-container skvm at the mounted cache (which holds the
+    // sanitized config + profiles/logs/proposals). Without this, the
+    // container resolves SKVM_CACHE to ~/.skvm = /workspace/.skvm (because
+    // HOME=/workspace) and never sees the mounted config or its routes.
+    SKVM_CACHE: "/skvm-cache",
+  }
+
+  // Dataset root, only when the launcher actually mounted it at /skvm-data.
+  if (opts.skvmDataMounted) {
+    env.SKVM_DATA_DIR = "/skvm-data"
+  }
+
+  // Forward host-set runtime toggles. `--no-auto-probe` is stripped from argv
+  // on the host and re-expressed as SKVM_AUTO_PROBE=0; without forwarding it,
+  // the container would re-enable auto-probe despite the user opting out.
+  const autoProbe = opts.hostEnv.SKVM_AUTO_PROBE
+  if (autoProbe !== undefined && autoProbe.length > 0) {
+    env.SKVM_AUTO_PROBE = autoProbe
+  }
+
+  // Proxy passthrough
+  for (const v of PROXY_VARS) {
+    const val = opts.hostEnv[v]
+    if (val && val.length > 0) env[v] = val
+  }
+
+  // Route key injection. safeRouteId collapses punctuation to `_`, so two
+  // distinct matches that differ only by punctuation (e.g. "openai-x/*" and
+  // "openai_x/*") would map to the same SKVM_ROUTE_<id>_KEY and the second
+  // would silently overwrite the first — injecting the wrong key for one
+  // route. Detect that collision on the host and fail loud before launching.
+  const idToMatch = new Map<string, string>()
+  for (const r of opts.routes) {
+    const id = safeRouteId(r.match)
+    const prior = idToMatch.get(id)
+    if (prior !== undefined && prior !== r.match) {
+      throw new Error(
+        `route match collision: "${prior}" and "${r.match}" both map to ` +
+        `SKVM_ROUTE_${id}_KEY. Rename one route's match so the two differ by ` +
+        `more than punctuation.`,
+      )
+    }
+    idToMatch.set(id, r.match)
+    const key = resolveRouteApiKey({
+      match: r.match,
+      apiKey: r.apiKey,
+      apiKeyEnv: r.apiKeyEnv,
+    })
+    if (key) {
+      env[`SKVM_ROUTE_${id}_KEY`] = key
+    }
+  }
+
+  return env
+}
diff --git a/src/launcher/image.ts b/src/launcher/image.ts
new file mode 100644
index 0000000..2af0c40
--- /dev/null
+++ b/src/launcher/image.ts
@@ -0,0 +1,34 @@
+import { spawnSync } from "node:child_process"
+
+export interface ResolveImageRefArgs {
+  cliOverride: string | null
+  configImage: string | null
+  skvmVersion: string
+}
+
+export function resolveImageRef(opts: ResolveImageRefArgs): string {
+  if (opts.cliOverride) return opts.cliOverride
+  if (opts.configImage) return opts.configImage
+  return `ghcr.io/sjtu-ipads/skvm-sandbox:${opts.skvmVersion}`
+}
+
+export function buildBuildCommandHint(ref: string): string {
+  return `docker build -f docker/skvm-sandbox.Dockerfile -t ${ref} .`
+}
+
+/**
+ * Check whether `ref` is present locally; if not, attempt `docker pull`; if
+ * that fails, throw with the exact build-locally command in the message.
+ */
+export function ensureImagePresent(ref: string): void {
+  const inspect = spawnSync("docker", ["image", "inspect", ref], { stdio: "ignore" })
+  if (inspect.status === 0) return
+
+  const pull = spawnSync("docker", ["pull", ref], { stdio: "inherit" })
+  if (pull.status === 0) return
+
+  throw new Error(
+    `skvm: image ${ref} not present locally and pull failed.\n` +
+    `Build it yourself with:\n  ${buildBuildCommandHint(ref)}\n`,
+  )
+}
diff --git a/src/launcher/index.ts b/src/launcher/index.ts
new file mode 100644
index 0000000..b0db0ba
--- /dev/null
+++ b/src/launcher/index.ts
@@ -0,0 +1,198 @@
+import { spawnSync } from "node:child_process"
+import { existsSync, mkdirSync } from "node:fs"
+import path from "node:path"
+
+import {
+  SKVM_CACHE,
+  SKVM_DATA_DIR,
+  getConfigPath,
+  getProvidersConfig,
+  getSandboxConfig,
+} from "../core/config.ts"
+import pkgJson from "../../package.json" with { type: "json" }
+
+import { composeMounts } from "./mounts.ts"
+import { composeEnv } from "./env.ts"
+import { writeSanitizedConfig } from "./config-sanitize.ts"
+import { resolveImageRef, ensureImagePresent } from "./image.ts"
+import { buildDockerRunArgv } from "./docker-argv.ts"
+import { reapLeaked } from "./stale-reap.ts"
+
+/**
+ * Redact the value of any `NAME=VALUE` argv token whose NAME looks like it
+ * carries a secret, so `--debug-sandbox` output is safe to paste into issues,
+ * CI logs, or a screen share. The injected provider keys live in
+ * `SKVM_ROUTE_<id>_KEY=...` env tokens; we also catch generic key/token/
+ * secret/password names defensively.
+ */
+/**
+ * Reject `--mount-extra` host paths that would hand the container control of
+ * the host: the Docker socket (→ full host root via the daemon API) and the
+ * host filesystem root. `--mount-extra` is a deliberate escape hatch, but
+ * these two break containment so completely that they should never be a
+ * frictionless one-liner — especially when a value is forwarded from a script.
+ */
+export function assertMountExtraAllowed(hostPath: string): void {
+  const resolved = path.resolve(hostPath)
+  if (resolved === "/") {
+    throw new Error(`--mount-extra refuses to mount the host root "/" into the sandbox.`)
+  }
+  if (/(^|\/)docker\.sock$/.test(resolved)) {
+    throw new Error(
+      `--mount-extra refuses to mount the Docker socket (${hostPath}); ` +
+      `that grants the container full control of the host Docker daemon.`,
+    )
+  }
+}
+
+/**
+ * Validate every host path in a list of extra mounts against the denylist.
+ * Both CLI `--mount-extra` and config `sandbox.docker.extraMounts` flow through
+ * this so the two escape hatches share one set of rules.
+ */
+export function assertExtraMountsAllowed(mounts: Array<{ host: string }>): void {
+  for (const m of mounts) {
+    assertMountExtraAllowed(m.host)
+  }
+}
+
+export function redactSecretToken(tok: string): string {
+  const eq = tok.indexOf("=")
+  if (eq <= 0) return tok
+  const name = tok.slice(0, eq)
+  if (name.startsWith("SKVM_ROUTE_") || /key|token|secret|password/i.test(name)) {
+    return `${name}=<redacted>`
+  }
+  return tok
+}
+
+/**
+ * Sandbox-mode dispatch. Composes mounts, env, image ref, and a hardened
+ * `docker run` argv from the user's CLI args; then replaces this process
+ * with `docker run`. Never returns on success.
+ *
+ * args: the full CLI args (slice(2) of process.argv) — `--sandbox` has
+ * already been stripped by the caller in src/index.ts.
+ */
+export async function runLauncher(args: string[]): Promise<never> {
+  reapLeaked()
+
+  const sandboxCfg = getSandboxConfig()
+  const providers = getProvidersConfig()
+  const hostConfigPath = getConfigPath()
+
+  // Config-supplied extra mounts are an escape hatch like --mount-extra, and
+  // must clear the same denylist (Docker socket, host root). Validate before
+  // composing mounts so a malformed config fails loud, not silently inside the
+  // container.
+  assertExtraMountsAllowed(sandboxCfg.docker.extraMounts)
+
+  const sanitizedConfigPath = writeSanitizedConfig(hostConfigPath, process.pid)
+
+  const skvmDataExists = existsSync(SKVM_DATA_DIR) ? SKVM_DATA_DIR : null
+
+  // --docker-image override (parsed inline; doesn't need to live in path-flags.ts)
+  let cliImageOverride: string | null = null
+  let cliNetworkOverride: typeof sandboxCfg.docker.network | null = null
+  const cliExtraMounts: Array<{ host: string; inner: string; mode: "ro" | "rw" }> = []
+  let debugSandbox = false
+  const forwarded: string[] = []
+  for (const a of args) {
+    if (a.startsWith("--docker-image=")) {
+      cliImageOverride = a.slice("--docker-image=".length)
+      continue
+    }
+    if (a.startsWith("--docker-network=")) {
+      const v = a.slice("--docker-network=".length)
+      if (v !== "none" && v !== "bridge" && v !== "host") {
+        throw new Error(`--docker-network must be one of none|bridge|host (got ${v})`)
+      }
+      cliNetworkOverride = v
+      continue
+    }
+    if (a.startsWith("--mount-extra=")) {
+      const triple = a.slice("--mount-extra=".length).split(":")
+      if (triple.length !== 3 || (triple[2] !== "ro" && triple[2] !== "rw")) {
+        throw new Error(`--mount-extra expects host:inner:ro|rw (got ${a})`)
+      }
+      assertMountExtraAllowed(triple[0]!)
+      cliExtraMounts.push({ host: triple[0]!, inner: triple[1]!, mode: triple[2] as "ro" | "rw" })
+      continue
+    }
+    if (a === "--debug-sandbox") { debugSandbox = true; continue }
+    forwarded.push(a)
+  }
+
+  const { argv: mountArgv, rewrittenArgs, ensureDirs } = composeMounts({
+    args: forwarded,
+    roots: {
+      cwd: process.cwd(),
+      skvmCache: SKVM_CACHE,
+      skvmDataDir: skvmDataExists,
+      sanitizedConfigPath,
+    },
+    configExtraMounts: sandboxCfg.docker.extraMounts,
+    cliExtraMounts,
+  })
+
+  // Pre-create managed rw mount sources (cache root + dynamic out-of-root
+  // output dirs) so the daemon doesn't create them as root and lock the
+  // host-uid container out. See ComposeMountsResult.ensureDirs.
+  for (const dir of ensureDirs) {
+    mkdirSync(dir, { recursive: true })
+  }
+
+  const env = composeEnv({
+    routes: providers.routes,
+    hostEnv: process.env as Record<string, string | undefined>,
+    skvmDataMounted: skvmDataExists !== null,
+  })
+
+  const image = resolveImageRef({
+    cliOverride: cliImageOverride,
+    configImage: sandboxCfg.docker.image,
+    skvmVersion: pkgJson.version,
+  })
+
+  ensureImagePresent(image)
+
+  // Run the container as the host user so bind-mounted writes are owned by the
+  // invoker. Refuse to silently fall back to uid 0 (root) when getuid is
+  // unavailable — running the sandbox as root would undermine the isolation
+  // the `-u` flag is meant to provide.
+  const getuid = process.getuid
+  const getgid = process.getgid
+  if (!getuid || !getgid) {
+    throw new Error(
+      `--sandbox: cannot determine host uid/gid on this platform ` +
+      `(process.getuid unavailable); refusing to run the container as root.`,
+    )
+  }
+
+  const argv = buildDockerRunArgv({
+    mountArgv,
+    env,
+    image,
+    networkMode: cliNetworkOverride ?? sandboxCfg.docker.network,
+    resourceLimits: {
+      memory: sandboxCfg.docker.memory,
+      cpus: sandboxCfg.docker.cpus,
+      pidsLimit: sandboxCfg.docker.pidsLimit,
+    },
+    hostUid: getuid(),
+    hostGid: getgid(),
+    hostPid: process.pid,
+    command: ["skvm", ...rewrittenArgs],
+  })
+
+  if (debugSandbox) {
+    for (const tok of argv) console.log(redactSecretToken(tok))
+    process.exit(0)
+  }
+
+  // Exec docker. spawnSync with stdio: "inherit" gives us signal forwarding +
+  // exit code propagation. (Bun lacks an execvp wrapper; spawnSync + exit is
+  // the idiomatic substitute.)
+  const child = spawnSync(argv[0]!, argv.slice(1), { stdio: "inherit" })
+  process.exit(child.status ?? 1)
+}
diff --git a/src/launcher/mounts.ts b/src/launcher/mounts.ts
new file mode 100644
index 0000000..14324a2
--- /dev/null
+++ b/src/launcher/mounts.ts
@@ -0,0 +1,416 @@
+import path from "node:path"
+import { existsSync as fsExistsSync } from "node:fs"
+import { PATH_FLAGS, resolvePathFlagValue, looksLikePath, type PathFlag } from "./path-flags.ts"
+
+// The composeMounts default is fsExistsSync (real fs check). Tests that
+// exercise non-existent paths inject `() => true` or `() => false`.
+export { fsExistsSync }
+
+export interface HostRoots {
+  cwd: string
+  skvmCache: string
+  skvmDataDir: string | null
+  sanitizedConfigPath: string
+}
+
+export interface DockerMount {
+  host: string
+  inner: string
+  mode: "ro" | "rw"
+}
+
+export interface ComposeMountsArgs {
+  args: string[]
+  roots: HostRoots
+  existsSync?: (p: string) => boolean
+  configExtraMounts?: Array<{ host: string; inner: string; mode: "ro" | "rw" }>
+  cliExtraMounts?: Array<{ host: string; inner: string; mode: "ro" | "rw" }>
+}
+
+export interface ComposeMountsResult {
+  mounts: DockerMount[]
+  rewrittenArgs: string[]
+  argv: string[]
+  /**
+   * Managed `rw` mount sources (the cache root and dynamic out-of-root output
+   * dirs) that do not yet exist on the host. The launcher must `mkdir -p` these
+   * before `docker run`, otherwise the daemon creates the bind source as root
+   * and the container — running as the host uid — cannot write into it, leaving
+   * a root-owned directory behind. Excludes user-controlled extra mounts.
+   */
+  ensureDirs: string[]
+}
+
+/** Inner paths for the three fixed host roots. */
+const INNER_WORKSPACE = "/workspace"
+const INNER_CACHE = "/skvm-cache"
+const INNER_DATA = "/skvm-data"
+
+/**
+ * Widen mode: rw beats ro.
+ */
+function widenMode(a: "ro" | "rw", b: "ro" | "rw"): "ro" | "rw" {
+  return a === "rw" || b === "rw" ? "rw" : "ro"
+}
+
+/**
+ * Convert a DockerMount to its `-v host:inner:mode` string (without the "-v"
+ * prefix; callers interleave "-v" separately for argv).
+ */
+function mountToSpec(m: DockerMount): string {
+  return `${m.host}:${m.inner}:${m.mode}`
+}
+
+/**
+ * Given a host absolute path and the three fixed host roots, return the inner
+ * rewritten path if the host path falls under one of those roots, or null if
+ * it is outside all of them.
+ *
+ * Longest-prefix wins. The roots can nest — most importantly the cache may sit
+ * under the workspace (`--skvm-cache=./.skvm`). If cwd were matched first, that
+ * value would rewrite to `/workspace/.skvm` and, since the in-container
+ * `--skvm-cache` flag outranks the `SKVM_CACHE=/skvm-cache` env, the container
+ * would read the *raw* config under the workspace mount and bypass the
+ * sanitized `/skvm-cache` overlay (leaking literal API keys). Matching the most
+ * specific root instead sends it to `/skvm-cache`, through the overlay.
+ */
+function rewriteUnderFixedRoots(
+  hostPath: string,
+  roots: HostRoots,
+): string | null {
+  const fixed: Array<{ hostRoot: string; innerRoot: string }> = [
+    { hostRoot: roots.cwd, innerRoot: INNER_WORKSPACE },
+    { hostRoot: roots.skvmCache, innerRoot: INNER_CACHE },
+    ...(roots.skvmDataDir !== null
+      ? [{ hostRoot: roots.skvmDataDir, innerRoot: INNER_DATA }]
+      : []),
+  ]
+
+  let best: { inner: string; rootLen: number } | null = null
+  for (const { hostRoot, innerRoot } of fixed) {
+    let inner: string | null = null
+    if (hostPath === hostRoot) {
+      inner = innerRoot
+    } else {
+      const prefix = hostRoot.endsWith("/") ? hostRoot : hostRoot + "/"
+      if (hostPath.startsWith(prefix)) {
+        inner = innerRoot + "/" + hostPath.slice(prefix.length)
+      }
+    }
+    if (inner !== null && (best === null || hostRoot.length > best.rootLen)) {
+      best = { inner, rootLen: hostRoot.length }
+    }
+  }
+
+  return best?.inner ?? null
+}
+
+/**
+ * Parse a single raw arg string for a known path flag.
+ * Returns `{ flag: PathFlag, value: string }` or null.
+ */
+function parsePathArg(
+  raw: string,
+): { flag: PathFlag; value: string } | null {
+  for (const flag of PATH_FLAGS) {
+    const prefix = flag.flag + "="
+    if (raw.startsWith(prefix)) {
+      return { flag, value: raw.slice(prefix.length) }
+    }
+  }
+  return null
+}
+
+/**
+ * For an out-of-root path-flag entry, compute the canonical "host root" that
+ * should be mounted:
+ *  - dir-kind: the path itself is treated as a directory; host root = hostPath.
+ *  - file-kind: mount the parent directory; host root = dirname(hostPath).
+ */
+function getHostRoot(hostPath: string, kind: "file" | "dir"): string {
+  return kind === "file" ? path.dirname(hostPath) : hostPath
+}
+
+/**
+ * Compose all Docker bind-mount arguments and rewritten CLI args for the
+ * Strategy-C launcher.
+ *
+ * Algorithm:
+ *  1. Emit three fixed default mounts (cwd, skvm-cache, skvm-data?).
+ *  2. Emit the sanitised-config overlay mount.
+ *  3. Walk the input args. For each path-flag:
+ *     a. Resolve the value to an absolute host path.
+ *     b. If required and does not exist, throw.
+ *     c. If it falls under a fixed root, rewrite in place — no new mount.
+ *     d. Otherwise, register it as an out-of-root path entry.
+ *  4. Group out-of-root entries by prefix dedup (see below).
+ *  5. Assign /extra/<idx> indices; emit one mount per group.
+ *  6. Compute rewritten arg values from group inner roots.
+ *
+ * Prefix dedup:
+ *  - Entries are processed in input (CLI arg) order.
+ *  - If a new entry's host root starts with an already-registered group's
+ *    host root, the new entry joins that group.
+ *  - Sibling paths (neither is a prefix of the other) each get their own group.
+ *  - A group's mode widens to rw if any participant contributes rw.
+ */
+export function composeMounts({
+  args,
+  roots,
+  existsSync = fsExistsSync,
+  configExtraMounts = [],
+  cliExtraMounts = [],
+}: ComposeMountsArgs): ComposeMountsResult {
+  // ── 1. Fixed default mounts ──────────────────────────────────────────────
+  const defaultMounts: DockerMount[] = [
+    { host: roots.cwd, inner: INNER_WORKSPACE, mode: "rw" },
+    { host: roots.skvmCache, inner: INNER_CACHE, mode: "rw" },
+    ...(roots.skvmDataDir !== null
+      ? [{ host: roots.skvmDataDir, inner: INNER_DATA, mode: "ro" as const }]
+      : []),
+    // Sanitized-config overlay: mounts on top of the cache directory.
+    {
+      host: roots.sanitizedConfigPath,
+      inner: INNER_CACHE + "/skvm.config.json",
+      mode: "ro" as const,
+    },
+    // Config-level extra mounts (sandbox.docker.extraMounts).
+    ...configExtraMounts,
+    // CLI-level extra mounts (--mount-extra=host:inner:ro|rw).
+    ...cliExtraMounts,
+  ]
+
+  // ── 2. Walk args for path flags ──────────────────────────────────────────
+  //
+  // Each path-flag arg expands into one or more *elements* (single flags have
+  // one; csv flags split on ","). Every element is rewritten independently:
+  // fixed-root elements are resolved here; out-of-root elements are deferred to
+  // the dynamic-mount grouping below and back-filled into their slot. After
+  // grouping, each arg is reassembled from its (now-complete) element list.
+  interface OutOfRootEntry {
+    argIndex: number    // index in rewrittenArgs array
+    elementSlot: number // which csv element of that arg this is
+    hostPath: string    // absolute resolved host path
+    hostRoot: string    // the path to mount (parent for file-kind, self for dir-kind)
+    kind: "file" | "dir"
+    mode: "ro" | "rw"
+    flagName: string
+  }
+
+  interface PendingArg {
+    argIndex: number
+    flagName: string
+    elements: Array<string | null>  // inner value per element; null = out-of-root pending
+  }
+
+  const rewrittenArgs: string[] = [...args]
+  const outOfRoot: OutOfRootEntry[] = []
+  const pendingByIndex = new Map<number, PendingArg>()
+
+  for (let i = 0; i < args.length; i++) {
+    const raw = args[i]
+    if (raw === undefined) continue
+    const parsed = parsePathArg(raw)
+    if (parsed === null) continue
+
+    const { flag, value } = parsed
+    const rawElements = (flag.shape ?? "single") === "csv" ? value.split(",") : [value]
+    const elements: Array<string | null> = new Array(rawElements.length).fill(null)
+
+    for (let slot = 0; slot < rawElements.length; slot++) {
+      const el = rawElements[slot]!
+
+      // pathLikeOnly: leave non-path tokens (e.g. bench task IDs) verbatim.
+      if (flag.pathLikeOnly && !looksLikePath(el)) {
+        elements[slot] = el
+        continue
+      }
+
+      const hostPath = resolvePathFlagValue(el, roots.cwd)
+
+      // Try to rewrite under a fixed root (no new mount needed).
+      const innerFixed = rewriteUnderFixedRoots(hostPath, roots)
+      if (innerFixed !== null) {
+        elements[slot] = innerFixed
+        continue
+      }
+
+      // Out-of-root path: check existence for required flags before mounting.
+      if (flag.required && !existsSync(hostPath)) {
+        throw new Error(
+          `${flag.flag}: required path does not exist: ${hostPath}`,
+        )
+      }
+
+      // Out-of-root: register for dynamic mount assignment; slot stays null.
+      outOfRoot.push({
+        argIndex: i,
+        elementSlot: slot,
+        hostPath,
+        hostRoot: getHostRoot(hostPath, flag.kind),
+        kind: flag.kind,
+        mode: flag.mode,
+        flagName: flag.flag,
+      })
+    }
+
+    pendingByIndex.set(i, { argIndex: i, flagName: flag.flag, elements })
+  }
+
+  // ── 3. Group out-of-root entries by prefix dedup ─────────────────────────
+  //
+  // A "group" represents one Docker mount. Each group has:
+  //   - hostRoot: the host path to mount (the broadest covering root)
+  //   - mode: widened across all participants
+  //   - members: the out-of-root entries that belong to this group
+  //
+  // We assign entries in input order. For each entry we check whether its
+  // hostRoot is a descendant of any existing group's hostRoot. If so, it joins
+  // that group. Otherwise, we also check whether the new entry's hostRoot is a
+  // prefix of an existing group — in that case the new entry becomes the new
+  // (broader) hostRoot for that group. If neither applies, a new group is
+  // created.
+
+  interface MountGroup {
+    hostRoot: string
+    mode: "ro" | "rw"
+    members: OutOfRootEntry[]
+  }
+
+  const groups: MountGroup[] = []
+
+  for (const entry of outOfRoot) {
+    const entryRoot = entry.hostRoot
+
+    // Try to find an existing group where entryRoot is a descendant.
+    let placed = false
+    for (const group of groups) {
+      const gRoot = group.hostRoot
+      if (
+        entryRoot === gRoot ||
+        entryRoot.startsWith(gRoot.endsWith("/") ? gRoot : gRoot + "/")
+      ) {
+        // Entry falls under an existing broader group.
+        group.mode = widenMode(group.mode, entry.mode)
+        group.members.push(entry)
+        placed = true
+        break
+      }
+      // Check if the new entry's root is BROADER (prefix) than the group's root.
+      if (
+        gRoot.startsWith(
+          entryRoot.endsWith("/") ? entryRoot : entryRoot + "/",
+        )
+      ) {
+        // New entry is broader — expand the group's host root.
+        group.hostRoot = entryRoot
+        group.mode = widenMode(group.mode, entry.mode)
+        group.members.push(entry)
+        placed = true
+        break
+      }
+    }
+
+    if (!placed) {
+      groups.push({
+        hostRoot: entryRoot,
+        mode: entry.mode,
+        members: [entry],
+      })
+    }
+  }
+
+  // ── 4. Assign /extra/<idx> indices and build dynamic mounts ──────────────
+  const dynamicMounts: DockerMount[] = []
+
+  for (const [idx, group] of groups.entries()) {
+    const innerGroupRoot = `/extra/${idx}`
+    const singleton = group.members.length === 1
+
+    // Determine the host path to actually mount.
+    // For a singleton:
+    //   - dir-kind: mount the dir itself (hostRoot = hostPath). The inner mount
+    //     path becomes /extra/<idx>/<basename>.
+    //   - file-kind: mount the parent (hostRoot = dirname(hostPath)). The inner
+    //     path becomes /extra/<idx>/<basename>.
+    // For dedup'd groups: mount the broader hostRoot; inner paths are computed
+    // via relative().
+
+    let mountHost: string
+    let mountInner: string
+
+    if (singleton) {
+      const member = group.members[0]
+      if (member === undefined) continue  // unreachable; satisfies TS
+      if (member.kind === "dir") {
+        // Mount = /extra/<idx>/<basename(hostPath)>
+        const base = path.basename(member.hostPath)
+        mountHost = member.hostPath
+        mountInner = innerGroupRoot + "/" + base
+      } else {
+        // file-kind: mount parent dir at /extra/<idx>
+        mountHost = member.hostRoot  // already dirname(hostPath)
+        mountInner = innerGroupRoot
+      }
+    } else {
+      // Dedup'd group: mount the broader hostRoot at /extra/<idx>.
+      mountHost = group.hostRoot
+      mountInner = innerGroupRoot
+    }
+
+    dynamicMounts.push({
+      host: mountHost,
+      inner: mountInner,
+      mode: group.mode,
+    })
+
+    // Back-fill each member's element slot with its computed inner path.
+    for (const member of group.members) {
+      let innerPath: string
+      if (singleton) {
+        if (member.kind === "dir") {
+          innerPath = mountInner  // /extra/<idx>/<basename>
+        } else {
+          // file-kind: innerGroupRoot + "/" + basename(hostPath)
+          innerPath = innerGroupRoot + "/" + path.basename(member.hostPath)
+        }
+      } else {
+        // Dedup'd: compute relative from the group's host root.
+        const rel = path.relative(group.hostRoot, member.hostPath)
+        innerPath = rel === "" ? mountInner : mountInner + "/" + rel
+      }
+      const pending = pendingByIndex.get(member.argIndex)
+      if (pending !== undefined) pending.elements[member.elementSlot] = innerPath
+    }
+  }
+
+  // ── 5. Reassemble each path-flag arg from its (now-complete) elements ─────
+  for (const pending of pendingByIndex.values()) {
+    const joined = pending.elements.map(e => e ?? "").join(",")
+    rewrittenArgs[pending.argIndex] = pending.flagName + "=" + joined
+  }
+
+  // ── 6. Assemble result ────────────────────────────────────────────────────
+  const allMounts: DockerMount[] = [...defaultMounts, ...dynamicMounts]
+
+  const argv: string[] = []
+  for (const m of allMounts) {
+    argv.push("-v", mountToSpec(m))
+  }
+
+  // Managed rw mount sources to pre-create. cwd always exists; the cache root
+  // and dynamic out-of-root rw outputs may not. Extra mounts are excluded —
+  // those are a user-controlled escape hatch and may intentionally be files.
+  const ensureDirs = [
+    roots.cwd,
+    roots.skvmCache,
+    ...dynamicMounts.filter(m => m.mode === "rw").map(m => m.host),
+  ].filter((h, i, a) => a.indexOf(h) === i && !existsSync(h))
+
+  return {
+    mounts: allMounts,
+    rewrittenArgs,
+    argv,
+    ensureDirs,
+  }
+}
diff --git a/src/launcher/path-flags.ts b/src/launcher/path-flags.ts
new file mode 100644
index 0000000..948d787
--- /dev/null
+++ b/src/launcher/path-flags.ts
@@ -0,0 +1,113 @@
+import path from "node:path"
+
+/**
+ * Whether a flag carries a single path value or a comma-separated list of
+ * them. `--skill`, `--logs`, `--tasks` etc. accept `a,b,c`; each element is a
+ * path that must be mounted / rewritten independently.
+ */
+export type PathValueShape = "single" | "csv"
+
+export interface PathFlag {
+  flag: string                    // e.g. "--skill"
+  kind: "file" | "dir"
+  mode: "ro" | "rw"
+  required: boolean               // is the host path expected to exist?
+  shape?: PathValueShape          // default "single"
+  /**
+   * When true, only rewrite elements that look like filesystem paths; leave
+   * non-path tokens untouched. Used by `--tasks` / `--test-tasks`, whose
+   * values are either bench task IDs (e.g. `bench_foo`) or paths to task JSON
+   * files. The predicate mirrors the JIT/bench resolver: a value is path-like
+   * if it ends in `.json` or contains a `/`.
+   */
+  pathLikeOnly?: boolean
+}
+
+/**
+ * Mirror of the JIT/bench task-ref resolver: a value is treated as a path
+ * (and therefore mounted / rewritten) only when it ends in `.json` or contains
+ * a slash. Bare identifiers like `bench_task_id` are left alone.
+ */
+export function looksLikePath(ref: string): boolean {
+  return ref.endsWith(".json") || ref.includes("/")
+}
+
+/**
+ * Every CLI flag whose value is a filesystem path. The launcher uses this to
+ * decide which arg values need mount placement / path rewriting before the
+ * container starts.
+ *
+ * Adding a path-shaped flag elsewhere in the codebase requires a matching
+ * entry here — otherwise the launcher will pass the host path through
+ * unchanged and the container will see a non-existent path.
+ *
+ * Before committing this file, manually grep for path-shaped CLI flags
+ * across `src/index.ts` and the per-command entry files, and verify each
+ * is present below. Add any missed flag with the right kind / mode /
+ * required.
+ *
+ * Keep alphabetised within each command group.
+ */
+export const PATH_FLAGS: PathFlag[] = [
+  // run / bench / jit-optimize — primary inputs
+  { flag: "--skill",          kind: "dir",  mode: "ro", required: true,  shape: "csv" },
+  { flag: "--skill-list",     kind: "file", mode: "ro", required: false },
+  { flag: "--task",           kind: "file", mode: "ro", required: true  },
+  { flag: "--out",            kind: "dir",  mode: "rw", required: false },
+  { flag: "--workspace",      kind: "dir",  mode: "rw", required: false },
+  { flag: "--workdir",        kind: "dir",  mode: "rw", required: false },
+
+  // global path overrides
+  { flag: "--skvm-cache",     kind: "dir",  mode: "rw", required: false },
+  { flag: "--skvm-data-dir",  kind: "dir",  mode: "ro", required: false },
+  { flag: "--profiles-dir",   kind: "dir",  mode: "rw", required: false },
+  { flag: "--logs-dir",       kind: "dir",  mode: "rw", required: false },
+  { flag: "--proposals-dir",  kind: "dir",  mode: "rw", required: false },
+
+  // aot-compile / pipeline / bench — profile TCP file
+  { flag: "--profile",        kind: "file", mode: "ro", required: false },
+
+  // jit-optimize specifics
+  { flag: "--skill-source",   kind: "dir",  mode: "ro", required: false },
+  { flag: "--log-source",     kind: "file", mode: "ro", required: false },
+  // --task-source=log: comma-separated lists of execution-log / failures files.
+  { flag: "--logs",           kind: "file", mode: "ro", required: true,  shape: "csv" },
+  { flag: "--failures",       kind: "file", mode: "ro", required: false, shape: "csv" },
+  // --task-source=real: comma-separated list of bench task IDs *or* task JSON
+  // paths. pathLikeOnly leaves bare IDs untouched and rewrites only paths.
+  { flag: "--tasks",          kind: "file", mode: "ro", required: false, shape: "csv", pathLikeOnly: true },
+  { flag: "--test-tasks",     kind: "file", mode: "ro", required: false, shape: "csv", pathLikeOnly: true },
+
+  // proposals
+  { flag: "--proposal",       kind: "dir",  mode: "ro", required: false },
+  { flag: "--target",         kind: "dir",  mode: "rw", required: false },
+
+  // bench
+  { flag: "--bench-config",   kind: "file", mode: "ro", required: false },
+  { flag: "--bench-report",   kind: "dir",  mode: "rw", required: false },
+  { flag: "--custom",         kind: "file", mode: "ro", required: false },
+  { flag: "--manifest",       kind: "dir",  mode: "ro", required: false },
+  { flag: "--output-dir",     kind: "dir",  mode: "rw", required: false },
+  { flag: "--path",           kind: "dir",  mode: "ro", required: false },
+  { flag: "--report",         kind: "file", mode: "rw", required: false },
+  { flag: "--skill-path",     kind: "dir",  mode: "ro", required: false },
+
+  // logs / clean
+  { flag: "--log-dir",        kind: "dir",  mode: "rw", required: false },
+]
+
+/**
+ * Resolve a CLI path-flag value to an absolute host path. Handles `~/`,
+ * relative paths (against the provided cwd, not `process.cwd()` so the
+ * launcher can be tested deterministically), and normalisation.
+ *
+ * Does **not** check that the path exists — that is the caller's job and is
+ * controlled per-flag by `required`.
+ */
+export function resolvePathFlagValue(value: string, cwd: string): string {
+  let expanded = value
+  if (expanded.startsWith("~/")) {
+    expanded = path.join(process.env.HOME ?? "", expanded.slice(2))
+  }
+  return path.resolve(cwd, expanded)
+}
diff --git a/src/launcher/stale-reap.ts b/src/launcher/stale-reap.ts
new file mode 100644
index 0000000..86045a4
--- /dev/null
+++ b/src/launcher/stale-reap.ts
@@ -0,0 +1,68 @@
+import { spawnSync } from "node:child_process"
+import { readdirSync, rmSync } from "node:fs"
+import path from "node:path"
+
+const TMP_PREFIX = "skvm-launcher-"
+
+export function isPidAlive(pid: number): boolean {
+  if (pid <= 0) return false
+  try {
+    process.kill(pid, 0)
+    return true
+  } catch {
+    return false
+  }
+}
+
+export function parseHostPidFromLabel(label: string): number | null {
+  const m = /^skvm-sandbox-host-pid=(\d+)$/.exec(label)
+  if (!m) return null
+  return parseInt(m[1]!, 10)
+}
+
+interface ContainerInfo { id: string; hostPid: number | null }
+
+function listLabeledContainers(): ContainerInfo[] {
+  const res = spawnSync(
+    "docker",
+    ["ps", "-a", "--filter", "label=skvm-sandbox=1", "--format", "{{.ID}} {{.Labels}}"],
+    { encoding: "utf-8", timeout: 5000 },
+  )
+  // status is non-zero (or null on timeout) when the daemon is down/hung —
+  // treat as "nothing to reap" so a stuck daemon never blocks the launch.
+  if (res.status !== 0) return []
+  return res.stdout.trim().split("\n").filter(Boolean).map(line => {
+    const [id, ...rest] = line.split(" ")
+    const labels = rest.join(" ")
+    const pidLabel = labels.split(",").map(s => s.trim()).find(s => s.startsWith("skvm-sandbox-host-pid="))
+    return { id: id!, hostPid: pidLabel ? parseHostPidFromLabel(pidLabel) : null }
+  })
+}
+
+function reapContainers(): void {
+  for (const c of listLabeledContainers()) {
+    if (c.hostPid === null || !isPidAlive(c.hostPid)) {
+      spawnSync("docker", ["rm", "-f", c.id], { stdio: "ignore", timeout: 10000 })
+    }
+  }
+}
+
+function reapTmpDirs(): void {
+  let entries: string[] = []
+  try { entries = readdirSync("/tmp") } catch { return }
+  for (const name of entries) {
+    if (!name.startsWith(TMP_PREFIX)) continue
+    const pidStr = name.slice(TMP_PREFIX.length)
+    const pid = parseInt(pidStr, 10)
+    if (Number.isNaN(pid) || isPidAlive(pid)) continue
+    try { rmSync(path.join("/tmp", name), { recursive: true, force: true }) } catch { /* ignore */ }
+  }
+}
+
+export function reapLeaked(): void {
+  // Reaping is best-effort cleanup of prior crashed runs — it must never
+  // abort or block the current launch. Any failure (daemon down, timeout,
+  // permission) is swallowed.
+  try { reapContainers() } catch { /* best-effort */ }
+  try { reapTmpDirs() } catch { /* best-effort */ }
+}
diff --git a/src/providers/registry.ts b/src/providers/registry.ts
index 581cb5b..f4a0988 100644
--- a/src/providers/registry.ts
+++ b/src/providers/registry.ts
@@ -1,6 +1,7 @@
 import type { LLMProvider } from "./types.ts"
 import type { ProviderRoute, ProvidersConfig } from "../core/types.ts"
-import { getProvidersConfig, stripRoutingPrefix } from "../core/config.ts"
+import { getProvidersConfig, stripRoutingPrefix, resolveRouteApiKey } from "../core/config.ts"
+export { resolveRouteApiKey } from "../core/config.ts"
 import { OpenRouterProvider } from "./openrouter.ts"
 import { AnthropicProvider } from "./anthropic.ts"
 import { OpenAICompatibleProvider } from "./openai-compatible.ts"
@@ -160,7 +161,7 @@ export function createProviderForModel(
     if (!altBase) {
       return { verdict: { primary: "polluted", alt: "indeterminate" }, altProvider: null, writeRoute: null }
     }
-    const altApiKey = overrides?.apiKey ?? r.apiKey ?? (r.apiKeyEnv ? process.env[r.apiKeyEnv] : undefined)
+    const altApiKey = overrides?.apiKey ?? resolveRouteApiKey(r)
     const altProvider: LLMProvider = new AnthropicProvider({
       apiKey: altApiKey,
       model: stripRoutingPrefix(mid),
@@ -209,25 +210,6 @@ export function globMatch(pattern: string, value: string): boolean {
   return new RegExp(`^${escaped}$`).test(value)
 }
 
-/**
- * Resolve a route's API key as a plain string. Used by env-var injection
- * (envForRoute) and the OPENCODE_CONFIG_CONTENT builder. Returns null when
- * neither `apiKey` nor `apiKeyEnv` yields a usable value — callers then
- * decide whether absence is a failure (instantiate) or just "no help"
- * (env injection — let the spawn inherit). `instantiate` keeps its own
- * branchy resolver because it must raise ProviderAuthError on missing keys
- * (the jit-optimize infraError classification depends on that exception
- * shape).
- */
-export function resolveRouteApiKey(route: ProviderRoute): string | null {
-  if (route.apiKey) return route.apiKey
-  if (route.apiKeyEnv) {
-    const val = process.env[route.apiKeyEnv]
-    if (val) return val
-  }
-  return null
-}
-
 /**
  * Standard SDK env vars to inject into adapter / headless subprocesses so
  * they can reach the backend matched by `providers.routes` without the user
@@ -262,30 +244,29 @@ function instantiate(
   route: ProviderRoute,
   overrides: ProviderOverrides | undefined,
 ): LLMProvider {
-  // Resolve API key. Order: explicit override → route.apiKey (stored in
-  // skvm.config.json by `skvm config init`) → env var named by route.apiKeyEnv.
+  // Resolve API key. Order: explicit override → resolveRouteApiKey (covers
+  // route.apiKey, SKVM_ROUTE_<id>_KEY sandbox injection, and route.apiKeyEnv).
   // A missing key is an infra / config failure, so raise ProviderAuthError —
   // plain Error would bypass the jit-optimize infraError classification and
   // show up as a normal score=0 criterion.
   let apiKey: string
   if (overrides?.apiKey !== undefined) {
     apiKey = overrides.apiKey
-  } else if (route.apiKey) {
-    apiKey = route.apiKey
-  } else if (route.apiKeyEnv) {
-    const val = process.env[route.apiKeyEnv]
-    if (!val) {
+  } else {
+    const resolved = resolveRouteApiKey(route)
+    if (!resolved) {
+      if (route.apiKeyEnv) {
+        throw new ProviderAuthError(
+          `Route "${route.match}" (kind=${route.kind}) requires env var ${route.apiKeyEnv}, which is not set`,
+          route.kind,
+        )
+      }
       throw new ProviderAuthError(
-        `Route "${route.match}" (kind=${route.kind}) requires env var ${route.apiKeyEnv}, which is not set`,
+        `Route "${route.match}" (kind=${route.kind}) has neither apiKey nor apiKeyEnv set`,
         route.kind,
       )
     }
-    apiKey = val
-  } else {
-    throw new ProviderAuthError(
-      `Route "${route.match}" (kind=${route.kind}) has neither apiKey nor apiKeyEnv set`,
-      route.kind,
-    )
+    apiKey = resolved
   }
 
   switch (route.kind) {
diff --git a/test/core/cli-flags.test.ts b/test/core/cli-flags.test.ts
index 877cb80..ef560f2 100644
--- a/test/core/cli-flags.test.ts
+++ b/test/core/cli-flags.test.ts
@@ -50,6 +50,15 @@ describe("assertKnownFlags", () => {
     expect(exitCode).toBeNull()
   })
 
+  test("accepts sandbox as a global flag so --sandbox=false opt-out passes per-command assertKnownFlags", () => {
+    // Regression: when defaults.sandbox=true and user passes --sandbox=false,
+    // dispatch strips it but assertKnownFlags in the subcommand still sees it.
+    // sandbox must be in GLOBAL_FLAGS so it is accepted without per-command declaration.
+    assertKnownFlags("run", { sandbox: "false" }, new Set(["skill", "task", "model"]))
+    expect(exitCode).toBeNull()
+    expect(stderr).toBe("")
+  })
+
   test("rejects an unknown flag with a 'did you mean' hint", () => {
     expect(() => {
       assertKnownFlags("profile", { adpter: "claude-code", model: "x/y" }, new Set(["adapter", "model"]))
diff --git a/test/core/config-sandbox.test.ts b/test/core/config-sandbox.test.ts
new file mode 100644
index 0000000..273ddd6
--- /dev/null
+++ b/test/core/config-sandbox.test.ts
@@ -0,0 +1,150 @@
+import { test, expect, describe, beforeEach, afterEach } from "bun:test"
+import { mkdtempSync, rmSync, writeFileSync } from "node:fs"
+import { tmpdir } from "node:os"
+import path from "node:path"
+import { SandboxConfigSchema } from "../../src/core/types.ts"
+import { invalidateConfigCache, getSandboxConfig, resolveRouteApiKey, safeRouteId, resolveAdapterConfigMode } from "../../src/core/config.ts"
+
+describe("resolveAdapterConfigMode — sandbox native guard", () => {
+  let savedInSandbox: string | undefined
+  beforeEach(() => { savedInSandbox = process.env.SKVM_IN_SANDBOX })
+  afterEach(() => {
+    if (savedInSandbox === undefined) delete process.env.SKVM_IN_SANDBOX
+    else process.env.SKVM_IN_SANDBOX = savedInSandbox
+  })
+
+  test("throws on native mode inside the sandbox", () => {
+    process.env.SKVM_IN_SANDBOX = "1"
+    expect(() => resolveAdapterConfigMode("native")).toThrow(/managed adapter mode/)
+  })
+
+  test("allows managed mode inside the sandbox", () => {
+    process.env.SKVM_IN_SANDBOX = "1"
+    expect(resolveAdapterConfigMode("managed")).toBe("managed")
+  })
+
+  test("allows native mode on the host (not in sandbox)", () => {
+    delete process.env.SKVM_IN_SANDBOX
+    expect(resolveAdapterConfigMode("native")).toBe("native")
+  })
+})
+
+describe("SandboxConfigSchema", () => {
+  test("accepts an empty object and fills defaults", () => {
+    const parsed = SandboxConfigSchema.parse({})
+    expect(parsed.docker.network).toBe("bridge")
+    expect(parsed.docker.memory).toBe("2g")
+    expect(parsed.docker.cpus).toBe("2")
+    expect(parsed.docker.pidsLimit).toBe(512)
+    expect(parsed.docker.image).toBeNull()
+    expect(parsed.docker.extraMounts).toEqual([])
+  })
+
+  test("accepts a fully populated block", () => {
+    const parsed = SandboxConfigSchema.parse({
+      docker: {
+        image: "ghcr.io/sjtu-ipads/skvm-sandbox:0.1.4",
+        network: "none",
+        memory: "4g",
+        cpus: "4",
+        pidsLimit: 1024,
+        extraMounts: [{ host: "/home/x/.ssh", inner: "/root/.ssh", mode: "ro" }],
+      },
+    })
+    expect(parsed.docker.image).toBe("ghcr.io/sjtu-ipads/skvm-sandbox:0.1.4")
+    expect(parsed.docker.extraMounts[0]!.mode).toBe("ro")
+  })
+
+  test("rejects unknown network values", () => {
+    expect(() => SandboxConfigSchema.parse({ docker: { network: "wifi" } })).toThrow()
+  })
+
+  test("rejects malformed memory / cpus values", () => {
+    expect(() => SandboxConfigSchema.parse({ docker: { memory: "banana" } })).toThrow()
+    expect(() => SandboxConfigSchema.parse({ docker: { cpus: "alot" } })).toThrow()
+    // valid forms pass
+    expect(SandboxConfigSchema.parse({ docker: { memory: "512m", cpus: "1.5" } }).docker.memory).toBe("512m")
+  })
+
+  test("rejects extra-mount with bad mode", () => {
+    expect(() =>
+      SandboxConfigSchema.parse({
+        docker: { extraMounts: [{ host: "/x", inner: "/y", mode: "exec" }] },
+      }),
+    ).toThrow()
+  })
+})
+
+describe("getSandboxConfig", () => {
+  let tmp: string
+  let savedCache: string | undefined
+
+  beforeEach(() => {
+    savedCache = process.env.SKVM_CACHE
+    tmp = mkdtempSync(path.join(tmpdir(), "skvm-cfg-"))
+    process.env.SKVM_CACHE = tmp
+    invalidateConfigCache()
+  })
+
+  afterEach(() => {
+    invalidateConfigCache()
+    if (savedCache === undefined) delete process.env.SKVM_CACHE
+    else process.env.SKVM_CACHE = savedCache
+    rmSync(tmp, { recursive: true, force: true })
+  })
+
+  test("returns parsed defaults when the file has no sandbox slice", () => {
+    writeFileSync(path.join(tmp, "skvm.config.json"), JSON.stringify({}))
+    const sb = getSandboxConfig()
+    expect(sb.docker.network).toBe("bridge")
+    expect(sb.docker.memory).toBe("2g")
+  })
+
+  test("throws on malformed sandbox slice", () => {
+    writeFileSync(
+      path.join(tmp, "skvm.config.json"),
+      JSON.stringify({ sandbox: { docker: { network: "wifi" } } }),
+    )
+    expect(() => getSandboxConfig()).toThrow()
+  })
+})
+
+describe("resolveRouteApiKey", () => {
+  // Restore env state between tests so the `SKVM_ROUTE_openai_KEY` etc. don't leak.
+  let savedSandboxKey: string | undefined
+  let savedCustomKey: string | undefined
+  beforeEach(() => {
+    savedSandboxKey = process.env.SKVM_ROUTE_openai_KEY
+    savedCustomKey = process.env.MY_CUSTOM_KEY
+    delete process.env.SKVM_ROUTE_openai_KEY
+    delete process.env.MY_CUSTOM_KEY
+  })
+  afterEach(() => {
+    if (savedSandboxKey === undefined) delete process.env.SKVM_ROUTE_openai_KEY
+    else process.env.SKVM_ROUTE_openai_KEY = savedSandboxKey
+    if (savedCustomKey === undefined) delete process.env.MY_CUSTOM_KEY
+    else process.env.MY_CUSTOM_KEY = savedCustomKey
+  })
+
+  test("returns the in-config apiKey when present", () => {
+    const route = { match: "openai", kind: "openai-compatible" as const, apiKey: "sk-direct" }
+    expect(resolveRouteApiKey(route)).toBe("sk-direct")
+  })
+
+  test("falls back to SKVM_ROUTE_<safe-id>_KEY when apiKey is absent", () => {
+    process.env.SKVM_ROUTE_openai_KEY = "sk-from-env"
+    const route = { match: "openai", kind: "openai-compatible" as const }
+    expect(resolveRouteApiKey(route)).toBe("sk-from-env")
+  })
+
+  test("honours apiKeyEnv when neither apiKey nor the standard fallback env are set", () => {
+    process.env.MY_CUSTOM_KEY = "sk-custom"
+    const route = { match: "openai", kind: "openai-compatible" as const, apiKeyEnv: "MY_CUSTOM_KEY" }
+    expect(resolveRouteApiKey(route)).toBe("sk-custom")
+  })
+
+  test("safe-id replaces every non-alphanumeric run in the route match string", () => {
+    expect(safeRouteId("openrouter/anthropic/claude-sonnet-4.6")).toBe("openrouter_anthropic_claude_sonnet_4_6")
+    expect(safeRouteId("openai/*")).toBe("openai__")
+  })
+})
diff --git a/test/launcher/config-sanitize.test.ts b/test/launcher/config-sanitize.test.ts
new file mode 100644
index 0000000..b1b7272
--- /dev/null
+++ b/test/launcher/config-sanitize.test.ts
@@ -0,0 +1,44 @@
+import { test, expect, describe } from "bun:test"
+import { mkdtempSync, writeFileSync, readFileSync, existsSync } from "node:fs"
+import { tmpdir } from "node:os"
+import path from "node:path"
+import { writeSanitizedConfig } from "../../src/launcher/config-sanitize.ts"
+
+describe("writeSanitizedConfig", () => {
+  test("drops apiKey and rewrites apiKeyEnv to the injected env var", () => {
+    const dir = mkdtempSync(path.join(tmpdir(), "skvm-sancfg-"))
+    const src = path.join(dir, "skvm.config.json")
+    writeFileSync(src, JSON.stringify({
+      providers: {
+        routes: [
+          { match: "openai/*", kind: "openai-compatible", apiKey: "sk-1" },
+          { match: "x/*", kind: "openai-compatible", apiKeyEnv: "X_KEY" },
+        ],
+      },
+    }))
+    const out = writeSanitizedConfig(src, 99999)
+    expect(existsSync(out)).toBe(true)
+    expect(out).toMatch(/\/tmp\/skvm-launcher-99999\/skvm\.config\.json$/)
+    const parsed = JSON.parse(readFileSync(out, "utf-8"))
+    // No literal secret remains; apiKeyEnv points at the launcher-injected var,
+    // keeping the route schema-valid (requires apiKey or apiKeyEnv).
+    expect(parsed.providers.routes[0]).toEqual({
+      match: "openai/*",
+      kind: "openai-compatible",
+      apiKeyEnv: "SKVM_ROUTE_openai___KEY",
+    })
+    expect(parsed.providers.routes[1]).toEqual({
+      match: "x/*",
+      kind: "openai-compatible",
+      apiKeyEnv: "SKVM_ROUTE_x___KEY",
+    })
+    expect(JSON.stringify(parsed)).not.toContain("sk-1")
+  })
+
+  test("returns an empty-config path when host config is missing", () => {
+    const out = writeSanitizedConfig("/nonexistent/skvm.config.json", 99998)
+    expect(existsSync(out)).toBe(true)
+    const parsed = JSON.parse(readFileSync(out, "utf-8"))
+    expect(parsed).toEqual({})
+  })
+})
diff --git a/test/launcher/dispatch.test.ts b/test/launcher/dispatch.test.ts
new file mode 100644
index 0000000..a680b92
--- /dev/null
+++ b/test/launcher/dispatch.test.ts
@@ -0,0 +1,105 @@
+import { test, expect, describe } from "bun:test"
+import { shouldEnterLauncher, parseSandboxFlag, assertSandboxCompatible } from "../../src/index.ts"
+
+describe("parseSandboxFlag", () => {
+  test("--sandbox alone means true", () => {
+    expect(parseSandboxFlag(["--sandbox", "run"])).toEqual({ value: true, present: true })
+  })
+
+  test("--sandbox=true means true", () => {
+    expect(parseSandboxFlag(["--sandbox=true"])).toEqual({ value: true, present: true })
+  })
+
+  test("--sandbox=false means false (explicit opt-out)", () => {
+    expect(parseSandboxFlag(["--sandbox=false"])).toEqual({ value: false, present: true })
+  })
+
+  test("absent means present:false", () => {
+    expect(parseSandboxFlag(["run", "--skill=/x"])).toEqual({ value: false, present: false })
+  })
+
+  test("throws on an unrecognized --sandbox=<value> instead of running unsandboxed", () => {
+    expect(() => parseSandboxFlag(["--sandbox=yes"])).toThrow(/must be "true" or "false"/)
+    expect(() => parseSandboxFlag(["--sandbox=1", "run"])).toThrow(/got "1"/)
+  })
+})
+
+describe("shouldEnterLauncher", () => {
+  test("explicit --sandbox + not in container → enter launcher", () => {
+    expect(shouldEnterLauncher({
+      parsed: { value: true, present: true },
+      defaultsSandbox: false,
+      inSandboxEnv: false,
+    })).toBe(true)
+  })
+
+  test("default config sandbox=true + flag absent + not in container → enter launcher", () => {
+    expect(shouldEnterLauncher({
+      parsed: { value: false, present: false },
+      defaultsSandbox: true,
+      inSandboxEnv: false,
+    })).toBe(true)
+  })
+
+  test("explicit --sandbox=false overrides config default", () => {
+    expect(shouldEnterLauncher({
+      parsed: { value: false, present: true },
+      defaultsSandbox: true,
+      inSandboxEnv: false,
+    })).toBe(false)
+  })
+
+  test("never enters launcher when SKVM_IN_SANDBOX=1", () => {
+    expect(shouldEnterLauncher({
+      parsed: { value: true, present: true },
+      defaultsSandbox: false,
+      inSandboxEnv: true,
+    })).toBe(false)
+  })
+})
+
+describe("--debug-sandbox flag", () => {
+  test("strips --debug-sandbox from forwarded args", () => {
+    const filtered = ["--sandbox", "--debug-sandbox", "run", "--skill=/x"]
+      .filter(a => a !== "--sandbox" && !a.startsWith("--sandbox=") && a !== "--debug-sandbox")
+    expect(filtered).toEqual(["run", "--skill=/x"])
+  })
+})
+
+describe("assertSandboxCompatible", () => {
+  test("hard-errors on --sandbox + config init", () => {
+    expect(() => assertSandboxCompatible({
+      sandboxOn: true,
+      command: "config",
+      subcommand: "init",
+      adapterMode: undefined,
+    })).toThrow(/config commands always run on host/)
+  })
+
+  test("hard-errors on --sandbox + native adapter mode", () => {
+    expect(() => assertSandboxCompatible({
+      sandboxOn: true,
+      command: "run",
+      subcommand: undefined,
+      adapterMode: "native",
+    })).toThrow(/managed adapter mode/)
+  })
+
+  test("passes on --sandbox + managed adapter", () => {
+    expect(() => assertSandboxCompatible({
+      sandboxOn: true,
+      command: "run",
+      subcommand: undefined,
+      adapterMode: "managed",
+    })).not.toThrow()
+  })
+
+  test("passes on --sandbox + config show absent", () => {
+    expect(() => assertSandboxCompatible({
+      sandboxOn: false,
+      command: "config",
+      subcommand: "show",
+      adapterMode: undefined,
+    })).not.toThrow()
+  })
+})
diff --git a/test/launcher/docker-argv.test.ts b/test/launcher/docker-argv.test.ts
new file mode 100644
index 0000000..5c381c2
--- /dev/null
+++ b/test/launcher/docker-argv.test.ts
@@ -0,0 +1,72 @@
+import { test, expect, describe } from "bun:test"
+import { buildDockerRunArgv } from "../../src/launcher/docker-argv.ts"
+
+describe("buildDockerRunArgv", () => {
+  const base = {
+    mountArgv: ["-v", "/x:/workspace:rw"],
+    env: { SKVM_IN_SANDBOX: "1", HOME: "/workspace" },
+    image: "skvm-sandbox:0.1.4",
+    networkMode: "bridge" as const,
+    resourceLimits: { memory: "2g", cpus: "2", pidsLimit: 512 },
+    hostUid: 1000,
+    hostGid: 1000,
+    hostPid: 9999,
+    command: ["skvm", "run", "--skill=/workspace/foo"],
+  }
+
+  test("throws when an env value contains a newline", () => {
+    expect(() => buildDockerRunArgv({
+      ...base,
+      env: { ...base.env, SKVM_ROUTE_x_KEY: "sk-abc\n" },
+    })).toThrow(/newline or NUL/)
+  })
+
+  test("includes hardening flags", () => {
+    const argv = buildDockerRunArgv(base)
+    expect(argv).toContain("--rm")
+    expect(argv).toContain("--cap-drop=ALL")
+    expect(argv).toContain("--security-opt")
+    expect(argv).toContain("no-new-privileges")
+    expect(argv).toContain("-u")
+    expect(argv).toContain("1000:1000")
+  })
+
+  test("applies resource limits", () => {
+    const argv = buildDockerRunArgv(base)
+    expect(argv).toContain("--memory=2g")
+    expect(argv).toContain("--cpus=2")
+    expect(argv).toContain("--pids-limit=512")
+  })
+
+  test("applies network mode", () => {
+    const argv = buildDockerRunArgv(base)
+    expect(argv).toContain("--network=bridge")
+  })
+
+  test("labels include host pid for stale-reap", () => {
+    const argv = buildDockerRunArgv(base)
+    expect(argv).toContain("skvm-sandbox=1")
+    expect(argv).toContain("skvm-sandbox-host-pid=9999")
+  })
+
+  test("forwards env via -e", () => {
+    const argv = buildDockerRunArgv(base)
+    expect(argv).toContain("-e")
+    expect(argv).toContain("SKVM_IN_SANDBOX=1")
+    expect(argv).toContain("HOME=/workspace")
+  })
+
+  test("workdir is /workspace", () => {
+    const argv = buildDockerRunArgv(base)
+    expect(argv).toContain("-w")
+    expect(argv).toContain("/workspace")
+  })
+
+  test("image precedes command", () => {
+    const argv = buildDockerRunArgv(base)
+    const i = argv.indexOf("skvm-sandbox:0.1.4")
+    const j = argv.indexOf("skvm")
+    expect(i).toBeGreaterThan(-1)
+    expect(j).toBeGreaterThan(i)
+  })
+})
diff --git a/test/launcher/env.test.ts b/test/launcher/env.test.ts
new file mode 100644
index 0000000..d3d7931
--- /dev/null
+++ b/test/launcher/env.test.ts
@@ -0,0 +1,80 @@
+import { test, expect, describe } from "bun:test"
+import { composeEnv } from "../../src/launcher/env.ts"
+
+describe("composeEnv", () => {
+  test("includes SKVM_IN_SANDBOX=1 and HOME=/workspace", () => {
+    const env = composeEnv({ routes: [], hostEnv: {} })
+    expect(env.SKVM_IN_SANDBOX).toBe("1")
+    expect(env.HOME).toBe("/workspace")
+  })
+
+  test("points SKVM_CACHE at the mounted /skvm-cache", () => {
+    const env = composeEnv({ routes: [], hostEnv: {} })
+    expect(env.SKVM_CACHE).toBe("/skvm-cache")
+  })
+
+  test("sets SKVM_DATA_DIR=/skvm-data only when the dataset is mounted", () => {
+    const without = composeEnv({ routes: [], hostEnv: {} })
+    expect(without.SKVM_DATA_DIR).toBeUndefined()
+    const withData = composeEnv({ routes: [], hostEnv: {}, skvmDataMounted: true })
+    expect(withData.SKVM_DATA_DIR).toBe("/skvm-data")
+  })
+
+  test("forwards HTTP_PROXY, HTTPS_PROXY, NO_PROXY in both cases", () => {
+    const env = composeEnv({
+      routes: [],
+      hostEnv: {
+        HTTP_PROXY: "http://p:1",
+        https_proxy: "http://p:2",
+        no_proxy: "localhost",
+      },
+    })
+    expect(env.HTTP_PROXY).toBe("http://p:1")
+    expect(env.https_proxy).toBe("http://p:2")
+    expect(env.no_proxy).toBe("localhost")
+  })
+
+  test("injects SKVM_ROUTE_<safe>_KEY for each route with a resolved key", () => {
+    const env = composeEnv({
+      routes: [
+        { match: "openai", kind: "openai-compatible", apiKey: "sk-1" },
+        { match: "openrouter/anthropic/claude-sonnet-4.6", kind: "openrouter", apiKey: "sk-2" },
+      ],
+      hostEnv: {},
+    })
+    expect(env.SKVM_ROUTE_openai_KEY).toBe("sk-1")
+    expect(env.SKVM_ROUTE_openrouter_anthropic_claude_sonnet_4_6_KEY).toBe("sk-2")
+  })
+
+  test("skips routes without a resolvable key", () => {
+    const env = composeEnv({
+      routes: [{ match: "x/y", kind: "openai-compatible" }],
+      hostEnv: {},
+    })
+    expect(Object.keys(env).some(k => k.startsWith("SKVM_ROUTE_"))).toBe(false)
+  })
+
+  test("throws on a route-match collision (distinct matches → same env var)", () => {
+    expect(() => composeEnv({
+      routes: [
+        { match: "openai-x/*", kind: "openai-compatible", apiKey: "sk-1" },
+        { match: "openai_x/*", kind: "openai-compatible", apiKey: "sk-2" },
+      ],
+      hostEnv: {},
+    })).toThrow(/route match collision/)
+  })
+
+  test("does not flag the same match string appearing once", () => {
+    expect(() => composeEnv({
+      routes: [{ match: "openai/*", kind: "openai-compatible", apiKey: "sk-1" }],
+      hostEnv: {},
+    })).not.toThrow()
+  })
+
+  test("forwards SKVM_AUTO_PROBE when set on the host (--no-auto-probe opt-out)", () => {
+    const off = composeEnv({ routes: [], hostEnv: { SKVM_AUTO_PROBE: "0" } })
+    expect(off.SKVM_AUTO_PROBE).toBe("0")
+    const unset = composeEnv({ routes: [], hostEnv: {} })
+    expect(unset.SKVM_AUTO_PROBE).toBeUndefined()
+  })
+})
diff --git a/test/launcher/image.test.ts b/test/launcher/image.test.ts
new file mode 100644
index 0000000..13a752a
--- /dev/null
+++ b/test/launcher/image.test.ts
@@ -0,0 +1,37 @@
+import { test, expect, describe } from "bun:test"
+import { resolveImageRef, buildBuildCommandHint } from "../../src/launcher/image.ts"
+
+describe("resolveImageRef", () => {
+  test("cli override wins over config and built-in", () => {
+    expect(resolveImageRef({
+      cliOverride: "custom:tag",
+      configImage: "config:tag",
+      skvmVersion: "0.1.4",
+    })).toBe("custom:tag")
+  })
+
+  test("config wins over built-in when no cli override", () => {
+    expect(resolveImageRef({
+      cliOverride: null,
+      configImage: "config:tag",
+      skvmVersion: "0.1.4",
+    })).toBe("config:tag")
+  })
+
+  test("built-in default uses skvm version", () => {
+    expect(resolveImageRef({
+      cliOverride: null,
+      configImage: null,
+      skvmVersion: "0.1.4",
+    })).toBe("ghcr.io/sjtu-ipads/skvm-sandbox:0.1.4")
+  })
+})
+
+describe("buildBuildCommandHint", () => {
+  test("includes the resolved image ref so the user can copy-paste", () => {
+    const hint = buildBuildCommandHint("ghcr.io/sjtu-ipads/skvm-sandbox:0.1.4")
+    expect(hint).toContain("docker build")
+    expect(hint).toContain("-f docker/skvm-sandbox.Dockerfile")
+    expect(hint).toContain("-t ghcr.io/sjtu-ipads/skvm-sandbox:0.1.4")
+  })
+})
diff --git a/test/launcher/mounts.test.ts b/test/launcher/mounts.test.ts
new file mode 100644
index 0000000..191a253
--- /dev/null
+++ b/test/launcher/mounts.test.ts
@@ -0,0 +1,235 @@
+import { test, expect, describe } from "bun:test"
+import { composeMounts, type HostRoots } from "../../src/launcher/mounts.ts"
+
+const ROOTS: HostRoots = {
+  cwd: "/home/u/proj",
+  skvmCache: "/home/u/.skvm",
+  skvmDataDir: "/home/u/.skvm-data",
+  sanitizedConfigPath: "/tmp/skvm-launcher-1234/skvm.config.json",
+}
+
+describe("composeMounts — defaults", () => {
+  test("emits three default mounts + sanitized-config overlay", () => {
+    const { mounts, argv } = composeMounts({ args: [], roots: ROOTS })
+    expect(argv).toEqual([
+      "-v", "/home/u/proj:/workspace:rw",
+      "-v", "/home/u/.skvm:/skvm-cache:rw",
+      "-v", "/home/u/.skvm-data:/skvm-data:ro",
+      "-v", "/tmp/skvm-launcher-1234/skvm.config.json:/skvm-cache/skvm.config.json:ro",
+    ])
+    expect(mounts.length).toBe(4)
+  })
+
+  test("omits /skvm-data when skvmDataDir is null", () => {
+    const { argv } = composeMounts({
+      args: [],
+      roots: { ...ROOTS, skvmDataDir: null },
+    })
+    expect(argv.find(s => s.includes("/skvm-data"))).toBeUndefined()
+  })
+})
+
+describe("composeMounts — path rewriting under known roots", () => {
+  test("rewrites --skill under cwd to /workspace/", () => {
+    const { rewrittenArgs } = composeMounts({
+      args: ["--skill=/home/u/proj/skills/foo"],
+      roots: ROOTS,
+    })
+    expect(rewrittenArgs).toEqual(["--skill=/workspace/skills/foo"])
+  })
+
+  test("rewrites --profiles-dir under skvm-cache", () => {
+    const { rewrittenArgs } = composeMounts({
+      args: ["--profiles-dir=/home/u/.skvm/profiles"],
+      roots: ROOTS,
+    })
+    expect(rewrittenArgs).toEqual(["--profiles-dir=/skvm-cache/profiles"])
+  })
+})
+
+describe("composeMounts — out-of-root dynamic mounts", () => {
+  test("adds an /extra/ mount for a dir-kind out-of-root --skill", () => {
+    const { argv, rewrittenArgs } = composeMounts({
+      args: ["--skill=/elsewhere/skills/foo"],
+      roots: ROOTS,
+      existsSync: () => true,
+    })
+    expect(argv).toContain("/elsewhere/skills/foo:/extra/0/foo:ro")
+    expect(rewrittenArgs).toEqual(["--skill=/extra/0/foo"])
+  })
+
+  test("adds a parent-dir /extra/ mount for a file-kind out-of-root --task", () => {
+    const { argv, rewrittenArgs } = composeMounts({
+      args: ["--task=/tmp/x/task.json"],
+      roots: ROOTS,
+      existsSync: () => true,
+    })
+    expect(argv).toContain("/tmp/x:/extra/0:ro")
+    expect(rewrittenArgs).toEqual(["--task=/extra/0/task.json"])
+  })
+
+  test("does not dedupe sibling out-of-root paths — each gets its own /extra/ mount", () => {
+    const { argv, rewrittenArgs } = composeMounts({
+      args: ["--skill=/elsewhere/a/skill", "--out=/elsewhere/b/out"],
+      roots: ROOTS,
+      existsSync: () => true,
+    })
+    const extraCount = argv.filter(s => s.startsWith("/elsewhere/")).length
+    expect(extraCount).toBe(2)
+    expect(rewrittenArgs).toEqual([
+      "--skill=/extra/0/skill",
+      "--out=/extra/1/out",
+    ])
+  })
+
+  test("dedupes when one out-of-root path contains another (prefix dedup)", () => {
+    const { argv, rewrittenArgs } = composeMounts({
+      args: ["--out=/elsewhere/work", "--skill=/elsewhere/work/skill"],
+      roots: ROOTS,
+      existsSync: () => true,
+    })
+    const extraCount = argv.filter(s => s.startsWith("/elsewhere/")).length
+    expect(extraCount).toBe(1)
+    expect(rewrittenArgs).toEqual([
+      "--out=/extra/0",
+      "--skill=/extra/0/skill",
+    ])
+    // The broader path's mode (rw, from --out) wins for the merged mount.
+    expect(argv).toContain("/elsewhere/work:/extra/0:rw")
+  })
+})
+
+describe("composeMounts — overlapping roots (longest-prefix wins)", () => {
+  // Cache nested under the workspace: --skvm-cache=./.skvm
+  const NESTED: HostRoots = {
+    cwd: "/home/u/proj",
+    skvmCache: "/home/u/proj/.skvm",
+    skvmDataDir: null,
+    sanitizedConfigPath: "/tmp/skvm-launcher-1/skvm.config.json",
+  }
+
+  test("a --skvm-cache under cwd resolves to /skvm-cache, not /workspace/.skvm", () => {
+    const { rewrittenArgs } = composeMounts({
+      args: ["--skvm-cache=/home/u/proj/.skvm"],
+      roots: NESTED,
+    })
+    // The more specific cache root must win, so the in-container flag routes
+    // through the sanitized overlay instead of the raw config in /workspace.
+    expect(rewrittenArgs).toEqual(["--skvm-cache=/skvm-cache"])
+  })
+
+  test("a profiles dir under the nested cache resolves under /skvm-cache", () => {
+    const { rewrittenArgs } = composeMounts({
+      args: ["--profiles-dir=/home/u/proj/.skvm/profiles"],
+      roots: NESTED,
+    })
+    expect(rewrittenArgs).toEqual(["--profiles-dir=/skvm-cache/profiles"])
+  })
+
+  test("a plain path under cwd (not the cache) still resolves to /workspace", () => {
+    const { rewrittenArgs } = composeMounts({
+      args: ["--skill=/home/u/proj/skills/foo"],
+      roots: NESTED,
+    })
+    expect(rewrittenArgs).toEqual(["--skill=/workspace/skills/foo"])
+  })
+})
+
+describe("composeMounts — ensureDirs (pre-create rw sources)", () => {
+  test("reports a missing dynamic rw output dir and the cache, excludes ro + existing", () => {
+    const { ensureDirs } = composeMounts({
+      args: ["--out=/tmp/new-output", "--profile=/tmp/in/prof.json"],
+      roots: ROOTS,
+      // cwd exists; everything else is treated as missing.
+      existsSync: (p) => p === ROOTS.cwd,
+    })
+    expect(ensureDirs).toContain("/tmp/new-output")  // dynamic rw output
+    expect(ensureDirs).toContain(ROOTS.skvmCache)    // cache root
+    expect(ensureDirs).not.toContain(ROOTS.cwd)      // already exists
+    expect(ensureDirs).not.toContain("/tmp/in")      // --profile is ro, not pre-created
+  })
+
+  test("is empty when every managed rw source already exists", () => {
+    const { ensureDirs } = composeMounts({
+      args: ["--out=/tmp/out"],
+      roots: ROOTS,
+      existsSync: () => true,
+    })
+    expect(ensureDirs).toEqual([])
+  })
+})
+
+describe("composeMounts — hard errors", () => {
+  test("throws when a required path-flag value does not exist", () => {
+    // --skill is required; we point at a non-existent path
+    expect(() =>
+      composeMounts({
+        args: ["--skill=/definitely/not/here"],
+        roots: ROOTS,
+        existsSync: () => false,
+      }),
+    ).toThrow(/--skill/)
+  })
+})
+
+describe("composeMounts — csv path flags", () => {
+  test("rewrites each element of an out-of-root --skill list to its own /extra mount", () => {
+    const { argv, rewrittenArgs } = composeMounts({
+      args: ["--skill=/tmp/a,/tmp/b"],
+      roots: ROOTS,
+      existsSync: () => true,
+    })
+    expect(rewrittenArgs).toEqual(["--skill=/extra/0/a,/extra/1/b"])
+    expect(argv).toContain("/tmp/a:/extra/0/a:ro")
+    expect(argv).toContain("/tmp/b:/extra/1/b:ro")
+  })
+
+  test("rewrites each element of an out-of-root --logs list (file-kind parent mounts)", () => {
+    const { argv, rewrittenArgs } = composeMounts({
+      args: ["--logs=/tmp/a.jsonl,/tmp/b.jsonl"],
+      roots: ROOTS,
+      existsSync: () => true,
+    })
+    // Both files share parent /tmp → prefix dedup into a single /extra/0 mount.
+    expect(rewrittenArgs).toEqual(["--logs=/extra/0/a.jsonl,/extra/0/b.jsonl"])
+    expect(argv).toContain("/tmp:/extra/0:ro")
+  })
+
+  test("mixes fixed-root and out-of-root elements within one csv flag", () => {
+    const { rewrittenArgs } = composeMounts({
+      args: ["--skill=/home/u/proj/skills/in,/tmp/out"],
+      roots: ROOTS,
+      existsSync: () => true,
+    })
+    expect(rewrittenArgs).toEqual(["--skill=/workspace/skills/in,/extra/0/out"])
+  })
+
+  test("--tasks (pathLikeOnly) leaves bare task IDs untouched and rewrites only paths", () => {
+    const { rewrittenArgs } = composeMounts({
+      args: ["--tasks=bench_task_id,/tmp/task.json"],
+      roots: ROOTS,
+      existsSync: () => true,
+    })
+    expect(rewrittenArgs).toEqual(["--tasks=bench_task_id,/extra/0/task.json"])
+  })
+})
+
+describe("composeMounts — extra mounts", () => {
+  test("applies config extraMounts after defaults, before dynamic", () => {
+    const { argv } = composeMounts({
+      args: [],
+      roots: ROOTS,
+      configExtraMounts: [{ host: "/h/.ssh", inner: "/root/.ssh", mode: "ro" }],
+    })
+    expect(argv).toContain("/h/.ssh:/root/.ssh:ro")
+  })
+
+  test("applies CLI --mount-extra triples", () => {
+    const { argv } = composeMounts({
+      args: [],
+      roots: ROOTS,
+      cliExtraMounts: [{ host: "/h/.gitconfig", inner: "/root/.gitconfig", mode: "ro" }],
+    })
+    expect(argv).toContain("/h/.gitconfig:/root/.gitconfig:ro")
+  })
+})
diff --git a/test/launcher/path-flags.test.ts b/test/launcher/path-flags.test.ts
new file mode 100644
index 0000000..8572997
--- /dev/null
+++ b/test/launcher/path-flags.test.ts
@@ -0,0 +1,84 @@
+import { test, expect, describe, beforeEach, afterEach } from "bun:test"
+import { PATH_FLAGS, resolvePathFlagValue, looksLikePath } from "../../src/launcher/path-flags.ts"
+
+describe("PATH_FLAGS", () => {
+  test("each entry has flag/kind/mode/required", () => {
+    for (const e of PATH_FLAGS) {
+      expect(e.flag).toMatch(/^--[a-z][-a-z0-9]*$/)
+      expect(["file", "dir"]).toContain(e.kind)
+      expect(["ro", "rw"]).toContain(e.mode)
+      expect(typeof e.required).toBe("boolean")
+    }
+  })
+
+  test("flag list has no duplicates", () => {
+    const flags = PATH_FLAGS.map(e => e.flag)
+    expect(new Set(flags).size).toBe(flags.length)
+  })
+
+  test("--skill, --task, --out are present", () => {
+    const flags = new Set(PATH_FLAGS.map(e => e.flag))
+    expect(flags.has("--skill")).toBe(true)
+    expect(flags.has("--task")).toBe(true)
+    expect(flags.has("--out")).toBe(true)
+  })
+
+  test("csv list flags are marked shape:csv", () => {
+    const byFlag = new Map(PATH_FLAGS.map(e => [e.flag, e]))
+    for (const f of ["--skill", "--logs", "--failures", "--tasks", "--test-tasks"]) {
+      expect(byFlag.get(f)?.shape).toBe("csv")
+    }
+  })
+
+  test("--tasks / --test-tasks are pathLikeOnly (mixed IDs + paths)", () => {
+    const byFlag = new Map(PATH_FLAGS.map(e => [e.flag, e]))
+    expect(byFlag.get("--tasks")?.pathLikeOnly).toBe(true)
+    expect(byFlag.get("--test-tasks")?.pathLikeOnly).toBe(true)
+  })
+})
+
+describe("looksLikePath", () => {
+  test("treats .json files and slashed values as paths", () => {
+    expect(looksLikePath("/tmp/task.json")).toBe(true)
+    expect(looksLikePath("task.json")).toBe(true)
+    expect(looksLikePath("dir/task")).toBe(true)
+  })
+
+  test("treats bare identifiers as non-paths", () => {
+    expect(looksLikePath("bench_task_id")).toBe(false)
+    expect(looksLikePath("pinch_foo")).toBe(false)
+  })
+})
+
+describe("resolvePathFlagValue", () => {
+  let savedHome: string | undefined
+
+  beforeEach(() => {
+    savedHome = process.env.HOME
+  })
+
+  afterEach(() => {
+    if (savedHome === undefined) {
+      delete process.env.HOME
+    } else {
+      process.env.HOME = savedHome
+    }
+  })
+
+  test("resolves relative path against cwd", () => {
+    const cwd = "/home/user/proj"
+    expect(resolvePathFlagValue("./skill", cwd)).toBe("/home/user/proj/skill")
+    expect(resolvePathFlagValue("skill", cwd)).toBe("/home/user/proj/skill")
+    expect(resolvePathFlagValue("../sibling", cwd)).toBe("/home/user/sibling")
+  })
+
+  test("returns absolute paths unchanged (modulo normalization)", () => {
+    expect(resolvePathFlagValue("/abs/path", "/cwd")).toBe("/abs/path")
+    expect(resolvePathFlagValue("/abs//path", "/cwd")).toBe("/abs/path")
+  })
+
+  test("expands ~/ to $HOME", () => {
+    process.env.HOME = "/home/user"
+    expect(resolvePathFlagValue("~/x", "/cwd")).toBe("/home/user/x")
+  })
+})
diff --git a/test/launcher/redact.test.ts b/test/launcher/redact.test.ts
new file mode 100644
index 0000000..3f8233d
--- /dev/null
+++ b/test/launcher/redact.test.ts
@@ -0,0 +1,57 @@
+import { test, expect, describe } from "bun:test"
+import { redactSecretToken, assertMountExtraAllowed, assertExtraMountsAllowed } from "../../src/launcher/index.ts"
+
+describe("assertMountExtraAllowed", () => {
+  test("rejects the host root", () => {
+    expect(() => assertMountExtraAllowed("/")).toThrow(/host root/)
+  })
+
+  test("rejects the docker socket at common paths", () => {
+    expect(() => assertMountExtraAllowed("/var/run/docker.sock")).toThrow(/Docker socket/)
+    expect(() => assertMountExtraAllowed("/run/docker.sock")).toThrow(/Docker socket/)
+  })
+
+  test("allows ordinary host paths", () => {
+    expect(() => assertMountExtraAllowed("/home/u/.ssh")).not.toThrow()
+    expect(() => assertMountExtraAllowed("/tmp/data")).not.toThrow()
+  })
+})
+
+describe("assertExtraMountsAllowed — config mounts share the CLI denylist", () => {
+  test("throws when a config extra mount targets the Docker socket", () => {
+    expect(() =>
+      assertExtraMountsAllowed([{ host: "/var/run/docker.sock" }]),
+    ).toThrow(/Docker socket/)
+  })
+
+  test("throws when a config extra mount targets the host root", () => {
+    expect(() => assertExtraMountsAllowed([{ host: "/" }])).toThrow(/host root/)
+  })
+
+  test("allows ordinary config extra mounts", () => {
+    expect(() => assertExtraMountsAllowed([{ host: "/home/u/.ssh" }, { host: "/tmp/d" }])).not.toThrow()
+  })
+})
+
+describe("redactSecretToken", () => {
+  test("redacts injected route key values", () => {
+    expect(redactSecretToken("SKVM_ROUTE_openai_KEY=sk-abc123")).toBe("SKVM_ROUTE_openai_KEY=<redacted>")
+  })
+
+  test("redacts generic secret-looking env names", () => {
+    expect(redactSecretToken("OPENAI_API_KEY=sk-x")).toBe("OPENAI_API_KEY=<redacted>")
+    expect(redactSecretToken("MY_TOKEN=t")).toBe("MY_TOKEN=<redacted>")
+    expect(redactSecretToken("DB_PASSWORD=p")).toBe("DB_PASSWORD=<redacted>")
+  })
+
+  test("leaves non-secret tokens untouched", () => {
+    expect(redactSecretToken("HOME=/workspace")).toBe("HOME=/workspace")
+    expect(redactSecretToken("--network=bridge")).toBe("--network=bridge")
+    expect(redactSecretToken("-e")).toBe("-e")
+    expect(redactSecretToken("ghcr.io/sjtu-ipads/skvm-sandbox:0.1.4")).toBe("ghcr.io/sjtu-ipads/skvm-sandbox:0.1.4")
+  })
+
+  test("redacts the entire value even when it contains '='", () => {
+    expect(redactSecretToken("SKVM_ROUTE_x_KEY=sk-a=b=c")).toBe("SKVM_ROUTE_x_KEY=<redacted>")
+  })
+})
diff --git a/test/launcher/stale-reap.test.ts b/test/launcher/stale-reap.test.ts
new file mode 100644
index 0000000..1a63add
--- /dev/null
+++ b/test/launcher/stale-reap.test.ts
@@ -0,0 +1,27 @@
+import { test, expect, describe } from "bun:test"
+import { isPidAlive, parseHostPidFromLabel } from "../../src/launcher/stale-reap.ts"
+
+describe("isPidAlive", () => {
+  test("returns true for our own pid", () => {
+    expect(isPidAlive(process.pid)).toBe(true)
+  })
+
+  test("returns false for pid 0 (invalid)", () => {
+    expect(isPidAlive(0)).toBe(false)
+  })
+
+  test("returns false for an obviously-unused high pid", () => {
+    expect(isPidAlive(2 ** 30)).toBe(false)
+  })
+})
+
+describe("parseHostPidFromLabel", () => {
+  test("extracts numeric pid from docker label output line", () => {
+    expect(parseHostPidFromLabel("skvm-sandbox-host-pid=12345")).toBe(12345)
+  })
+
+  test("returns null for malformed labels", () => {
+    expect(parseHostPidFromLabel("skvm-sandbox=1")).toBeNull()
+    expect(parseHostPidFromLabel("garbage")).toBeNull()
+  })
+})