From fa95c126f109baffddd9fdeffe87c1d3291bad00 Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Thu, 9 Apr 2026 21:11:28 +0800 Subject: [PATCH 01/19] feat: dockerize phase1 worker execution Run builder and QA through single-node Docker containers. Preserve canonical job bundles, state mirrors, and archive finalization. --- adapters/generic-cli/adapter-capability.json | 16 +- adapters/generic-cli/adapter.json | 2 +- adapters/generic-cli/adapter.ts | 52 +-- adapters/generic-cli/docker-runtime.ts | 316 +++++++++++++++++++ core/contracts/types.ts | 31 ++ core/loop/phase1-local-flow.ts | 8 +- docker/worker-base.Dockerfile | 11 + docker/worker-builder.Dockerfile | 8 + docker/worker-qa.Dockerfile | 8 + 9 files changed, 418 insertions(+), 34 deletions(-) create mode 100644 adapters/generic-cli/docker-runtime.ts create mode 100644 docker/worker-base.Dockerfile create mode 100644 docker/worker-builder.Dockerfile create mode 100644 docker/worker-qa.Dockerfile diff --git a/adapters/generic-cli/adapter-capability.json b/adapters/generic-cli/adapter-capability.json index a040fe3..9576606 100644 --- a/adapters/generic-cli/adapter-capability.json +++ b/adapters/generic-cli/adapter-capability.json @@ -28,10 +28,10 @@ "shell_command": { "mode": "allow", "scope": [ - "local-worker-command" + "docker-worker-command" ], "cost_level": "low", - "risk_level": "medium", + "risk_level": "high", "approval_requirement": "none" }, "git": { @@ -58,11 +58,13 @@ "approval_requirement": "not-supported" }, "container_control": { - "mode": "deny", - "scope": [], - "cost_level": "none", - "risk_level": "low", - "approval_requirement": "not-supported" + "mode": "allow", + "scope": [ + "single-node-local-worker-containers" + ], + "cost_level": "low", + "risk_level": "high", + "approval_requirement": "none" }, "screenshot": { "mode": "deny", diff --git a/adapters/generic-cli/adapter.json b/adapters/generic-cli/adapter.json index 2500292..29c6096 100644 --- a/adapters/generic-cli/adapter.json +++ b/adapters/generic-cli/adapter.json @@ -16,7 +16,7 @@ "local-fixture" ], "supported_sandbox_model": [ - "host-process" + "docker-container" ], "supported_artifact_outputs": [ "run-result.json", diff --git a/adapters/generic-cli/adapter.ts b/adapters/generic-cli/adapter.ts index 7e18048..a5e50c1 100644 --- a/adapters/generic-cli/adapter.ts +++ b/adapters/generic-cli/adapter.ts @@ -1,7 +1,7 @@ import { dirname, join } from "node:path"; import { buildArtifactIndex } from "../../ops/archive/artifact-index.ts"; import { writeRunTimings, writeWorkerLog } from "../../ops/archive/run-metadata.ts"; -import { ensureDir, relativePosix, writeJson, writeText } from "../../core/loop/support.ts"; +import { relativePosix, toPosixPath, writeJson, writeText } from "../../core/loop/support.ts"; import type { AdapterExecutionResult, RunEnvelope, @@ -10,20 +10,21 @@ import type { RunTimingMetadata, WorkerOutput, } from "../../core/contracts/types.ts"; +import { DockerWorkerLauncher, materializeContainerizedRunEnvelope } from "./docker-runtime.ts"; -function workerScriptForRole(repoRoot: string, runRole: RunRole): string { +function workerScriptForRole(rootPath: string, runRole: RunRole): string { if (runRole === "builder") { - return join(repoRoot, "ops", "workers", "builder.ts"); + return toPosixPath(join(rootPath, "ops", "workers", "builder.ts")); } if (runRole === "qa") { - return join(repoRoot, "ops", "workers", "qa.ts"); + return toPosixPath(join(rootPath, "ops", "workers", "qa.ts")); } throw new Error(`unsupported run role: ${runRole}`); } -function fallbackWorkerOutput(errorText: string): WorkerOutput { +function fallbackWorkerOutput(errorText: string, status: WorkerOutput["status"] = "FAILED_EXECUTION"): WorkerOutput { return { - status: "FAILED_EXECUTION", + status, completed: [], open: ["Inspect the worker stderr output."], blockers: errorText ? [errorText.trim()] : ["worker execution failed"], @@ -109,36 +110,37 @@ function renderHandoff( } export class GenericCliAdapter { - constructor(private readonly repoRoot: string) {} + private readonly dockerLauncher: DockerWorkerLauncher; + + constructor(private readonly repoRoot: string) { + this.dockerLauncher = new DockerWorkerLauncher(repoRoot); + } async execute(envelope: RunEnvelope): Promise { const runRoot = envelope.artifact_path; - const envelopePath = join(envelope.runtime_home, "envelopes", `${envelope.run_id}.json`); const commandLogPath = join(runRoot, "logs", "command-log.txt"); const workerLogPath = join(runRoot, "logs", "worker.log"); const runResultPath = join(runRoot, "metadata", "run-result.json"); const timingsPath = join(runRoot, "metadata", "timings.json"); const artifactIndexPath = join(runRoot, "metadata", "artifact-index.json"); const handoffPath = join(runRoot, "reports", "handoff.en.md"); - const workerScript = workerScriptForRole(this.repoRoot, envelope.run_role); + const materialization = await materializeContainerizedRunEnvelope(envelope); + const workerScript = workerScriptForRole( + materialization.runtime.container_paths.repo_path, + materialization.container_envelope.run_role, + ); - await ensureDir(dirname(envelopePath)); - await writeJson(envelopePath, envelope); - - const command = [process.execPath, workerScript, envelopePath]; const startedAtDate = new Date(); - const processHandle = Bun.spawn({ - cmd: command, - cwd: this.repoRoot, - stdout: "pipe", - stderr: "pipe", + const launchResult = await this.dockerLauncher.launch({ + run_role: materialization.container_envelope.run_role, + image: materialization.runtime.image, + worker_script_path: workerScript, + envelope_path: materialization.runtime.envelope_container_path, + runtime: materialization.runtime, }); - - const stdoutPromise = new Response(processHandle.stdout).text(); - const stderrPromise = new Response(processHandle.stderr).text(); - const exitCode = await processHandle.exited; - const stdout = await stdoutPromise; - const stderr = await stderrPromise; + const exitCode = launchResult.exitCode; + const stdout = launchResult.stdout; + const stderr = launchResult.stderr; const endedAtDate = new Date(); let workerOutput: WorkerOutput; @@ -166,7 +168,7 @@ export class GenericCliAdapter { duration_s: Math.max(0, Math.round(durationMs / 1000)), }; - await writeText(commandLogPath, renderCommandLog(command, exitCode, stdout, stderr)); + await writeText(commandLogPath, renderCommandLog(launchResult.command, exitCode, stdout, stderr)); await writeWorkerLog(workerLogPath, { job_id: envelope.job_id, run_id: envelope.run_id, diff --git a/adapters/generic-cli/docker-runtime.ts b/adapters/generic-cli/docker-runtime.ts new file mode 100644 index 0000000..dac98a4 --- /dev/null +++ b/adapters/generic-cli/docker-runtime.ts @@ -0,0 +1,316 @@ +import { dirname, isAbsolute, join, resolve } from "node:path"; +import { ensureDir, toPosixPath, uniqueStrings, writeJson } from "../../core/loop/support.ts"; +import type { + ContainerPathMap, + ContainerPathMount, + ContainerRuntimeConfig, + RunEnvelope, + RunRole, +} from "../../core/contracts/types.ts"; + +const CONTAINER_PATHS = { + repo: "/work/repo", + state: "/work/state", + artifacts: "/work/artifacts", + runtimeHome: "/work/runtime-home", +} as const; + +const DEFAULT_BASE_IMAGE = "codingclaw-worker-base:phase1-local"; + +const DEFAULT_ROLE_IMAGES: Record<"builder" | "qa", string> = { + builder: "codingclaw-worker-builder:phase1-local", + qa: "codingclaw-worker-qa:phase1-local", +}; + +const ROLE_IMAGE_ENV_VARS: Record<"builder" | "qa", string> = { + builder: "CODINGCLAW_WORKER_BUILDER_IMAGE", + qa: "CODINGCLAW_WORKER_QA_IMAGE", +}; + +const BASE_IMAGE_ENV_VAR = "CODINGCLAW_WORKER_BASE_IMAGE"; +const DOCKER_BIN_ENV_VAR = "CODINGCLAW_DOCKER_BIN"; + +interface NormalizedContainerPathMount extends ContainerPathMount { + normalized_host_path: string; +} + +export interface RoleImageResolver { + resolve(runRole: RunRole): string; +} + +export interface ContainerizedRunEnvelopeMaterialization { + host_envelope: RunEnvelope; + container_envelope: RunEnvelope; + runtime: ContainerRuntimeConfig; + host_envelope_path: string; + container_envelope_path: string; +} + +export interface DockerWorkerLaunchRequest { + run_role: RunRole; + image: string; + worker_script_path: string; + envelope_path: string; + runtime: ContainerRuntimeConfig; +} + +export interface DockerWorkerLaunchResult { + command: string[]; + exitCode: number; + stdout: string; + stderr: string; +} + +function normalizeHostPath(value: string): string { + return toPosixPath(resolve(value)).replace(/\/+$/u, ""); +} + +function artifactRootFromRunRoot(runRoot: string): string { + return dirname(dirname(runRoot)); +} + +export function resolveDockerWorkerImage(runRole: RunRole): string { + if (runRole !== "builder" && runRole !== "qa") { + throw new Error(`unsupported docker worker role: ${runRole}`); + } + const override = process.env[ROLE_IMAGE_ENV_VARS[runRole]]?.trim(); + return override && override.length > 0 ? override : DEFAULT_ROLE_IMAGES[runRole]; +} + +export class DockerPathMapper { + readonly mounts: ContainerPathMount[]; + private readonly normalizedMounts: NormalizedContainerPathMount[]; + + constructor(mounts: ContainerPathMount[]) { + this.mounts = mounts; + this.normalizedMounts = mounts + .map((mount) => ({ + ...mount, + normalized_host_path: normalizeHostPath(mount.host_path), + })) + .sort((left, right) => right.normalized_host_path.length - left.normalized_host_path.length); + } + + mapPath(hostPath: string): string { + if (!hostPath || !isAbsolute(hostPath)) { + return hostPath; + } + const normalizedPath = normalizeHostPath(hostPath); + for (const mount of this.normalizedMounts) { + if (normalizedPath === mount.normalized_host_path) { + return mount.container_path; + } + if (normalizedPath.startsWith(`${mount.normalized_host_path}/`)) { + return `${mount.container_path}${normalizedPath.slice(mount.normalized_host_path.length)}`; + } + } + return hostPath; + } + + mapValue(value: unknown): unknown { + if (typeof value === "string") { + return this.mapPath(value); + } + if (Array.isArray(value)) { + return value.map((entry) => this.mapValue(entry)); + } + if (!value || typeof value !== "object") { + return value; + } + return Object.fromEntries(Object.entries(value).map(([key, entry]) => [key, this.mapValue(entry)])); + } +} + +export function buildDockerPathMapping(envelope: RunEnvelope): DockerPathMapper { + return new DockerPathMapper([ + { + name: "repo", + host_path: envelope.repo_path, + container_path: CONTAINER_PATHS.repo, + read_only: true, + }, + { + name: "state", + host_path: envelope.state_path, + container_path: CONTAINER_PATHS.state, + read_only: false, + }, + { + name: "artifacts", + host_path: artifactRootFromRunRoot(envelope.artifact_path), + container_path: CONTAINER_PATHS.artifacts, + read_only: false, + }, + { + name: "runtime-home", + host_path: envelope.runtime_home, + container_path: CONTAINER_PATHS.runtimeHome, + read_only: false, + }, + ]); +} + +function buildContainerPathMap(envelope: RunEnvelope, mapper: DockerPathMapper): ContainerPathMap { + return { + repo_path: mapper.mapPath(envelope.repo_path), + state_path: mapper.mapPath(envelope.state_path), + artifact_path: mapper.mapPath(envelope.artifact_path), + runtime_home: mapper.mapPath(envelope.runtime_home), + task_packet_path: mapper.mapPath(envelope.task_packet_path), + previous_handoff_path: mapper.mapPath(envelope.previous_handoff_path), + approval_snapshot_path: mapper.mapPath(envelope.approval_snapshot_path), + trace_context: mapper.mapValue(envelope.trace_context) as Record, + }; +} + +export async function materializeContainerizedRunEnvelope( + envelope: RunEnvelope, +): Promise { + const hostEnvelopePath = join(envelope.runtime_home, "envelopes", `${envelope.run_id}.json`); + const containerEnvelopePath = join(envelope.runtime_home, "envelopes", "container", `${envelope.run_id}.json`); + const mapper = buildDockerPathMapping(envelope); + const runtime: ContainerRuntimeConfig = { + runtime: "docker", + image: resolveDockerWorkerImage(envelope.run_role), + workdir: CONTAINER_PATHS.repo, + envelope_host_path: hostEnvelopePath, + envelope_container_path: mapper.mapPath(containerEnvelopePath), + mounts: mapper.mounts, + container_paths: buildContainerPathMap(envelope, mapper), + }; + const requestedCapabilities = uniqueStrings([...envelope.requested_capabilities, "container_control"]); + const hostEnvelope: RunEnvelope = { + ...envelope, + requested_capabilities: requestedCapabilities, + container_runtime: runtime, + }; + const containerEnvelope: RunEnvelope = { + ...hostEnvelope, + repo_path: runtime.container_paths.repo_path, + state_path: runtime.container_paths.state_path, + artifact_path: runtime.container_paths.artifact_path, + runtime_home: runtime.container_paths.runtime_home, + task_packet_path: runtime.container_paths.task_packet_path, + previous_handoff_path: runtime.container_paths.previous_handoff_path, + approval_snapshot_path: runtime.container_paths.approval_snapshot_path, + trace_context: runtime.container_paths.trace_context, + }; + + await ensureDir(dirname(hostEnvelopePath)); + await ensureDir(dirname(containerEnvelopePath)); + await writeJson(hostEnvelopePath, hostEnvelope); + await writeJson(containerEnvelopePath, containerEnvelope); + + return { + host_envelope: hostEnvelope, + container_envelope: containerEnvelope, + runtime, + host_envelope_path: hostEnvelopePath, + container_envelope_path: containerEnvelopePath, + }; +} + +function buildImageDockerfile(repoRoot: string, runRole: "base" | "builder" | "qa"): string { + if (runRole === "base") { + return join(repoRoot, "docker", "worker-base.Dockerfile"); + } + return join(repoRoot, "docker", `worker-${runRole}.Dockerfile`); +} + +function mountArg(mount: ContainerPathMount): string { + const mode = mount.read_only ? ",readonly" : ""; + return `type=bind,source=${mount.host_path},target=${mount.container_path}${mode}`; +} + +async function spawnCommand(command: string[], cwd: string): Promise { + const handle = Bun.spawn({ + cmd: command, + cwd, + stdout: "pipe", + stderr: "pipe", + }); + const stdoutPromise = new Response(handle.stdout).text(); + const stderrPromise = new Response(handle.stderr).text(); + const exitCode = await handle.exited; + return { + command, + exitCode, + stdout: await stdoutPromise, + stderr: await stderrPromise, + }; +} + +export class DockerWorkerLauncher implements RoleImageResolver { + private readonly preparedImages = new Set(); + private readonly baseImage: string; + private readonly dockerExecutable: string; + + constructor(private readonly repoRoot: string) { + const override = process.env[BASE_IMAGE_ENV_VAR]?.trim(); + this.baseImage = override && override.length > 0 ? override : DEFAULT_BASE_IMAGE; + const dockerOverride = process.env[DOCKER_BIN_ENV_VAR]?.trim(); + this.dockerExecutable = dockerOverride && dockerOverride.length > 0 ? dockerOverride : "docker"; + } + + resolve(runRole: RunRole): string { + return resolveDockerWorkerImage(runRole); + } + + private async ensureImage(image: string, dockerfilePath: string, buildArgs: string[] = []): Promise { + if (this.preparedImages.has(image)) { + return; + } + + const inspect = await spawnCommand([this.dockerExecutable, "image", "inspect", image], this.repoRoot); + if (inspect.exitCode === 0) { + this.preparedImages.add(image); + return; + } + + const build = await spawnCommand( + [this.dockerExecutable, "build", "--file", dockerfilePath, "--tag", image, ...buildArgs, this.repoRoot], + this.repoRoot, + ); + if (build.exitCode !== 0) { + throw new Error(build.stderr.trim() || `failed to build docker image ${image}`); + } + this.preparedImages.add(image); + } + + private async ensureRoleImage(runRole: RunRole, image: string): Promise { + if (runRole !== "builder" && runRole !== "qa") { + throw new Error(`unsupported docker worker role: ${runRole}`); + } + await this.ensureImage(this.baseImage, buildImageDockerfile(this.repoRoot, "base")); + await this.ensureImage(image, buildImageDockerfile(this.repoRoot, runRole), [ + "--build-arg", + `CODINGCLAW_BASE_IMAGE=${this.baseImage}`, + ]); + } + + async launch(request: DockerWorkerLaunchRequest): Promise { + await this.ensureRoleImage(request.run_role, request.image); + + const command = [ + this.dockerExecutable, + "run", + "--rm", + "--network", + "none", + "--workdir", + request.runtime.workdir, + "--env", + "HOME=/work/runtime-home/home", + "--env", + "XDG_CACHE_HOME=/work/runtime-home/cache", + "--env", + "BUN_INSTALL_CACHE_DIR=/work/runtime-home/cache/bun", + ...request.runtime.mounts.flatMap((mount) => ["--mount", mountArg(mount)]), + request.image, + "bun", + request.worker_script_path, + request.envelope_path, + ]; + return spawnCommand(command, this.repoRoot); + } +} diff --git a/core/contracts/types.ts b/core/contracts/types.ts index 2246c50..2e63dc0 100644 --- a/core/contracts/types.ts +++ b/core/contracts/types.ts @@ -82,6 +82,36 @@ export interface TaskPacket { story: StoryContract; } +export type ContainerMountName = "repo" | "state" | "artifacts" | "runtime-home"; + +export interface ContainerPathMount { + name: ContainerMountName; + host_path: string; + container_path: string; + read_only: boolean; +} + +export interface ContainerPathMap { + repo_path: string; + state_path: string; + artifact_path: string; + runtime_home: string; + task_packet_path: string; + previous_handoff_path: string; + approval_snapshot_path: string; + trace_context: Record; +} + +export interface ContainerRuntimeConfig { + runtime: "docker"; + image: string; + workdir: string; + envelope_host_path: string; + envelope_container_path: string; + mounts: ContainerPathMount[]; + container_paths: ContainerPathMap; +} + export interface RunEnvelope { job_id: string; freeze_id: string; @@ -106,6 +136,7 @@ export interface RunEnvelope { approval_snapshot_path: string; trace_context: Record; requested_capabilities: string[]; + container_runtime?: ContainerRuntimeConfig | null; } export interface WorkerOutput { diff --git a/core/loop/phase1-local-flow.ts b/core/loop/phase1-local-flow.ts index 3da4789..a669c71 100644 --- a/core/loop/phase1-local-flow.ts +++ b/core/loop/phase1-local-flow.ts @@ -206,7 +206,7 @@ async function buildRunEnvelope( approvalSnapshotPath: string, traceContext: Record, ): Promise { - return materializeJsonTemplate( + const envelope = await materializeJsonTemplate( join(repoRoot, "control", "fixtures", "phase1-local-run-envelope.json"), { __RUN_ID__: taskPacket.run_id, @@ -224,6 +224,12 @@ async function buildRunEnvelope( __TRACE_CONTEXT__: traceContext, }, ); + + return { + ...envelope, + requested_capabilities: uniqueStrings([...envelope.requested_capabilities, "container_control"]), + container_runtime: null, + }; } async function normalizeRunArtifacts( diff --git a/docker/worker-base.Dockerfile b/docker/worker-base.Dockerfile new file mode 100644 index 0000000..b8a2422 --- /dev/null +++ b/docker/worker-base.Dockerfile @@ -0,0 +1,11 @@ +FROM oven/bun:1 + +WORKDIR /work/repo + +RUN mkdir -p /work/repo /work/state /work/artifacts /work/runtime-home /work/cache + +ENV HOME=/work/runtime-home/home +ENV XDG_CACHE_HOME=/work/runtime-home/cache +ENV BUN_INSTALL_CACHE_DIR=/work/runtime-home/cache/bun + +CMD ["bun", "--version"] diff --git a/docker/worker-builder.Dockerfile b/docker/worker-builder.Dockerfile new file mode 100644 index 0000000..f4db06c --- /dev/null +++ b/docker/worker-builder.Dockerfile @@ -0,0 +1,8 @@ +ARG CODINGCLAW_BASE_IMAGE=codingclaw-worker-base:phase1-local +FROM ${CODINGCLAW_BASE_IMAGE} + +WORKDIR /work/repo + +ENV CODINGCLAW_WORKER_ROLE=builder + +CMD ["bun", "--version"] diff --git a/docker/worker-qa.Dockerfile b/docker/worker-qa.Dockerfile new file mode 100644 index 0000000..3c03d0a --- /dev/null +++ b/docker/worker-qa.Dockerfile @@ -0,0 +1,8 @@ +ARG CODINGCLAW_BASE_IMAGE=codingclaw-worker-base:phase1-local +FROM ${CODINGCLAW_BASE_IMAGE} + +WORKDIR /work/repo + +ENV CODINGCLAW_WORKER_ROLE=qa + +CMD ["bun", "--version"] From c2853e50b776b40b09f0c0fce9cfb30e844918e5 Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Thu, 9 Apr 2026 22:41:35 +0800 Subject: [PATCH 02/19] fix: preserve run bundles on docker launcher failures --- adapters/generic-cli/adapter.ts | 49 +++++++-- adapters/generic-cli/docker-runtime.ts | 132 +++++++++++++++++++------ core/contracts/types.ts | 2 +- docker/worker-base.Dockerfile | 4 +- 4 files changed, 147 insertions(+), 40 deletions(-) diff --git a/adapters/generic-cli/adapter.ts b/adapters/generic-cli/adapter.ts index a5e50c1..0c11aed 100644 --- a/adapters/generic-cli/adapter.ts +++ b/adapters/generic-cli/adapter.ts @@ -10,7 +10,7 @@ import type { RunTimingMetadata, WorkerOutput, } from "../../core/contracts/types.ts"; -import { DockerWorkerLauncher, materializeContainerizedRunEnvelope } from "./docker-runtime.ts"; +import { DockerWorkerLauncher, type DockerWorkerLaunchResult, materializeContainerizedRunEnvelope } from "./docker-runtime.ts"; function workerScriptForRole(rootPath: string, runRole: RunRole): string { if (runRole === "builder") { @@ -38,6 +38,32 @@ function fallbackWorkerOutput(errorText: string, status: WorkerOutput["status"] }; } +function formatErrorText(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +function launchFailureText(launchResult: DockerWorkerLaunchResult): string { + const stderr = launchResult.stderr.trim(); + if (stderr) { + return stderr; + } + const stdout = launchResult.stdout.trim(); + if (stdout) { + return stdout; + } + return `launcher exited with code ${launchResult.exitCode}`; +} + +function unexpectedLaunchResult(error: unknown): DockerWorkerLaunchResult { + return { + command: [""], + exitCode: -1, + stdout: "", + stderr: formatErrorText(error), + failure_status: "FAILED_INFRA", + }; +} + function renderCommandLog(command: string[], exitCode: number, stdout: string, stderr: string): string { return [ `command: ${command.map((value) => value.replaceAll("\\", "/")).join(" ")}`, @@ -131,13 +157,18 @@ export class GenericCliAdapter { ); const startedAtDate = new Date(); - const launchResult = await this.dockerLauncher.launch({ - run_role: materialization.container_envelope.run_role, - image: materialization.runtime.image, - worker_script_path: workerScript, - envelope_path: materialization.runtime.envelope_container_path, - runtime: materialization.runtime, - }); + let launchResult: DockerWorkerLaunchResult; + try { + launchResult = await this.dockerLauncher.launch({ + run_role: materialization.container_envelope.run_role, + image: materialization.runtime.image, + worker_script_path: workerScript, + envelope_path: materialization.runtime.envelope_container_path, + runtime: materialization.runtime, + }); + } catch (error) { + launchResult = unexpectedLaunchResult(error); + } const exitCode = launchResult.exitCode; const stdout = launchResult.stdout; const stderr = launchResult.stderr; @@ -151,7 +182,7 @@ export class GenericCliAdapter { workerOutput = fallbackWorkerOutput("worker output was not valid JSON"); } } else { - workerOutput = fallbackWorkerOutput(stderr); + workerOutput = fallbackWorkerOutput(launchFailureText(launchResult), launchResult.failure_status ?? "FAILED_EXECUTION"); } const durationMs = Math.max(0, endedAtDate.getTime() - startedAtDate.getTime()); diff --git a/adapters/generic-cli/docker-runtime.ts b/adapters/generic-cli/docker-runtime.ts index dac98a4..365a88b 100644 --- a/adapters/generic-cli/docker-runtime.ts +++ b/adapters/generic-cli/docker-runtime.ts @@ -5,6 +5,7 @@ import type { ContainerPathMount, ContainerRuntimeConfig, RunEnvelope, + RunExitStatus, RunRole, } from "../../core/contracts/types.ts"; @@ -13,6 +14,7 @@ const CONTAINER_PATHS = { state: "/work/state", artifacts: "/work/artifacts", runtimeHome: "/work/runtime-home", + cache: "/work/cache", } as const; const DEFAULT_BASE_IMAGE = "codingclaw-worker-base:phase1-local"; @@ -59,6 +61,15 @@ export interface DockerWorkerLaunchResult { exitCode: number; stdout: string; stderr: string; + failure_status: RunExitStatus | null; +} + +interface CommandExecutionResult { + command: string[]; + exitCode: number; + stdout: string; + stderr: string; + spawn_error: string | null; } function normalizeHostPath(value: string): string { @@ -147,6 +158,12 @@ export function buildDockerPathMapping(envelope: RunEnvelope): DockerPathMapper container_path: CONTAINER_PATHS.runtimeHome, read_only: false, }, + { + name: "cache", + host_path: join(envelope.runtime_home, "cache"), + container_path: CONTAINER_PATHS.cache, + read_only: false, + }, ]); } @@ -168,6 +185,7 @@ export async function materializeContainerizedRunEnvelope( ): Promise { const hostEnvelopePath = join(envelope.runtime_home, "envelopes", `${envelope.run_id}.json`); const containerEnvelopePath = join(envelope.runtime_home, "envelopes", "container", `${envelope.run_id}.json`); + const cacheHostPath = join(envelope.runtime_home, "cache"); const mapper = buildDockerPathMapping(envelope); const runtime: ContainerRuntimeConfig = { runtime: "docker", @@ -198,6 +216,7 @@ export async function materializeContainerizedRunEnvelope( await ensureDir(dirname(hostEnvelopePath)); await ensureDir(dirname(containerEnvelopePath)); + await ensureDir(cacheHostPath); await writeJson(hostEnvelopePath, hostEnvelope); await writeJson(containerEnvelopePath, containerEnvelope); @@ -222,22 +241,52 @@ function mountArg(mount: ContainerPathMount): string { return `type=bind,source=${mount.host_path},target=${mount.container_path}${mode}`; } -async function spawnCommand(command: string[], cwd: string): Promise { - const handle = Bun.spawn({ - cmd: command, - cwd, - stdout: "pipe", - stderr: "pipe", - }); - const stdoutPromise = new Response(handle.stdout).text(); - const stderrPromise = new Response(handle.stderr).text(); - const exitCode = await handle.exited; - return { - command, - exitCode, - stdout: await stdoutPromise, - stderr: await stderrPromise, - }; +async function spawnCommand(command: string[], cwd: string): Promise { + try { + const handle = Bun.spawn({ + cmd: command, + cwd, + stdout: "pipe", + stderr: "pipe", + }); + const stdoutPromise = new Response(handle.stdout).text(); + const stderrPromise = new Response(handle.stderr).text(); + const exitCode = await handle.exited; + return { + command, + exitCode, + stdout: await stdoutPromise, + stderr: await stderrPromise, + spawn_error: null, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { + command, + exitCode: -1, + stdout: "", + stderr: message, + spawn_error: message, + }; + } +} + +function classifyRunFailure(result: CommandExecutionResult): RunExitStatus | null { + if (result.exitCode === 0) { + return null; + } + if (result.spawn_error) { + return "FAILED_INFRA"; + } + const stderr = result.stderr.trim(); + if ( + /^\s*docker:/iu.test(stderr) || + /cannot connect to the docker daemon/iu.test(stderr) || + /error during connect/iu.test(stderr) + ) { + return "FAILED_INFRA"; + } + return "FAILED_EXECUTION"; } export class DockerWorkerLauncher implements RoleImageResolver { @@ -256,40 +305,60 @@ export class DockerWorkerLauncher implements RoleImageResolver { return resolveDockerWorkerImage(runRole); } - private async ensureImage(image: string, dockerfilePath: string, buildArgs: string[] = []): Promise { + private async ensureImage( + image: string, + dockerfilePath: string, + buildArgs: string[] = [], + ): Promise { if (this.preparedImages.has(image)) { - return; + return null; } const inspect = await spawnCommand([this.dockerExecutable, "image", "inspect", image], this.repoRoot); + if (inspect.spawn_error) { + return inspect; + } if (inspect.exitCode === 0) { this.preparedImages.add(image); - return; + return null; } const build = await spawnCommand( [this.dockerExecutable, "build", "--file", dockerfilePath, "--tag", image, ...buildArgs, this.repoRoot], this.repoRoot, ); - if (build.exitCode !== 0) { - throw new Error(build.stderr.trim() || `failed to build docker image ${image}`); + if (build.spawn_error || build.exitCode !== 0) { + return build; } this.preparedImages.add(image); + return null; } - private async ensureRoleImage(runRole: RunRole, image: string): Promise { + private async ensureRoleImage(runRole: RunRole, image: string): Promise { if (runRole !== "builder" && runRole !== "qa") { throw new Error(`unsupported docker worker role: ${runRole}`); } - await this.ensureImage(this.baseImage, buildImageDockerfile(this.repoRoot, "base")); - await this.ensureImage(image, buildImageDockerfile(this.repoRoot, runRole), [ + const baseFailure = await this.ensureImage(this.baseImage, buildImageDockerfile(this.repoRoot, "base")); + if (baseFailure) { + return baseFailure; + } + return this.ensureImage(image, buildImageDockerfile(this.repoRoot, runRole), [ "--build-arg", `CODINGCLAW_BASE_IMAGE=${this.baseImage}`, ]); } async launch(request: DockerWorkerLaunchRequest): Promise { - await this.ensureRoleImage(request.run_role, request.image); + const imagePreparationFailure = await this.ensureRoleImage(request.run_role, request.image); + if (imagePreparationFailure) { + return { + command: imagePreparationFailure.command, + exitCode: imagePreparationFailure.exitCode, + stdout: imagePreparationFailure.stdout, + stderr: imagePreparationFailure.stderr, + failure_status: "FAILED_INFRA", + }; + } const command = [ this.dockerExecutable, @@ -302,15 +371,22 @@ export class DockerWorkerLauncher implements RoleImageResolver { "--env", "HOME=/work/runtime-home/home", "--env", - "XDG_CACHE_HOME=/work/runtime-home/cache", + "XDG_CACHE_HOME=/work/cache", "--env", - "BUN_INSTALL_CACHE_DIR=/work/runtime-home/cache/bun", + "BUN_INSTALL_CACHE_DIR=/work/cache/bun", ...request.runtime.mounts.flatMap((mount) => ["--mount", mountArg(mount)]), request.image, "bun", request.worker_script_path, request.envelope_path, ]; - return spawnCommand(command, this.repoRoot); + const run = await spawnCommand(command, this.repoRoot); + return { + command: run.command, + exitCode: run.exitCode, + stdout: run.stdout, + stderr: run.stderr, + failure_status: classifyRunFailure(run), + }; } } diff --git a/core/contracts/types.ts b/core/contracts/types.ts index 2e63dc0..ed3b065 100644 --- a/core/contracts/types.ts +++ b/core/contracts/types.ts @@ -82,7 +82,7 @@ export interface TaskPacket { story: StoryContract; } -export type ContainerMountName = "repo" | "state" | "artifacts" | "runtime-home"; +export type ContainerMountName = "repo" | "state" | "artifacts" | "runtime-home" | "cache"; export interface ContainerPathMount { name: ContainerMountName; diff --git a/docker/worker-base.Dockerfile b/docker/worker-base.Dockerfile index b8a2422..d6fdd8f 100644 --- a/docker/worker-base.Dockerfile +++ b/docker/worker-base.Dockerfile @@ -5,7 +5,7 @@ WORKDIR /work/repo RUN mkdir -p /work/repo /work/state /work/artifacts /work/runtime-home /work/cache ENV HOME=/work/runtime-home/home -ENV XDG_CACHE_HOME=/work/runtime-home/cache -ENV BUN_INSTALL_CACHE_DIR=/work/runtime-home/cache/bun +ENV XDG_CACHE_HOME=/work/cache +ENV BUN_INSTALL_CACHE_DIR=/work/cache/bun CMD ["bun", "--version"] From 525c53821c0a2ad28daa16d216c378906eaf7d50 Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Thu, 9 Apr 2026 23:09:27 +0800 Subject: [PATCH 03/19] Ensure host-owned run directories exist before worker launch --- adapters/generic-cli/adapter.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/adapters/generic-cli/adapter.ts b/adapters/generic-cli/adapter.ts index 0c11aed..dabfd9d 100644 --- a/adapters/generic-cli/adapter.ts +++ b/adapters/generic-cli/adapter.ts @@ -1,7 +1,7 @@ import { dirname, join } from "node:path"; import { buildArtifactIndex } from "../../ops/archive/artifact-index.ts"; import { writeRunTimings, writeWorkerLog } from "../../ops/archive/run-metadata.ts"; -import { relativePosix, toPosixPath, writeJson, writeText } from "../../core/loop/support.ts"; +import { ensureDir, relativePosix, toPosixPath, writeJson, writeText } from "../../core/loop/support.ts"; import type { AdapterExecutionResult, RunEnvelope, @@ -64,6 +64,13 @@ function unexpectedLaunchResult(error: unknown): DockerWorkerLaunchResult { }; } +async function ensureHostWritableRunLayout(runRoot: string): Promise { + await ensureDir(join(runRoot, "logs")); + await ensureDir(join(runRoot, "reports")); + await ensureDir(join(runRoot, "metadata")); + await ensureDir(join(runRoot, "evidence", "test-results")); +} + function renderCommandLog(command: string[], exitCode: number, stdout: string, stderr: string): string { return [ `command: ${command.map((value) => value.replaceAll("\\", "/")).join(" ")}`, @@ -150,6 +157,7 @@ export class GenericCliAdapter { const timingsPath = join(runRoot, "metadata", "timings.json"); const artifactIndexPath = join(runRoot, "metadata", "artifact-index.json"); const handoffPath = join(runRoot, "reports", "handoff.en.md"); + await ensureHostWritableRunLayout(runRoot); const materialization = await materializeContainerizedRunEnvelope(envelope); const workerScript = workerScriptForRole( materialization.runtime.container_paths.repo_path, From 9105242d550b1ca70a5dc073efb6a6afadc80af5 Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Fri, 10 Apr 2026 10:49:00 +0800 Subject: [PATCH 04/19] Fix docker worker contract alignment --- adapters/generic-cli/adapter-capability.json | 1 + adapters/generic-cli/docker-runtime.ts | 125 ++++++++++++++++--- core/loop/phase1-local-flow.ts | 32 +++-- 3 files changed, 130 insertions(+), 28 deletions(-) diff --git a/adapters/generic-cli/adapter-capability.json b/adapters/generic-cli/adapter-capability.json index 9576606..d1ca08b 100644 --- a/adapters/generic-cli/adapter-capability.json +++ b/adapters/generic-cli/adapter-capability.json @@ -17,6 +17,7 @@ "filesystem_write": { "mode": "allow", "scope": [ + "repo", "state", "artifacts", "runtime-home" diff --git a/adapters/generic-cli/docker-runtime.ts b/adapters/generic-cli/docker-runtime.ts index 365a88b..a049fab 100644 --- a/adapters/generic-cli/docker-runtime.ts +++ b/adapters/generic-cli/docker-runtime.ts @@ -1,5 +1,5 @@ import { dirname, isAbsolute, join, resolve } from "node:path"; -import { ensureDir, toPosixPath, uniqueStrings, writeJson } from "../../core/loop/support.ts"; +import { ensureDir, readText, sha256Text, toPosixPath, uniqueStrings, writeJson } from "../../core/loop/support.ts"; import type { ContainerPathMap, ContainerPathMount, @@ -7,9 +7,10 @@ import type { RunEnvelope, RunExitStatus, RunRole, + TaskPacket, } from "../../core/contracts/types.ts"; -const CONTAINER_PATHS = { +export const CONTAINER_PATHS = { repo: "/work/repo", state: "/work/state", artifacts: "/work/artifacts", @@ -18,6 +19,7 @@ const CONTAINER_PATHS = { } as const; const DEFAULT_BASE_IMAGE = "codingclaw-worker-base:phase1-local"; +const IMAGE_SIGNATURE_LABEL = "io.codingclaw.image-signature"; const DEFAULT_ROLE_IMAGES: Record<"builder" | "qa", string> = { builder: "codingclaw-worker-builder:phase1-local", @@ -72,6 +74,17 @@ interface CommandExecutionResult { spawn_error: string | null; } +interface DockerMappedPaths { + repo_path: string; + state_path: string; + artifact_path: string; + runtime_home: string; +} + +export interface DockerPathMappingRequest extends DockerMappedPaths { + run_role?: RunRole; +} + function normalizeHostPath(value: string): string { return toPosixPath(resolve(value)).replace(/\/+$/u, ""); } @@ -132,41 +145,54 @@ export class DockerPathMapper { } } -export function buildDockerPathMapping(envelope: RunEnvelope): DockerPathMapper { +export function buildDockerPathMapping(paths: DockerPathMappingRequest): DockerPathMapper { return new DockerPathMapper([ { name: "repo", - host_path: envelope.repo_path, + host_path: paths.repo_path, container_path: CONTAINER_PATHS.repo, - read_only: true, + read_only: paths.run_role !== "builder", }, { name: "state", - host_path: envelope.state_path, + host_path: paths.state_path, container_path: CONTAINER_PATHS.state, read_only: false, }, { name: "artifacts", - host_path: artifactRootFromRunRoot(envelope.artifact_path), + host_path: artifactRootFromRunRoot(paths.artifact_path), container_path: CONTAINER_PATHS.artifacts, read_only: false, }, { name: "runtime-home", - host_path: envelope.runtime_home, + host_path: paths.runtime_home, container_path: CONTAINER_PATHS.runtimeHome, read_only: false, }, { name: "cache", - host_path: join(envelope.runtime_home, "cache"), + host_path: join(paths.runtime_home, "cache"), container_path: CONTAINER_PATHS.cache, read_only: false, }, ]); } +export function containerizeTaskPacket(taskPacket: TaskPacket, paths: DockerMappedPaths): TaskPacket { + const mapper = buildDockerPathMapping(paths); + return { + ...taskPacket, + repo_path: mapper.mapPath(taskPacket.repo_path), + state_path: mapper.mapPath(taskPacket.state_path), + artifact_path: mapper.mapPath(taskPacket.artifact_path), + runtime_home: mapper.mapPath(taskPacket.runtime_home), + previous_handoff_path: mapper.mapPath(taskPacket.previous_handoff_path), + requested_capabilities: uniqueStrings([...taskPacket.requested_capabilities, "container_control"]), + }; +} + function buildContainerPathMap(envelope: RunEnvelope, mapper: DockerPathMapper): ContainerPathMap { return { repo_path: mapper.mapPath(envelope.repo_path), @@ -241,6 +267,36 @@ function mountArg(mount: ContainerPathMount): string { return `type=bind,source=${mount.host_path},target=${mount.container_path}${mode}`; } +function dockerUserArgs(): string[] { + if (typeof process.getuid !== "function" || typeof process.getgid !== "function") { + return []; + } + return ["--user", `${process.getuid()}:${process.getgid()}`]; +} + +function normalizeImageSignature(value: string): string | null { + const normalized = value.trim(); + if (!normalized || normalized === "") { + return null; + } + return normalized; +} + +async function buildImageSignature( + dockerfilePath: string, + buildArgs: string[], + extraInputs: string[] = [], +): Promise { + return sha256Text( + JSON.stringify({ + dockerfile_path: toPosixPath(dockerfilePath), + dockerfile_text: await readText(dockerfilePath), + build_args: buildArgs, + extra_inputs: extraInputs, + }), + ); +} + async function spawnCommand(command: string[], cwd: string): Promise { try { const handle = Bun.spawn({ @@ -309,28 +365,52 @@ export class DockerWorkerLauncher implements RoleImageResolver { image: string, dockerfilePath: string, buildArgs: string[] = [], + extraSignatureInputs: string[] = [], ): Promise { - if (this.preparedImages.has(image)) { + const signature = await buildImageSignature(dockerfilePath, buildArgs, extraSignatureInputs); + const preparedKey = `${image}@${signature}`; + if (this.preparedImages.has(preparedKey)) { return null; } - const inspect = await spawnCommand([this.dockerExecutable, "image", "inspect", image], this.repoRoot); + const inspect = await spawnCommand( + [ + this.dockerExecutable, + "image", + "inspect", + image, + "--format", + `{{index .Config.Labels "${IMAGE_SIGNATURE_LABEL}"}}`, + ], + this.repoRoot, + ); if (inspect.spawn_error) { return inspect; } - if (inspect.exitCode === 0) { - this.preparedImages.add(image); + if (inspect.exitCode === 0 && normalizeImageSignature(inspect.stdout) === signature) { + this.preparedImages.add(preparedKey); return null; } const build = await spawnCommand( - [this.dockerExecutable, "build", "--file", dockerfilePath, "--tag", image, ...buildArgs, this.repoRoot], + [ + this.dockerExecutable, + "build", + "--file", + dockerfilePath, + "--tag", + image, + "--label", + `${IMAGE_SIGNATURE_LABEL}=${signature}`, + ...buildArgs, + this.repoRoot, + ], this.repoRoot, ); if (build.spawn_error || build.exitCode !== 0) { return build; } - this.preparedImages.add(image); + this.preparedImages.add(preparedKey); return null; } @@ -338,14 +418,18 @@ export class DockerWorkerLauncher implements RoleImageResolver { if (runRole !== "builder" && runRole !== "qa") { throw new Error(`unsupported docker worker role: ${runRole}`); } - const baseFailure = await this.ensureImage(this.baseImage, buildImageDockerfile(this.repoRoot, "base")); + const baseDockerfilePath = buildImageDockerfile(this.repoRoot, "base"); + const baseSignature = await buildImageSignature(baseDockerfilePath, []); + const baseFailure = await this.ensureImage(this.baseImage, baseDockerfilePath); if (baseFailure) { return baseFailure; } - return this.ensureImage(image, buildImageDockerfile(this.repoRoot, runRole), [ - "--build-arg", - `CODINGCLAW_BASE_IMAGE=${this.baseImage}`, - ]); + return this.ensureImage( + image, + buildImageDockerfile(this.repoRoot, runRole), + ["--build-arg", `CODINGCLAW_BASE_IMAGE=${this.baseImage}`], + [this.baseImage, baseSignature], + ); } async launch(request: DockerWorkerLaunchRequest): Promise { @@ -366,6 +450,7 @@ export class DockerWorkerLauncher implements RoleImageResolver { "--rm", "--network", "none", + ...dockerUserArgs(), "--workdir", request.runtime.workdir, "--env", diff --git a/core/loop/phase1-local-flow.ts b/core/loop/phase1-local-flow.ts index a669c71..40e9825 100644 --- a/core/loop/phase1-local-flow.ts +++ b/core/loop/phase1-local-flow.ts @@ -1,5 +1,6 @@ import { join } from "node:path"; import { GenericCliAdapter } from "../../adapters/generic-cli/adapter.ts"; +import { containerizeTaskPacket } from "../../adapters/generic-cli/docker-runtime.ts"; import { writeApprovalArchive } from "../../ops/archive/approvals.ts"; import { buildEnvironmentSnapshotMetadata, @@ -186,9 +187,15 @@ async function buildTaskPacket( ); await ensureDir(join(artifactRoot, "metadata")); + const containerizedPacket = containerizeTaskPacket(packetWithoutChecksum, { + repo_path: repoRoot, + state_path: stateRoot, + artifact_path: artifactRoot, + runtime_home: runtimeHome, + }); const finalPacket: TaskPacket = { - ...packetWithoutChecksum, - task_packet_sha256: taskPacketDigest(packetWithoutChecksum), + ...containerizedPacket, + task_packet_sha256: taskPacketDigest(containerizedPacket), }; await writeJson(taskPacketPath, finalPacket); @@ -202,6 +209,9 @@ function taskPacketDigest(taskPacket: TaskPacket): string { async function buildRunEnvelope( repoRoot: string, taskPacket: TaskPacket, + stateRoot: string, + artifactRoot: string, + runtimeHome: string, previousHandoffPath: string, approvalSnapshotPath: string, traceContext: Record, @@ -212,12 +222,12 @@ async function buildRunEnvelope( __RUN_ID__: taskPacket.run_id, __RUN_ROLE__: taskPacket.run_role, __RUN_ATTEMPT__: taskPacket.run_attempt, - __REPO_PATH__: taskPacket.repo_path, + __REPO_PATH__: repoRoot, __BASE_COMMIT__: taskPacket.base_commit, - __STATE_PATH__: taskPacket.state_path, - __ARTIFACT_PATH__: taskPacket.artifact_path, - __RUNTIME_HOME__: taskPacket.runtime_home, - __TASK_PACKET_PATH__: join(taskPacket.artifact_path, "metadata", "task-packet.en.json"), + __STATE_PATH__: stateRoot, + __ARTIFACT_PATH__: artifactRoot, + __RUNTIME_HOME__: runtimeHome, + __TASK_PACKET_PATH__: join(artifactRoot, "metadata", "task-packet.en.json"), __TASK_PACKET_SHA256__: taskPacket.task_packet_sha256, __PREVIOUS_HANDOFF_PATH__: previousHandoffPath, __APPROVAL_SNAPSHOT_PATH__: approvalSnapshotPath, @@ -227,7 +237,7 @@ async function buildRunEnvelope( return { ...envelope, - requested_capabilities: uniqueStrings([...envelope.requested_capabilities, "container_control"]), + requested_capabilities: taskPacket.requested_capabilities, container_runtime: null, }; } @@ -677,6 +687,9 @@ export async function runPhase1Local(repoRoot: string): Promise Date: Fri, 10 Apr 2026 10:53:05 +0800 Subject: [PATCH 05/19] fix: fix --- prompt_phase1_non_success_containerization.md | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 prompt_phase1_non_success_containerization.md diff --git a/prompt_phase1_non_success_containerization.md b/prompt_phase1_non_success_containerization.md new file mode 100644 index 0000000..a854f07 --- /dev/null +++ b/prompt_phase1_non_success_containerization.md @@ -0,0 +1,66 @@ +You are implementing one bounded Phase 1 task for CodingClaw. + +Read these documents first: +- docs/SYSTEM_BLUEPRINT.md +- docs/ARCHITECTURE_OVERVIEW.md +- docs/DEPLOYMENT_PLAN.md +- docs/STATE_STORE_SPEC.md +- docs/EXECUTOR_ADAPTER_CONTRACT.md +- docs/ARTIFACT_LAYOUT_SPEC.md +- docs/STATUS_MODEL.md +- docs/LOOP_SPEC.md +- docs/QA_CONTRACT.en.md + +Task +Extend the Phase 1 local dockerized loop so the current success-only implementation also closes correctly for `FIXBACK_REQUIRED`, `FAILED_INFRA`, and `TIMEOUT`. + +Goal +Build the smallest working implementation that preserves the current real-Docker happy path while making the Phase 1 loop stop, archive, and mirror state correctly for the allowed non-success run exits. + +In scope +- `core/loop/phase1-local-flow.ts` +- `adapters/generic-cli/adapter.ts` +- `adapters/generic-cli/docker-runtime.ts` +- `core/loop/state-store.ts` +- focused verification in `tests/integration/` and any directly related targeted tests +- canonical outputs under `jobs//artifacts/runs//...` +- canonical outputs under `jobs//state/...` +- canonical outputs under `state/...` +- `job-manifest.json`, `checksums.txt`, and `contract-freeze.sha256` behavior for non-success exits + +Out of scope +- implementing a real fixback executor or multi-round fixback workflow +- new run roles or review executor behavior +- change-request, approval-resume, credential-resume, or takeover expansions beyond preserving current behavior +- new status vocabulary +- unrelated refactors + +Constraints +- Preserve the single-node Phase 1 design. +- Keep builder and QA launched through the existing Docker worker path. +- Stop the loop after the first non-success exit instead of continuing into an unsupported downstream run. +- Reuse the existing run exit to job state mapping from `STATUS_MODEL.md`. +- Keep artifacts and state externalized at the canonical host paths. +- Do not write `artifacts/final/final-summary.en.md` for jobs that do not reach final archive state. +- Do not add new dependencies. +- English repo-facing artifacts only. + +Deliverables +- updated loop branching that handles builder and QA non-success exits without breaking checksum or manifest closure +- adapter runtime handling that classifies Docker timeout and container launch failure into the correct standard run exit status +- any minimal state-store adjustments required so mirrored `state/` files match the archived run outcome +- focused verification that covers at least one builder-side early stop and one QA-side non-success closeout + +Acceptance criteria +- If builder exits `FIXBACK_REQUIRED`, `FAILED_INFRA`, or `TIMEOUT`, the loop does not dispatch QA, still writes the canonical builder run bundle, updates `job-manifest.json` and mirrored `state/` to the mapped job state, and keeps checksum verification passing. +- If QA exits `FIXBACK_REQUIRED`, `FAILED_INFRA`, or `TIMEOUT`, the loop does not run archive finalization, leaves `artifacts/final/final-summary.en.md` absent, and keeps the manifest plus mirrored state aligned with `STATUS_MODEL.md`. +- Docker launch failures are reported as `FAILED_INFRA`, and enforced runtime timeout exits are reported as `TIMEOUT`, without inventing new machine-readable statuses. +- The command logs still record the `docker run` invocation and the archived outputs remain under the canonical host paths. +- The existing real-Docker success path is not regressed. +- The result matches the existing docs instead of redefining them. + +Execution instructions +1. Explore the relevant docs and current code first, especially `core/loop/phase1-local-flow.ts`, `adapters/generic-cli/adapter.ts`, `adapters/generic-cli/docker-runtime.ts`, and `core/loop/state-store.ts`. +2. Implement the minimum viable vertical slice for these three non-success statuses only. +3. Run focused verification, including real Docker where practical. +4. Report changed files, verification performed, and any unresolved risks. From 44b30cd25f0022861b42750d0ee0142597e223ed Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Fri, 10 Apr 2026 11:42:47 +0800 Subject: [PATCH 06/19] feat: close phase1 docker non-success runs correctly --- adapters/generic-cli/adapter.ts | 1 + adapters/generic-cli/docker-runtime.ts | 51 ++- core/loop/phase1-local-flow.ts | 149 +++++---- tests/integration/test_phase1_local_flow.py | 330 +++++++++++++++++++- 4 files changed, 461 insertions(+), 70 deletions(-) diff --git a/adapters/generic-cli/adapter.ts b/adapters/generic-cli/adapter.ts index dabfd9d..8201484 100644 --- a/adapters/generic-cli/adapter.ts +++ b/adapters/generic-cli/adapter.ts @@ -173,6 +173,7 @@ export class GenericCliAdapter { worker_script_path: workerScript, envelope_path: materialization.runtime.envelope_container_path, runtime: materialization.runtime, + time_limits: envelope.time_limits, }); } catch (error) { launchResult = unexpectedLaunchResult(error); diff --git a/adapters/generic-cli/docker-runtime.ts b/adapters/generic-cli/docker-runtime.ts index a049fab..8faaa03 100644 --- a/adapters/generic-cli/docker-runtime.ts +++ b/adapters/generic-cli/docker-runtime.ts @@ -56,6 +56,7 @@ export interface DockerWorkerLaunchRequest { worker_script_path: string; envelope_path: string; runtime: ContainerRuntimeConfig; + time_limits: Record; } export interface DockerWorkerLaunchResult { @@ -72,6 +73,7 @@ interface CommandExecutionResult { stdout: string; stderr: string; spawn_error: string | null; + timed_out: boolean; } interface DockerMappedPaths { @@ -297,7 +299,36 @@ async function buildImageSignature( ); } -async function spawnCommand(command: string[], cwd: string): Promise { +function readNumericLimit(value: unknown): number | null { + if (typeof value === "number" && Number.isFinite(value)) { + return value; + } + if (typeof value === "string" && value.trim().length > 0) { + const parsed = Number(value); + if (Number.isFinite(parsed)) { + return parsed; + } + } + return null; +} + +function resolveTimeoutMs(timeLimits: Record): number | null { + const milliseconds = readNumericLimit(timeLimits.milliseconds ?? timeLimits.ms); + if (milliseconds !== null) { + return Math.max(0, Math.round(milliseconds)); + } + const seconds = readNumericLimit(timeLimits.seconds); + if (seconds !== null) { + return Math.max(0, Math.round(seconds * 1000)); + } + const minutes = readNumericLimit(timeLimits.minutes); + if (minutes !== null) { + return Math.max(0, Math.round(minutes * 60_000)); + } + return null; +} + +async function spawnCommand(command: string[], cwd: string, timeoutMs: number | null = null): Promise { try { const handle = Bun.spawn({ cmd: command, @@ -307,13 +338,25 @@ async function spawnCommand(command: string[], cwd: string): Promise { + timedOut = true; + handle.kill(); + }, timeoutMs); const exitCode = await handle.exited; + if (timeoutHandle !== null) { + clearTimeout(timeoutHandle); + } return { command, exitCode, stdout: await stdoutPromise, stderr: await stderrPromise, spawn_error: null, + timed_out: timedOut, }; } catch (error) { const message = error instanceof Error ? error.message : String(error); @@ -323,11 +366,15 @@ async function spawnCommand(command: string[], cwd: string): Promise { const taskPacketPath = join(artifactRoot, "metadata", "task-packet.en.json"); const { expectedArtifacts, verificationTargets } = roleArtifacts(runRole); @@ -186,18 +186,14 @@ async function buildTaskPacket( }, ); - await ensureDir(join(artifactRoot, "metadata")); - const containerizedPacket = containerizeTaskPacket(packetWithoutChecksum, { - repo_path: repoRoot, - state_path: stateRoot, - artifact_path: artifactRoot, - runtime_home: runtimeHome, - }); const finalPacket: TaskPacket = { - ...containerizedPacket, - task_packet_sha256: taskPacketDigest(containerizedPacket), + ...packetWithoutChecksum, + task_packet_sha256: taskPacketDigest(packetWithoutChecksum), }; - await writeJson(taskPacketPath, finalPacket); + if (persist) { + await ensureDir(join(artifactRoot, "metadata")); + await writeJson(taskPacketPath, finalPacket); + } return finalPacket; } @@ -237,6 +233,8 @@ async function buildRunEnvelope( return { ...envelope, + budget_limits: taskPacket.budget_limits, + time_limits: taskPacket.time_limits, requested_capabilities: taskPacket.requested_capabilities, container_runtime: null, }; @@ -560,9 +558,9 @@ export interface Phase1RunSummary { job_id: string; job_root: string; builder_run_id: string; - qa_run_id: string; + qa_run_id: string | null; builder_status: string; - qa_status: string; + qa_status: string | null; manifest_path: string; checksums_path: string; } @@ -595,7 +593,7 @@ export async function runPhase1Local(repoRoot: string): Promise Path: return repo_root -def run_phase1(repo_root: Path) -> subprocess.CompletedProcess[str]: +def run_phase1(repo_root: Path, extra_env: dict[str, str] | None = None) -> subprocess.CompletedProcess[str]: + env = os.environ.copy() + if extra_env: + env.update(extra_env) return subprocess.run( ["bun", "run", "phase1"], cwd=repo_root, capture_output=True, text=True, + env=env, ) @@ -51,6 +57,242 @@ def dependency_snapshot_digest(repo_root: Path) -> str: return hashlib.sha256("\n".join(inputs).encode("utf-8")).hexdigest() +def write_fake_docker(tmp_path: Path) -> Path: + script_path = tmp_path / "fake-docker.py" + script_path.write_text( + textwrap.dedent( + """\ + #!/usr/bin/env python3 + import json + import os + import sys + import time + from pathlib import Path + + + def parse_mount(raw: str) -> dict[str, str]: + entry: dict[str, str] = {} + for item in raw.split(","): + if "=" in item: + key, value = item.split("=", 1) + entry[key] = value + else: + entry[item] = "true" + return entry + + + def build_mounts(argv: list[str]) -> list[dict[str, str]]: + mounts: list[dict[str, str]] = [] + index = 0 + while index < len(argv): + if argv[index] == "--mount": + mounts.append(parse_mount(argv[index + 1])) + index += 2 + continue + index += 1 + return mounts + + + def map_path(container_path: str, mounts: list[dict[str, str]]) -> str: + for mount in sorted(mounts, key=lambda item: len(item["target"]), reverse=True): + target = mount["target"].rstrip("/") + if container_path == target: + return mount["source"] + if container_path.startswith(f"{target}/"): + return f"{mount['source']}{container_path[len(target):]}" + return container_path + + + def load_json(path: str) -> dict: + return json.loads(Path(path).read_text(encoding="utf-8")) + + + def write_text(path: Path, value: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(value, encoding="utf-8") + + + def write_json(path: Path, value: dict) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(value, indent=2) + "\\n", encoding="utf-8") + + + def task_packet_for(envelope: dict, mounts: list[dict[str, str]]) -> dict: + task_packet_path = map_path(envelope["task_packet_path"], mounts) + return load_json(task_packet_path) + + + def create_builder_outputs(envelope: dict, task_packet: dict, artifact_root: Path) -> dict: + write_text( + artifact_root / "reports" / "implementation-summary.en.md", + "\\n".join( + [ + "# Implementation Summary", + "", + f"- job_id: {envelope['job_id']}", + f"- run_id: {envelope['run_id']}", + f"- story_id: {task_packet['story']['story_id']}", + "", + ] + ) + + "\\n", + ) + write_text( + artifact_root / "reports" / "self-check.en.md", + "\\n".join( + [ + "# Self Check", + "", + "- required checks executed:", + "- scope-compliance", + "- artifact-presence", + "", + ] + ) + + "\\n", + ) + write_json( + artifact_root / "evidence" / "test-results" / "builder-check.json", + { + "run_id": envelope["run_id"], + "run_role": envelope["run_role"], + "story_id": task_packet["story"]["story_id"], + "status": "prepared-for-qa", + "checked_items": task_packet["story"]["mandatory_checks"], + }, + ) + return { + "status": "SUCCESS", + "completed": ["builder completed"], + "open": ["run QA"], + "blockers": [], + "next_action": "run QA", + "acceptance_status": "blocked", + "mandatory_check_status": "blocked", + "evidence_paths": [ + "reports/implementation-summary.en.md", + "reports/self-check.en.md", + "evidence/test-results/builder-check.json", + ], + "report_paths": [ + "reports/implementation-summary.en.md", + "reports/self-check.en.md", + ], + "test_result_paths": ["evidence/test-results/builder-check.json"], + "fixback_items": [], + } + + + def create_qa_outputs(envelope: dict, task_packet: dict, artifact_root: Path, status: str) -> dict: + write_text( + artifact_root / "reports" / "qa-report.en.md", + "\\n".join( + [ + "# QA Report", + "", + f"- job_id: {envelope['job_id']}", + f"- run_id: {envelope['run_id']}", + f"- story_id: {task_packet['story']['story_id']}", + f"- QA verdict: {status}", + "", + ] + ) + + "\\n", + ) + write_json( + artifact_root / "evidence" / "test-results" / "qa-check.json", + { + "run_id": envelope["run_id"], + "run_role": envelope["run_role"], + "story_id": task_packet["story"]["story_id"], + "status": status, + }, + ) + write_json( + artifact_root / "metadata" / "qa-verdict.json", + { + "story_id": task_packet["story"]["story_id"], + "status_family": "run_exit", + "status": status, + }, + ) + if status == "FIXBACK_REQUIRED": + write_text( + artifact_root / "reports" / "fixback-items.en.md", + "# Fixback Items\\n\\n- Restore builder artifact: reports/self-check.en.md\\n", + ) + return { + "status": status, + "completed": ["qa completed"], + "open": ["archive" if status == "SUCCESS" else "enter fixback"], + "blockers": [] if status == "SUCCESS" else ["Restore builder artifact: reports/self-check.en.md"], + "next_action": "archive" if status == "SUCCESS" else "enter fixback", + "acceptance_status": "pass" if status == "SUCCESS" else "fail", + "mandatory_check_status": "pass" if status == "SUCCESS" else "fail", + "evidence_paths": [ + "reports/qa-report.en.md", + "metadata/qa-verdict.json", + "evidence/test-results/qa-check.json", + *([] if status == "SUCCESS" else ["reports/fixback-items.en.md"]), + ], + "report_paths": [ + "reports/qa-report.en.md", + *([] if status == "SUCCESS" else ["reports/fixback-items.en.md"]), + ], + "test_result_paths": ["evidence/test-results/qa-check.json"], + "fixback_items": [] if status == "SUCCESS" else ["Restore builder artifact: reports/self-check.en.md"], + } + + + def main() -> int: + argv = sys.argv[1:] + if not argv: + return 1 + if argv[0] == "image" and len(argv) > 1 and argv[1] == "inspect": + return 1 + if argv[0] == "build": + return 0 + if argv[0] != "run": + return 1 + + mounts = build_mounts(argv) + envelope = load_json(map_path(argv[-1], mounts)) + task_packet = task_packet_for(envelope, mounts) + artifact_root = Path(map_path(envelope["artifact_path"], mounts)) + mode = os.environ.get("CODINGCLAW_FAKE_DOCKER_MODE", "success") + if mode == "slow_success": + time.sleep(float(os.environ.get("CODINGCLAW_FAKE_DOCKER_SLEEP", "1"))) + + if envelope["run_role"] == "builder": + output = create_builder_outputs(envelope, task_packet, artifact_root) + else: + qa_status = "FIXBACK_REQUIRED" if mode == "qa_fixback" else "SUCCESS" + output = create_qa_outputs(envelope, task_packet, artifact_root, qa_status) + + sys.stdout.write(json.dumps(output)) + return 0 + + + raise SystemExit(main()) + """ + ), + encoding="utf-8", + ) + script_path.chmod(0o755) + return script_path + + +def write_time_limit_minutes(repo_root: Path, minutes: float) -> None: + for relative_path in [ + "control/fixtures/phase1-local-run-envelope.json", + "control/fixtures/phase1-local-task-packet.en.json", + ]: + fixture_path = repo_root / relative_path + fixture = json.loads(fixture_path.read_text(encoding="utf-8")) + fixture["time_limits"]["minutes"] = minutes + fixture_path.write_text(json.dumps(fixture, indent=2) + "\n", encoding="utf-8") + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_rerun_rejects_mutating_existing_archive(tmp_path): @@ -87,3 +329,89 @@ def test_phase1_local_freeze_digest_captures_repo_dependency_inputs(tmp_path): freeze = json.loads((repo_root / "jobs" / "job-phase1-local" / "contract-freeze.json").read_text(encoding="utf-8")) assert freeze["dependency_snapshot_digest"] == dependency_snapshot_digest(repo_root) + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_builder_failed_infra_stops_before_qa(tmp_path): + repo_root = export_repo(tmp_path) + result = run_phase1(repo_root, {"CODINGCLAW_DOCKER_BIN": "does-not-exist"}) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + archived_trace = json.loads((job_root / "state" / "trace-index.json").read_text(encoding="utf-8")) + live_trace = json.loads((repo_root / "state" / "trace-index.json").read_text(encoding="utf-8")) + + assert [run["run_role"] for run in manifest["runs"]] == ["builder"] + assert [run["run_exit_status"] for run in manifest["runs"]] == ["FAILED_INFRA"] + assert manifest["current_run_id"] == manifest["runs"][0]["run_id"] + assert manifest["stories"][0]["latest_run_role"] == "builder" + assert manifest["stories"][0]["latest_run_status"] == "FAILED_INFRA" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_run_role"] == "builder" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_qa_status"] == "PENDING" + assert live_trace == archived_trace + assert not (job_root / "artifacts" / "final" / "final-summary.en.md").exists() + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_qa_fixback_closes_without_final_summary(tmp_path): + repo_root = export_repo(tmp_path) + fake_docker = write_fake_docker(tmp_path) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_MODE": "qa_fixback", + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + archived_trace = json.loads((job_root / "state" / "trace-index.json").read_text(encoding="utf-8")) + live_trace = json.loads((repo_root / "state" / "trace-index.json").read_text(encoding="utf-8")) + + assert [run["run_role"] for run in manifest["runs"]] == ["builder", "qa"] + assert [run["run_exit_status"] for run in manifest["runs"]] == ["SUCCESS", "FIXBACK_REQUIRED"] + assert manifest["status"] == "FIXBACK_PENDING" + assert manifest["stories"][0]["latest_run_role"] == "qa" + assert manifest["stories"][0]["latest_run_status"] == "FIXBACK_REQUIRED" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_run_role"] == "qa" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_qa_status"] == "FIXBACK_REQUIRED" + assert live_trace == archived_trace + assert not (job_root / "artifacts" / "final" / "final-summary.en.md").exists() + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_builder_timeout_stops_before_qa(tmp_path): + repo_root = export_repo(tmp_path) + write_time_limit_minutes(repo_root, 0) + fake_docker = write_fake_docker(tmp_path) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_MODE": "slow_success", + "CODINGCLAW_FAKE_DOCKER_SLEEP": "1", + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + archived_trace = json.loads((job_root / "state" / "trace-index.json").read_text(encoding="utf-8")) + + assert [run["run_role"] for run in manifest["runs"]] == ["builder"] + assert [run["run_exit_status"] for run in manifest["runs"]] == ["TIMEOUT"] + assert manifest["status"] == "AWAITING_OWNER" + assert manifest["stories"][0]["latest_run_role"] == "builder" + assert manifest["stories"][0]["latest_run_status"] == "TIMEOUT" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_run_role"] == "builder" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_qa_status"] == "PENDING" + assert not (job_root / "artifacts" / "final" / "final-summary.en.md").exists() From c3324aa107327200fc8af82ff4ce5be2ef17529e Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Fri, 10 Apr 2026 12:30:14 +0800 Subject: [PATCH 07/19] fix: isolate docker worker artifacts and task packets --- adapters/generic-cli/adapter.ts | 70 ++++++++++++++++++- adapters/generic-cli/docker-runtime.ts | 45 +++++++++++-- core/contracts/types.ts | 2 +- tests/integration/test_phase1_local_flow.py | 74 +++++++++++++++++++++ 4 files changed, 182 insertions(+), 9 deletions(-) diff --git a/adapters/generic-cli/adapter.ts b/adapters/generic-cli/adapter.ts index 8201484..8bbbc98 100644 --- a/adapters/generic-cli/adapter.ts +++ b/adapters/generic-cli/adapter.ts @@ -1,7 +1,7 @@ -import { dirname, join } from "node:path"; +import { join } from "node:path"; import { buildArtifactIndex } from "../../ops/archive/artifact-index.ts"; import { writeRunTimings, writeWorkerLog } from "../../ops/archive/run-metadata.ts"; -import { ensureDir, relativePosix, toPosixPath, writeJson, writeText } from "../../core/loop/support.ts"; +import { ensureDir, relativePosix, toPosixPath, uniqueStrings, writeJson, writeText } from "../../core/loop/support.ts"; import type { AdapterExecutionResult, RunEnvelope, @@ -64,6 +64,13 @@ function unexpectedLaunchResult(error: unknown): DockerWorkerLaunchResult { }; } +const BUILDER_FALLBACK_REPORT_PATHS = ["reports/implementation-summary.en.md", "reports/self-check.en.md"]; +const BUILDER_FALLBACK_TEST_RESULT_PATHS = ["evidence/test-results/builder-check.json"]; +const BUILDER_FALLBACK_EVIDENCE_PATHS = [ + ...BUILDER_FALLBACK_REPORT_PATHS, + ...BUILDER_FALLBACK_TEST_RESULT_PATHS, +]; + async function ensureHostWritableRunLayout(runRoot: string): Promise { await ensureDir(join(runRoot, "logs")); await ensureDir(join(runRoot, "reports")); @@ -142,6 +149,58 @@ function renderHandoff( ].join("\n"); } +function withBuilderFallbackPaths(workerOutput: WorkerOutput): WorkerOutput { + return { + ...workerOutput, + evidence_paths: uniqueStrings([...workerOutput.evidence_paths, ...BUILDER_FALLBACK_EVIDENCE_PATHS]), + report_paths: uniqueStrings([...workerOutput.report_paths, ...BUILDER_FALLBACK_REPORT_PATHS]), + test_result_paths: uniqueStrings([...workerOutput.test_result_paths, ...BUILDER_FALLBACK_TEST_RESULT_PATHS]), + }; +} + +async function writeBuilderFallbackArtifacts( + runRoot: string, + envelope: RunEnvelope, + workerOutput: WorkerOutput, +): Promise { + await writeText( + join(runRoot, "reports", "implementation-summary.en.md"), + [ + "# Implementation Summary", + "", + `- job_id: ${envelope.job_id}`, + `- run_id: ${envelope.run_id}`, + `- story_id: ${envelope.story_id}`, + `- status: ${workerOutput.status}`, + "- completed work:", + "- worker execution did not produce the builder implementation bundle", + "- adapter synthesized the required builder reports for archival completeness", + "", + ].join("\n"), + ); + await writeText( + join(runRoot, "reports", "self-check.en.md"), + [ + "# Self Check", + "", + "- required checks executed:", + "- scope-compliance: blocked", + "- artifact-presence: failed", + "- evidence-completeness: failed", + `- next required action: ${workerOutput.next_action}`, + "", + ].join("\n"), + ); + await writeJson(join(runRoot, "evidence", "test-results", "builder-check.json"), { + run_id: envelope.run_id, + run_role: envelope.run_role, + story_id: envelope.story_id, + status: workerOutput.status, + checked_items: [], + blockers: workerOutput.blockers, + }); +} + export class GenericCliAdapter { private readonly dockerLauncher: DockerWorkerLauncher; @@ -184,14 +243,17 @@ export class GenericCliAdapter { const endedAtDate = new Date(); let workerOutput: WorkerOutput; + let usedFallbackWorkerOutput = false; if (exitCode === 0) { try { workerOutput = JSON.parse(stdout) as WorkerOutput; } catch { workerOutput = fallbackWorkerOutput("worker output was not valid JSON"); + usedFallbackWorkerOutput = true; } } else { workerOutput = fallbackWorkerOutput(launchFailureText(launchResult), launchResult.failure_status ?? "FAILED_EXECUTION"); + usedFallbackWorkerOutput = true; } const durationMs = Math.max(0, endedAtDate.getTime() - startedAtDate.getTime()); @@ -220,6 +282,10 @@ export class GenericCliAdapter { stderr, }); await writeRunTimings(timingsPath, runTimings); + if (envelope.run_role === "builder" && usedFallbackWorkerOutput) { + workerOutput = withBuilderFallbackPaths(workerOutput); + await writeBuilderFallbackArtifacts(runRoot, envelope, workerOutput); + } const runResult: RunResult = { run_id: envelope.run_id, diff --git a/adapters/generic-cli/docker-runtime.ts b/adapters/generic-cli/docker-runtime.ts index 8faaa03..a2cc10a 100644 --- a/adapters/generic-cli/docker-runtime.ts +++ b/adapters/generic-cli/docker-runtime.ts @@ -1,5 +1,5 @@ -import { dirname, isAbsolute, join, resolve } from "node:path"; -import { ensureDir, readText, sha256Text, toPosixPath, uniqueStrings, writeJson } from "../../core/loop/support.ts"; +import { basename, dirname, isAbsolute, join, resolve } from "node:path"; +import { ensureDir, readJson, readText, sha256Text, toPosixPath, uniqueStrings, writeJson } from "../../core/loop/support.ts"; import type { ContainerPathMap, ContainerPathMount, @@ -95,6 +95,14 @@ function artifactRootFromRunRoot(runRoot: string): string { return dirname(dirname(runRoot)); } +function runRootContainerPath(runRoot: string): string { + return `${CONTAINER_PATHS.artifacts}/runs/${basename(normalizeHostPath(runRoot))}`; +} + +function taskPacketDigest(taskPacket: TaskPacket): string { + return sha256Text(`${JSON.stringify({ ...taskPacket, task_packet_sha256: "" }, null, 2)}\n`); +} + export function resolveDockerWorkerImage(runRole: RunRole): string { if (runRole !== "builder" && runRole !== "qa") { throw new Error(`unsupported docker worker role: ${runRole}`); @@ -153,18 +161,24 @@ export function buildDockerPathMapping(paths: DockerPathMappingRequest): DockerP name: "repo", host_path: paths.repo_path, container_path: CONTAINER_PATHS.repo, - read_only: paths.run_role !== "builder", + read_only: true, }, { name: "state", host_path: paths.state_path, container_path: CONTAINER_PATHS.state, - read_only: false, + read_only: true, }, { name: "artifacts", host_path: artifactRootFromRunRoot(paths.artifact_path), container_path: CONTAINER_PATHS.artifacts, + read_only: true, + }, + { + name: "run-artifacts", + host_path: paths.artifact_path, + container_path: runRootContainerPath(paths.artifact_path), read_only: false, }, { @@ -184,7 +198,7 @@ export function buildDockerPathMapping(paths: DockerPathMappingRequest): DockerP export function containerizeTaskPacket(taskPacket: TaskPacket, paths: DockerMappedPaths): TaskPacket { const mapper = buildDockerPathMapping(paths); - return { + const mappedTaskPacket: TaskPacket = { ...taskPacket, repo_path: mapper.mapPath(taskPacket.repo_path), state_path: mapper.mapPath(taskPacket.state_path), @@ -193,6 +207,10 @@ export function containerizeTaskPacket(taskPacket: TaskPacket, paths: DockerMapp previous_handoff_path: mapper.mapPath(taskPacket.previous_handoff_path), requested_capabilities: uniqueStrings([...taskPacket.requested_capabilities, "container_control"]), }; + return { + ...mappedTaskPacket, + task_packet_sha256: taskPacketDigest(mappedTaskPacket), + }; } function buildContainerPathMap(envelope: RunEnvelope, mapper: DockerPathMapper): ContainerPathMap { @@ -213,8 +231,17 @@ export async function materializeContainerizedRunEnvelope( ): Promise { const hostEnvelopePath = join(envelope.runtime_home, "envelopes", `${envelope.run_id}.json`); const containerEnvelopePath = join(envelope.runtime_home, "envelopes", "container", `${envelope.run_id}.json`); + const hostContainerTaskPacketPath = join( + envelope.runtime_home, + "envelopes", + "container", + "task-packets", + `${envelope.run_id}.json`, + ); const cacheHostPath = join(envelope.runtime_home, "cache"); const mapper = buildDockerPathMapping(envelope); + const containerTaskPacket = containerizeTaskPacket(await readJson(envelope.task_packet_path), envelope); + const containerTaskPacketPath = mapper.mapPath(hostContainerTaskPacketPath); const runtime: ContainerRuntimeConfig = { runtime: "docker", image: resolveDockerWorkerImage(envelope.run_role), @@ -222,7 +249,10 @@ export async function materializeContainerizedRunEnvelope( envelope_host_path: hostEnvelopePath, envelope_container_path: mapper.mapPath(containerEnvelopePath), mounts: mapper.mounts, - container_paths: buildContainerPathMap(envelope, mapper), + container_paths: { + ...buildContainerPathMap(envelope, mapper), + task_packet_path: containerTaskPacketPath, + }, }; const requestedCapabilities = uniqueStrings([...envelope.requested_capabilities, "container_control"]); const hostEnvelope: RunEnvelope = { @@ -237,6 +267,7 @@ export async function materializeContainerizedRunEnvelope( artifact_path: runtime.container_paths.artifact_path, runtime_home: runtime.container_paths.runtime_home, task_packet_path: runtime.container_paths.task_packet_path, + task_packet_sha256: containerTaskPacket.task_packet_sha256, previous_handoff_path: runtime.container_paths.previous_handoff_path, approval_snapshot_path: runtime.container_paths.approval_snapshot_path, trace_context: runtime.container_paths.trace_context, @@ -244,9 +275,11 @@ export async function materializeContainerizedRunEnvelope( await ensureDir(dirname(hostEnvelopePath)); await ensureDir(dirname(containerEnvelopePath)); + await ensureDir(dirname(hostContainerTaskPacketPath)); await ensureDir(cacheHostPath); await writeJson(hostEnvelopePath, hostEnvelope); await writeJson(containerEnvelopePath, containerEnvelope); + await writeJson(hostContainerTaskPacketPath, containerTaskPacket); return { host_envelope: hostEnvelope, diff --git a/core/contracts/types.ts b/core/contracts/types.ts index ed3b065..5331f9f 100644 --- a/core/contracts/types.ts +++ b/core/contracts/types.ts @@ -82,7 +82,7 @@ export interface TaskPacket { story: StoryContract; } -export type ContainerMountName = "repo" | "state" | "artifacts" | "runtime-home" | "cache"; +export type ContainerMountName = "repo" | "state" | "artifacts" | "run-artifacts" | "runtime-home" | "cache"; export interface ContainerPathMount { name: ContainerMountName; diff --git a/tests/integration/test_phase1_local_flow.py b/tests/integration/test_phase1_local_flow.py index e4577ea..81d2b0e 100644 --- a/tests/integration/test_phase1_local_flow.py +++ b/tests/integration/test_phase1_local_flow.py @@ -117,6 +117,21 @@ def write_json(path: Path, value: dict) -> None: path.write_text(json.dumps(value, indent=2) + "\\n", encoding="utf-8") + def maybe_capture(envelope: dict, task_packet: dict, mounts: list[dict[str, str]]) -> None: + capture_dir = os.environ.get("CODINGCLAW_FAKE_DOCKER_CAPTURE_DIR", "").strip() + if not capture_dir: + return + capture_path = Path(capture_dir) / f"{envelope['run_role']}-{envelope['run_id']}.json" + write_json( + capture_path, + { + "envelope": envelope, + "task_packet": task_packet, + "mounts": mounts, + }, + ) + + def task_packet_for(envelope: dict, mounts: list[dict[str, str]]) -> dict: task_packet_path = map_path(envelope["task_packet_path"], mounts) return load_json(task_packet_path) @@ -269,6 +284,7 @@ def main() -> int: qa_status = "FIXBACK_REQUIRED" if mode == "qa_fixback" else "SUCCESS" output = create_qa_outputs(envelope, task_packet, artifact_root, qa_status) + maybe_capture(envelope, task_packet, mounts) sys.stdout.write(json.dumps(output)) return 0 @@ -293,6 +309,28 @@ def write_time_limit_minutes(repo_root: Path, minutes: float) -> None: fixture_path.write_text(json.dumps(fixture, indent=2) + "\n", encoding="utf-8") +def load_capture(capture_dir: Path, run_role: str) -> dict: + matches = sorted(capture_dir.glob(f"{run_role}-*.json")) + assert matches + return json.loads(matches[0].read_text(encoding="utf-8")) + + +def assert_builder_failure_bundle(job_root: Path) -> None: + run_roots = sorted(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) + assert len(run_roots) == 1 + builder_run_root = run_roots[0] + artifact_index = json.loads((builder_run_root / "metadata" / "artifact-index.json").read_text(encoding="utf-8")) + indexed_paths = {entry["path"] for entry in artifact_index["artifacts"]} + + for relative_path in [ + "reports/implementation-summary.en.md", + "reports/self-check.en.md", + "evidence/test-results/builder-check.json", + ]: + assert (builder_run_root / relative_path).exists() + assert relative_path in indexed_paths + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_rerun_rejects_mutating_existing_archive(tmp_path): @@ -331,6 +369,40 @@ def test_phase1_local_freeze_digest_captures_repo_dependency_inputs(tmp_path): assert freeze["dependency_snapshot_digest"] == dependency_snapshot_digest(repo_root) +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_builder_container_materialization_uses_read_only_inputs_and_container_paths(tmp_path): + repo_root = export_repo(tmp_path) + fake_docker = write_fake_docker(tmp_path) + capture_dir = tmp_path / "captures" + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_CAPTURE_DIR": str(capture_dir), + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + builder_capture = load_capture(capture_dir, "builder") + builder_task_packet = builder_capture["task_packet"] + builder_envelope = builder_capture["envelope"] + mounts = {mount["target"]: mount for mount in builder_capture["mounts"]} + + assert builder_task_packet["repo_path"] == "/work/repo" + assert builder_task_packet["state_path"] == "/work/state" + assert builder_task_packet["runtime_home"] == "/work/runtime-home" + assert builder_task_packet["artifact_path"] == builder_envelope["artifact_path"] + assert builder_task_packet["artifact_path"].endswith(f"/artifacts/runs/{builder_envelope['run_id']}") + assert str(repo_root) not in json.dumps(builder_task_packet, ensure_ascii=False) + + assert mounts["/work/repo"]["readonly"] == "true" + assert mounts["/work/state"]["readonly"] == "true" + assert mounts["/work/artifacts"]["readonly"] == "true" + assert mounts[builder_envelope["artifact_path"]].get("readonly") != "true" + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_builder_failed_infra_stops_before_qa(tmp_path): @@ -353,6 +425,7 @@ def test_phase1_local_builder_failed_infra_stops_before_qa(tmp_path): assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_qa_status"] == "PENDING" assert live_trace == archived_trace assert not (job_root / "artifacts" / "final" / "final-summary.en.md").exists() + assert_builder_failure_bundle(job_root) @pytest.mark.integration @@ -415,3 +488,4 @@ def test_phase1_local_builder_timeout_stops_before_qa(tmp_path): assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_run_role"] == "builder" assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_qa_status"] == "PENDING" assert not (job_root / "artifacts" / "final" / "final-summary.en.md").exists() + assert_builder_failure_bundle(job_root) From 2ab840b2fe2113a2e1cb4e320aebf37eb5b47d11 Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Fri, 10 Apr 2026 13:38:32 +0800 Subject: [PATCH 08/19] fix: harden docker worker failure artifacts --- adapters/generic-cli/adapter-capability.json | 3 +- adapters/generic-cli/adapter.ts | 93 +++++++++++++++++++- adapters/generic-cli/docker-runtime.ts | 52 ++++++----- tests/integration/test_phase1_local_flow.py | 81 ++++++++++++++++- 4 files changed, 198 insertions(+), 31 deletions(-) diff --git a/adapters/generic-cli/adapter-capability.json b/adapters/generic-cli/adapter-capability.json index d1ca08b..5e12241 100644 --- a/adapters/generic-cli/adapter-capability.json +++ b/adapters/generic-cli/adapter-capability.json @@ -18,8 +18,7 @@ "mode": "allow", "scope": [ "repo", - "state", - "artifacts", + "run-artifacts", "runtime-home" ], "cost_level": "low", diff --git a/adapters/generic-cli/adapter.ts b/adapters/generic-cli/adapter.ts index 8bbbc98..6984e95 100644 --- a/adapters/generic-cli/adapter.ts +++ b/adapters/generic-cli/adapter.ts @@ -1,13 +1,14 @@ import { join } from "node:path"; import { buildArtifactIndex } from "../../ops/archive/artifact-index.ts"; import { writeRunTimings, writeWorkerLog } from "../../ops/archive/run-metadata.ts"; -import { ensureDir, relativePosix, toPosixPath, uniqueStrings, writeJson, writeText } from "../../core/loop/support.ts"; +import { ensureDir, readJson, relativePosix, toPosixPath, uniqueStrings, writeJson, writeText } from "../../core/loop/support.ts"; import type { AdapterExecutionResult, RunEnvelope, RunResult, RunRole, RunTimingMetadata, + TaskPacket, WorkerOutput, } from "../../core/contracts/types.ts"; import { DockerWorkerLauncher, type DockerWorkerLaunchResult, materializeContainerizedRunEnvelope } from "./docker-runtime.ts"; @@ -70,6 +71,14 @@ const BUILDER_FALLBACK_EVIDENCE_PATHS = [ ...BUILDER_FALLBACK_REPORT_PATHS, ...BUILDER_FALLBACK_TEST_RESULT_PATHS, ]; +const QA_FALLBACK_REPORT_PATHS = ["reports/qa-report.en.md", "reports/fixback-items.en.md"]; +const QA_FALLBACK_TEST_RESULT_PATHS = ["evidence/test-results/qa-check.json"]; +const QA_FALLBACK_METADATA_PATHS = ["metadata/qa-verdict.json"]; +const QA_FALLBACK_EVIDENCE_PATHS = [ + ...QA_FALLBACK_REPORT_PATHS, + ...QA_FALLBACK_TEST_RESULT_PATHS, + ...QA_FALLBACK_METADATA_PATHS, +]; async function ensureHostWritableRunLayout(runRoot: string): Promise { await ensureDir(join(runRoot, "logs")); @@ -158,6 +167,27 @@ function withBuilderFallbackPaths(workerOutput: WorkerOutput): WorkerOutput { }; } +function qaFallbackItems(workerOutput: WorkerOutput): string[] { + if (workerOutput.fixback_items.length > 0) { + return workerOutput.fixback_items; + } + if (workerOutput.blockers.length > 0) { + return workerOutput.blockers; + } + return ["QA did not produce its required outputs."]; +} + +function withQaFallbackPaths(workerOutput: WorkerOutput): WorkerOutput { + const fixbackItems = qaFallbackItems(workerOutput); + return { + ...workerOutput, + evidence_paths: uniqueStrings([...workerOutput.evidence_paths, ...QA_FALLBACK_EVIDENCE_PATHS]), + report_paths: uniqueStrings([...workerOutput.report_paths, ...QA_FALLBACK_REPORT_PATHS]), + test_result_paths: uniqueStrings([...workerOutput.test_result_paths, ...QA_FALLBACK_TEST_RESULT_PATHS]), + fixback_items: fixbackItems, + }; +} + async function writeBuilderFallbackArtifacts( runRoot: string, envelope: RunEnvelope, @@ -201,6 +231,54 @@ async function writeBuilderFallbackArtifacts( }); } +async function writeQaFallbackArtifacts( + runRoot: string, + envelope: RunEnvelope, + taskPacket: TaskPacket, + workerOutput: WorkerOutput, +): Promise { + await writeText( + join(runRoot, "reports", "qa-report.en.md"), + [ + "# QA Report", + "", + `- job_id: ${envelope.job_id}`, + `- run_id: ${envelope.run_id}`, + `- story_id: ${taskPacket.story.story_id}`, + `- QA verdict: ${workerOutput.status}`, + "- checked artifacts:", + "- none", + "", + "- missing artifacts:", + ...workerOutput.fixback_items.map((value) => `- ${value}`), + "", + ].join("\n"), + ); + await writeText( + join(runRoot, "reports", "fixback-items.en.md"), + ["# Fixback Items", "", ...workerOutput.fixback_items.map((value) => `- ${value}`), ""].join("\n"), + ); + await writeJson(join(runRoot, "evidence", "test-results", "qa-check.json"), { + run_id: envelope.run_id, + run_role: envelope.run_role, + story_id: taskPacket.story.story_id, + status: workerOutput.status, + checked_items: [], + blockers: workerOutput.blockers, + }); + await writeJson(join(runRoot, "metadata", "qa-verdict.json"), { + story_id: taskPacket.story.story_id, + status_family: "run_exit", + status: workerOutput.status, + acceptance_closure: { + pass: 0, + fail: 0, + blocked: taskPacket.story.acceptance_ids.length, + total: taskPacket.story.acceptance_ids.length, + }, + }); +} + export class GenericCliAdapter { private readonly dockerLauncher: DockerWorkerLauncher; @@ -282,9 +360,16 @@ export class GenericCliAdapter { stderr, }); await writeRunTimings(timingsPath, runTimings); - if (envelope.run_role === "builder" && usedFallbackWorkerOutput) { - workerOutput = withBuilderFallbackPaths(workerOutput); - await writeBuilderFallbackArtifacts(runRoot, envelope, workerOutput); + if (usedFallbackWorkerOutput) { + if (envelope.run_role === "builder") { + workerOutput = withBuilderFallbackPaths(workerOutput); + await writeBuilderFallbackArtifacts(runRoot, envelope, workerOutput); + } + if (envelope.run_role === "qa") { + workerOutput = withQaFallbackPaths(workerOutput); + const taskPacket = await readJson(envelope.task_packet_path); + await writeQaFallbackArtifacts(runRoot, envelope, taskPacket, workerOutput); + } } const runResult: RunResult = { diff --git a/adapters/generic-cli/docker-runtime.ts b/adapters/generic-cli/docker-runtime.ts index a2cc10a..8781bad 100644 --- a/adapters/generic-cli/docker-runtime.ts +++ b/adapters/generic-cli/docker-runtime.ts @@ -156,12 +156,13 @@ export class DockerPathMapper { } export function buildDockerPathMapping(paths: DockerPathMappingRequest): DockerPathMapper { + const repoReadOnly = paths.run_role === "builder" ? false : true; return new DockerPathMapper([ { name: "repo", host_path: paths.repo_path, container_path: CONTAINER_PATHS.repo, - read_only: true, + read_only: repoReadOnly, }, { name: "state", @@ -309,6 +310,30 @@ function dockerUserArgs(): string[] { return ["--user", `${process.getuid()}:${process.getgid()}`]; } +function buildRunCommand(dockerExecutable: string, request: DockerWorkerLaunchRequest): string[] { + return [ + dockerExecutable, + "run", + "--rm", + "--network", + "none", + ...dockerUserArgs(), + "--workdir", + request.runtime.workdir, + "--env", + "HOME=/work/runtime-home/home", + "--env", + "XDG_CACHE_HOME=/work/cache", + "--env", + "BUN_INSTALL_CACHE_DIR=/work/cache/bun", + ...request.runtime.mounts.flatMap((mount) => ["--mount", mountArg(mount)]), + request.image, + "bun", + request.worker_script_path, + request.envelope_path, + ]; +} + function normalizeImageSignature(value: string): string | null { const normalized = value.trim(); if (!normalized || normalized === "") { @@ -513,38 +538,17 @@ export class DockerWorkerLauncher implements RoleImageResolver { } async launch(request: DockerWorkerLaunchRequest): Promise { + const command = buildRunCommand(this.dockerExecutable, request); const imagePreparationFailure = await this.ensureRoleImage(request.run_role, request.image); if (imagePreparationFailure) { return { - command: imagePreparationFailure.command, + command, exitCode: imagePreparationFailure.exitCode, stdout: imagePreparationFailure.stdout, stderr: imagePreparationFailure.stderr, failure_status: "FAILED_INFRA", }; } - - const command = [ - this.dockerExecutable, - "run", - "--rm", - "--network", - "none", - ...dockerUserArgs(), - "--workdir", - request.runtime.workdir, - "--env", - "HOME=/work/runtime-home/home", - "--env", - "XDG_CACHE_HOME=/work/cache", - "--env", - "BUN_INSTALL_CACHE_DIR=/work/cache/bun", - ...request.runtime.mounts.flatMap((mount) => ["--mount", mountArg(mount)]), - request.image, - "bun", - request.worker_script_path, - request.envelope_path, - ]; const run = await spawnCommand(command, this.repoRoot, resolveTimeoutMs(request.time_limits)); return { command: run.command, diff --git a/tests/integration/test_phase1_local_flow.py b/tests/integration/test_phase1_local_flow.py index 81d2b0e..04a6220 100644 --- a/tests/integration/test_phase1_local_flow.py +++ b/tests/integration/test_phase1_local_flow.py @@ -277,10 +277,15 @@ def main() -> int: mode = os.environ.get("CODINGCLAW_FAKE_DOCKER_MODE", "success") if mode == "slow_success": time.sleep(float(os.environ.get("CODINGCLAW_FAKE_DOCKER_SLEEP", "1"))) + if mode == "qa_timeout" and envelope["run_role"] == "qa": + time.sleep(float(os.environ.get("CODINGCLAW_FAKE_DOCKER_SLEEP", "1"))) if envelope["run_role"] == "builder": output = create_builder_outputs(envelope, task_packet, artifact_root) else: + if mode == "qa_failed_infra": + sys.stderr.write("docker: qa container failed before worker start\\n") + return 125 qa_status = "FIXBACK_REQUIRED" if mode == "qa_fixback" else "SUCCESS" output = create_qa_outputs(envelope, task_packet, artifact_root, qa_status) @@ -397,10 +402,17 @@ def test_phase1_local_builder_container_materialization_uses_read_only_inputs_an assert builder_task_packet["artifact_path"].endswith(f"/artifacts/runs/{builder_envelope['run_id']}") assert str(repo_root) not in json.dumps(builder_task_packet, ensure_ascii=False) - assert mounts["/work/repo"]["readonly"] == "true" + capability_manifest = json.loads( + (repo_root / "adapters" / "generic-cli" / "adapter-capability.json").read_text(encoding="utf-8") + ) + filesystem_write_scope = set(capability_manifest["capabilities"]["filesystem_write"]["scope"]) + + assert mounts["/work/repo"].get("readonly") != "true" assert mounts["/work/state"]["readonly"] == "true" assert mounts["/work/artifacts"]["readonly"] == "true" assert mounts[builder_envelope["artifact_path"]].get("readonly") != "true" + assert mounts["/work/runtime-home"].get("readonly") != "true" + assert filesystem_write_scope == {"repo", "run-artifacts", "runtime-home"} @pytest.mark.integration @@ -428,6 +440,23 @@ def test_phase1_local_builder_failed_infra_stops_before_qa(tmp_path): assert_builder_failure_bundle(job_root) +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_builder_failed_infra_command_log_records_docker_run(tmp_path): + repo_root = export_repo(tmp_path) + result = run_phase1(repo_root, {"CODINGCLAW_DOCKER_BIN": "does-not-exist"}) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + run_root = next(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) + command_log = (run_root / "logs" / "command-log.txt").read_text(encoding="utf-8") + + assert "command: does-not-exist run --rm --network none" in command_log + assert " image inspect " not in command_log + assert " build --file " not in command_log + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_qa_fixback_closes_without_final_summary(tmp_path): @@ -459,6 +488,56 @@ def test_phase1_local_qa_fixback_closes_without_final_summary(tmp_path): assert not (job_root / "artifacts" / "final" / "final-summary.en.md").exists() +@pytest.mark.integration +@pytest.mark.parametrize( + ("mode", "expected_status"), + [("qa_failed_infra", "FAILED_INFRA"), ("qa_timeout", "TIMEOUT")], +) +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_qa_non_success_writes_required_qa_bundle(tmp_path, mode, expected_status): + repo_root = export_repo(tmp_path) + if mode == "qa_timeout": + write_time_limit_minutes(repo_root, 0.001) + fake_docker = write_fake_docker(tmp_path) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_MODE": mode, + "CODINGCLAW_FAKE_DOCKER_SLEEP": "1", + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + archived_trace = json.loads((job_root / "state" / "trace-index.json").read_text(encoding="utf-8")) + run_roots = sorted(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) + qa_run_root = run_roots[-1] + artifact_index = json.loads((qa_run_root / "metadata" / "artifact-index.json").read_text(encoding="utf-8")) + indexed_paths = {entry["path"] for entry in artifact_index["artifacts"]} + + assert [run["run_role"] for run in manifest["runs"]] == ["builder", "qa"] + assert [run["run_exit_status"] for run in manifest["runs"]] == ["SUCCESS", expected_status] + assert manifest["status"] == "AWAITING_OWNER" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_run_role"] == "qa" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_qa_status"] == expected_status + assert not (job_root / "artifacts" / "final" / "final-summary.en.md").exists() + + for relative_path in [ + "reports/qa-report.en.md", + "reports/fixback-items.en.md", + "metadata/qa-verdict.json", + "evidence/test-results/qa-check.json", + ]: + assert (qa_run_root / relative_path).exists() + assert relative_path in indexed_paths + + qa_verdict = json.loads((qa_run_root / "metadata" / "qa-verdict.json").read_text(encoding="utf-8")) + assert qa_verdict["status"] == expected_status + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_builder_timeout_stops_before_qa(tmp_path): From bf6ff3e06261d484e3dd236e540cbcbb4252c0a0 Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Fri, 10 Apr 2026 13:39:31 +0800 Subject: [PATCH 09/19] doc: progress prompt --- ...mpt_phase1_non_success_containerization.md | 0 docs/progress/prompt_phase1_recovery_cards.md | 71 +++++++++++++++++++ 2 files changed, 71 insertions(+) rename prompt_phase1_non_success_containerization.md => docs/progress/prompt_phase1_non_success_containerization.md (100%) create mode 100644 docs/progress/prompt_phase1_recovery_cards.md diff --git a/prompt_phase1_non_success_containerization.md b/docs/progress/prompt_phase1_non_success_containerization.md similarity index 100% rename from prompt_phase1_non_success_containerization.md rename to docs/progress/prompt_phase1_non_success_containerization.md diff --git a/docs/progress/prompt_phase1_recovery_cards.md b/docs/progress/prompt_phase1_recovery_cards.md new file mode 100644 index 0000000..e89e709 --- /dev/null +++ b/docs/progress/prompt_phase1_recovery_cards.md @@ -0,0 +1,71 @@ +You are implementing one bounded Phase 1 task for CodingClaw. + +Read these documents first: +- docs/SYSTEM_BLUEPRINT.md +- docs/ARCHITECTURE_OVERVIEW.md +- docs/DEPLOYMENT_PLAN.md +- docs/STATE_STORE_SPEC.md +- docs/EXECUTOR_ADAPTER_CONTRACT.md +- docs/ARTIFACT_LAYOUT_SPEC.md +- docs/STATUS_MODEL.md +- docs/APPROVAL_CARD_SPEC.md +- docs/JOB_MANIFEST_SCHEMA.md +- docs/LANGUAGE_BOUNDARY_POLICY.md + +Task +Implement Phase 1 recovery-card archival and `pause_context` population for suspended run exits in the local loop. + +Goal +Build the smallest working implementation that preserves the current local Phase 1 builder-to-QA flow while adding the missing control-plane recovery objects and manifest/state recovery context required by the docs. + +In scope +- `core/loop/phase1-local-flow.ts` +- `core/loop/state-store.ts` +- `ops/archive/approvals.ts` +- minimal new helpers under `ops/recovery/` if they reduce branching in the loop +- `core/contracts/types.ts` only for minimal type additions required by recovery-card payloads or archive records +- focused verification in `tests/integration/` and any directly related harness fixtures or smoke tests +- recovery control-plane artifacts under `jobs//approvals//...` +- `job-manifest.json` `approvals[]` and `pause_context` +- mirrored `jobs//state/...` and `state/...` recovery-facing files + +Out of scope +- mobile channel delivery or webhook integration +- owner decision intake, pause/resume commands, or actual resume execution +- takeover packet generation or Wuying session orchestration +- review executor behavior +- fixback or change-request workflow redesign +- policy-engine expansion for `FAILED_POLICY` +- unrelated refactors + +Constraints +- Preserve the single-node Phase 1 design. +- Reuse the existing job-state and run-exit vocabularies from `STATUS_MODEL.md`. +- Generate recovery cards only when the latest run leaves the job in a waiting state that needs owner or takeover recovery context. +- Keep recovery cards as control-plane artifacts under `approvals/`, not under `artifacts/runs//`. +- Keep repository-facing outputs in English; Chinese is allowed only for owner-facing recovery summaries in control-plane artifacts. +- Do not overwrite the existing approved plan/freeze approval archives when creating a recovery card. +- If a pending recovery card does not yet have a real owner decision, do not fabricate a fake resolved decision just to satisfy the current helper shape; adapt the archive flow minimally and honestly. +- Do not add new dependencies. +- Do not regress the current success path or the recently fixed non-success run bundle behavior. + +Deliverables +- minimal recovery-card creation and archival for suspended Phase 1 runs +- any minimal approval-archive support required so pending recovery cards can be stored without breaking existing decided approval archives +- `job-manifest.json` updates so `pause_context` explains suspended jobs and references the related recovery card +- mirrored `state/progress.en.md`, `state/decisions.en.md`, and `state/risk-register.en.md` updates that reflect the recovery gate +- focused verification for at least one waiting-owner recovery case and one waiting-takeover or approval-interrupt recovery case + +Acceptance criteria +- When builder or QA exits `FAILED_EXECUTION`, `FAILED_INFRA`, `TIMEOUT`, or `BUDGET_EXCEEDED`, the job maps to `AWAITING_OWNER`, archives a recovery card under `approvals//`, records the new approval entry in `job-manifest.json`, and populates `pause_context` with a non-empty reason, waiting target, resume action, paused timestamp, and related card ID. +- When builder or QA exits `AWAITING_APPROVAL`, `AWAITING_CREDENTIALS`, or `AWAITING_TAKEOVER`, the loop still stops without dispatching downstream work, and the mapped waiting state plus `pause_context` align with `STATUS_MODEL.md`. +- The archived recovery card contains the required recovery context from `APPROVAL_CARD_SPEC.md`: last exit reason, current freeze version, current story, latest evidence path, recommended next action, and resume gate. +- Recovery card artifacts stay outside per-run artifact indexes and remain referenced through canonical job-root-relative paths in `job-manifest.json`. +- Existing approved plan/freeze approval archives still work, and the current successful builder-plus-QA path is not regressed. +- The result matches the existing docs instead of redefining them. + +Execution instructions +1. Explore the relevant docs and current code first, especially `core/loop/phase1-local-flow.ts`, `core/loop/state-store.ts`, `ops/archive/approvals.ts`, and the manifest/pause-context rules in `docs/APPROVAL_CARD_SPEC.md` plus `docs/JOB_MANIFEST_SCHEMA.md`. +2. Implement the minimum viable vertical slice for recovery-card archival and `pause_context` only. +3. Run focused verification for suspended-run recovery cases and the existing happy path. +4. Report changed files, verification performed, and any unresolved risks. From 87db173d1a629295bfd2da794eb334745548ea4a Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Fri, 10 Apr 2026 13:59:57 +0800 Subject: [PATCH 10/19] fix: fix --- adapters/generic-cli/adapter.ts | 2 +- adapters/generic-cli/docker-runtime.ts | 2 + core/contracts/types.ts | 16 +- core/loop/phase1-local-flow.ts | 225 +++++++++++++++++--- ops/archive/approvals.ts | 44 ++-- tests/integration/test_phase1_local_flow.py | 107 ++++++++++ 6 files changed, 348 insertions(+), 48 deletions(-) diff --git a/adapters/generic-cli/adapter.ts b/adapters/generic-cli/adapter.ts index 6984e95..9a01477 100644 --- a/adapters/generic-cli/adapter.ts +++ b/adapters/generic-cli/adapter.ts @@ -397,7 +397,7 @@ export class GenericCliAdapter { artifactIndex, workerOutput, runRoot, - taskPacketPath: envelope.task_packet_path, + taskPacketPath: materialization.host_task_packet_path, runResultPath, artifactIndexPath, commandLogPath, diff --git a/adapters/generic-cli/docker-runtime.ts b/adapters/generic-cli/docker-runtime.ts index 8781bad..b355b2b 100644 --- a/adapters/generic-cli/docker-runtime.ts +++ b/adapters/generic-cli/docker-runtime.ts @@ -48,6 +48,7 @@ export interface ContainerizedRunEnvelopeMaterialization { runtime: ContainerRuntimeConfig; host_envelope_path: string; container_envelope_path: string; + host_task_packet_path: string; } export interface DockerWorkerLaunchRequest { @@ -288,6 +289,7 @@ export async function materializeContainerizedRunEnvelope( runtime, host_envelope_path: hostEnvelopePath, container_envelope_path: containerEnvelopePath, + host_task_packet_path: hostContainerTaskPacketPath, }; } diff --git a/core/contracts/types.ts b/core/contracts/types.ts index 5331f9f..af24f9c 100644 --- a/core/contracts/types.ts +++ b/core/contracts/types.ts @@ -327,6 +327,16 @@ export interface ApprovalCardSnapshot { timeout_at: string; created_at: string; evidence_refs: string[]; + recovery_context?: { + last_exit_reason: RunExitStatus; + current_freeze_version: string; + current_story: string; + latest_evidence_path: string; + recommended_next_action: string; + resume_gate: "owner" | "takeover"; + paused_run_id: string; + paused_run_role: RunRole; + } | null; } export interface ApprovalDecisionReceipt { @@ -434,11 +444,11 @@ export interface JobManifestApprovalRecord { card_state: ApprovalCardState; card_type: string; requested_action: string; - decision: string; + decision: string | null; snapshot_path: string; - decision_path: string; + decision_path: string | null; summary_zh_ref: string; - decided_at: string; + decided_at: string | null; } export interface JobManifestArtifactRecord { diff --git a/core/loop/phase1-local-flow.ts b/core/loop/phase1-local-flow.ts index 3eb83d3..1dd5305 100644 --- a/core/loop/phase1-local-flow.ts +++ b/core/loop/phase1-local-flow.ts @@ -1,5 +1,6 @@ import { join } from "node:path"; import { GenericCliAdapter } from "../../adapters/generic-cli/adapter.ts"; +import { containerizeTaskPacket } from "../../adapters/generic-cli/docker-runtime.ts"; import { writeApprovalArchive } from "../../ops/archive/approvals.ts"; import { buildEnvironmentSnapshotMetadata, @@ -37,11 +38,13 @@ import type { ContractFreezeMetadata, JobManifest, JobManifestApprovalRecord, + JobManifestPauseContext, JobManifestRunRecord, JobManifestStoryRecord, JobState, RunResult, RunEnvelope, + RunExitStatus, RunRole, TaskPacket, } from "../contracts/types.ts"; @@ -156,6 +159,7 @@ async function assertFreshJobRoot(layout: ReturnType { const taskPacketPath = join(artifactRoot, "metadata", "task-packet.en.json"); const { expectedArtifacts, verificationTargets } = roleArtifacts(runRole); - const baseCommit = detectBaseCommit(repoRoot); const packetWithoutChecksum = await materializeJsonTemplate( join(repoRoot, "control", "fixtures", "phase1-local-task-packet.en.json"), @@ -202,6 +205,10 @@ function taskPacketDigest(taskPacket: TaskPacket): string { return sha256Text(`${JSON.stringify({ ...taskPacket, task_packet_sha256: "" }, null, 2)}\n`); } +function executionTaskPacket(taskPacket: TaskPacket): TaskPacket { + return containerizeTaskPacket(taskPacket, taskPacket); +} + async function buildRunEnvelope( repoRoot: string, taskPacket: TaskPacket, @@ -330,6 +337,20 @@ function buildFreezeMetadata( }; } +function approvalRelativePaths( + layout: ReturnType, + approvalRecord: Awaited>, +): string[] { + const relativePaths = [ + layout.relativeToJobRoot(approvalRecord.snapshot_path), + layout.relativeToJobRoot(approvalRecord.summary_path), + ]; + if (approvalRecord.decision_path !== null) { + relativePaths.push(layout.relativeToJobRoot(approvalRecord.decision_path)); + } + return uniqueStrings(relativePaths); +} + function buildManifestApprovalRecord( layout: ReturnType, approvalRecord: Awaited>, @@ -341,7 +362,8 @@ function buildManifestApprovalRecord( requested_action: approvalRecord.requested_action, decision: approvalRecord.decision, snapshot_path: layout.relativeToJobRoot(approvalRecord.snapshot_path), - decision_path: layout.relativeToJobRoot(approvalRecord.decision_path), + decision_path: + approvalRecord.decision_path === null ? null : layout.relativeToJobRoot(approvalRecord.decision_path), summary_zh_ref: layout.relativeToJobRoot(approvalRecord.summary_path), decided_at: approvalRecord.decided_at, }; @@ -399,6 +421,111 @@ function buildManifestStoryRecord( }; } +function emptyPauseContext(): JobManifestPauseContext { + return { + is_paused: false, + pause_reason: null, + waiting_on: null, + resume_action: null, + paused_at: null, + related_card_id: null, + expires_at: null, + }; +} + +function waitingOnForState(state: JobState): string | null { + if (state === "AWAITING_OWNER") { + return "owner"; + } + if (state === "AWAITING_TAKEOVER") { + return "takeover"; + } + return null; +} + +function latestEvidencePath(execution: AdapterExecutionResult): string { + const relativePath = execution.workerOutput.evidence_paths[0] ?? "reports/handoff.en.md"; + return `artifacts/runs/${execution.runResult.run_id}/${relativePath}`; +} + +function recoveryRiskLevel(status: RunExitStatus): string { + if ( + status === "FAILED_POLICY" || + status === "FAILED_EXECUTION" || + status === "FAILED_INFRA" || + status === "TIMEOUT" || + status === "BUDGET_EXCEEDED" + ) { + return "high"; + } + return "medium"; +} + +function buildRecoveryCard(taskPacket: TaskPacket, execution: AdapterExecutionResult): ApprovalCardSnapshot { + const jobState = mapRunExitToJobState(execution.runResult.status, execution.runResult.run_role); + const resumeGate = jobState === "AWAITING_TAKEOVER" ? "takeover" : "owner"; + const latestEvidenceRef = latestEvidencePath(execution); + const timeoutAt = new Date(Date.now() + 24 * 60 * 60 * 1000).toISOString(); + return { + job_id: taskPacket.job_id, + card_id: `card-recovery-${execution.runResult.run_id}`, + card_state: "PENDING", + card_type: "recovery", + story_id: taskPacket.story.story_id, + freeze_version: taskPacket.freeze_version, + risk_level: recoveryRiskLevel(execution.runResult.status), + summary_zh: `运行 ${execution.runResult.run_id} 已以 ${execution.runResult.status} 停止,当前故事需要${resumeGate === "takeover" ? "接管" : "人工决策"}后继续。`, + requested_action: + resumeGate === "takeover" + ? "Review the blocked run and trigger takeover before continuing." + : "Review the blocked run and decide how to resume the job.", + candidate_actions: + resumeGate === "takeover" + ? ["trigger takeover", "resume job", "request revision"] + : ["resume job", "request revision", "trigger takeover"], + timeout_at: timeoutAt, + created_at: execution.runResult.ended_at, + evidence_refs: uniqueStrings([ + latestEvidenceRef, + `artifacts/runs/${execution.runResult.run_id}/metadata/run-result.json`, + `artifacts/runs/${execution.runResult.run_id}/reports/handoff.en.md`, + ]), + recovery_context: { + last_exit_reason: execution.runResult.status, + current_freeze_version: taskPacket.freeze_version, + current_story: taskPacket.story.story_id, + latest_evidence_path: latestEvidenceRef, + recommended_next_action: execution.workerOutput.next_action, + resume_gate: resumeGate, + paused_run_id: execution.runResult.run_id, + paused_run_role: execution.runResult.run_role, + }, + }; +} + +function buildPauseContext( + execution: AdapterExecutionResult | null, + approvalRecord: Awaited> | null, +): JobManifestPauseContext { + if (execution === null) { + return emptyPauseContext(); + } + const jobState = mapRunExitToJobState(execution.runResult.status, execution.runResult.run_role); + const waitingOn = waitingOnForState(jobState); + if (waitingOn === null) { + return emptyPauseContext(); + } + return { + is_paused: true, + pause_reason: execution.runResult.status, + waiting_on: waitingOn, + resume_action: execution.workerOutput.next_action, + paused_at: execution.runResult.ended_at, + related_card_id: approvalRecord?.card_id ?? null, + expires_at: approvalRecord?.timeout_at ?? null, + }; +} + function buildJobManifest( layout: ReturnType, taskPacket: TaskPacket, @@ -406,13 +533,16 @@ function buildJobManifest( planRecord: Awaited>, freezeRecord: Awaited>, approvalRecord: Awaited>, + approvalRecords: Array>>, executions: AdapterExecutionResult[], adapterId: string, ): JobManifest { const runs = buildManifestRunRecords(layout, executions); const storyRecord = buildManifestStoryRecord(taskPacket, executions); const latestRunId = runs.length > 0 ? runs[runs.length - 1].run_id : ""; - const createdAt = approvalRecord.decided_at; + const latestExecution = executions.length > 0 ? executions[executions.length - 1] : null; + const latestApprovalRecord = approvalRecords.length > 0 ? approvalRecords[approvalRecords.length - 1] : null; + const createdAt = approvalRecord.decided_at ?? nowIso(); const updatedAt = executions.length > 0 ? executions[executions.length - 1].runResult.ended_at : nowIso(); const status = storyRecord.queue_state; @@ -429,15 +559,7 @@ function buildJobManifest( active_story_id: taskPacket.story.story_id, current_run_id: latestRunId, approved_adapter_set: [adapterId], - pause_context: { - is_paused: false, - pause_reason: null, - waiting_on: null, - resume_action: null, - paused_at: null, - related_card_id: null, - expires_at: null, - }, + pause_context: buildPauseContext(latestExecution, latestApprovalRecord), language_policy: taskPacket.language_policy, budget_limits: taskPacket.budget_limits, time_limits: taskPacket.time_limits, @@ -452,7 +574,7 @@ function buildJobManifest( approval_card_id: approvalRecord.card_id, summary_zh_ref: layout.relativeToJobRoot(approvalRecord.summary_path), approval_state: approvalRecord.card_state, - approved_at: approvalRecord.decided_at, + approved_at: approvalRecord.decided_at ?? createdAt, }, freeze: { freeze_id: taskPacket.freeze_id, @@ -462,11 +584,11 @@ function buildJobManifest( checksum_path: layout.relativeToJobRoot(freezeRecord.checksum_path), hash: freezeRecord.hash, approval_card_id: approvalRecord.card_id, - approved_at: approvalRecord.decided_at, + approved_at: approvalRecord.decided_at ?? createdAt, }, stories: [storyRecord], runs, - approvals: [buildManifestApprovalRecord(layout, approvalRecord)], + approvals: approvalRecords.map((record) => buildManifestApprovalRecord(layout, record)), artifacts: { runs_root: "artifacts/runs", sessions_root: "artifacts/sessions", @@ -476,9 +598,7 @@ function buildJobManifest( layout.relativeToJobRoot(freezeRecord.path), layout.relativeToJobRoot(freezeRecord.json_path), layout.relativeToJobRoot(freezeRecord.checksum_path), - layout.relativeToJobRoot(approvalRecord.snapshot_path), - layout.relativeToJobRoot(approvalRecord.decision_path), - layout.relativeToJobRoot(approvalRecord.summary_path), + ...approvalRecords.flatMap((record) => approvalRelativePaths(layout, record)), "job-manifest.json", "checksums.txt", ]), @@ -525,7 +645,7 @@ async function buildChecksumRecords( layout: ReturnType, planRecord: Awaited>, freezeRecord: Awaited>, - approvalRecord: Awaited>, + approvalRecords: Array>>, executions: AdapterExecutionResult[], extraRelativePaths: string[] = [], ): Promise { @@ -536,6 +656,10 @@ async function buildChecksumRecords( for (const file of files) { runRelativePaths.push(`${runRootRelative}/${file}`); } + const taskPacketPath = layout.relativeToJobRoot(execution.taskPacketPath); + if (!taskPacketPath.startsWith(`${runRootRelative}/`)) { + runRelativePaths.push(taskPacketPath); + } } const relativePaths = uniqueStrings([ @@ -543,9 +667,7 @@ async function buildChecksumRecords( layout.relativeToJobRoot(freezeRecord.path), layout.relativeToJobRoot(freezeRecord.json_path), layout.relativeToJobRoot(freezeRecord.checksum_path), - layout.relativeToJobRoot(approvalRecord.snapshot_path), - layout.relativeToJobRoot(approvalRecord.decision_path), - layout.relativeToJobRoot(approvalRecord.summary_path), + ...approvalRecords.flatMap((record) => approvalRelativePaths(layout, record)), "job-manifest.json", ...runRelativePaths, ...extraRelativePaths, @@ -586,6 +708,7 @@ export async function runPhase1Local(repoRoot: string): Promise>> = [approvalRecord]; const allExpectedArtifacts = uniqueStrings([ ...builderTaskPacket.story.expected_artifacts, ...qaTaskPacketPreview.story.expected_artifacts, @@ -654,8 +782,8 @@ export async function runPhase1Local(repoRoot: string): Promise { const snapshotPath = `${approvalRoot}/approval-card.json`; - const decisionPath = `${approvalRoot}/decision.json`; + const decisionPath = decision === null ? null : `${approvalRoot}/decision.json`; const summaryPath = `${approvalRoot}/summary.zh.md`; const summaryText = renderApprovalSummary(card, decision); const snapshotText = `${JSON.stringify(card, null, 2)}\n`; - const decisionText = `${JSON.stringify(decision, null, 2)}\n`; await writeJson(snapshotPath, card); - await writeJson(decisionPath, decision); + if (decisionPath !== null) { + await writeJson(decisionPath, decision); + } await writeText(summaryPath, summaryText); return { @@ -53,12 +72,13 @@ export async function writeApprovalArchive( decision_path: decisionPath, summary_path: summaryPath, snapshot_checksum: sha256Text(snapshotText), - decision_checksum: sha256Text(decisionText), + decision_checksum: decision === null ? null : sha256Text(`${JSON.stringify(decision, null, 2)}\n`), summary_checksum: sha256Text(summaryText), card_state: card.card_state, card_type: card.card_type, requested_action: card.requested_action, - decision: decision.decision, - decided_at: decision.decided_at, + decision: decision?.decision ?? null, + decided_at: decision?.decided_at ?? null, + timeout_at: card.timeout_at, }; } diff --git a/tests/integration/test_phase1_local_flow.py b/tests/integration/test_phase1_local_flow.py index 04a6220..b68c554 100644 --- a/tests/integration/test_phase1_local_flow.py +++ b/tests/integration/test_phase1_local_flow.py @@ -57,6 +57,21 @@ def dependency_snapshot_digest(repo_root: Path) -> str: return hashlib.sha256("\n".join(inputs).encode("utf-8")).hexdigest() +def init_git_repo(repo_root: Path) -> str: + subprocess.run(["git", "init", "-b", "main"], cwd=repo_root, capture_output=True, text=True, check=True) + subprocess.run(["git", "config", "user.email", "tests@example.com"], cwd=repo_root, capture_output=True, text=True, check=True) + subprocess.run(["git", "config", "user.name", "Tests"], cwd=repo_root, capture_output=True, text=True, check=True) + subprocess.run(["git", "add", "."], cwd=repo_root, capture_output=True, text=True, check=True) + subprocess.run(["git", "commit", "-m", "initial"], cwd=repo_root, capture_output=True, text=True, check=True) + return subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=repo_root, + capture_output=True, + text=True, + check=True, + ).stdout.strip() + + def write_fake_docker(tmp_path: Path) -> Path: script_path = tmp_path / "fake-docker.py" script_path.write_text( @@ -65,6 +80,7 @@ def write_fake_docker(tmp_path: Path) -> Path: #!/usr/bin/env python3 import json import os + import subprocess import sys import time from pathlib import Path @@ -137,6 +153,15 @@ def task_packet_for(envelope: dict, mounts: list[dict[str, str]]) -> dict: return load_json(task_packet_path) + def mutate_head(repo_path: str) -> None: + subprocess.run(["git", "config", "user.email", "tests@example.com"], cwd=repo_path, capture_output=True, text=True, check=True) + subprocess.run(["git", "config", "user.name", "Tests"], cwd=repo_path, capture_output=True, text=True, check=True) + marker = Path(repo_path) / ".phase1-head-shift" + marker.write_text("shift\\n", encoding="utf-8") + subprocess.run(["git", "add", ".phase1-head-shift"], cwd=repo_path, capture_output=True, text=True, check=True) + subprocess.run(["git", "commit", "-m", "builder-shift"], cwd=repo_path, capture_output=True, text=True, check=True) + + def create_builder_outputs(envelope: dict, task_packet: dict, artifact_root: Path) -> dict: write_text( artifact_root / "reports" / "implementation-summary.en.md", @@ -282,6 +307,8 @@ def main() -> int: if envelope["run_role"] == "builder": output = create_builder_outputs(envelope, task_packet, artifact_root) + if mode == "builder_head_shift": + mutate_head(map_path(envelope["repo_path"], mounts)) else: if mode == "qa_failed_infra": sys.stderr.write("docker: qa container failed before worker start\\n") @@ -336,6 +363,34 @@ def assert_builder_failure_bundle(job_root: Path) -> None: assert relative_path in indexed_paths +def assert_recovery_pause_context(manifest: dict, job_root: Path, expected_status: str) -> None: + pause_context = manifest["pause_context"] + assert pause_context["is_paused"] is True + assert pause_context["pause_reason"] == expected_status + assert pause_context["waiting_on"] == "owner" + assert pause_context["resume_action"] + assert pause_context["paused_at"] + assert pause_context["related_card_id"] + assert pause_context["expires_at"] + + recovery_record = manifest["approvals"][-1] + assert recovery_record["card_id"] == pause_context["related_card_id"] + assert recovery_record["card_type"] == "recovery" + assert recovery_record["card_state"] == "PENDING" + assert recovery_record["decision"] is None + assert recovery_record["decision_path"] is None + + recovery_root = job_root / "approvals" / recovery_record["card_id"] + recovery_card = json.loads((recovery_root / "approval-card.json").read_text(encoding="utf-8")) + + assert (recovery_root / "summary.zh.md").exists() + assert not (recovery_root / "decision.json").exists() + assert recovery_card["recovery_context"]["last_exit_reason"] == expected_status + assert recovery_card["recovery_context"]["latest_evidence_path"] + assert recovery_card["recovery_context"]["recommended_next_action"] + assert recovery_card["recovery_context"]["resume_gate"] == "owner" + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_rerun_rejects_mutating_existing_archive(tmp_path): @@ -414,6 +469,56 @@ def test_phase1_local_builder_container_materialization_uses_read_only_inputs_an assert mounts["/work/runtime-home"].get("readonly") != "true" assert filesystem_write_scope == {"repo", "run-artifacts", "runtime-home"} + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + freeze = json.loads((job_root / "contract-freeze.json").read_text(encoding="utf-8")) + builder_run = next(run for run in manifest["runs"] if run["run_role"] == "builder") + expected_task_packet_path = builder_run["task_packet_path"] + + assert expected_task_packet_path.endswith(f"/envelopes/container/task-packets/{builder_envelope['run_id']}.json") + assert builder_run["task_packet_path"] != f"artifacts/runs/{builder_envelope['run_id']}/metadata/task-packet.en.json" + assert json.loads((job_root / expected_task_packet_path).read_text(encoding="utf-8")) == builder_task_packet + assert expected_task_packet_path in (job_root / "checksums.txt").read_text(encoding="utf-8") + assert freeze["task_packet_digests"][builder_envelope["run_id"]] == builder_task_packet["task_packet_sha256"] + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_qa_packet_keeps_freeze_base_commit_when_builder_moves_head(tmp_path): + repo_root = export_repo(tmp_path) + initial_commit = init_git_repo(repo_root) + fake_docker = write_fake_docker(tmp_path) + capture_dir = tmp_path / "captures" + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_MODE": "builder_head_shift", + "CODINGCLAW_FAKE_DOCKER_CAPTURE_DIR": str(capture_dir), + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + current_head = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=repo_root, + capture_output=True, + text=True, + check=True, + ).stdout.strip() + job_root = repo_root / "jobs" / "job-phase1-local" + freeze = json.loads((job_root / "contract-freeze.json").read_text(encoding="utf-8")) + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + qa_capture = load_capture(capture_dir, "qa") + + assert current_head != initial_commit + assert freeze["base_commit"] == initial_commit + assert manifest["base_commit"] == initial_commit + assert qa_capture["task_packet"]["base_commit"] == initial_commit + assert qa_capture["task_packet"]["base_commit"] != current_head + assert freeze["task_packet_digests"][qa_capture["task_packet"]["run_id"]] == qa_capture["task_packet"]["task_packet_sha256"] + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") @@ -438,6 +543,7 @@ def test_phase1_local_builder_failed_infra_stops_before_qa(tmp_path): assert live_trace == archived_trace assert not (job_root / "artifacts" / "final" / "final-summary.en.md").exists() assert_builder_failure_bundle(job_root) + assert_recovery_pause_context(manifest, job_root, "FAILED_INFRA") @pytest.mark.integration @@ -536,6 +642,7 @@ def test_phase1_local_qa_non_success_writes_required_qa_bundle(tmp_path, mode, e qa_verdict = json.loads((qa_run_root / "metadata" / "qa-verdict.json").read_text(encoding="utf-8")) assert qa_verdict["status"] == expected_status + assert_recovery_pause_context(manifest, job_root, expected_status) @pytest.mark.integration From 5497fafa1da4139a4fd512e5968380046d18cff5 Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Fri, 10 Apr 2026 14:48:20 +0800 Subject: [PATCH 11/19] fix: canonicalize phase1 task packets and recovery mirrors - bind freeze, manifest and checksums to canonical run-root task packets - archive recovery cards before mirroring state and expose recovery metadata - update integration coverage, harness fixtures and progress prompts --- adapters/generic-cli/adapter.ts | 2 +- adapters/generic-cli/docker-runtime.ts | 48 +--------- core/loop/phase1-local-flow.ts | 92 ++++++++----------- core/loop/state-store.ts | 76 ++++++++++++--- ...mpt_phase1_non_success_containerization.md | 7 ++ docs/progress/prompt_phase1_recovery_cards.md | 7 ++ ops/archive/approvals.ts | 6 ++ tests/harness/fixtures/samples.py | 23 +++-- tests/harness/runners/scenario.py | 17 ++-- tests/integration/test_phase1_local_flow.py | 38 ++++++-- 10 files changed, 186 insertions(+), 130 deletions(-) diff --git a/adapters/generic-cli/adapter.ts b/adapters/generic-cli/adapter.ts index 9a01477..165d712 100644 --- a/adapters/generic-cli/adapter.ts +++ b/adapters/generic-cli/adapter.ts @@ -397,7 +397,7 @@ export class GenericCliAdapter { artifactIndex, workerOutput, runRoot, - taskPacketPath: materialization.host_task_packet_path, + taskPacketPath: materialization.canonical_task_packet_path, runResultPath, artifactIndexPath, commandLogPath, diff --git a/adapters/generic-cli/docker-runtime.ts b/adapters/generic-cli/docker-runtime.ts index b355b2b..5489a3d 100644 --- a/adapters/generic-cli/docker-runtime.ts +++ b/adapters/generic-cli/docker-runtime.ts @@ -1,5 +1,5 @@ import { basename, dirname, isAbsolute, join, resolve } from "node:path"; -import { ensureDir, readJson, readText, sha256Text, toPosixPath, uniqueStrings, writeJson } from "../../core/loop/support.ts"; +import { ensureDir, readText, sha256Text, toPosixPath, writeJson } from "../../core/loop/support.ts"; import type { ContainerPathMap, ContainerPathMount, @@ -7,7 +7,6 @@ import type { RunEnvelope, RunExitStatus, RunRole, - TaskPacket, } from "../../core/contracts/types.ts"; export const CONTAINER_PATHS = { @@ -48,7 +47,7 @@ export interface ContainerizedRunEnvelopeMaterialization { runtime: ContainerRuntimeConfig; host_envelope_path: string; container_envelope_path: string; - host_task_packet_path: string; + canonical_task_packet_path: string; } export interface DockerWorkerLaunchRequest { @@ -100,10 +99,6 @@ function runRootContainerPath(runRoot: string): string { return `${CONTAINER_PATHS.artifacts}/runs/${basename(normalizeHostPath(runRoot))}`; } -function taskPacketDigest(taskPacket: TaskPacket): string { - return sha256Text(`${JSON.stringify({ ...taskPacket, task_packet_sha256: "" }, null, 2)}\n`); -} - export function resolveDockerWorkerImage(runRole: RunRole): string { if (runRole !== "builder" && runRole !== "qa") { throw new Error(`unsupported docker worker role: ${runRole}`); @@ -198,23 +193,6 @@ export function buildDockerPathMapping(paths: DockerPathMappingRequest): DockerP ]); } -export function containerizeTaskPacket(taskPacket: TaskPacket, paths: DockerMappedPaths): TaskPacket { - const mapper = buildDockerPathMapping(paths); - const mappedTaskPacket: TaskPacket = { - ...taskPacket, - repo_path: mapper.mapPath(taskPacket.repo_path), - state_path: mapper.mapPath(taskPacket.state_path), - artifact_path: mapper.mapPath(taskPacket.artifact_path), - runtime_home: mapper.mapPath(taskPacket.runtime_home), - previous_handoff_path: mapper.mapPath(taskPacket.previous_handoff_path), - requested_capabilities: uniqueStrings([...taskPacket.requested_capabilities, "container_control"]), - }; - return { - ...mappedTaskPacket, - task_packet_sha256: taskPacketDigest(mappedTaskPacket), - }; -} - function buildContainerPathMap(envelope: RunEnvelope, mapper: DockerPathMapper): ContainerPathMap { return { repo_path: mapper.mapPath(envelope.repo_path), @@ -233,17 +211,8 @@ export async function materializeContainerizedRunEnvelope( ): Promise { const hostEnvelopePath = join(envelope.runtime_home, "envelopes", `${envelope.run_id}.json`); const containerEnvelopePath = join(envelope.runtime_home, "envelopes", "container", `${envelope.run_id}.json`); - const hostContainerTaskPacketPath = join( - envelope.runtime_home, - "envelopes", - "container", - "task-packets", - `${envelope.run_id}.json`, - ); const cacheHostPath = join(envelope.runtime_home, "cache"); const mapper = buildDockerPathMapping(envelope); - const containerTaskPacket = containerizeTaskPacket(await readJson(envelope.task_packet_path), envelope); - const containerTaskPacketPath = mapper.mapPath(hostContainerTaskPacketPath); const runtime: ContainerRuntimeConfig = { runtime: "docker", image: resolveDockerWorkerImage(envelope.run_role), @@ -251,15 +220,10 @@ export async function materializeContainerizedRunEnvelope( envelope_host_path: hostEnvelopePath, envelope_container_path: mapper.mapPath(containerEnvelopePath), mounts: mapper.mounts, - container_paths: { - ...buildContainerPathMap(envelope, mapper), - task_packet_path: containerTaskPacketPath, - }, + container_paths: buildContainerPathMap(envelope, mapper), }; - const requestedCapabilities = uniqueStrings([...envelope.requested_capabilities, "container_control"]); const hostEnvelope: RunEnvelope = { ...envelope, - requested_capabilities: requestedCapabilities, container_runtime: runtime, }; const containerEnvelope: RunEnvelope = { @@ -269,7 +233,7 @@ export async function materializeContainerizedRunEnvelope( artifact_path: runtime.container_paths.artifact_path, runtime_home: runtime.container_paths.runtime_home, task_packet_path: runtime.container_paths.task_packet_path, - task_packet_sha256: containerTaskPacket.task_packet_sha256, + task_packet_sha256: envelope.task_packet_sha256, previous_handoff_path: runtime.container_paths.previous_handoff_path, approval_snapshot_path: runtime.container_paths.approval_snapshot_path, trace_context: runtime.container_paths.trace_context, @@ -277,11 +241,9 @@ export async function materializeContainerizedRunEnvelope( await ensureDir(dirname(hostEnvelopePath)); await ensureDir(dirname(containerEnvelopePath)); - await ensureDir(dirname(hostContainerTaskPacketPath)); await ensureDir(cacheHostPath); await writeJson(hostEnvelopePath, hostEnvelope); await writeJson(containerEnvelopePath, containerEnvelope); - await writeJson(hostContainerTaskPacketPath, containerTaskPacket); return { host_envelope: hostEnvelope, @@ -289,7 +251,7 @@ export async function materializeContainerizedRunEnvelope( runtime, host_envelope_path: hostEnvelopePath, container_envelope_path: containerEnvelopePath, - host_task_packet_path: hostContainerTaskPacketPath, + canonical_task_packet_path: envelope.task_packet_path, }; } diff --git a/core/loop/phase1-local-flow.ts b/core/loop/phase1-local-flow.ts index 1dd5305..fe9ea86 100644 --- a/core/loop/phase1-local-flow.ts +++ b/core/loop/phase1-local-flow.ts @@ -1,6 +1,5 @@ import { join } from "node:path"; import { GenericCliAdapter } from "../../adapters/generic-cli/adapter.ts"; -import { containerizeTaskPacket } from "../../adapters/generic-cli/docker-runtime.ts"; import { writeApprovalArchive } from "../../ops/archive/approvals.ts"; import { buildEnvironmentSnapshotMetadata, @@ -205,10 +204,6 @@ function taskPacketDigest(taskPacket: TaskPacket): string { return sha256Text(`${JSON.stringify({ ...taskPacket, task_packet_sha256: "" }, null, 2)}\n`); } -function executionTaskPacket(taskPacket: TaskPacket): TaskPacket { - return containerizeTaskPacket(taskPacket, taskPacket); -} - async function buildRunEnvelope( repoRoot: string, taskPacket: TaskPacket, @@ -518,14 +513,27 @@ function buildPauseContext( return { is_paused: true, pause_reason: execution.runResult.status, - waiting_on: waitingOn, - resume_action: execution.workerOutput.next_action, + waiting_on: approvalRecord?.waiting_on ?? waitingOn, + resume_action: approvalRecord?.resume_action ?? execution.workerOutput.next_action, paused_at: execution.runResult.ended_at, related_card_id: approvalRecord?.card_id ?? null, expires_at: approvalRecord?.timeout_at ?? null, }; } +function buildRunRecoveryState( + approvalRecord: Awaited> | null, +): { card_id: string; waiting_on: "owner" | "takeover"; resume_action: string } | null { + if (approvalRecord === null || approvalRecord.waiting_on === null || approvalRecord.resume_action === null) { + return null; + } + return { + card_id: approvalRecord.card_id, + waiting_on: approvalRecord.waiting_on, + resume_action: approvalRecord.resume_action, + }; +} + function buildJobManifest( layout: ReturnType, taskPacket: TaskPacket, @@ -656,10 +664,6 @@ async function buildChecksumRecords( for (const file of files) { runRelativePaths.push(`${runRootRelative}/${file}`); } - const taskPacketPath = layout.relativeToJobRoot(execution.taskPacketPath); - if (!taskPacketPath.startsWith(`${runRootRelative}/`)) { - runRelativePaths.push(taskPacketPath); - } } const relativePaths = uniqueStrings([ @@ -782,8 +786,8 @@ export async function runPhase1Local(repoRoot: string): Promise> | null = null; + const builderJobState = mapRunExitToJobState(builderExecution.runResult.status, builderExecution.runResult.run_role); + if (builderJobState === "AWAITING_OWNER" || builderJobState === "AWAITING_TAKEOVER") { const recoveryCard = buildRecoveryCard(builderTaskPacket, builderExecution); - approvalRecords.push(await writeApprovalArchive(layout.approvalRoot(recoveryCard.card_id), recoveryCard, null)); + builderRecoveryRecord = await writeApprovalArchive(layout.approvalRoot(recoveryCard.card_id), recoveryCard, null); + approvalRecords.push(builderRecoveryRecord); } - const builderManifestWithRecovery = buildJobManifest( + await stateStore.recordRun(builderTaskPacket, builderExecution, buildRunRecoveryState(builderRecoveryRecord)); + + const builderManifest = buildJobManifest( layout, builderTaskPacket, baseBranch, @@ -856,11 +852,11 @@ export async function runPhase1Local(repoRoot: string): Promise> | null = null; + const qaJobState = mapRunExitToJobState(qaExecution.runResult.status, qaExecution.runResult.run_role); + if (qaJobState === "AWAITING_OWNER" || qaJobState === "AWAITING_TAKEOVER") { const recoveryCard = buildRecoveryCard(qaTaskPacket, qaExecution); - approvalRecords.push(await writeApprovalArchive(layout.approvalRoot(recoveryCard.card_id), recoveryCard, null)); + qaRecoveryRecord = await writeApprovalArchive(layout.approvalRoot(recoveryCard.card_id), recoveryCard, null); + approvalRecords.push(qaRecoveryRecord); } - const finalManifestWithRecovery = buildJobManifest( + await stateStore.recordRun(qaTaskPacket, qaExecution, buildRunRecoveryState(qaRecoveryRecord)); + + const finalManifest = buildJobManifest( layout, qaTaskPacket, baseBranch, @@ -921,16 +909,16 @@ export async function runPhase1Local(repoRoot: string): Promise { + async recordRun( + taskPacket: TaskPacket, + execution: AdapterExecutionResult, + recoveryState: RunRecoveryState | null = null, + ): Promise { const jobState = mapRunExitToJobState(execution.runResult.status, execution.runResult.run_role); const loopMetrics = await readJson(this.loopMetricsPath); const storyQueue = await readJson(this.storyQueuePath); @@ -432,6 +474,10 @@ export class StateStore { ? "The builder completed the local slice and handed off to QA." : execution.runResult.run_role === "qa" && execution.runResult.status === "SUCCESS" ? "QA closed the local proof-of-concept story and the job is ready to archive." + : jobState === "AWAITING_OWNER" && recoveryState !== null + ? `${execution.runResult.run_role} ended with status ${execution.runResult.status}. Waiting for owner decision via recovery card ${recoveryState.card_id}.` + : jobState === "AWAITING_TAKEOVER" && recoveryState !== null + ? `${execution.runResult.run_role} ended with status ${execution.runResult.status}. Waiting for takeover via recovery card ${recoveryState.card_id}.` : `${execution.runResult.run_role} ended with status ${execution.runResult.status}.`; const existingDecisions = await readText(this.decisionsPath); @@ -439,11 +485,15 @@ export class StateStore { await this.writeMirroredJson(this.loopMetricsPath, "loop-metrics.json", loopMetrics); await this.writeMirroredJson(this.traceIndexPath, "trace-index.json", traceIndex); await this.writeMirroredText(this.handoffPath, "handoff.en.md", handoffContent); - await this.writeMirroredText(this.riskRegisterPath, "risk-register.en.md", renderRiskRegister(blockers)); + await this.writeMirroredText( + this.riskRegisterPath, + "risk-register.en.md", + renderRiskRegister(jobState, blockers, recoveryState), + ); await this.writeMirroredText( this.decisionsPath, "decisions.en.md", - `${existingDecisions.trimEnd()}\n\n${runDecisionEntry(execution.runResult.run_id, jobState)}`, + `${existingDecisions.trimEnd()}\n\n${runDecisionEntry(execution.runResult.run_id, jobState, recoveryState)}`, ); await this.writeMirroredText( this.progressPath, @@ -467,7 +517,11 @@ export class StateStore { const existingDecisions = await readText(this.decisionsPath); await this.writeMirroredJson(this.storyQueuePath, "story-queue.json", storyQueue); - await this.writeMirroredText(this.riskRegisterPath, "risk-register.en.md", renderRiskRegister([reason])); + await this.writeMirroredText( + this.riskRegisterPath, + "risk-register.en.md", + renderRiskRegister("INTEGRITY_FAILED", [reason]), + ); await this.writeMirroredText( this.decisionsPath, "decisions.en.md", diff --git a/docs/progress/prompt_phase1_non_success_containerization.md b/docs/progress/prompt_phase1_non_success_containerization.md index a854f07..c504e06 100644 --- a/docs/progress/prompt_phase1_non_success_containerization.md +++ b/docs/progress/prompt_phase1_non_success_containerization.md @@ -1,5 +1,12 @@ You are implementing one bounded Phase 1 task for CodingClaw. +Current behavior +- Dockerized Phase 1 now keeps exactly one task packet per run at `artifacts/runs//metadata/task-packet.en.json`. +- Container execution reads that canonical packet through the mapped `RunEnvelope.task_packet_path`; path translation lives in `RunEnvelope.container_runtime.container_paths`. +- `runtime-home/.../envelopes/container/task-packets/` transport copies are no longer generated, and the task packet is no longer rewritten with containerized paths or injected `container_control`. +- `contract-freeze.json.task_packet_digests`, `job-manifest.json.runs[].task_packet_path`, and `checksums.txt` bind only the canonical run-root packet. +- Builder or QA non-success exits still stop the loop before unsupported downstream work, archive the required run bundle, and skip `artifacts/final/final-summary.en.md` unless QA reaches `SUCCESS`. + Read these documents first: - docs/SYSTEM_BLUEPRINT.md - docs/ARCHITECTURE_OVERVIEW.md diff --git a/docs/progress/prompt_phase1_recovery_cards.md b/docs/progress/prompt_phase1_recovery_cards.md index e89e709..b6dfece 100644 --- a/docs/progress/prompt_phase1_recovery_cards.md +++ b/docs/progress/prompt_phase1_recovery_cards.md @@ -1,5 +1,12 @@ You are implementing one bounded Phase 1 task for CodingClaw. +Current behavior +- Recovery cards are archived under `approvals//` before archived `state/` and mirrored `state/` files are updated. +- Pending recovery archive records now expose `waiting_on`, `resume_action`, and `paused_run_id` so loop state can consume them directly. +- `job-manifest.json.pause_context` is populated from the archived recovery card when the latest run leaves the job in `AWAITING_OWNER` or `AWAITING_TAKEOVER`. +- `state/decisions.en.md`, `state/progress.en.md`, and `state/risk-register.en.md` now mirror the recovery gate with the archived `card_id` and waiting target instead of reusing fixback wording. +- Waiting-owner states describe owner recovery review, and waiting-takeover states describe takeover gating. + Read these documents first: - docs/SYSTEM_BLUEPRINT.md - docs/ARCHITECTURE_OVERVIEW.md diff --git a/ops/archive/approvals.ts b/ops/archive/approvals.ts index a09ec4a..9bc3f1a 100644 --- a/ops/archive/approvals.ts +++ b/ops/archive/approvals.ts @@ -15,6 +15,9 @@ export interface ApprovalArchiveRecord { decision: string | null; decided_at: string | null; timeout_at: string; + waiting_on: "owner" | "takeover" | null; + resume_action: string | null; + paused_run_id: string | null; } function renderApprovalSummary(card: ApprovalCardSnapshot, decision: ApprovalDecisionReceipt | null): string { @@ -80,5 +83,8 @@ export async function writeApprovalArchive( decision: decision?.decision ?? null, decided_at: decision?.decided_at ?? null, timeout_at: card.timeout_at, + waiting_on: card.recovery_context?.resume_gate ?? null, + resume_action: card.recovery_context === null || card.recovery_context === undefined ? null : card.requested_action, + paused_run_id: card.recovery_context?.paused_run_id ?? null, }; } diff --git a/tests/harness/fixtures/samples.py b/tests/harness/fixtures/samples.py index 5d697d6..8f392a4 100644 --- a/tests/harness/fixtures/samples.py +++ b/tests/harness/fixtures/samples.py @@ -269,14 +269,23 @@ def build_job_manifest( approval_records: list[dict], ) -> dict: job_state = map_run_exit_to_job_state(run_result["status"]) + paused = job_state in {"AWAITING_OWNER", "AWAITING_TAKEOVER"} + waiting_on = "owner" if job_state == "AWAITING_OWNER" else ("takeover" if job_state == "AWAITING_TAKEOVER" else None) + latest_approval = approval_records[-1] if paused and approval_records else None pause_context = { - "is_paused": job_state in {"AWAITING_OWNER", "AWAITING_TAKEOVER"}, - "pause_reason": run_result["status"] if job_state in {"AWAITING_OWNER", "AWAITING_TAKEOVER"} else "", - "waiting_on": "owner" if job_state == "AWAITING_OWNER" else ("takeover" if job_state == "AWAITING_TAKEOVER" else ""), - "resume_action": "resume-run" if job_state in {"AWAITING_OWNER", "AWAITING_TAKEOVER"} else "", - "paused_at": "2026-04-08T00:05:00Z" if job_state in {"AWAITING_OWNER", "AWAITING_TAKEOVER"} else "", - "related_card_id": approval_records[-1]["card_id"] if approval_records else "", - "expires_at": "", + "is_paused": paused, + "pause_reason": run_result["status"] if paused else None, + "waiting_on": (latest_approval or {}).get("waiting_on", waiting_on) if paused else None, + "resume_action": ( + (latest_approval or {}).get("resume_action") + or (latest_approval or {}).get("requested_action") + or ("Wait for owner input before continuing." if waiting_on == "owner" else "Wait for takeover before continuing.") + ) + if paused + else None, + "paused_at": "2026-04-08T00:05:00Z" if paused else None, + "related_card_id": latest_approval["card_id"] if latest_approval else None, + "expires_at": (latest_approval or {}).get("timeout_at") if paused else None, } return { "job_id": job_id, diff --git a/tests/harness/runners/scenario.py b/tests/harness/runners/scenario.py index 78df314..97098d3 100644 --- a/tests/harness/runners/scenario.py +++ b/tests/harness/runners/scenario.py @@ -81,7 +81,7 @@ def _default_mandatory_statuses(exit_status: str) -> dict[str, str]: def _expected_artifacts(scenario: Scenario) -> list[str]: artifacts = [ - "task-packet.json", + "metadata/task-packet.en.json", "logs/command-log.txt", "metadata/run-result.json", "metadata/trace-index.json", @@ -178,7 +178,7 @@ def materialize_scenario(root: Path, scenario: Scenario) -> dict: freeze_json["base_commit"] = "tampered-commit" _write_json(freeze_json_path, freeze_json) - _write_json(run_root / "task-packet.json", task_packet) + _write_json(run_root / "metadata" / "task-packet.en.json", task_packet) acceptance_statuses = scenario.acceptance_statuses or _default_acceptance_statuses(story, scenario.exit_status) mandatory_check_statuses = scenario.mandatory_check_statuses or _default_mandatory_statuses(scenario.exit_status) @@ -241,11 +241,14 @@ def materialize_scenario(root: Path, scenario: Scenario) -> dict: "card_state": approval_card["status"], "card_type": "approval", "requested_action": approval_card["requested_action"], - "decision": "", + "decision": None, "snapshot_path": f"artifacts/runs/{scenario.run_id}/approvals/{scenario.approval_card_id}/approval-card.json", - "decision_path": "", + "decision_path": None, "summary_zh_ref": f"artifacts/runs/{scenario.run_id}/approvals/{scenario.approval_card_id}/summary.zh.md", - "decided_at": "", + "decided_at": None, + "waiting_on": "owner" if scenario.exit_status != "AWAITING_TAKEOVER" else "takeover", + "resume_action": approval_card["requested_action"], + "timeout_at": "2026-04-09T00:00:00Z", } if scenario.approval_decided: decision_payload = build_approval_card( @@ -303,7 +306,7 @@ def materialize_scenario(root: Path, scenario: Scenario) -> dict: freeze_checksum=sha256_file(freeze_path), run_result=run_result, run_root=f"artifacts/runs/{scenario.run_id}", - task_packet_path=f"artifacts/runs/{scenario.run_id}/task-packet.json", + task_packet_path=f"artifacts/runs/{scenario.run_id}/metadata/task-packet.en.json", artifact_index_path=f"artifacts/runs/{scenario.run_id}/metadata/artifact-index.json", handoff_path=f"artifacts/runs/{scenario.run_id}/reports/handoff.en.md", active_story=story, @@ -334,7 +337,7 @@ def materialize_scenario(root: Path, scenario: Scenario) -> dict: "freeze_path": freeze_path, "freeze_json_path": freeze_json_path, "freeze_checksum_path": freeze_checksum_path, - "task_packet_path": run_root / "task-packet.json", + "task_packet_path": run_root / "metadata" / "task-packet.en.json", "run_result_path": run_root / "metadata" / "run-result.json", "trace_index_path": run_root / "metadata" / "trace-index.json", "artifact_index_path": run_root / "metadata" / "artifact-index.json", diff --git a/tests/integration/test_phase1_local_flow.py b/tests/integration/test_phase1_local_flow.py index b68c554..51fe671 100644 --- a/tests/integration/test_phase1_local_flow.py +++ b/tests/integration/test_phase1_local_flow.py @@ -391,6 +391,18 @@ def assert_recovery_pause_context(manifest: dict, job_root: Path, expected_statu assert recovery_card["recovery_context"]["resume_gate"] == "owner" +def assert_recovery_state_mirror(job_root: Path, recovery_card_id: str) -> None: + decisions = (job_root / "state" / "decisions.en.md").read_text(encoding="utf-8") + progress = (job_root / "state" / "progress.en.md").read_text(encoding="utf-8") + risk_register = (job_root / "state" / "risk-register.en.md").read_text(encoding="utf-8") + + assert f"- card_id: {recovery_card_id}" in decisions + assert "Wait for owner input before continuing." in decisions + assert "Wait for owner input before continuing." in progress + assert "fixback" not in risk_register.lower() + assert "owner review is required" in risk_register.lower() + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_rerun_rejects_mutating_existing_archive(tmp_path): @@ -448,14 +460,21 @@ def test_phase1_local_builder_container_materialization_uses_read_only_inputs_an builder_capture = load_capture(capture_dir, "builder") builder_task_packet = builder_capture["task_packet"] builder_envelope = builder_capture["envelope"] + container_paths = builder_envelope["container_runtime"]["container_paths"] mounts = {mount["target"]: mount for mount in builder_capture["mounts"]} + job_root = repo_root / "jobs" / "job-phase1-local" - assert builder_task_packet["repo_path"] == "/work/repo" - assert builder_task_packet["state_path"] == "/work/state" - assert builder_task_packet["runtime_home"] == "/work/runtime-home" - assert builder_task_packet["artifact_path"] == builder_envelope["artifact_path"] - assert builder_task_packet["artifact_path"].endswith(f"/artifacts/runs/{builder_envelope['run_id']}") - assert str(repo_root) not in json.dumps(builder_task_packet, ensure_ascii=False) + assert builder_task_packet["repo_path"] == repo_root.as_posix() + assert builder_task_packet["state_path"] == (job_root / "state").as_posix() + assert builder_task_packet["runtime_home"] == (job_root / "runtime-home" / "phase1-local").as_posix() + assert builder_task_packet["artifact_path"] == (job_root / "artifacts" / "runs" / builder_envelope["run_id"]).as_posix() + assert "container_control" not in builder_task_packet["requested_capabilities"] + assert container_paths["repo_path"] == "/work/repo" + assert container_paths["state_path"] == "/work/state" + assert container_paths["runtime_home"] == "/work/runtime-home" + assert container_paths["artifact_path"] == builder_envelope["artifact_path"] + assert container_paths["task_packet_path"] == builder_envelope["task_packet_path"] + assert builder_envelope["task_packet_path"].endswith(f"/artifacts/runs/{builder_envelope['run_id']}/metadata/task-packet.en.json") capability_manifest = json.loads( (repo_root / "adapters" / "generic-cli" / "adapter-capability.json").read_text(encoding="utf-8") @@ -469,15 +488,14 @@ def test_phase1_local_builder_container_materialization_uses_read_only_inputs_an assert mounts["/work/runtime-home"].get("readonly") != "true" assert filesystem_write_scope == {"repo", "run-artifacts", "runtime-home"} - job_root = repo_root / "jobs" / "job-phase1-local" manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) freeze = json.loads((job_root / "contract-freeze.json").read_text(encoding="utf-8")) builder_run = next(run for run in manifest["runs"] if run["run_role"] == "builder") expected_task_packet_path = builder_run["task_packet_path"] - assert expected_task_packet_path.endswith(f"/envelopes/container/task-packets/{builder_envelope['run_id']}.json") - assert builder_run["task_packet_path"] != f"artifacts/runs/{builder_envelope['run_id']}/metadata/task-packet.en.json" + assert expected_task_packet_path == f"artifacts/runs/{builder_envelope['run_id']}/metadata/task-packet.en.json" assert json.loads((job_root / expected_task_packet_path).read_text(encoding="utf-8")) == builder_task_packet + assert not (job_root / "runtime-home" / "phase1-local" / "envelopes" / "container" / "task-packets").exists() assert expected_task_packet_path in (job_root / "checksums.txt").read_text(encoding="utf-8") assert freeze["task_packet_digests"][builder_envelope["run_id"]] == builder_task_packet["task_packet_sha256"] @@ -544,6 +562,7 @@ def test_phase1_local_builder_failed_infra_stops_before_qa(tmp_path): assert not (job_root / "artifacts" / "final" / "final-summary.en.md").exists() assert_builder_failure_bundle(job_root) assert_recovery_pause_context(manifest, job_root, "FAILED_INFRA") + assert_recovery_state_mirror(job_root, manifest["pause_context"]["related_card_id"]) @pytest.mark.integration @@ -643,6 +662,7 @@ def test_phase1_local_qa_non_success_writes_required_qa_bundle(tmp_path, mode, e qa_verdict = json.loads((qa_run_root / "metadata" / "qa-verdict.json").read_text(encoding="utf-8")) assert qa_verdict["status"] == expected_status assert_recovery_pause_context(manifest, job_root, expected_status) + assert_recovery_state_mirror(job_root, manifest["pause_context"]["related_card_id"]) @pytest.mark.integration From b4f095b42907e7d3944d587ad8917bc6c2e6ec99 Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Fri, 10 Apr 2026 15:27:56 +0800 Subject: [PATCH 12/19] fix: harden phase1 docker archive boundaries - protect canonical task packets from worker-side rewrites through repo aliases\n- persist the QA canonical packet so freeze digests always resolve\n- archive recovery approval requests and preserve the real resume action --- adapters/generic-cli/docker-runtime.ts | 85 ++++++++++++++++-- core/contracts/types.ts | 29 ++++++- core/loop/phase1-local-flow.ts | 96 ++++++++++++++++++--- ops/archive/approvals.ts | 15 +++- tests/integration/test_phase1_local_flow.py | 78 ++++++++++++++++- 5 files changed, 283 insertions(+), 20 deletions(-) diff --git a/adapters/generic-cli/docker-runtime.ts b/adapters/generic-cli/docker-runtime.ts index 5489a3d..d7a86d2 100644 --- a/adapters/generic-cli/docker-runtime.ts +++ b/adapters/generic-cli/docker-runtime.ts @@ -35,6 +35,7 @@ const DOCKER_BIN_ENV_VAR = "CODINGCLAW_DOCKER_BIN"; interface NormalizedContainerPathMount extends ContainerPathMount { normalized_host_path: string; + mount_order: number; } export interface RoleImageResolver { @@ -99,6 +100,22 @@ function runRootContainerPath(runRoot: string): string { return `${CONTAINER_PATHS.artifacts}/runs/${basename(normalizeHostPath(runRoot))}`; } +function taskPacketPathFromRunRoot(runRoot: string): string { + return join(runRoot, "metadata", "task-packet.en.json"); +} + +function repoContainerPathForHostPath(repoRoot: string, hostPath: string): string | null { + const normalizedRepoRoot = normalizeHostPath(repoRoot); + const normalizedHostPath = normalizeHostPath(hostPath); + if (normalizedHostPath === normalizedRepoRoot) { + return CONTAINER_PATHS.repo; + } + if (!normalizedHostPath.startsWith(`${normalizedRepoRoot}/`)) { + return null; + } + return `${CONTAINER_PATHS.repo}${normalizedHostPath.slice(normalizedRepoRoot.length)}`; +} + export function resolveDockerWorkerImage(runRole: RunRole): string { if (runRole !== "builder" && runRole !== "qa") { throw new Error(`unsupported docker worker role: ${runRole}`); @@ -114,11 +131,15 @@ export class DockerPathMapper { constructor(mounts: ContainerPathMount[]) { this.mounts = mounts; this.normalizedMounts = mounts - .map((mount) => ({ + .map((mount, mountOrder) => ({ ...mount, normalized_host_path: normalizeHostPath(mount.host_path), + mount_order: mountOrder, })) - .sort((left, right) => right.normalized_host_path.length - left.normalized_host_path.length); + .sort( + (left, right) => + right.normalized_host_path.length - left.normalized_host_path.length || left.mount_order - right.mount_order, + ); } mapPath(hostPath: string): string { @@ -153,13 +174,30 @@ export class DockerPathMapper { export function buildDockerPathMapping(paths: DockerPathMappingRequest): DockerPathMapper { const repoReadOnly = paths.run_role === "builder" ? false : true; - return new DockerPathMapper([ + const jobRoot = dirname(paths.state_path); + const runRootContainer = runRootContainerPath(paths.artifact_path); + const taskPacketPath = taskPacketPathFromRunRoot(paths.artifact_path); + const repoJobRootPath = repoContainerPathForHostPath(paths.repo_path, jobRoot); + const repoRunRootPath = repoContainerPathForHostPath(paths.repo_path, paths.artifact_path); + const repoTaskPacketPath = repoContainerPathForHostPath(paths.repo_path, taskPacketPath); + const repoRuntimeHomePath = repoContainerPathForHostPath(paths.repo_path, paths.runtime_home); + const mounts: ContainerPathMount[] = [ { name: "repo", host_path: paths.repo_path, container_path: CONTAINER_PATHS.repo, read_only: repoReadOnly, }, + ...(repoJobRootPath === null + ? [] + : [ + { + name: "repo-job-root" as const, + host_path: jobRoot, + container_path: repoJobRootPath, + read_only: true, + }, + ]), { name: "state", host_path: paths.state_path, @@ -175,22 +213,59 @@ export function buildDockerPathMapping(paths: DockerPathMappingRequest): DockerP { name: "run-artifacts", host_path: paths.artifact_path, - container_path: runRootContainerPath(paths.artifact_path), + container_path: runRootContainer, read_only: false, }, + ...(repoRunRootPath === null + ? [] + : [ + { + name: "repo-run-artifacts" as const, + host_path: paths.artifact_path, + container_path: repoRunRootPath, + read_only: false, + }, + ]), + { + name: "task-packet", + host_path: taskPacketPath, + container_path: `${runRootContainer}/metadata/task-packet.en.json`, + read_only: true, + }, + ...(repoTaskPacketPath === null + ? [] + : [ + { + name: "repo-task-packet" as const, + host_path: taskPacketPath, + container_path: repoTaskPacketPath, + read_only: true, + }, + ]), { name: "runtime-home", host_path: paths.runtime_home, container_path: CONTAINER_PATHS.runtimeHome, read_only: false, }, + ...(repoRuntimeHomePath === null + ? [] + : [ + { + name: "repo-runtime-home" as const, + host_path: paths.runtime_home, + container_path: repoRuntimeHomePath, + read_only: false, + }, + ]), { name: "cache", host_path: join(paths.runtime_home, "cache"), container_path: CONTAINER_PATHS.cache, read_only: false, }, - ]); + ]; + return new DockerPathMapper(mounts); } function buildContainerPathMap(envelope: RunEnvelope, mapper: DockerPathMapper): ContainerPathMap { diff --git a/core/contracts/types.ts b/core/contracts/types.ts index af24f9c..0a7a9f9 100644 --- a/core/contracts/types.ts +++ b/core/contracts/types.ts @@ -82,7 +82,18 @@ export interface TaskPacket { story: StoryContract; } -export type ContainerMountName = "repo" | "state" | "artifacts" | "run-artifacts" | "runtime-home" | "cache"; +export type ContainerMountName = + | "repo" + | "repo-job-root" + | "state" + | "artifacts" + | "run-artifacts" + | "repo-run-artifacts" + | "task-packet" + | "repo-task-packet" + | "runtime-home" + | "repo-runtime-home" + | "cache"; export interface ContainerPathMount { name: ContainerMountName; @@ -313,6 +324,20 @@ export interface LoopMetricsFile { runs: LoopMetricEntry[]; } +export interface ApprovalRequestSnapshot { + request_id: string; + job_id: string; + story_id: string; + run_id: string; + run_role: RunRole; + action_summary: string; + reason: string; + risk_level: string; + requested_capability: string; + suggested_alternatives: string[]; + timeout_at: string; +} + export interface ApprovalCardSnapshot { job_id: string; card_id: string; @@ -327,6 +352,7 @@ export interface ApprovalCardSnapshot { timeout_at: string; created_at: string; evidence_refs: string[]; + approval_request?: ApprovalRequestSnapshot | null; recovery_context?: { last_exit_reason: RunExitStatus; current_freeze_version: string; @@ -449,6 +475,7 @@ export interface JobManifestApprovalRecord { decision_path: string | null; summary_zh_ref: string; decided_at: string | null; + approval_request?: ApprovalRequestSnapshot | null; } export interface JobManifestArtifactRecord { diff --git a/core/loop/phase1-local-flow.ts b/core/loop/phase1-local-flow.ts index fe9ea86..2c378c6 100644 --- a/core/loop/phase1-local-flow.ts +++ b/core/loop/phase1-local-flow.ts @@ -33,6 +33,7 @@ import type { AdapterExecutionResult, ApprovalCardSnapshot, ApprovalDecisionReceipt, + ApprovalRequestSnapshot, ChecksumRecord, ContractFreezeMetadata, JobManifest, @@ -361,6 +362,7 @@ function buildManifestApprovalRecord( approvalRecord.decision_path === null ? null : layout.relativeToJobRoot(approvalRecord.decision_path), summary_zh_ref: layout.relativeToJobRoot(approvalRecord.summary_path), decided_at: approvalRecord.decided_at, + ...(approvalRecord.approval_request === null ? {} : { approval_request: approvalRecord.approval_request }), }; } @@ -456,11 +458,89 @@ function recoveryRiskLevel(status: RunExitStatus): string { return "medium"; } +function recoveryRequestedAction(status: RunExitStatus, resumeGate: "owner" | "takeover"): string { + if (status === "AWAITING_APPROVAL") { + return "Review the executor approval request before continuing."; + } + if (status === "AWAITING_CREDENTIALS") { + return "Provide the required credential or choose an alternative before continuing."; + } + if (resumeGate === "takeover") { + return "Review the blocked run and trigger takeover before continuing."; + } + return "Review the blocked run and decide how to resume the job."; +} + +function recoveryCandidateActions(status: RunExitStatus, resumeGate: "owner" | "takeover"): string[] { + if (status === "AWAITING_APPROVAL") { + return ["approve requested action", "request revision", "trigger takeover"]; + } + if (status === "AWAITING_CREDENTIALS") { + return ["provide credentials", "request revision", "trigger takeover"]; + } + if (resumeGate === "takeover") { + return ["trigger takeover", "resume job", "request revision"]; + } + return ["resume job", "request revision", "trigger takeover"]; +} + +function approvalRequestReason(execution: AdapterExecutionResult): string { + const blocker = execution.workerOutput.blockers.find((value) => value.trim().length > 0); + if (blocker !== undefined) { + return blocker; + } + if (execution.workerOutput.next_action.trim().length > 0) { + return execution.workerOutput.next_action; + } + return `worker reported ${execution.runResult.status}`; +} + +function buildRecoveryApprovalRequest( + taskPacket: TaskPacket, + execution: AdapterExecutionResult, + timeoutAt: string, + riskLevel: string, +): ApprovalRequestSnapshot | null { + if (execution.runResult.status === "AWAITING_APPROVAL") { + return { + request_id: `approval-request-${execution.runResult.run_id}`, + job_id: taskPacket.job_id, + story_id: taskPacket.story.story_id, + run_id: execution.runResult.run_id, + run_role: execution.runResult.run_role, + action_summary: `${execution.runResult.run_role} requested owner approval before continuing.`, + reason: approvalRequestReason(execution), + risk_level: riskLevel, + requested_capability: "interactive_approval", + suggested_alternatives: ["request revision", "trigger takeover"], + timeout_at: timeoutAt, + }; + } + if (execution.runResult.status === "AWAITING_CREDENTIALS") { + return { + request_id: `approval-request-${execution.runResult.run_id}`, + job_id: taskPacket.job_id, + story_id: taskPacket.story.story_id, + run_id: execution.runResult.run_id, + run_role: execution.runResult.run_role, + action_summary: `${execution.runResult.run_role} requested credentials before continuing.`, + reason: approvalRequestReason(execution), + risk_level: riskLevel, + requested_capability: "secret_injection", + suggested_alternatives: ["continue without credentials", "request revision", "trigger takeover"], + timeout_at: timeoutAt, + }; + } + return null; +} + function buildRecoveryCard(taskPacket: TaskPacket, execution: AdapterExecutionResult): ApprovalCardSnapshot { const jobState = mapRunExitToJobState(execution.runResult.status, execution.runResult.run_role); const resumeGate = jobState === "AWAITING_TAKEOVER" ? "takeover" : "owner"; const latestEvidenceRef = latestEvidencePath(execution); const timeoutAt = new Date(Date.now() + 24 * 60 * 60 * 1000).toISOString(); + const riskLevel = recoveryRiskLevel(execution.runResult.status); + const approvalRequest = buildRecoveryApprovalRequest(taskPacket, execution, timeoutAt, riskLevel); return { job_id: taskPacket.job_id, card_id: `card-recovery-${execution.runResult.run_id}`, @@ -468,16 +548,10 @@ function buildRecoveryCard(taskPacket: TaskPacket, execution: AdapterExecutionRe card_type: "recovery", story_id: taskPacket.story.story_id, freeze_version: taskPacket.freeze_version, - risk_level: recoveryRiskLevel(execution.runResult.status), + risk_level: riskLevel, summary_zh: `运行 ${execution.runResult.run_id} 已以 ${execution.runResult.status} 停止,当前故事需要${resumeGate === "takeover" ? "接管" : "人工决策"}后继续。`, - requested_action: - resumeGate === "takeover" - ? "Review the blocked run and trigger takeover before continuing." - : "Review the blocked run and decide how to resume the job.", - candidate_actions: - resumeGate === "takeover" - ? ["trigger takeover", "resume job", "request revision"] - : ["resume job", "request revision", "trigger takeover"], + requested_action: recoveryRequestedAction(execution.runResult.status, resumeGate), + candidate_actions: recoveryCandidateActions(execution.runResult.status, resumeGate), timeout_at: timeoutAt, created_at: execution.runResult.ended_at, evidence_refs: uniqueStrings([ @@ -485,6 +559,7 @@ function buildRecoveryCard(taskPacket: TaskPacket, execution: AdapterExecutionRe `artifacts/runs/${execution.runResult.run_id}/metadata/run-result.json`, `artifacts/runs/${execution.runResult.run_id}/reports/handoff.en.md`, ]), + ...(approvalRequest === null ? {} : { approval_request: approvalRequest }), recovery_context: { last_exit_reason: execution.runResult.status, current_freeze_version: taskPacket.freeze_version, @@ -729,7 +804,6 @@ export async function runPhase1Local(repoRoot: string): Promise int: if envelope["run_role"] == "builder": output = create_builder_outputs(envelope, task_packet, artifact_root) + if mode == "builder_awaiting_approval": + output["status"] = "AWAITING_APPROVAL" + output["open"] = ["owner approval required"] + output["blockers"] = ["Privileged action requires owner approval"] + output["next_action"] = "approve requested action or choose an alternative" + if mode == "builder_awaiting_credentials": + output["status"] = "AWAITING_CREDENTIALS" + output["open"] = ["credentials required"] + output["blockers"] = ["A credential is required before continuing"] + output["next_action"] = "provide the requested credential or choose an alternative" if mode == "builder_head_shift": mutate_head(map_path(envelope["repo_path"], mounts)) else: @@ -349,8 +359,7 @@ def load_capture(capture_dir: Path, run_role: str) -> dict: def assert_builder_failure_bundle(job_root: Path) -> None: run_roots = sorted(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) - assert len(run_roots) == 1 - builder_run_root = run_roots[0] + builder_run_root = next(path for path in run_roots if path.name.startswith("run-builder-")) artifact_index = json.loads((builder_run_root / "metadata" / "artifact-index.json").read_text(encoding="utf-8")) indexed_paths = {entry["path"] for entry in artifact_index["artifacts"]} @@ -389,6 +398,7 @@ def assert_recovery_pause_context(manifest: dict, job_root: Path, expected_statu assert recovery_card["recovery_context"]["latest_evidence_path"] assert recovery_card["recovery_context"]["recommended_next_action"] assert recovery_card["recovery_context"]["resume_gate"] == "owner" + assert pause_context["resume_action"] == recovery_card["recovery_context"]["recommended_next_action"] def assert_recovery_state_mirror(job_root: Path, recovery_card_id: str) -> None: @@ -480,12 +490,20 @@ def test_phase1_local_builder_container_materialization_uses_read_only_inputs_an (repo_root / "adapters" / "generic-cli" / "adapter-capability.json").read_text(encoding="utf-8") ) filesystem_write_scope = set(capability_manifest["capabilities"]["filesystem_write"]["scope"]) + repo_job_root_path = f"/work/repo/jobs/job-phase1-local" + repo_run_root_path = f"{repo_job_root_path}/artifacts/runs/{builder_envelope['run_id']}" + task_packet_target = f"{builder_envelope['artifact_path']}/metadata/task-packet.en.json" assert mounts["/work/repo"].get("readonly") != "true" + assert mounts[repo_job_root_path]["readonly"] == "true" assert mounts["/work/state"]["readonly"] == "true" assert mounts["/work/artifacts"]["readonly"] == "true" assert mounts[builder_envelope["artifact_path"]].get("readonly") != "true" + assert mounts[repo_run_root_path].get("readonly") != "true" + assert mounts[task_packet_target]["readonly"] == "true" + assert mounts[f"{repo_run_root_path}/metadata/task-packet.en.json"]["readonly"] == "true" assert mounts["/work/runtime-home"].get("readonly") != "true" + assert mounts[f"{repo_job_root_path}/runtime-home/phase1-local"].get("readonly") != "true" assert filesystem_write_scope == {"repo", "run-artifacts", "runtime-home"} manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) @@ -548,8 +566,12 @@ def test_phase1_local_builder_failed_infra_stops_before_qa(tmp_path): job_root = repo_root / "jobs" / "job-phase1-local" manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + freeze = json.loads((job_root / "contract-freeze.json").read_text(encoding="utf-8")) archived_trace = json.loads((job_root / "state" / "trace-index.json").read_text(encoding="utf-8")) live_trace = json.loads((repo_root / "state" / "trace-index.json").read_text(encoding="utf-8")) + qa_run_id = next(run_id for run_id in freeze["task_packet_digests"] if run_id.startswith("run-qa-")) + qa_task_packet_path = job_root / "artifacts" / "runs" / qa_run_id / "metadata" / "task-packet.en.json" + qa_task_packet = json.loads(qa_task_packet_path.read_text(encoding="utf-8")) assert [run["run_role"] for run in manifest["runs"]] == ["builder"] assert [run["run_exit_status"] for run in manifest["runs"]] == ["FAILED_INFRA"] @@ -559,12 +581,64 @@ def test_phase1_local_builder_failed_infra_stops_before_qa(tmp_path): assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_run_role"] == "builder" assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_qa_status"] == "PENDING" assert live_trace == archived_trace + assert qa_task_packet["task_packet_sha256"] == freeze["task_packet_digests"][qa_run_id] + assert f"artifacts/runs/{qa_run_id}/metadata/task-packet.en.json" in (job_root / "checksums.txt").read_text(encoding="utf-8") + assert sorted(path.relative_to(job_root / "artifacts" / "runs" / qa_run_id).as_posix() for path in qa_task_packet_path.parent.rglob("*") if path.is_file()) == [ + "metadata/task-packet.en.json" + ] assert not (job_root / "artifacts" / "final" / "final-summary.en.md").exists() assert_builder_failure_bundle(job_root) assert_recovery_pause_context(manifest, job_root, "FAILED_INFRA") assert_recovery_state_mirror(job_root, manifest["pause_context"]["related_card_id"]) +@pytest.mark.integration +@pytest.mark.parametrize( + ("mode", "expected_status", "expected_capability"), + [ + ("builder_awaiting_approval", "AWAITING_APPROVAL", "interactive_approval"), + ("builder_awaiting_credentials", "AWAITING_CREDENTIALS", "secret_injection"), + ], +) +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_waiting_recovery_card_preserves_approval_request_details( + tmp_path, + mode, + expected_status, + expected_capability, +): + repo_root = export_repo(tmp_path) + fake_docker = write_fake_docker(tmp_path) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_MODE": mode, + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + recovery_record = manifest["approvals"][-1] + recovery_card = json.loads( + (job_root / "approvals" / recovery_record["card_id"] / "approval-card.json").read_text(encoding="utf-8") + ) + approval_request = recovery_card["approval_request"] + + assert [run["run_role"] for run in manifest["runs"]] == ["builder"] + assert [run["run_exit_status"] for run in manifest["runs"]] == [expected_status] + assert manifest["status"] == "AWAITING_OWNER" + assert manifest["pause_context"]["waiting_on"] == "owner" + assert approval_request["run_id"] == manifest["runs"][0]["run_id"] + assert approval_request["run_role"] == "builder" + assert approval_request["requested_capability"] == expected_capability + assert approval_request["reason"] + assert approval_request["suggested_alternatives"] + assert approval_request == recovery_record["approval_request"] + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_builder_failed_infra_command_log_records_docker_run(tmp_path): From 657fa0305ba2d62b56ef9bef6848caab88e1f403 Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Mon, 13 Apr 2026 17:02:16 +0800 Subject: [PATCH 13/19] fix: align phase1 runtime plans and docker worker flow --- adapters/generic-cli/docker-runtime.ts | 66 +++- codingclaw_ironclaw_revision_blueprint_cn.md | 50 +-- .../fixtures/phase1-local-task-packet.en.json | 2 +- core/loop/phase1-local-flow.ts | 37 +- docs/ARCHITECTURE_OVERVIEW.md | 24 +- docs/BUILDER_CONTRACT.en.md | 2 + docs/DEPLOYMENT_PLAN.md | 20 +- docs/DEVELOPMENT_PLAN_TEMPLATE.en.md | 2 +- docs/EXECUTOR_ADAPTER_CONTRACT.md | 9 +- docs/GUI_EXCEPTION_POLICY.md | 24 +- docs/LIGHTWEIGHT_RUNTIME_PLAN.md | 363 ++++++++++++++++++ docs/LOOP_SPEC.md | 2 +- docs/OFFICIAL_REFERENCE_NOTES.md | 20 +- docs/QA_CONTRACT.en.md | 2 + docs/README.md | 5 +- docs/SYSTEM_BLUEPRINT.md | 13 +- docs/TAKEOVER_FLOW.md | 9 +- docs/TAKEOVER_PACKET_TEMPLATE.en.md | 2 +- docs/UBUNTU_GUI_RUNTIME_PLAN.md | 69 ++++ docs/WUYING_INTEGRATION_PLAN.md | 64 --- docs/progress/prompt_phase1_recovery_cards.md | 2 +- prompt_template.md | 2 +- 22 files changed, 619 insertions(+), 170 deletions(-) create mode 100644 docs/LIGHTWEIGHT_RUNTIME_PLAN.md create mode 100644 docs/UBUNTU_GUI_RUNTIME_PLAN.md delete mode 100644 docs/WUYING_INTEGRATION_PLAN.md diff --git a/adapters/generic-cli/docker-runtime.ts b/adapters/generic-cli/docker-runtime.ts index d7a86d2..0922a4e 100644 --- a/adapters/generic-cli/docker-runtime.ts +++ b/adapters/generic-cli/docker-runtime.ts @@ -1,4 +1,4 @@ -import { basename, dirname, isAbsolute, join, resolve } from "node:path"; +import { basename, dirname, extname, isAbsolute, join, resolve } from "node:path"; import { ensureDir, readText, sha256Text, toPosixPath, writeJson } from "../../core/loop/support.ts"; import type { ContainerPathMap, @@ -19,6 +19,7 @@ export const CONTAINER_PATHS = { const DEFAULT_BASE_IMAGE = "codingclaw-worker-base:phase1-local"; const IMAGE_SIGNATURE_LABEL = "io.codingclaw.image-signature"; +const WINDOWS_SCRIPT_TIMEOUT_GRACE_MS = 500; const DEFAULT_ROLE_IMAGES: Record<"builder" | "qa", string> = { builder: "codingclaw-worker-builder:phase1-local", @@ -426,9 +427,12 @@ function resolveTimeoutMs(timeLimits: Record): number | null { } async function spawnCommand(command: string[], cwd: string, timeoutMs: number | null = null): Promise { + const spawnedCommand = normalizeSpawnCommand(command); + const effectiveTimeoutMs = + timeoutMs === null ? null : timeoutMs + timeoutGraceMs(command, spawnedCommand); try { const handle = Bun.spawn({ - cmd: command, + cmd: spawnedCommand, cwd, stdout: "pipe", stderr: "pipe", @@ -437,12 +441,12 @@ async function spawnCommand(command: string[], cwd: string, timeoutMs: number | const stderrPromise = new Response(handle.stderr).text(); let timedOut = false; const timeoutHandle = - timeoutMs === null + effectiveTimeoutMs === null ? null : setTimeout(() => { timedOut = true; handle.kill(); - }, timeoutMs); + }, effectiveTimeoutMs); const exitCode = await handle.exited; if (timeoutHandle !== null) { clearTimeout(timeoutHandle); @@ -468,6 +472,60 @@ async function spawnCommand(command: string[], cwd: string, timeoutMs: number | } } +function pythonLauncherCommand(scriptPath: string): string[] | null { + const python = Bun.which("python") ?? Bun.which("py"); + if (python === null) { + return null; + } + const launcher = basename(python).toLowerCase(); + if (launcher === "py" || launcher === "py.exe") { + return [python, "-3", scriptPath]; + } + return [python, scriptPath]; +} + +function normalizeSpawnCommand(command: string[]): string[] { + if (process.platform !== "win32" || command.length === 0) { + return command; + } + const executable = command[0]; + if (!isAbsolute(executable)) { + return command; + } + const extension = extname(executable).toLowerCase(); + if (extension === ".py") { + const launcher = pythonLauncherCommand(executable); + return launcher === null ? command : [...launcher, ...command.slice(1)]; + } + if (extension === ".cmd" || extension === ".bat") { + const shell = process.env.ComSpec?.trim() || "cmd.exe"; + return [shell, "/d", "/s", "/c", executable, ...command.slice(1)]; + } + if (extension === ".ps1") { + return [ + "powershell.exe", + "-NoLogo", + "-NoProfile", + "-ExecutionPolicy", + "Bypass", + "-File", + executable, + ...command.slice(1), + ]; + } + return command; +} + +function timeoutGraceMs(command: string[], spawnedCommand: string[]): number { + if (process.platform !== "win32" || command.length === 0) { + return 0; + } + if (command.length === spawnedCommand.length && command.every((value, index) => value === spawnedCommand[index])) { + return 0; + } + return WINDOWS_SCRIPT_TIMEOUT_GRACE_MS; +} + function classifyRunFailure(result: CommandExecutionResult): RunExitStatus | null { if (result.timed_out) { return "TIMEOUT"; diff --git a/codingclaw_ironclaw_revision_blueprint_cn.md b/codingclaw_ironclaw_revision_blueprint_cn.md index 49cc5ad..07258af 100644 --- a/codingclaw_ironclaw_revision_blueprint_cn.md +++ b/codingclaw_ironclaw_revision_blueprint_cn.md @@ -12,7 +12,7 @@ CodingClaw 是基于 **IronClaw** 思路进行二次设计与定制化扩展的 - 以冻结合同为范围控制基础 - 以 Builder / QA 双轨机制提升质量 - 以 Docker 隔离和轨迹归档保证复现性 -- 在必要时引入云桌面进行 GUI 特例处理与人工接管 +- 在需要真实图形界面时,允许系统在本地 Ubuntu 图形会话中自动执行 GUI 步骤 的 **7×24 小时自动编码编排系统**。 @@ -87,7 +87,7 @@ v0.2 的实现方式明确为: - **Coding Loop = Execution Kernel** - **Builder / QA Executors = Role-specific Workers** - **Artifact Storage = Audit Plane** -- **Wuying Desktop = GUI Exception Plane** +- **Local Ubuntu GUI Session = GUI Execution Surface** ### 3.3 文件状态优先于长上下文记忆 @@ -162,10 +162,10 @@ v0.2 的实现方式明确为: |- Metadata DB Optional: -[Aliyun Wuying Desktop] - |- GUI Exception Handling - |- Human Takeover - |- Remote Assistance +[Local Ubuntu GUI Session] + |- Headed GUI Automation + |- Local Human Takeover + |- Rendered Evidence Capture ``` ## 4.2 四层模型 @@ -785,7 +785,8 @@ codingclaw/ contracts/ adapters/ - codex/ + claude-code-builder/ + codex-qa/ aider/ generic-cli/ @@ -973,16 +974,16 @@ artifacts/ --- -## 17. 阿里无影云电脑集成策略 +## 17. 本地 Ubuntu 图形执行面策略 ### 17.1 定位 -阿里无影不是主编码执行面,而是: +本地 Ubuntu 图形会话不是旁路特例,而是默认 GUI 执行面: -- GUI 特例平面 -- 人工接管平面 -- 远程协助平面 -- Windows-only / 桌面类工具处理平面 +- Headed Browser / GUI 自动化平面 +- 渲染证据采集平面 +- 本地人工接管平面 +- Linux 桌面类工具执行平面 ### 17.2 推荐使用顺序 @@ -995,9 +996,9 @@ artifacts/ ### 17.3 不推荐方式 -不推荐将无影作为: +不推荐将图形执行面设计成: -- 主 Builder 执行器 +- 云桌面或远程桌面桥接 - 全天候唯一编码环境 - 全链路自动点击执行核心 @@ -1100,7 +1101,7 @@ Phase 1 明确不纳入: - 多通道同时接入 - 多执行器并行接入 -- 无影桥接 +- 云桌面桥接 - Dashboard - 历史任务复用 - 生产级多任务调度 @@ -1120,13 +1121,14 @@ Phase 1 明确不纳入: 目标: -- Codex 适配器 +- Claude Code Builder 适配器 +- Codex QA 适配器 - Aider 适配器 - 通用 CLI 适配器 - role packs - test matrix 支持 -### Phase 4:无影与人工接管 +### Phase 4:本地 GUI 自动化与人工接管 目标: @@ -1184,9 +1186,9 @@ Phase 1 明确不纳入: - `CHECKSUM_POLICY.md` - `JOB_MANIFEST_SCHEMA.md` -### 20.5 无影相关文档 +### 20.5 本地 GUI 相关文档 -- `WUYING_INTEGRATION_PLAN.md` +- `UBUNTU_GUI_RUNTIME_PLAN.md` - `TAKEOVER_FLOW.md` - `TAKEOVER_PACKET_TEMPLATE.en.md` - `GUI_EXCEPTION_POLICY.md` @@ -1198,7 +1200,7 @@ Phase 1 明确不纳入: 以下内容不属于当前版本目标: - 跳过用户审批直接开始编码 -- 让 GUI 云桌面成为主编码执行面 +- 依赖云桌面或远程桌面桥接作为主编码执行面 - 依赖单一长会话连续完成所有任务 - 不做预算、不做熔断地无限循环 - 伪造执行器轨迹代替真实运行痕迹 @@ -1216,13 +1218,13 @@ Phase 1 明确不纳入: 3. 跑通单轮 **Builder / QA**,确保 story traceability 与 QA verdict 成立。 4. 补充归档、checksums、失败恢复、审批卡片与预算约束。 5. 再扩展执行器适配层。 -6. 最后接入无影桥接与高级运营能力。 +6. 最后补齐本地 GUI 自动化与高级运营能力。 --- ## 23. 一句话架构定义 -> CodingClaw 是一个以 IronClaw Fork 为控制外壳、以 coding loop 为执行内核、以冻结合同为范围边界、以 Builder/QA 双轨为质量机制、以 Docker 隔离和归档为审计基础、并在必要时借助阿里无影完成 GUI 特例处理与人工接管的自动编码代理系统。 +> CodingClaw 是一个以 IronClaw Fork 为控制外壳、以 coding loop 为执行内核、以冻结合同为范围边界、以 Builder/QA 双轨为质量机制、以 Docker 隔离和归档为审计基础、并在本地 Ubuntu 图形会话中完成自动 GUI 执行与必要时人工接管的自动编码代理系统。 --- @@ -1478,7 +1480,7 @@ v0.2 不追求一次接完所有执行器。 1. 先冻结 `Executor Adapter Contract` 2. 先实现一个 `Generic CLI Adapter` -3. 再实现 `Codex Adapter` +3. 再实现 `Claude Code Builder Adapter` 与 `Codex QA Adapter` 4. 再实现 `Aider Adapter` --- diff --git a/control/fixtures/phase1-local-task-packet.en.json b/control/fixtures/phase1-local-task-packet.en.json index fbee9c4..7a124d4 100644 --- a/control/fixtures/phase1-local-task-packet.en.json +++ b/control/fixtures/phase1-local-task-packet.en.json @@ -57,7 +57,7 @@ "out_of_scope_checklist": [ "Mobile channel input", "Production deployment hardening", - "Wuying integration", + "Cloud desktop vendor integration", "Distributed scheduling", "Dashboard UI" ], diff --git a/core/loop/phase1-local-flow.ts b/core/loop/phase1-local-flow.ts index 2c378c6..276f873 100644 --- a/core/loop/phase1-local-flow.ts +++ b/core/loop/phase1-local-flow.ts @@ -25,6 +25,7 @@ import { readJson, readText, sha256Text, + toPosixPath, uniqueStrings, writeJson, writeText, @@ -169,6 +170,11 @@ async function buildTaskPacket( persist = true, ): Promise { const taskPacketPath = join(artifactRoot, "metadata", "task-packet.en.json"); + const normalizedRepoRoot = toPosixPath(repoRoot); + const normalizedStateRoot = toPosixPath(stateRoot); + const normalizedArtifactRoot = toPosixPath(artifactRoot); + const normalizedRuntimeHome = toPosixPath(runtimeHome); + const normalizedPreviousHandoffPath = previousHandoffPath.length > 0 ? toPosixPath(previousHandoffPath) : ""; const { expectedArtifacts, verificationTargets } = roleArtifacts(runRole); const packetWithoutChecksum = await materializeJsonTemplate( @@ -177,13 +183,13 @@ async function buildTaskPacket( __RUN_ROLE__: runRole, __RUN_ATTEMPT__: 1, __RUN_ID__: runId, - __REPO_PATH__: repoRoot, + __REPO_PATH__: normalizedRepoRoot, __BASE_COMMIT__: baseCommit, - __STATE_PATH__: stateRoot, - __ARTIFACT_PATH__: artifactRoot, - __RUNTIME_HOME__: runtimeHome, + __STATE_PATH__: normalizedStateRoot, + __ARTIFACT_PATH__: normalizedArtifactRoot, + __RUNTIME_HOME__: normalizedRuntimeHome, __TASK_PACKET_SHA256__: "", - __PREVIOUS_HANDOFF_PATH__: previousHandoffPath, + __PREVIOUS_HANDOFF_PATH__: normalizedPreviousHandoffPath, __VERIFICATION_TARGETS__: verificationTargets, __EXPECTED_ARTIFACTS__: expectedArtifacts, }, @@ -215,21 +221,28 @@ async function buildRunEnvelope( approvalSnapshotPath: string, traceContext: Record, ): Promise { + const normalizedRepoRoot = toPosixPath(repoRoot); + const normalizedStateRoot = toPosixPath(stateRoot); + const normalizedArtifactRoot = toPosixPath(artifactRoot); + const normalizedRuntimeHome = toPosixPath(runtimeHome); + const normalizedTaskPacketPath = toPosixPath(join(artifactRoot, "metadata", "task-packet.en.json")); + const normalizedPreviousHandoffPath = previousHandoffPath.length > 0 ? toPosixPath(previousHandoffPath) : ""; + const normalizedApprovalSnapshotPath = toPosixPath(approvalSnapshotPath); const envelope = await materializeJsonTemplate( join(repoRoot, "control", "fixtures", "phase1-local-run-envelope.json"), { __RUN_ID__: taskPacket.run_id, __RUN_ROLE__: taskPacket.run_role, __RUN_ATTEMPT__: taskPacket.run_attempt, - __REPO_PATH__: repoRoot, + __REPO_PATH__: normalizedRepoRoot, __BASE_COMMIT__: taskPacket.base_commit, - __STATE_PATH__: stateRoot, - __ARTIFACT_PATH__: artifactRoot, - __RUNTIME_HOME__: runtimeHome, - __TASK_PACKET_PATH__: join(artifactRoot, "metadata", "task-packet.en.json"), + __STATE_PATH__: normalizedStateRoot, + __ARTIFACT_PATH__: normalizedArtifactRoot, + __RUNTIME_HOME__: normalizedRuntimeHome, + __TASK_PACKET_PATH__: normalizedTaskPacketPath, __TASK_PACKET_SHA256__: taskPacket.task_packet_sha256, - __PREVIOUS_HANDOFF_PATH__: previousHandoffPath, - __APPROVAL_SNAPSHOT_PATH__: approvalSnapshotPath, + __PREVIOUS_HANDOFF_PATH__: normalizedPreviousHandoffPath, + __APPROVAL_SNAPSHOT_PATH__: normalizedApprovalSnapshotPath, __TRACE_CONTEXT__: traceContext, }, ); diff --git a/docs/ARCHITECTURE_OVERVIEW.md b/docs/ARCHITECTURE_OVERVIEW.md index ef80ef6..304a01c 100644 --- a/docs/ARCHITECTURE_OVERVIEW.md +++ b/docs/ARCHITECTURE_OVERVIEW.md @@ -19,11 +19,11 @@ This document describes the runtime architecture of CodingClaw and the boundarie |- Policy Guard |- Budget Guard |- Channel Adapters - |- GUI Exception Gate + |- GUI Runtime Gate | - +------> [GUI Exception Plane] - | |- Wuying Bridge - | |- Takeover Session + +------> [Local GUI Runtime Plane] + | |- Ubuntu Graphical Session + | |- Headed Automation Session | v [Coding Loop Kernel] @@ -34,8 +34,8 @@ This document describes the runtime architecture of CodingClaw and the boundarie | v [Execution Workers] - |- Builder Worker - |- QA Worker + |- Builder Worker (Claude Code) + |- QA Worker (Codex) |- Optional Review Worker | v @@ -84,11 +84,11 @@ This document describes the runtime architecture of CodingClaw and the boundarie - serve as the only long-lived memory source - support recovery after crash, timeout, or handoff -### GUI Exception Plane +### Local GUI Runtime Plane -- handle approved GUI-only interruptions -- bridge human takeover back into the control shell -- remain an exception path rather than a default execution surface +- run approved headed browser or desktop automation on the same Ubuntu host +- capture rendered evidence without a cloud desktop bridge +- allow governed local takeover only when automation cannot proceed ## Runtime Objects @@ -182,11 +182,11 @@ codingclaw/ Phase 1 ships a single-node topology: -- control shell on one host +- control shell on one Ubuntu host with a graphical session - local or same-host Docker workers - one active job at a time - one active story at a time -- GUI exception handling documented as a governed pause-and-takeover path +- local GUI automation available on the same host when the story requires a real browser or desktop surface - local artifact volume plus SQLite or Postgres metadata This keeps the first release small enough to verify end-to-end governance before scaling scheduler complexity. diff --git a/docs/BUILDER_CONTRACT.en.md b/docs/BUILDER_CONTRACT.en.md index 314be29..d51da6b 100644 --- a/docs/BUILDER_CONTRACT.en.md +++ b/docs/BUILDER_CONTRACT.en.md @@ -4,6 +4,8 @@ The Builder implements one approved story within the boundary of the active Contract Freeze. +Phase 1 binds the builder role to the Claude Code execution profile. + ## Inputs The Builder reads: diff --git a/docs/DEPLOYMENT_PLAN.md b/docs/DEPLOYMENT_PLAN.md index c1b7e0b..fcef7b7 100644 --- a/docs/DEPLOYMENT_PLAN.md +++ b/docs/DEPLOYMENT_PLAN.md @@ -2,7 +2,7 @@ ## Goal -This document defines the recommended deployment path for CodingClaw from local development to a stable single-node production baseline. +This document defines the recommended deployment path for CodingClaw from local development to a stable single-node production baseline on one Ubuntu host with a graphical session. Official feasibility references for Docker, SQLite, and PostgreSQL are collected in [OFFICIAL_REFERENCE_NOTES.md](OFFICIAL_REFERENCE_NOTES.md). @@ -24,14 +24,15 @@ The control host runs: - approval queue manager - scheduler - budget and policy guards +- the local graphical session used by headed automation when a story requires a real GUI surface ### Worker Runtime Workers run in Docker with role-specific images: - `worker-base` -- `worker-builder` -- `worker-qa` +- `worker-builder` with the Claude Code builder profile +- `worker-qa` with the Codex QA profile Each worker mounts: @@ -84,16 +85,17 @@ Redis is optional and not required for the first release. ### Stage 3: Adapter Expansion -- Codex adapter +- Claude Code builder adapter +- Codex QA adapter - Aider adapter - richer capability manifests - optional review executor -### Stage 4: GUI Exception Integration +### Stage 4: Local GUI Automation Hardening -- Wuying bridge -- takeover packet flow -- assisted resume path +- host display bootstrap +- headed browser or desktop automation evidence capture +- takeover packet flow for exceptional local recovery only ## Environment Separation @@ -143,6 +145,6 @@ The following items are intentionally deferred beyond Phase 1: - multi-tenant job queues - horizontally scaled schedulers -- browser or GUI automation as a primary surface +- cloud desktop bridges or remote-assistance control planes - dashboard analytics - shared warm workers diff --git a/docs/DEVELOPMENT_PLAN_TEMPLATE.en.md b/docs/DEVELOPMENT_PLAN_TEMPLATE.en.md index 4dc1cd6..71fd766 100644 --- a/docs/DEVELOPMENT_PLAN_TEMPLATE.en.md +++ b/docs/DEVELOPMENT_PLAN_TEMPLATE.en.md @@ -44,7 +44,7 @@ This document defines the required section order for `DEVELOPMENT_PLAN.en.md`. - planned adapters - expected run roles -- any GUI exception expectation +- any local GUI automation expectation ### 8. Architecture Direction diff --git a/docs/EXECUTOR_ADAPTER_CONTRACT.md b/docs/EXECUTOR_ADAPTER_CONTRACT.md index b1d1ab9..a4cee0c 100644 --- a/docs/EXECUTOR_ADAPTER_CONTRACT.md +++ b/docs/EXECUTOR_ADAPTER_CONTRACT.md @@ -91,6 +91,11 @@ The envelope must include: Phase 1 implementations are required to support `builder` and `qa`. `review` remains optional until the review executor is enabled. +Phase 1 role binding is profile-specific: + +- builder should use a Claude Code adapter profile +- qa should use a Codex adapter profile + ## Task Packet Boundary `task-packet.en.json` must follow [TASK_PACKET_TEMPLATE.en.md](TASK_PACKET_TEMPLATE.en.md). @@ -201,4 +206,6 @@ Before a new adapter is accepted for live use, it must pass a consistency check Phase 1 should freeze this contract first, then implement: 1. one generic CLI adapter -2. later adapters such as Codex or Aider +2. one Claude Code builder adapter profile +3. one Codex QA adapter profile +4. later adapters such as Aider diff --git a/docs/GUI_EXCEPTION_POLICY.md b/docs/GUI_EXCEPTION_POLICY.md index 78657d8..475c1a7 100644 --- a/docs/GUI_EXCEPTION_POLICY.md +++ b/docs/GUI_EXCEPTION_POLICY.md @@ -2,20 +2,20 @@ ## Purpose -This policy defines when GUI handling is allowed and how it is governed. +This policy defines when local GUI automation is allowed and how it is governed. ## GUI Exception Definition -A GUI exception is a task step that cannot reasonably be completed through approved APIs, CLI tools, or non-interactive automation. +A GUI execution step is a task step that needs a real display, browser window, or desktop application and cannot reasonably be completed through pure CLI or API automation alone. ## Allowed Cases Allowed cases may include: +- headed browser automation on the Ubuntu host +- Linux desktop-only tooling - mandatory interactive login -- desktop-only tooling -- remote approval inside a managed desktop -- visual validation that requires a real GUI surface +- visual validation that requires a real rendered surface ## Disallowed Cases @@ -24,13 +24,15 @@ GUI handling must not be used to: - replace normal builder execution - bypass missing CLI automation that should be implemented - hide unlogged actions +- depend on a cloud desktop bridge for the normal execution path - avoid approval and evidence requirements ## Governance Rules -- GUI entry requires explicit approval -- the main loop must leave its active `RUNNING_*` state and enter `AWAITING_TAKEOVER` before takeover starts -- the human or assisted operator must record what changed +- in-scope local GUI automation may run automatically on the approved Ubuntu host when the active adapter profile declares the required capability +- manual takeover still requires explicit approval +- the main loop must leave its active `RUNNING_*` state and enter `AWAITING_TAKEOVER` before manual takeover starts +- the human operator must record what changed - resulting artifacts must be archived - the resumed loop must reference the takeover output @@ -38,10 +40,10 @@ GUI handling must not be used to: GUI-related interruptions should map to: -- `AWAITING_TAKEOVER` when waiting for takeover +- `AWAITING_TAKEOVER` when waiting for manual takeover - `AWAITING_APPROVAL` when approval is missing -- `FAILED_EXECUTION` or `FAILED_INFRA` when the desktop session fails +- `FAILED_EXECUTION` or `FAILED_INFRA` when the local GUI session fails ## Phase 1 Rule -Phase 1 treats GUI handling as documented policy only. Operational automation is deferred until the core loop is stable. +Phase 1 allows local GUI automation on the supported Ubuntu host. Remote desktop orchestration remains out of scope. diff --git a/docs/LIGHTWEIGHT_RUNTIME_PLAN.md b/docs/LIGHTWEIGHT_RUNTIME_PLAN.md new file mode 100644 index 0000000..bec2ae2 --- /dev/null +++ b/docs/LIGHTWEIGHT_RUNTIME_PLAN.md @@ -0,0 +1,363 @@ +# Lightweight Runtime Plan + +## Purpose + +This document defines the minimum runtime hardening plan for CodingClaw. + +The target is a lightweight runtime with harder boundaries, not a bridge into another product shell and not a full fork of an external agent host. + +`SYSTEM_BLUEPRINT.md` remains the canonical top-level definition. This document is an implementation plan for the next runtime slice. + +## Decision Summary + +CodingClaw should keep the current Phase 1 control shell and the existing builder to QA loop. + +The next runtime work should harden three boundaries: + +- state scope and atomic persistence +- adapter capability enforcement +- host-side shell and credential guards + +External reuse priority should be: + +- borrow TypeScript state path and planning gate ideas from oh-my-codex +- borrow capability, secret, and shell guard ideas from IronClaw +- do not import either product shell, team runtime, memory system, or orchestration stack + +## Why This Fits The Current Repository + +The repository already points in this direction: + +- [SYSTEM_BLUEPRINT.md](SYSTEM_BLUEPRINT.md) already defines control shell, files over memory, executor-agnostic adapters, and short-lived workers +- [ARCHITECTURE_OVERVIEW.md](ARCHITECTURE_OVERVIEW.md) already preserves the builder to QA loop and the single-node Phase 1 shape +- [EXECUTOR_ADAPTER_CONTRACT.md](EXECUTOR_ADAPTER_CONTRACT.md) already defines a capability manifest boundary +- [SECURITY_POLICY.md](SECURITY_POLICY.md) already requires secret injection, log redaction, and approval interception + +This plan does not replace the current architecture. It tightens the boundaries the current architecture already claims. + +## What Already Exists + +The current codebase already has the right shell, but several boundaries are still declarative instead of enforced: + +- `core/loop/phase1-local-flow.ts` already owns the Phase 1 builder to QA control path +- `core/loop/state-store.ts` already mirrors archived state back into `state/` +- `adapters/generic-cli/adapter-capability.json` already declares capability intent +- `control/fixtures/phase1-local-task-packet.en.json` and `control/fixtures/phase1-local-run-envelope.json` already carry `requested_capabilities` +- `docs/SECURITY_POLICY.md` already states that secrets must not be written into task packets + +Current gaps: + +- `StateStore` writes directly, without an atomic write path or a write lock +- state has one root and one mirror, but no root, session, and run scope resolver +- capability data is passed through, but `GenericCliAdapter` does not enforce default-deny execution against the manifest +- shell and secret rules exist as policy text, but not as host-side runtime guards + +## External Adoption Targets + +### Borrow From Oh My Codex + +Adopt the state boundary ideas, not the product shell: + +- session ID validation +- Windows and WSL path normalization +- working-directory allowlist checks before resolving state paths +- root scope plus session scope plus current-session fallback reads +- atomic file writes through temp file then rename +- write locking around state mutations + +Adopt the planning gate rules, not the planner prompt wording: + +- inspect the repository before asking the user for code facts +- use an adaptive step count instead of a fixed five-step template +- do not execute before explicit user approval + +### Borrow From IronClaw + +Adopt the security boundary ideas, not the Rust host shell: + +- default-deny capability objects +- missing capability sidecar means no permissions +- host-boundary credential injection, where tools never see secret values +- shell policy split into blocked commands, dangerous patterns, never-auto-approve patterns, and safe environment allowlist + +## Target Runtime Shape + +```text +[Control Shell] + | + +--> [Planner Gate] + | + +--> [State Scope Resolver] + | |- root scope + | |- session scope + | `- run scope + | + +--> [Task Packet + Run Envelope] + | + `--> [Adapter Capability Gate] + | + +--> [shell-policy] + +--> [credential-injector] + `--> [Generic CLI Worker] + | + +--> Builder + `--> QA +``` + +The lightweight runtime should keep the current archive-first model: + +- canonical job state remains under `jobs//state/` +- latest live mirror remains under repository `state/` +- session-local state becomes optional and isolated +- run-local state becomes archived and replayable + +## State Scope Model + +The new state model should define three scopes: + +### Root Scope + +Canonical job state under `jobs//state/`. + +This remains the only authoritative control-shell state for: + +- `progress.en.md` +- `story-queue.json` +- `active-story.json` +- `handoff.en.md` +- `risk-register.en.md` +- `loop-metrics.json` +- `decisions.en.md` +- `trace-index.json` + +### Session Scope + +Optional adapter session state under `jobs//state/sessions//`. + +This scope should exist only for executor-specific resumability and should never redefine contract scope. + +Read behavior should support: + +- explicit session ID +- current session fallback from `jobs//state/current-session.json` +- fallback to root scope when no session state exists + +### Run Scope + +Archived run-local state under `artifacts/runs//metadata/state/`. + +This scope should hold transient state snapshots needed for replay, recovery, and forensic review. + +### Resolution Rules + +- path resolution must normalize Windows and WSL forms before validation +- resolved paths must stay inside the allowed job root +- session IDs must use a strict safe pattern +- the control shell may promote selected session or run outputs back into root scope +- workers may read scoped state, but they must not rewrite control-shell canonical files directly + +## Required Modules + +The minimum useful implementation should add these modules: + +- `core/loop/state-scope.ts` +- `core/loop/state-write.ts` +- `adapters/generic-cli/capability-gate.ts` +- `ops/guards/shell-policy.ts` +- `ops/guards/credential-injector.ts` + +Responsibilities: + +- `state-scope.ts`: resolve root, session, and run paths, validate session IDs, normalize Windows and WSL paths, and provide read scope order +- `state-write.ts`: provide write lock and atomic write primitives +- `capability-gate.ts`: load adapter capabilities, intersect them with requested capabilities, and reject undeclared actions by default +- `shell-policy.ts`: classify commands and environment variables before execution +- `credential-injector.ts`: translate secret handles and host allowlists into host-side request injection without exposing raw secrets to workers + +## Existing File Changes + +The first implementation pass should update only the following existing files: + +- `core/loop/state-store.ts` +- `core/loop/phase1-local-flow.ts` +- `adapters/generic-cli/adapter.ts` +- `adapters/generic-cli/adapter-capability.json` +- `core/contracts/types.ts` + +The goal is to keep the diff narrow and preserve the current Phase 1 control path. + +## Capability Enforcement Rules + +Capability handling should become executable policy, not metadata decoration. + +The adapter gate should enforce: + +- effective capability set equals `adapter-capability.json` allow entries intersected with `requested_capabilities` +- any missing capability entry is deny +- any denied capability request fails before worker launch +- any privileged capability outside the approved profile returns a standard approval or policy failure path + +The first enforced capabilities should be: + +- `shell_command` +- `container_control` +- `secret_injection` +- `interactive_approval` + +## Shell And Secret Guard Rules + +### Shell Policy + +The shell guard should run before any worker command is launched. + +It should evaluate: + +- exact blocked commands +- dangerous substrings or command patterns +- commands that can never be auto-approved +- environment variable allowlist + +Phase 1 does not need a perfect shell sandbox. It needs a host-side deny path for obvious unsafe commands and obvious secret exfiltration attempts. + +### Credential Injection + +Secrets must never enter: + +- `task-packet.en.json` +- run envelopes +- archived reports +- worker stdout or stderr + +The task packet should carry only: + +- secret handle +- credential alias +- allowed host patterns +- injection mode metadata + +The host should resolve the secret and inject it only at the outgoing request boundary. + +## Planner Gate Follow-Up + +Planner hardening is a control-shell follow-up, not the first runtime blocker. + +When the live planner role is implemented, it should inherit three rules: + +- repo inspection before user questions about code facts +- scope-matched step count +- explicit approval before execution handoff + +This is a policy borrow from oh-my-codex, not a prompt import project. + +## Implementation Sequence + +### Step 1 + +Add state scope resolution plus atomic state writes. + +Acceptance: + +- root, session, and run scope paths resolve deterministically +- direct state writes are replaced by atomic write helpers +- concurrent writes cannot truncate or partially overwrite canonical state files + +### Step 2 + +Add default-deny capability enforcement in `GenericCliAdapter`. + +Acceptance: + +- adapter launch fails when a requested capability is undeclared or denied +- capability checks happen before worker start +- failure surfaces through the existing run status vocabulary + +### Step 3 + +Add shell policy and credential injection guards. + +Acceptance: + +- unsafe shell commands are blocked before launch +- environment scrubbing is enforced for worker execution +- secret handles can be resolved and injected without writing secret values into artifacts + +### Step 4 + +Wire planner gate rules into the future live planner implementation. + +Acceptance: + +- planner flow inspects the repo before asking for code facts +- plan length matches actual scope +- no execution starts without owner approval + +## Collaboration Model + +The work can be split into three lanes: + +| Lane | Modules Touched | Depends On | +|------|-----------------|------------| +| A | `core/loop/` | - | +| B | `ops/guards/` | - | +| C | `adapters/generic-cli/`, `core/contracts/` | A, B | + +Recommended execution order: + +- launch Lane A and Lane B in parallel +- merge both +- implement Lane C after the state and guard interfaces are stable + +Conflict notes: + +- Lane C touches adapter launch and will likely need the final guard interfaces from Lane B +- Lane A should avoid changing archive layout semantics beyond scoped state additions + +## Failure Modes To Design For + +| Codepath | Production Failure | Test Need | Error Handling Need | User Outcome | +|----------|--------------------|-----------|---------------------|--------------| +| state write | partial file write on crash | yes | yes | explicit failure, never silent corruption | +| state resolve | path escape or invalid session ID | yes | yes | fail closed | +| capability gate | undeclared capability still executes | yes | yes | fail before worker launch | +| shell policy | dangerous command bypasses deny list | yes | yes | approval or policy failure | +| credential injection | secret leaks into packet or logs | yes | yes | policy failure and redacted evidence | + +Any implementation that leaves a silent secret leak or a silent capability bypass is below the minimum bar. + +## Not In Scope + +This plan explicitly excludes: + +- bridging CodingClaw into oh-my-codex runtime +- forking IronClaw as the new host shell +- tmux, HUD, multi-agent team runtime, or worktree orchestration from oh-my-codex +- WASM tool ecosystems, channels, routines, web gateways, or long-lived memory layers from IronClaw +- multi-job concurrency or multi-adapter parallel execution +- browser QA expansion, large GUI automation expansion, or remote desktop vendor integration + +## Source Basis + +Internal references: + +- [SYSTEM_BLUEPRINT.md](SYSTEM_BLUEPRINT.md) +- [ARCHITECTURE_OVERVIEW.md](ARCHITECTURE_OVERVIEW.md) +- [EXECUTOR_ADAPTER_CONTRACT.md](EXECUTOR_ADAPTER_CONTRACT.md) +- [SECURITY_POLICY.md](SECURITY_POLICY.md) + +External references: + +- oh-my-codex state scope source: https://github.com/Yeachan-Heo/oh-my-codex/blob/main/src/mcp/state-paths.ts +- oh-my-codex atomic state write source: https://github.com/Yeachan-Heo/oh-my-codex/blob/main/src/mcp/state-server.ts +- oh-my-codex planner prompt source: https://github.com/Yeachan-Heo/oh-my-codex/blob/main/prompts/planner.md +- IronClaw capability source: https://github.com/nearai/ironclaw/blob/staging/src/tools/wasm/capabilities.rs +- IronClaw capability loader source: https://github.com/nearai/ironclaw/blob/staging/src/tools/wasm/loader.rs +- IronClaw credential injection source: https://github.com/nearai/ironclaw/blob/staging/src/tools/wasm/credential_injector.rs +- IronClaw shell policy source: https://github.com/nearai/ironclaw/blob/staging/src/tools/builtin/shell.rs + +## Related Documents + +- [README.md](README.md) +- [STATE_STORE_SPEC.md](STATE_STORE_SPEC.md) +- [LOOP_SPEC.md](LOOP_SPEC.md) +- [STATUS_MODEL.md](STATUS_MODEL.md) +- [UBUNTU_GUI_RUNTIME_PLAN.md](UBUNTU_GUI_RUNTIME_PLAN.md) diff --git a/docs/LOOP_SPEC.md b/docs/LOOP_SPEC.md index 954b0d6..a5e7380 100644 --- a/docs/LOOP_SPEC.md +++ b/docs/LOOP_SPEC.md @@ -72,7 +72,7 @@ The task packet must: - the loop launches only one role at a time - the worker may be builder, QA, or review -- Phase 1 requires builder and QA only. Review remains optional until a later phase enables it. +- Phase 1 requires builder and QA only. Builder is expected to run through Claude Code, QA through Codex. Review remains optional until a later phase enables it. - workers must return standard output objects and a standard exit status - unapproved privileged actions must interrupt execution and return approval-needed status diff --git a/docs/OFFICIAL_REFERENCE_NOTES.md b/docs/OFFICIAL_REFERENCE_NOTES.md index 589a5f1..bf70595 100644 --- a/docs/OFFICIAL_REFERENCE_NOTES.md +++ b/docs/OFFICIAL_REFERENCE_NOTES.md @@ -13,8 +13,8 @@ Repository documents remain normative. External documents in this file are suppo - Docker volumes fit cache or container-owned persistent data better than additional bind mounts. - SQLite is a reasonable Phase 1 metadata store only for single-node, local metadata with low write concurrency. - PostgreSQL is the correct upgrade path once the system needs multi-process coordination, multi-client access, or higher write concurrency. -- Wuying is feasible as a governed GUI exception and human takeover surface. -- The current Wuying Phase 1 strategy remains correct: document the handoff path first, do not depend on undocumented automation interfaces. +- The supported Phase 1 GUI surface is a single Ubuntu host with a local graphical session. +- The system should not depend on a cloud desktop vendor or remote-assistance bridge for normal automation. - The executor-agnostic adapter contract is feasible, but capability declarations must stay profile-specific rather than assume one universal tool surface. ## Docker Worker Runtime @@ -45,19 +45,11 @@ Repository documents remain normative. External documents in this file are suppo - `DEPLOYMENT_PLAN.md`: PostgreSQL MVCC and advisory locks make it a better fit for scheduler coordination, queue ownership, and other multi-process control-plane workflows. References: [MVCC Introduction](https://www.postgresql.org/docs/current/mvcc-intro.html), [Explicit Locking](https://www.postgresql.org/docs/current/explicit-locking.html) -## Wuying GUI Exception Plane +## Local Ubuntu GUI Runtime Baseline -- `WUYING_INTEGRATION_PLAN.md`, `GUI_EXCEPTION_POLICY.md`: Wuying is positioned by Alibaba Cloud as desktop-as-a-service for end users, not as a generic server control plane. - Reference: [What is Elastic Desktop Service](https://help.aliyun.com/zh/wuying-workspace/product-overview/what-is-elastic-desktop-service) +- `UBUNTU_GUI_RUNTIME_PLAN.md`, `GUI_EXCEPTION_POLICY.md`, `DEPLOYMENT_PLAN.md`: the repository design keeps the control shell, workers, artifacts, and headed GUI surface on one Ubuntu host so audit paths and evidence paths stay local and deterministic. -- `WUYING_INTEGRATION_PLAN.md`, `TAKEOVER_FLOW.md`: remote assistance supports an approval-and-accept flow that matches a governed takeover path better than a silent automation path. - Reference: [Use remote assistance and collaboration session](https://help.aliyun.com/zh/wtc/user-guide/use-remote-assitance-and-collaboration-session) - -- `WUYING_INTEGRATION_PLAN.md`, `SECURITY_POLICY.md`: Wuying login and access controls support SSO, MFA, client validation, and organization-scoped access control. - References: [Certification overview](https://help.aliyun.com/zh/wuying-workspace/user-guide/certification-overview), [Web client](https://help.aliyun.com/zh/wtc/user-guide/web-client) - -- `WUYING_INTEGRATION_PLAN.md`: the Web client is a convenient access path but has usage limits such as internet-only access and no local disk mapping, which supports keeping it as an exception surface rather than the default coding environment. - Reference: [Web client](https://help.aliyun.com/zh/wtc/user-guide/web-client) +- `TAKEOVER_FLOW.md`: human takeover remains a fallback path for blocked interactive steps. It is not the normal execution surface and does not require a cloud desktop vendor to exist. ## Executor And Adapter Boundary @@ -73,5 +65,5 @@ Repository documents remain normative. External documents in this file are suppo ## Limits Of External Proof - No official Docker, SQLite, PostgreSQL, Alibaba Cloud, or OpenAI document defines CodingClaw's freeze contract, approval card schema, artifact archive structure, or trace index schema. -- No official Wuying document found in this review directly proves a stable native API for the full `takeover packet -> resume semantics` workflow defined by this repository. +- No official external document in this review defines the full `takeover packet -> resume semantics` workflow or the exact local GUI orchestration rules used by this repository. - These governance and audit objects remain internal platform design decisions. They are feasible, but their correctness must be validated by implementation and integration tests, not by vendor documentation alone. diff --git a/docs/QA_CONTRACT.en.md b/docs/QA_CONTRACT.en.md index 861351a..6ac4d50 100644 --- a/docs/QA_CONTRACT.en.md +++ b/docs/QA_CONTRACT.en.md @@ -4,6 +4,8 @@ The QA executor validates that a builder output is reproducible, inside scope, and supported by evidence. +Phase 1 binds the QA role to the Codex execution profile. + ## Inputs QA reads: diff --git a/docs/README.md b/docs/README.md index ce5d078..236228d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -32,6 +32,7 @@ Start with these documents in order: - [LOOP_SPEC.md](LOOP_SPEC.md) - [STATE_STORE_SPEC.md](STATE_STORE_SPEC.md) +- [LIGHTWEIGHT_RUNTIME_PLAN.md](LIGHTWEIGHT_RUNTIME_PLAN.md) - [EXECUTOR_ADAPTER_CONTRACT.md](EXECUTOR_ADAPTER_CONTRACT.md) - [PLANNER_CONTRACT.en.md](PLANNER_CONTRACT.en.md) - [BUILDER_CONTRACT.en.md](BUILDER_CONTRACT.en.md) @@ -47,9 +48,9 @@ Start with these documents in order: - [CHECKSUM_POLICY.md](CHECKSUM_POLICY.md) - [JOB_MANIFEST_SCHEMA.md](JOB_MANIFEST_SCHEMA.md) -## Takeover And Wuying +## GUI Runtime And Takeover -- [WUYING_INTEGRATION_PLAN.md](WUYING_INTEGRATION_PLAN.md) +- [UBUNTU_GUI_RUNTIME_PLAN.md](UBUNTU_GUI_RUNTIME_PLAN.md) - [TAKEOVER_FLOW.md](TAKEOVER_FLOW.md) - [TAKEOVER_PACKET_TEMPLATE.en.md](TAKEOVER_PACKET_TEMPLATE.en.md) diff --git a/docs/SYSTEM_BLUEPRINT.md b/docs/SYSTEM_BLUEPRINT.md index 438a6b9..81d78fb 100644 --- a/docs/SYSTEM_BLUEPRINT.md +++ b/docs/SYSTEM_BLUEPRINT.md @@ -55,9 +55,9 @@ Workers run in isolated environments. Builder produces implementation artifacts. This plane stores all long-lived state, reports, logs, evidence, sessions, checksums, and manifests required for replay and audit. -### GUI Exception Plane +### Local GUI Runtime Plane -Aliyun Wuying Desktop is reserved for GUI-only tasks, human takeover, and assisted recovery. It is not a primary coding surface. +A single Ubuntu host with a graphical session is the supported GUI execution surface. Headed browser or desktop automation runs locally on that host. Manual takeover remains an exceptional fallback, not the primary path. ## Mandatory Lifecycle @@ -125,8 +125,9 @@ Phase 1 is the minimum working product. It includes: - one Chinese mobile entry channel - one control shell - one generic CLI adapter -- one builder worker -- one QA worker +- one Claude Code builder worker +- one Codex QA worker +- one Ubuntu host with a graphical session for local full automation when a story needs a real GUI surface - contract binding to `base_commit` - traceability from story to acceptance to QA verdict - local artifact archival and checksums @@ -138,7 +139,7 @@ Phase 1 excludes: - multi-channel concurrency - multi-adapter parallel execution - mandatory review executor in the live path -- Wuying automation +- cloud desktop bridges or vendor-specific remote desktop orchestration - dashboards - historical job reuse - production-scale multi-tenant scheduling @@ -158,5 +159,5 @@ Phase 1 excludes: - [APPROVAL_CARD_SPEC.md](APPROVAL_CARD_SPEC.md) - [REVIEW_CONTRACT.en.md](REVIEW_CONTRACT.en.md) - [ARTIFACT_LAYOUT_SPEC.md](ARTIFACT_LAYOUT_SPEC.md) -- [WUYING_INTEGRATION_PLAN.md](WUYING_INTEGRATION_PLAN.md) +- [UBUNTU_GUI_RUNTIME_PLAN.md](UBUNTU_GUI_RUNTIME_PLAN.md) - [TAKEOVER_PACKET_TEMPLATE.en.md](TAKEOVER_PACKET_TEMPLATE.en.md) diff --git a/docs/TAKEOVER_FLOW.md b/docs/TAKEOVER_FLOW.md index f5af52a..ebaf06f 100644 --- a/docs/TAKEOVER_FLOW.md +++ b/docs/TAKEOVER_FLOW.md @@ -2,15 +2,14 @@ ## Purpose -This document defines the controlled handoff from automated execution to human takeover. +This document defines the controlled handoff from automated execution to human takeover when local automation on the Ubuntu host cannot proceed. ## Trigger Conditions Takeover may be triggered when: -- a GUI-only step blocks progress +- a local GUI step blocks progress after normal automation has been attempted - credentials require an interactive login -- a Windows-only tool is required - policy demands human confirmation inside a live UI ## Standard Flow @@ -20,8 +19,8 @@ DETECT_GUI_EXCEPTION -> PAUSE_MAIN_LOOP -> PREPARE_TAKEOVER_PACKET -> ISSUE_APPROVAL_CARD - -> OPEN_WUYING_BRIDGE - -> HUMAN_OR_ASSISTED_ACTION + -> OPEN_LOCAL_GUI_SESSION + -> HUMAN_ACTION -> CAPTURE_RESULT -> WRITE_HANDOFF -> RESUME_OR_TERMINATE diff --git a/docs/TAKEOVER_PACKET_TEMPLATE.en.md b/docs/TAKEOVER_PACKET_TEMPLATE.en.md index 51d7cb1..10ada30 100644 --- a/docs/TAKEOVER_PACKET_TEMPLATE.en.md +++ b/docs/TAKEOVER_PACKET_TEMPLATE.en.md @@ -2,7 +2,7 @@ ## Purpose -`takeover-packet.en.md` is the standard human handoff document for a governed GUI or interactive interruption. +`takeover-packet.en.md` is the standard human handoff document for a governed local GUI or interactive interruption. ## Archive Location diff --git a/docs/UBUNTU_GUI_RUNTIME_PLAN.md b/docs/UBUNTU_GUI_RUNTIME_PLAN.md new file mode 100644 index 0000000..c864f5e --- /dev/null +++ b/docs/UBUNTU_GUI_RUNTIME_PLAN.md @@ -0,0 +1,69 @@ +# Ubuntu GUI Runtime Plan + +## Purpose + +This document defines how CodingClaw runs on a single Ubuntu host with a local graphical session. + +Supporting feasibility notes for this positioning are collected in [OFFICIAL_REFERENCE_NOTES.md](OFFICIAL_REFERENCE_NOTES.md). + +## Positioning + +The Ubuntu GUI host is the default graphical execution surface. It exists for: + +- fully automated headed browser workflows +- fully automated Linux desktop tooling +- screenshot and rendered evidence capture +- exceptional local takeover when automation is blocked + +## Preferred Order of Use + +1. API or CLI automation +2. local headed browser or desktop automation on the Ubuntu host +3. local human takeover + +## Integration Components + +The integration should define: + +- local display and session bootstrap +- builder launch with the Claude Code profile +- QA launch with the Codex profile +- takeover packet generation following `TAKEOVER_PACKET_TEMPLATE.en.md` +- secure local access handoff when takeover is required +- result collection +- resume signal back into the control shell + +## Control Rules + +- the supported deployment target is one Ubuntu host with a graphical session +- standard in-scope local GUI automation may run automatically when the active adapter profile declares the required capability +- builder uses Claude Code and QA uses Codex in Phase 1 +- main loop execution must transition into `AWAITING_TAKEOVER` during manual takeover unless the task is explicitly parallel-safe +- all takeover results must be written back under `artifacts/runs//takeover/` and referenced by handoff and manifest records + +## Phase Plan + +### Phase 1 + +- one Ubuntu host with a graphical session +- automated builder flow through Claude Code +- automated QA flow through Codex +- local headed browser or GUI execution when a story requires a real rendered surface +- no dependency on a cloud desktop vendor + +### Phase 2 + +- hardened host display bootstrap +- managed takeover packet format and archive path +- stable resume semantics + +### Phase 3 + +- broader local desktop tooling support +- stronger evidence capture for local GUI flows + +## Non-Goals + +- depending on a cloud desktop bridge +- depending on Windows-only tools for normal execution +- replacing auditable local automation with manual remote sessions diff --git a/docs/WUYING_INTEGRATION_PLAN.md b/docs/WUYING_INTEGRATION_PLAN.md deleted file mode 100644 index 3595896..0000000 --- a/docs/WUYING_INTEGRATION_PLAN.md +++ /dev/null @@ -1,64 +0,0 @@ -# Wuying Integration Plan - -## Purpose - -This document defines how Aliyun Wuying Desktop fits into CodingClaw as a GUI exception and human takeover surface. - -Official feasibility references for this positioning are collected in [OFFICIAL_REFERENCE_NOTES.md](OFFICIAL_REFERENCE_NOTES.md). - -## Positioning - -Wuying is not the primary builder environment. It exists for: - -- GUI-only workflows -- Windows-only tools -- human takeover -- remote assistance - -## Preferred Order of Use - -1. API or management SDK control -2. web bridge access -3. human takeover -4. assisted GUI operation - -## Integration Components - -The integration should define: - -- Wuying session provisioning or lookup -- takeover packet generation following `TAKEOVER_PACKET_TEMPLATE.en.md` -- secure access handoff -- result collection -- resume signal back into the control shell - -## Control Rules - -- Wuying access must be explicitly approved -- main loop execution must transition into `AWAITING_TAKEOVER` during takeover unless the task is explicitly parallel-safe -- all takeover results must be written back under `artifacts/runs//takeover/` and referenced by handoff and manifest records - -## Phase Plan - -### Phase 1 - -- no active Wuying automation -- only reserve document hooks and policy definitions -- keep Wuying limited to approved takeover and remote assistance paths -- do not depend on undocumented Wuying-native orchestration for resume control - -### Phase 2 - -- managed takeover packet format and archive path -- stable resume semantics - -### Phase 3 - -- controlled bridge integration -- assisted GUI support - -## Non-Goals - -- using Wuying as the default coding environment -- replacing Docker workers with desktop sessions -- turning GUI automation into the main execution path diff --git a/docs/progress/prompt_phase1_recovery_cards.md b/docs/progress/prompt_phase1_recovery_cards.md index b6dfece..d2816d9 100644 --- a/docs/progress/prompt_phase1_recovery_cards.md +++ b/docs/progress/prompt_phase1_recovery_cards.md @@ -39,7 +39,7 @@ In scope Out of scope - mobile channel delivery or webhook integration - owner decision intake, pause/resume commands, or actual resume execution -- takeover packet generation or Wuying session orchestration +- takeover packet generation or local GUI session orchestration - review executor behavior - fixback or change-request workflow redesign - policy-engine expansion for `FAILED_POLICY` diff --git a/prompt_template.md b/prompt_template.md index 4cf31d2..7b0ecd1 100644 --- a/prompt_template.md +++ b/prompt_template.md @@ -22,7 +22,7 @@ In scope Out of scope - mobile channel integration -- Wuying automation +- cloud desktop or remote desktop vendor integration - review executor - multi-tenant scheduling - dashboards From 820701ae503d3a3ac5da6bc058d7913a46c70bb6 Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Mon, 13 Apr 2026 17:42:28 +0800 Subject: [PATCH 14/19] fix: restore phase1 docker worker CI coverage --- adapters/generic-cli/adapter.ts | 117 ++++++-- adapters/generic-cli/capability-gate.ts | 73 +++++ .../phase1-local-approval-decision.json | 12 + core/contracts/types.ts | 1 + core/loop/phase1-local-flow.ts | 42 +-- ops/workers/builder.ts | 119 +++++--- ops/workers/qa.ts | 256 ++++++++++++++---- tests/integration/test_phase1_local_flow.py | 169 +++++++++++- 8 files changed, 660 insertions(+), 129 deletions(-) create mode 100644 adapters/generic-cli/capability-gate.ts create mode 100644 control/fixtures/phase1-local-approval-decision.json diff --git a/adapters/generic-cli/adapter.ts b/adapters/generic-cli/adapter.ts index 165d712..6a5f837 100644 --- a/adapters/generic-cli/adapter.ts +++ b/adapters/generic-cli/adapter.ts @@ -12,6 +12,7 @@ import type { WorkerOutput, } from "../../core/contracts/types.ts"; import { DockerWorkerLauncher, type DockerWorkerLaunchResult, materializeContainerizedRunEnvelope } from "./docker-runtime.ts"; +import { CapabilityGate } from "./capability-gate.ts"; function workerScriptForRole(rootPath: string, runRole: RunRole): string { if (runRole === "builder") { @@ -44,6 +45,12 @@ function formatErrorText(error: unknown): string { } function launchFailureText(launchResult: DockerWorkerLaunchResult): string { + if (launchResult.failure_status === "TIMEOUT") { + return "worker timed out before producing a complete result"; + } + if (launchResult.failure_status === "BUDGET_EXCEEDED") { + return "worker exceeded the configured budget"; + } const stderr = launchResult.stderr.trim(); if (stderr) { return stderr; @@ -158,6 +165,59 @@ function renderHandoff( ].join("\n"); } +function renderTakeoverPacket(envelope: RunEnvelope, workerOutput: WorkerOutput): string { + const approvalCardId = String((envelope.approval_context.approval_card_id ?? "n/a") as string); + const evidenceDestination = `artifacts/runs/${envelope.run_id}/takeover/result.en.md`; + const blockedStep = workerOutput.open[0] ?? workerOutput.next_action; + const reason = workerOutput.blockers[0] ?? `worker returned ${workerOutput.status}`; + return [ + "# Takeover Packet", + "", + "## Run Identity", + "", + `- job ID: ${envelope.job_id}`, + `- run ID: ${envelope.run_id}`, + `- freeze version: ${envelope.freeze_version}`, + `- story ID: ${envelope.story_id}`, + `- triggering run role: ${envelope.run_role}`, + `- triggering exit status: ${workerOutput.status}`, + "", + "## Blocked Step", + "", + `- exact blocked action: ${blockedStep}`, + `- reason automation cannot continue: ${reason}`, + "- current page, tool, or environment when relevant: generic-cli worker container", + "", + "## Required Human Action", + "", + `- concrete human task: ${workerOutput.next_action}`, + "- allowed action boundary: stay inside the active story, freeze, and run root", + "- forbidden actions: do not widen scope, mutate approvals, or bypass evidence capture", + "- expected completion signal: write the takeover outcome into the archived result record", + "", + "## Access And Approval Context", + "", + `- approval card ID: ${approvalCardId}`, + "- approved access method: local governed takeover", + "- credential handling rule: do not expose long-lived secrets in artifacts", + `- timeout or expiry condition: ${new Date(Date.now() + 24 * 60 * 60 * 1000).toISOString()}`, + "", + "## Expected Result", + "", + `- expected output: ${workerOutput.next_action}`, + `- artifact destination: ${evidenceDestination}`, + `- evidence destination: ${evidenceDestination}`, + "- resume criteria: result is archived under the same run_id takeover root and referenced by the manifest", + "", + "## Resume Notes", + "", + `- next loop role: ${envelope.run_role}`, + "- next command or check: review the takeover result and decide whether to resume or terminate", + "- rollback instruction if the takeover fails: stop the job and return control to owner review", + "", + ].join("\n"); +} + function withBuilderFallbackPaths(workerOutput: WorkerOutput): WorkerOutput { return { ...workerOutput, @@ -281,9 +341,11 @@ async function writeQaFallbackArtifacts( export class GenericCliAdapter { private readonly dockerLauncher: DockerWorkerLauncher; + private readonly capabilityGate: CapabilityGate; constructor(private readonly repoRoot: string) { this.dockerLauncher = new DockerWorkerLauncher(repoRoot); + this.capabilityGate = new CapabilityGate(repoRoot); } async execute(envelope: RunEnvelope): Promise { @@ -295,25 +357,38 @@ export class GenericCliAdapter { const artifactIndexPath = join(runRoot, "metadata", "artifact-index.json"); const handoffPath = join(runRoot, "reports", "handoff.en.md"); await ensureHostWritableRunLayout(runRoot); - const materialization = await materializeContainerizedRunEnvelope(envelope); - const workerScript = workerScriptForRole( - materialization.runtime.container_paths.repo_path, - materialization.container_envelope.run_role, - ); + let taskPacketPath = envelope.task_packet_path; const startedAtDate = new Date(); let launchResult: DockerWorkerLaunchResult; - try { - launchResult = await this.dockerLauncher.launch({ - run_role: materialization.container_envelope.run_role, - image: materialization.runtime.image, - worker_script_path: workerScript, - envelope_path: materialization.runtime.envelope_container_path, - runtime: materialization.runtime, - time_limits: envelope.time_limits, - }); - } catch (error) { - launchResult = unexpectedLaunchResult(error); + const capabilityDecision = await this.capabilityGate.evaluate(envelope.requested_capabilities); + if (!capabilityDecision.allowed) { + launchResult = { + command: ["", ...envelope.requested_capabilities], + exitCode: 1, + stdout: "", + stderr: capabilityDecision.reason ?? "capability gate rejected the launch", + failure_status: capabilityDecision.status ?? "FAILED_POLICY", + }; + } else { + try { + const materialization = await materializeContainerizedRunEnvelope(envelope); + taskPacketPath = materialization.canonical_task_packet_path; + const workerScript = workerScriptForRole( + materialization.runtime.container_paths.repo_path, + materialization.container_envelope.run_role, + ); + launchResult = await this.dockerLauncher.launch({ + run_role: materialization.container_envelope.run_role, + image: materialization.runtime.image, + worker_script_path: workerScript, + envelope_path: materialization.runtime.envelope_container_path, + runtime: materialization.runtime, + time_limits: envelope.time_limits, + }); + } catch (error) { + launchResult = unexpectedLaunchResult(error); + } } const exitCode = launchResult.exitCode; const stdout = launchResult.stdout; @@ -388,6 +463,13 @@ export class GenericCliAdapter { const archivedHandoffPath = relativePosix(this.repoRoot, handoffPath); await writeJson(runResultPath, runResult); await writeText(handoffPath, renderHandoff(envelope, workerOutput.status, workerOutput, archivedHandoffPath)); + const takeoverPacketPath = + workerOutput.status === "AWAITING_TAKEOVER" + ? join(runRoot, "takeover", "takeover-packet.en.md") + : null; + if (takeoverPacketPath !== null) { + await writeText(takeoverPacketPath, renderTakeoverPacket(envelope, workerOutput)); + } const artifactIndex = await buildArtifactIndex(runRoot, envelope.run_id, envelope.run_role); await writeJson(artifactIndexPath, artifactIndex); @@ -397,11 +479,12 @@ export class GenericCliAdapter { artifactIndex, workerOutput, runRoot, - taskPacketPath: materialization.canonical_task_packet_path, + taskPacketPath, runResultPath, artifactIndexPath, commandLogPath, handoffPath, + takeoverPacketPath, }; } } diff --git a/adapters/generic-cli/capability-gate.ts b/adapters/generic-cli/capability-gate.ts new file mode 100644 index 0000000..3fa8cef --- /dev/null +++ b/adapters/generic-cli/capability-gate.ts @@ -0,0 +1,73 @@ +import { join } from "node:path"; +import { readJson, uniqueStrings } from "../../core/loop/support.ts"; +import type { RunExitStatus } from "../../core/contracts/types.ts"; + +interface CapabilityEntry { + mode: string; + approval_requirement: string; +} + +interface CapabilityManifest { + profile_id: string; + capabilities: Record; +} + +export interface CapabilityGateDecision { + allowed: boolean; + reason: string | null; + status: RunExitStatus | null; +} + +function deniedCapabilityStatus(entry: CapabilityEntry): RunExitStatus { + return entry.approval_requirement !== "none" && entry.approval_requirement !== "not-supported" + ? "AWAITING_APPROVAL" + : "FAILED_POLICY"; +} + +export class CapabilityGate { + private manifestPromise: Promise | null = null; + + constructor(private readonly repoRoot: string) {} + + private async loadManifest(): Promise { + if (this.manifestPromise === null) { + this.manifestPromise = readJson( + join(this.repoRoot, "adapters", "generic-cli", "adapter-capability.json"), + ); + } + return this.manifestPromise; + } + + async evaluate(requestedCapabilities: string[]): Promise { + const manifest = await this.loadManifest(); + for (const capability of uniqueStrings(requestedCapabilities)) { + const entry = manifest.capabilities[capability]; + if (entry === undefined) { + return { + allowed: false, + reason: `requested capability is undeclared for profile ${manifest.profile_id}: ${capability}`, + status: "FAILED_POLICY", + }; + } + if (entry.mode !== "allow") { + return { + allowed: false, + reason: `requested capability is denied for profile ${manifest.profile_id}: ${capability}`, + status: deniedCapabilityStatus(entry), + }; + } + if (entry.approval_requirement !== "none") { + return { + allowed: false, + reason: `requested capability needs explicit approval before launch: ${capability}`, + status: "AWAITING_APPROVAL", + }; + } + } + return { + allowed: true, + reason: null, + status: null, + }; + } +} diff --git a/control/fixtures/phase1-local-approval-decision.json b/control/fixtures/phase1-local-approval-decision.json new file mode 100644 index 0000000..c976b54 --- /dev/null +++ b/control/fixtures/phase1-local-approval-decision.json @@ -0,0 +1,12 @@ +{ + "job_id": "job-phase1-local", + "card_id": "card-phase1-local-001", + "card_type": "plan-approval", + "story_id": "STORY-PHASE1-LOCAL-001", + "freeze_version": "1", + "decision": "approve", + "actor": "local-owner", + "decided_at": "2026-04-08T00:00:00Z", + "card_state": "DECIDED", + "requested_action": "Approve the fixed local Phase 1 proof-of-concept story and allow the builder-to-QA execution flow." +} diff --git a/core/contracts/types.ts b/core/contracts/types.ts index 0a7a9f9..b8d5143 100644 --- a/core/contracts/types.ts +++ b/core/contracts/types.ts @@ -526,4 +526,5 @@ export interface AdapterExecutionResult { artifactIndexPath: string; commandLogPath: string; handoffPath: string; + takeoverPacketPath: string | null; } diff --git a/core/loop/phase1-local-flow.ts b/core/loop/phase1-local-flow.ts index 276f873..4e6f12a 100644 --- a/core/loop/phase1-local-flow.ts +++ b/core/loop/phase1-local-flow.ts @@ -111,19 +111,17 @@ function roleArtifacts(runRole: RunRole): { expectedArtifacts: string[]; verific }; } -function buildApprovalDecision(card: ApprovalCardSnapshot): ApprovalDecisionReceipt { - return { - job_id: card.job_id, - card_id: card.card_id, - card_type: card.card_type, - story_id: card.story_id, - freeze_version: card.freeze_version, - decision: "approve", - actor: "local-owner", - decided_at: "2026-04-08T00:00:00Z", - card_state: "DECIDED", - requested_action: card.requested_action, - }; +async function loadApprovalDecision(repoRoot: string, card: ApprovalCardSnapshot): Promise { + const decision = await readJson( + join(repoRoot, "control", "fixtures", "phase1-local-approval-decision.json"), + ); + if (decision.job_id !== card.job_id || decision.card_id !== card.card_id) { + throw new Error("phase1 approval decision fixture does not match the plan approval card"); + } + if (decision.card_state !== "DECIDED") { + throw new Error("phase1 approval decision fixture must be decided before freeze generation"); + } + return decision; } async function loadAdapterInfo(repoRoot: string): Promise<{ adapter_id: string; adapter_version: string }> { @@ -393,7 +391,8 @@ function buildManifestRunRecords( run_result_path: layout.relativeToJobRoot(execution.runResultPath), artifact_index_path: layout.relativeToJobRoot(execution.artifactIndexPath), handoff_path: layout.relativeToJobRoot(execution.handoffPath), - takeover_packet_path: null, + takeover_packet_path: + execution.takeoverPacketPath === null ? null : layout.relativeToJobRoot(execution.takeoverPacketPath), started_at: execution.runResult.started_at, ended_at: execution.runResult.ended_at, })); @@ -786,7 +785,6 @@ export async function runPhase1Local(repoRoot: string): Promise( join(repoRoot, "control", "fixtures", "phase1-local-approval-card.json"), ); - const decision = buildApprovalDecision(approvalCard); const layout = resolveJobRootLayout(repoRoot, approvalCard.job_id); await assertFreshJobRoot(layout); await ensureJobRootLayout(layout); @@ -819,11 +817,6 @@ export async function runPhase1Local(repoRoot: string): Promise>> = [approvalRecord]; const allExpectedArtifacts = uniqueStrings([ ...builderTaskPacket.story.expected_artifacts, ...qaTaskPacketPreview.story.expected_artifacts, @@ -865,6 +858,15 @@ export async function runPhase1Local(repoRoot: string): Promise>> = [approvalRecord]; const freezeRecord = await writeContractFreeze(layout.freezePath, layout.freezeJsonPath, { metadata: buildFreezeMetadata( diff --git a/ops/workers/builder.ts b/ops/workers/builder.ts index 58e7997..c9bfcc6 100644 --- a/ops/workers/builder.ts +++ b/ops/workers/builder.ts @@ -1,62 +1,103 @@ -import { writeJson, writeText } from "../../core/loop/support.ts"; +import { pathExists, writeJson, writeText } from "../../core/loop/support.ts"; import type { WorkerOutput } from "../../core/contracts/types.ts"; import { emitWorkerOutput, loadWorkerContext } from "./common.ts"; +const CHINESE_TEXT = /[\u4e00-\u9fff]/u; + async function main(): Promise { const { envelope, taskPacket, runRoot } = await loadWorkerContext(); const implementationSummaryPath = `${runRoot}/reports/implementation-summary.en.md`; const selfCheckPath = `${runRoot}/reports/self-check.en.md`; const builderCheckPath = `${runRoot}/evidence/test-results/builder-check.json`; + const implementationSummaryText = [ + "# Implementation Summary", + "", + `- job_id: ${envelope.job_id}`, + `- run_id: ${envelope.run_id}`, + `- story_id: ${taskPacket.story.story_id}`, + `- objective: ${taskPacket.story.story_objective}`, + `- freeze_version: ${taskPacket.freeze_version}`, + `- base_commit: ${taskPacket.base_commit}`, + `- requested_capabilities: ${taskPacket.requested_capabilities.join(", ")}`, + `- expected_artifacts: ${taskPacket.story.expected_artifacts.join(", ")}`, + "", + ].join("\n"); + const selfCheckText = [ + "# Self Check", + "", + "- required checks executed:", + "- approval-context", + "- language-boundary", + "- artifact-presence", + "- evidence-completeness", + "- next required action: run QA against the same story", + "", + ].join("\n"); - await writeText( - implementationSummaryPath, - [ - "# Implementation Summary", - "", - `- job_id: ${envelope.job_id}`, - `- run_id: ${envelope.run_id}`, - `- story_id: ${taskPacket.story.story_id}`, - `- objective: ${taskPacket.story.story_objective}`, - "- completed work:", - "- materialized the fixed local Phase 1 builder slice", - "- wrote the required builder reports and evidence under the run root", - "", - ].join("\n"), - ); + await writeText(implementationSummaryPath, implementationSummaryText); + await writeText(selfCheckPath, selfCheckText); - await writeText( - selfCheckPath, - [ - "# Self Check", - "", - "- required checks executed:", - "- scope-compliance", - "- artifact-presence", - "- evidence-completeness", - "- next required action: run QA against the same story", - "", - ].join("\n"), - ); + const producedArtifacts = [ + "reports/implementation-summary.en.md", + "reports/self-check.en.md", + ]; + const missingArtifacts: string[] = []; + for (const relativePath of producedArtifacts) { + if (!(await pathExists(`${runRoot}/${relativePath}`))) { + missingArtifacts.push(relativePath); + } + } + const languageViolations = [ + ["reports/implementation-summary.en.md", implementationSummaryText], + ["reports/self-check.en.md", selfCheckText], + ] + .filter(([, content]) => CHINESE_TEXT.test(content)) + .map(([relativePath]) => relativePath); + const approvalState = String(taskPacket.approval_context.approval_state ?? ""); + const approvalCardId = String(taskPacket.approval_context.approval_card_id ?? ""); + const blockers = [ + ...(approvalState === "DECIDED" ? [] : [`approval context is not decided: ${approvalState || "missing"}`]), + ...missingArtifacts.map((relativePath) => `required builder artifact missing after self-check: ${relativePath}`), + ...languageViolations.map((relativePath) => `non-English repository-facing content: ${relativePath}`), + ]; + const status: WorkerOutput["status"] = blockers.length === 0 ? "SUCCESS" : "FAILED_POLICY"; await writeJson(builderCheckPath, { run_id: envelope.run_id, run_role: envelope.run_role, story_id: taskPacket.story.story_id, - status: "prepared-for-qa", + status: status === "SUCCESS" ? "prepared-for-qa" : "blocked", + approval_context: { + approval_card_id: approvalCardId, + approval_state: approvalState || "missing", + }, + self_checks: { + approval_context: approvalState === "DECIDED" ? "pass" : "fail", + language_boundary: languageViolations.length === 0 ? "pass" : "fail", + artifact_presence: missingArtifacts.length === 0 ? "pass" : "fail", + evidence_completeness: missingArtifacts.length === 0 ? "pass" : "fail", + }, + verification_targets: taskPacket.story.verification_targets, + produced_artifacts: [...producedArtifacts, "evidence/test-results/builder-check.json"], + missing_artifacts: missingArtifacts, + language_violations: languageViolations, checked_items: taskPacket.story.mandatory_checks, }); const output: WorkerOutput = { - status: "SUCCESS", - completed: [ - "Read the fixed local task packet.", - "Wrote the builder reports and local verification evidence.", - ], - open: ["Run QA against the same story."], - blockers: [], - next_action: "run QA", + status, + completed: + status === "SUCCESS" + ? [ + "Read the fixed local task packet and approval context.", + "Wrote the builder reports and local verification evidence.", + ] + : ["Stopped after the builder self-check found contract or policy violations."], + open: status === "SUCCESS" ? ["Run QA against the same story."] : ["Resolve the builder blockers before re-running QA."], + blockers, + next_action: status === "SUCCESS" ? "run QA" : "stop and review builder blockers", acceptance_status: "blocked", - mandatory_check_status: "blocked", + mandatory_check_status: status === "SUCCESS" ? "pass" : "fail", evidence_paths: [ "reports/implementation-summary.en.md", "reports/self-check.en.md", diff --git a/ops/workers/qa.ts b/ops/workers/qa.ts index d13789d..5cb0e96 100644 --- a/ops/workers/qa.ts +++ b/ops/workers/qa.ts @@ -1,13 +1,39 @@ -import { readJson, pathExists, writeJson, writeText } from "../../core/loop/support.ts"; -import type { RunResult, WorkerOutput } from "../../core/contracts/types.ts"; +import { collectRelativeFiles, pathExists, readJson, readText, writeJson, writeText } from "../../core/loop/support.ts"; +import type { ArtifactIndex, RunResult, TaskPacket, WorkerOutput } from "../../core/contracts/types.ts"; import { emitWorkerOutput, loadWorkerContext } from "./common.ts"; +const CHINESE_TEXT = /[\u4e00-\u9fff]/u; + +function countAcceptanceStatuses(verdicts: Record): { + pass: number; + fail: number; + blocked: number; + total: number; +} { + const counts = { pass: 0, fail: 0, blocked: 0, total: 0 }; + for (const verdict of Object.values(verdicts)) { + counts.total += 1; + counts[verdict.status] += 1; + } + return counts; +} + async function main(): Promise { const { envelope, taskPacket, runRoot } = await loadWorkerContext(); const builderRunRoot = String(envelope.trace_context.builder_run_root ?? ""); const builderRunResultPath = String(envelope.trace_context.builder_run_result_path ?? ""); + const builderTaskPacketPath = `${builderRunRoot}/metadata/task-packet.en.json`; + const builderArtifactIndexPath = `${builderRunRoot}/metadata/artifact-index.json`; - const requiredBuilderArtifacts = [ + const qaReportPath = `${runRoot}/reports/qa-report.en.md`; + const qaCheckPath = `${runRoot}/evidence/test-results/qa-check.json`; + const qaVerdictPath = `${runRoot}/metadata/qa-verdict.json`; + const fixbackItemsPath = `${runRoot}/reports/fixback-items.en.md`; + const builderTaskPacket = + builderTaskPacketPath && (await pathExists(builderTaskPacketPath)) + ? await readJson(builderTaskPacketPath) + : null; + const requiredBuilderArtifacts = builderTaskPacket?.story.expected_artifacts ?? [ "metadata/task-packet.en.json", "metadata/timings.json", "metadata/run-result.json", @@ -19,71 +45,192 @@ async function main(): Promise { "reports/self-check.en.md", "evidence/test-results/builder-check.json", ]; - - const missingArtifacts: string[] = []; - for (const relativePath of requiredBuilderArtifacts) { - const absolutePath = `${builderRunRoot}/${relativePath}`; - if (!(await pathExists(absolutePath))) { - missingArtifacts.push(relativePath); - } - } - + const builderProducedArtifacts = builderRunRoot && (await pathExists(builderRunRoot)) ? await collectRelativeFiles(builderRunRoot) : []; + const missingArtifacts = requiredBuilderArtifacts.filter((relativePath) => !builderProducedArtifacts.includes(relativePath)); + const builderArtifactIndex = + builderArtifactIndexPath && (await pathExists(builderArtifactIndexPath)) + ? await readJson(builderArtifactIndexPath) + : null; + const indexedArtifacts = new Set((builderArtifactIndex?.artifacts ?? []).map((entry) => entry.path)); + const unindexedArtifacts = builderProducedArtifacts.filter((relativePath) => !indexedArtifacts.has(relativePath)); + const undeclaredArtifacts = builderProducedArtifacts.filter((relativePath) => !requiredBuilderArtifacts.includes(relativePath)); let builderStatus = "UNKNOWN"; + let builderRunResult: RunResult | null = null; if (builderRunResultPath && (await pathExists(builderRunResultPath))) { - const builderResult = await readJson(builderRunResultPath); - builderStatus = builderResult.status; + builderRunResult = await readJson(builderRunResultPath); + builderStatus = builderRunResult.status; } else { missingArtifacts.push("metadata/run-result.json"); } + const reproducibilityIssues = [ + ...(builderTaskPacket === null ? ["missing builder task packet"] : []), + ...(builderTaskPacket !== null && builderTaskPacket.story.story_id !== taskPacket.story.story_id + ? ["builder task packet story does not match QA story"] + : []), + ...(builderTaskPacket !== null && builderTaskPacket.freeze_version !== taskPacket.freeze_version + ? ["builder task packet freeze version does not match QA freeze"] + : []), + ...(builderTaskPacket !== null && builderTaskPacket.base_commit !== taskPacket.base_commit + ? ["builder task packet base commit does not match QA base commit"] + : []), + ...(builderRunResult !== null && builderRunResult.story_id !== taskPacket.story.story_id + ? ["builder run result story does not match QA story"] + : []), + ]; + const textSurfaces = ( + await Promise.all( + ["reports/handoff.en.md", "reports/implementation-summary.en.md", "reports/self-check.en.md"].map(async (relativePath) => { + const absolutePath = `${builderRunRoot}/${relativePath}`; + if (!(await pathExists(absolutePath))) { + return null; + } + return { + relativePath, + content: await readText(absolutePath), + }; + }), + ) + ).filter((value): value is { relativePath: string; content: string } => value !== null); + const languageViolations = textSurfaces + .filter((surface) => CHINESE_TEXT.test(surface.content)) + .map((surface) => surface.relativePath); + const fixbackItems = [ + ...missingArtifacts.map((relativePath) => `Restore builder artifact: ${relativePath}`), + ...unindexedArtifacts.map((relativePath) => `Add artifact-index entry for builder output: ${relativePath}`), + ...undeclaredArtifacts.map((relativePath) => `Move or remove out-of-scope builder artifact: ${relativePath}`), + ...reproducibilityIssues.map((issue) => `Restore reproducibility contract: ${issue}`), + ...languageViolations.map((relativePath) => `Rewrite repository-facing output in English: ${relativePath}`), + ...(builderStatus === "SUCCESS" ? [] : [`Builder did not finish successfully: ${builderStatus}`]), + ]; + const status: WorkerOutput["status"] = + undeclaredArtifacts.length > 0 + ? "CHANGE_REQUEST_REQUIRED" + : fixbackItems.length === 0 + ? "SUCCESS" + : "FIXBACK_REQUIRED"; + const acceptanceVerdicts = Object.fromEntries( + taskPacket.story.acceptance_ids.map((acceptanceId, index) => { + const passEvidence = + index === 0 + ? ["reports/implementation-summary.en.md", "evidence/test-results/builder-check.json"] + : ["reports/qa-report.en.md", "metadata/qa-verdict.json", "evidence/test-results/qa-check.json"]; + const failEvidence = status === "CHANGE_REQUEST_REQUIRED" ? ["reports/fixback-items.en.md", "reports/qa-report.en.md"] : passEvidence; + return [ + acceptanceId, + { + status: status === "SUCCESS" ? "pass" : status === "CHANGE_REQUEST_REQUIRED" ? "blocked" : "fail", + evidence_paths: status === "SUCCESS" ? passEvidence : failEvidence, + }, + ]; + }), + ) as Record; + const mandatoryChecks = { + "scope-compliance": { + status: undeclaredArtifacts.length === 0 ? "pass" : "fail", + evidence_paths: + undeclaredArtifacts.length === 0 ? ["metadata/qa-verdict.json"] : ["reports/fixback-items.en.md", "reports/qa-report.en.md"], + }, + "artifact-presence": { + status: missingArtifacts.length === 0 ? "pass" : "fail", + evidence_paths: + missingArtifacts.length === 0 + ? ["reports/qa-report.en.md", "metadata/qa-verdict.json"] + : ["reports/fixback-items.en.md", "reports/qa-report.en.md"], + }, + "evidence-completeness": { + status: unindexedArtifacts.length === 0 ? "pass" : "fail", + evidence_paths: + unindexedArtifacts.length === 0 + ? ["evidence/test-results/qa-check.json", "metadata/qa-verdict.json"] + : ["reports/fixback-items.en.md", "reports/qa-report.en.md"], + }, + "acceptance-closure": { + status: Object.values(acceptanceVerdicts).every((entry) => entry.status === "pass") ? "pass" : "fail", + evidence_paths: ["metadata/qa-verdict.json", "reports/qa-report.en.md"], + }, + }; + const acceptanceClosure = countAcceptanceStatuses(acceptanceVerdicts); + const qaReportText = [ + "# QA Report", + "", + `- job_id: ${envelope.job_id}`, + `- run_id: ${envelope.run_id}`, + `- story_id: ${taskPacket.story.story_id}`, + `- verified builder run root: ${builderRunRoot.replaceAll("\\", "/")}`, + `- builder run status: ${builderStatus}`, + `- QA verdict: ${status}`, + "", + "## Contract Checks", + "", + `- scope-compliance: ${mandatoryChecks["scope-compliance"].status}`, + `- build-or-install reproducibility: ${reproducibilityIssues.length === 0 ? "pass" : "fail"}`, + `- language boundary compliance: ${languageViolations.length === 0 ? "pass" : "fail"}`, + `- evidence completeness: ${mandatoryChecks["evidence-completeness"].status}`, + `- artifact presence: ${mandatoryChecks["artifact-presence"].status}`, + `- acceptance closure: ${mandatoryChecks["acceptance-closure"].status}`, + "", + "- checked artifacts:", + ...requiredBuilderArtifacts.map((value) => `- ${value}`), + "", + "- missing artifacts:", + ...(missingArtifacts.length === 0 ? ["- none"] : missingArtifacts.map((value) => `- ${value}`)), + "", + "- undeclared builder artifacts:", + ...(undeclaredArtifacts.length === 0 ? ["- none"] : undeclaredArtifacts.map((value) => `- ${value}`)), + "", + "- language violations:", + ...(languageViolations.length === 0 ? ["- none"] : languageViolations.map((value) => `- ${value}`)), + "", + "- reproducibility issues:", + ...(reproducibilityIssues.length === 0 ? ["- none"] : reproducibilityIssues.map((value) => `- ${value}`)), + "", + ].join("\n"); - const status = missingArtifacts.length === 0 && builderStatus === "SUCCESS" ? "SUCCESS" : "FIXBACK_REQUIRED"; - const qaReportPath = `${runRoot}/reports/qa-report.en.md`; - const qaCheckPath = `${runRoot}/evidence/test-results/qa-check.json`; - const qaVerdictPath = `${runRoot}/metadata/qa-verdict.json`; - const fixbackItemsPath = `${runRoot}/reports/fixback-items.en.md`; - - await writeText( - qaReportPath, - [ - "# QA Report", - "", - `- job_id: ${envelope.job_id}`, - `- run_id: ${envelope.run_id}`, - `- story_id: ${taskPacket.story.story_id}`, - `- verified builder run root: ${builderRunRoot.replaceAll("\\", "/")}`, - `- builder run status: ${builderStatus}`, - `- QA verdict: ${status}`, - "- checked artifacts:", - ...requiredBuilderArtifacts.map((value) => `- ${value}`), - "", - "- missing artifacts:", - ...(missingArtifacts.length === 0 ? ["- none"] : missingArtifacts.map((value) => `- ${value}`)), - "", - ].join("\n"), - ); + await writeText(qaReportPath, qaReportText); await writeJson(qaCheckPath, { run_id: envelope.run_id, run_role: envelope.run_role, story_id: taskPacket.story.story_id, + scope_validation: { + undeclared_builder_artifacts: undeclaredArtifacts, + unindexed_builder_artifacts: unindexedArtifacts, + }, + reproducibility: { + issues: reproducibilityIssues, + builder_task_packet_path: builderTaskPacketPath, + builder_run_result_path: builderRunResultPath, + }, + language_validation: { + violations: languageViolations, + checked_surfaces: textSurfaces.map((surface) => surface.relativePath), + }, verified_builder_artifacts: requiredBuilderArtifacts, missing_builder_artifacts: missingArtifacts, builder_status: builderStatus, }); - const acceptanceClosure = - status === "SUCCESS" - ? { pass: taskPacket.story.acceptance_ids.length, fail: 0, blocked: 0, total: taskPacket.story.acceptance_ids.length } - : { pass: 0, fail: taskPacket.story.acceptance_ids.length, blocked: 0, total: taskPacket.story.acceptance_ids.length }; - await writeJson(qaVerdictPath, { story_id: taskPacket.story.story_id, status_family: "run_exit", status, acceptance_closure: acceptanceClosure, + acceptance_verdicts: acceptanceVerdicts, + mandatory_checks: mandatoryChecks, + scope_validation: { + undeclared_builder_artifacts: undeclaredArtifacts, + unindexed_builder_artifacts: unindexedArtifacts, + }, + reproducibility: { + status: reproducibilityIssues.length === 0 ? "pass" : "fail", + issues: reproducibilityIssues, + }, + language_validation: { + status: languageViolations.length === 0 ? "pass" : "fail", + violations: languageViolations, + }, }); - const fixbackItems = missingArtifacts.map((value) => `Restore builder artifact: ${value}`); if (fixbackItems.length > 0) { await writeText( fixbackItemsPath, @@ -96,14 +243,21 @@ async function main(): Promise { completed: status === "SUCCESS" ? [ - "Verified the builder artifact bundle against the same approved story.", - "Closed acceptance and mandatory checks with QA evidence.", + "Verified the builder artifact bundle against the same approved story and freeze.", + "Closed acceptance, scope, reproducibility, language, and evidence checks with QA evidence.", ] - : ["Ran QA against the builder bundle and found missing required artifacts."], - open: status === "SUCCESS" ? ["Archive the local proof-of-concept story."] : ["Run an in-scope fixback for the missing builder artifacts."], + : status === "CHANGE_REQUEST_REQUIRED" + ? ["Ran QA against the builder bundle and found scope drift."] + : ["Ran QA against the builder bundle and found fixback work inside the active scope."], + open: + status === "SUCCESS" + ? ["Archive the local proof-of-concept story."] + : status === "CHANGE_REQUEST_REQUIRED" + ? ["Create a change request before continuing."] + : ["Run an in-scope fixback for the active QA findings."], blockers: fixbackItems, - next_action: status === "SUCCESS" ? "archive" : "enter fixback", - acceptance_status: status === "SUCCESS" ? "pass" : "fail", + next_action: status === "SUCCESS" ? "archive" : status === "CHANGE_REQUEST_REQUIRED" ? "request change" : "enter fixback", + acceptance_status: status === "SUCCESS" ? "pass" : status === "CHANGE_REQUEST_REQUIRED" ? "blocked" : "fail", mandatory_check_status: status === "SUCCESS" ? "pass" : "fail", evidence_paths: [ "reports/qa-report.en.md", diff --git a/tests/integration/test_phase1_local_flow.py b/tests/integration/test_phase1_local_flow.py index b68dd1f..9dce852 100644 --- a/tests/integration/test_phase1_local_flow.py +++ b/tests/integration/test_phase1_local_flow.py @@ -19,13 +19,15 @@ def export_repo(tmp_path: Path) -> Path: repo_root = tmp_path / "repo" repo_root.mkdir() tracked_files = subprocess.run( - ["git", "ls-files"], + ["git", "ls-files", "--cached", "--others", "--exclude-standard"], cwd=REPO_ROOT, capture_output=True, text=True, check=True, ).stdout.splitlines() for relative_path in tracked_files: + if relative_path.startswith(".omx/"): + continue source = REPO_ROOT / relative_path target = repo_root / relative_path target.parent.mkdir(parents=True, exist_ok=True) @@ -304,6 +306,17 @@ def main() -> int: time.sleep(float(os.environ.get("CODINGCLAW_FAKE_DOCKER_SLEEP", "1"))) if mode == "qa_timeout" and envelope["run_role"] == "qa": time.sleep(float(os.environ.get("CODINGCLAW_FAKE_DOCKER_SLEEP", "1"))) + if mode == "real_worker": + worker_script = map_path(argv[-2], mounts) + container_envelope_path = map_path(argv[-1], mounts) + host_envelope_path = envelope.get("container_runtime", {}).get( + "envelope_host_path", + container_envelope_path, + ) + result = subprocess.run(["bun", worker_script, host_envelope_path], capture_output=True, text=True) + sys.stdout.write(result.stdout) + sys.stderr.write(result.stderr) + return result.returncode if envelope["run_role"] == "builder": output = create_builder_outputs(envelope, task_packet, artifact_root) @@ -317,6 +330,11 @@ def main() -> int: output["open"] = ["credentials required"] output["blockers"] = ["A credential is required before continuing"] output["next_action"] = "provide the requested credential or choose an alternative" + if mode == "builder_awaiting_takeover": + output["status"] = "AWAITING_TAKEOVER" + output["open"] = ["manual takeover required"] + output["blockers"] = ["A local GUI step requires governed takeover"] + output["next_action"] = "open the takeover session and archive the result" if mode == "builder_head_shift": mutate_head(map_path(envelope["repo_path"], mounts)) else: @@ -351,6 +369,24 @@ def write_time_limit_minutes(repo_root: Path, minutes: float) -> None: fixture_path.write_text(json.dumps(fixture, indent=2) + "\n", encoding="utf-8") +def write_requested_capabilities(repo_root: Path, requested_capabilities: list[str]) -> None: + for relative_path in [ + "control/fixtures/phase1-local-run-envelope.json", + "control/fixtures/phase1-local-task-packet.en.json", + ]: + fixture_path = repo_root / relative_path + fixture = json.loads(fixture_path.read_text(encoding="utf-8")) + fixture["requested_capabilities"] = requested_capabilities + fixture_path.write_text(json.dumps(fixture, indent=2) + "\n", encoding="utf-8") + + +def write_plan_approval_decision(repo_root: Path, decision: str) -> None: + decision_path = repo_root / "control" / "fixtures" / "phase1-local-approval-decision.json" + payload = json.loads(decision_path.read_text(encoding="utf-8")) + payload["decision"] = decision + decision_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") + + def load_capture(capture_dir: Path, run_role: str) -> dict: matches = sorted(capture_dir.glob(f"{run_role}-*.json")) assert matches @@ -413,6 +449,24 @@ def assert_recovery_state_mirror(job_root: Path, recovery_card_id: str) -> None: assert "owner review is required" in risk_register.lower() +def assert_takeover_pause_context(manifest: dict, job_root: Path, expected_status: str) -> None: + pause_context = manifest["pause_context"] + assert pause_context["is_paused"] is True + assert pause_context["pause_reason"] == expected_status + assert pause_context["waiting_on"] == "takeover" + assert pause_context["resume_action"] + assert pause_context["paused_at"] + assert pause_context["related_card_id"] + assert pause_context["expires_at"] + + recovery_record = manifest["approvals"][-1] + assert recovery_record["card_id"] == pause_context["related_card_id"] + assert recovery_record["card_type"] == "recovery" + assert recovery_record["card_state"] == "PENDING" + recovery_card = json.loads((job_root / "approvals" / recovery_record["card_id"] / "approval-card.json").read_text(encoding="utf-8")) + assert recovery_card["recovery_context"]["resume_gate"] == "takeover" + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_rerun_rejects_mutating_existing_archive(tmp_path): @@ -438,6 +492,25 @@ def test_phase1_local_rerun_rejects_mutating_existing_archive(tmp_path): assert run_dirs_before == sorted(path.name for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_requires_approved_plan_decision_before_freeze_and_execution(tmp_path): + repo_root = export_repo(tmp_path) + write_plan_approval_decision(repo_root, "reject") + result = run_phase1(repo_root) + combined_output = "\n".join(part for part in [result.stdout, result.stderr] if part) + + assert result.returncode != 0 + assert "requires an approved Development Plan" in combined_output + + job_root = repo_root / "jobs" / "job-phase1-local" + assert (job_root / "DEVELOPMENT_PLAN.en.md").exists() + assert (job_root / "approvals" / "card-phase1-local-001" / "approval-card.json").exists() + assert (job_root / "approvals" / "card-phase1-local-001" / "decision.json").exists() + assert not (job_root / "contract-freeze.json").exists() + assert not (job_root / "job-manifest.json").exists() + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_freeze_digest_captures_repo_dependency_inputs(tmp_path): @@ -451,6 +524,38 @@ def test_phase1_local_freeze_digest_captures_repo_dependency_inputs(tmp_path): assert freeze["dependency_snapshot_digest"] == dependency_snapshot_digest(repo_root) +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_success_records_contract_checks_and_acceptance_mapping(tmp_path): + repo_root = export_repo(tmp_path) + fake_docker = write_fake_docker(tmp_path) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_MODE": "real_worker", + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + run_roots = sorted(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) + builder_run_root = next(path for path in run_roots if path.name.startswith("run-builder-")) + qa_run_root = next(path for path in run_roots if path.name.startswith("run-qa-")) + builder_check = json.loads((builder_run_root / "evidence" / "test-results" / "builder-check.json").read_text(encoding="utf-8")) + qa_verdict = json.loads((qa_run_root / "metadata" / "qa-verdict.json").read_text(encoding="utf-8")) + + assert builder_check["self_checks"]["approval_context"] == "pass" + assert builder_check["self_checks"]["artifact_presence"] == "pass" + assert builder_check["language_violations"] == [] + assert sorted(qa_verdict["acceptance_verdicts"]) == ["ACC-PHASE1-BUILDER", "ACC-PHASE1-QA"] + assert qa_verdict["reproducibility"]["status"] == "pass" + assert qa_verdict["language_validation"]["status"] == "pass" + assert qa_verdict["scope_validation"]["undeclared_builder_artifacts"] == [] + assert qa_verdict["mandatory_checks"]["acceptance-closure"]["status"] == "pass" + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_builder_container_materialization_uses_read_only_inputs_and_container_paths(tmp_path): @@ -639,6 +744,65 @@ def test_phase1_local_waiting_recovery_card_preserves_approval_request_details( assert approval_request == recovery_record["approval_request"] +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_capability_gate_stops_undeclared_or_denied_requests_before_worker_launch(tmp_path): + repo_root = export_repo(tmp_path) + fake_docker = write_fake_docker(tmp_path) + capture_dir = tmp_path / "captures" + write_requested_capabilities(repo_root, ["filesystem_read", "filesystem_write", "shell_command", "browser"]) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_CAPTURE_DIR": str(capture_dir), + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + run_root = next(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) + command_log = (run_root / "logs" / "command-log.txt").read_text(encoding="utf-8") + + assert [run["run_role"] for run in manifest["runs"]] == ["builder"] + assert [run["run_exit_status"] for run in manifest["runs"]] == ["FAILED_POLICY"] + assert manifest["status"] == "AWAITING_OWNER" + assert " filesystem_read filesystem_write shell_command browser" in command_log + assert not capture_dir.exists() or not list(capture_dir.iterdir()) + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_takeover_run_writes_takeover_packet_and_manifest_reference(tmp_path): + repo_root = export_repo(tmp_path) + fake_docker = write_fake_docker(tmp_path) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_MODE": "builder_awaiting_takeover", + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + run_record = manifest["runs"][0] + takeover_packet_path = job_root / run_record["takeover_packet_path"] + artifact_index = json.loads((job_root / run_record["artifact_index_path"]).read_text(encoding="utf-8")) + indexed_paths = {entry["path"] for entry in artifact_index["artifacts"]} + + assert [run["run_exit_status"] for run in manifest["runs"]] == ["AWAITING_TAKEOVER"] + assert manifest["status"] == "AWAITING_TAKEOVER" + assert takeover_packet_path.exists() + assert run_record["takeover_packet_path"].endswith("takeover/takeover-packet.en.md") + assert "takeover/takeover-packet.en.md" in indexed_paths + assert_takeover_pause_context(manifest, job_root, "AWAITING_TAKEOVER") + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_builder_failed_infra_command_log_records_docker_run(tmp_path): @@ -648,7 +812,8 @@ def test_phase1_local_builder_failed_infra_command_log_records_docker_run(tmp_pa assert result.returncode == 0, result.stderr or result.stdout job_root = repo_root / "jobs" / "job-phase1-local" - run_root = next(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) + run_roots = sorted(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) + run_root = next(path for path in run_roots if path.name.startswith("run-builder-")) command_log = (run_root / "logs" / "command-log.txt").read_text(encoding="utf-8") assert "command: does-not-exist run --rm --network none" in command_log From cc766eb1ce5a43fe28b27d2ba88b4a3a10be7131 Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Mon, 13 Apr 2026 17:45:23 +0800 Subject: [PATCH 15/19] fix: harden phase1 recovery control flow --- adapters/generic-cli/adapter.ts | 11 +++++- core/loop/phase1-local-flow.ts | 64 +++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/adapters/generic-cli/adapter.ts b/adapters/generic-cli/adapter.ts index 6a5f837..ac4c8cd 100644 --- a/adapters/generic-cli/adapter.ts +++ b/adapters/generic-cli/adapter.ts @@ -361,7 +361,16 @@ export class GenericCliAdapter { const startedAtDate = new Date(); let launchResult: DockerWorkerLaunchResult; - const capabilityDecision = await this.capabilityGate.evaluate(envelope.requested_capabilities); + let capabilityDecision; + try { + capabilityDecision = await this.capabilityGate.evaluate(envelope.requested_capabilities); + } catch (error) { + capabilityDecision = { + allowed: false, + reason: `capability gate could not load adapter policy: ${formatErrorText(error)}`, + status: "FAILED_POLICY", + }; + } if (!capabilityDecision.allowed) { launchResult = { command: ["", ...envelope.requested_capabilities], diff --git a/core/loop/phase1-local-flow.ts b/core/loop/phase1-local-flow.ts index 4e6f12a..4f6e7c5 100644 --- a/core/loop/phase1-local-flow.ts +++ b/core/loop/phase1-local-flow.ts @@ -585,6 +585,64 @@ function buildRecoveryCard(taskPacket: TaskPacket, execution: AdapterExecutionRe }; } +async function writeRecoveryTakeoverPacket( + taskPacket: TaskPacket, + execution: AdapterExecutionResult, + approvalRecord: Awaited>, +): Promise { + const takeoverPacketPath = execution.takeoverPacketPath ?? join(execution.runRoot, "takeover", "takeover-packet.en.md"); + execution.takeoverPacketPath = takeoverPacketPath; + await writeText( + takeoverPacketPath, + [ + "# Takeover Packet", + "", + "## Run Identity", + "", + `- job ID: ${taskPacket.job_id}`, + `- run ID: ${execution.runResult.run_id}`, + `- freeze version: ${taskPacket.freeze_version}`, + `- story ID: ${taskPacket.story.story_id}`, + `- triggering run role: ${execution.runResult.run_role}`, + `- triggering exit status: ${execution.runResult.status}`, + "", + "## Blocked Step", + "", + `- exact blocked action: ${execution.workerOutput.open[0] ?? execution.workerOutput.next_action}`, + `- reason automation cannot continue: ${approvalRequestReason(execution)}`, + "- current page, tool, or environment when relevant: generic-cli worker container", + "", + "## Required Human Action", + "", + `- concrete human task: ${execution.workerOutput.next_action}`, + "- allowed action boundary: stay inside the active story, freeze, and archived run root", + "- forbidden actions: do not widen scope, rewrite approvals, or bypass evidence capture", + "- expected completion signal: archive the takeover outcome under the same run_id takeover root", + "", + "## Access And Approval Context", + "", + `- approval card ID: ${approvalRecord.card_id}`, + "- approved access method: governed local takeover", + "- credential handling rule: do not place long-lived secrets in takeover artifacts", + `- timeout or expiry condition: ${approvalRecord.timeout_at}`, + "", + "## Expected Result", + "", + `- expected output: ${execution.workerOutput.next_action}`, + `- artifact destination: artifacts/runs/${execution.runResult.run_id}/takeover/result.en.md`, + `- evidence destination: artifacts/runs/${execution.runResult.run_id}/takeover/result.en.md`, + `- resume criteria: update pause context via ${approvalRecord.card_id} and reference the same run_id takeover root in the manifest`, + "", + "## Resume Notes", + "", + `- next loop role: ${execution.runResult.run_role}`, + "- next command or check: review the archived takeover result and decide whether to resume or terminate", + "- rollback instruction if the takeover fails: stop the job and return control to owner review", + "", + ].join("\n"), + ); +} + function buildPauseContext( execution: AdapterExecutionResult | null, approvalRecord: Awaited> | null, @@ -926,6 +984,9 @@ export async function runPhase1Local(repoRoot: string): Promise Date: Mon, 13 Apr 2026 17:46:59 +0800 Subject: [PATCH 16/19] test: cover phase1 recovery edge cases --- tests/integration/test_phase1_local_flow.py | 34 +++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/integration/test_phase1_local_flow.py b/tests/integration/test_phase1_local_flow.py index 9dce852..8e1083c 100644 --- a/tests/integration/test_phase1_local_flow.py +++ b/tests/integration/test_phase1_local_flow.py @@ -380,6 +380,10 @@ def write_requested_capabilities(repo_root: Path, requested_capabilities: list[s fixture_path.write_text(json.dumps(fixture, indent=2) + "\n", encoding="utf-8") +def remove_capability_manifest(repo_root: Path) -> None: + (repo_root / "adapters" / "generic-cli" / "adapter-capability.json").unlink() + + def write_plan_approval_decision(repo_root: Path, decision: str) -> None: decision_path = repo_root / "control" / "fixtures" / "phase1-local-approval-decision.json" payload = json.loads(decision_path.read_text(encoding="utf-8")) @@ -773,6 +777,34 @@ def test_phase1_local_capability_gate_stops_undeclared_or_denied_requests_before assert not capture_dir.exists() or not list(capture_dir.iterdir()) +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_capability_gate_manifest_load_failure_returns_failed_policy(tmp_path): + repo_root = export_repo(tmp_path) + fake_docker = write_fake_docker(tmp_path) + capture_dir = tmp_path / "captures" + remove_capability_manifest(repo_root) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_CAPTURE_DIR": str(capture_dir), + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + run_root = next(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) + command_log = (run_root / "logs" / "command-log.txt").read_text(encoding="utf-8") + + assert [run["run_exit_status"] for run in manifest["runs"]] == ["FAILED_POLICY"] + assert manifest["status"] == "AWAITING_OWNER" + assert "capability gate could not load adapter policy" in command_log + assert not capture_dir.exists() or not list(capture_dir.iterdir()) + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_takeover_run_writes_takeover_packet_and_manifest_reference(tmp_path): @@ -792,6 +824,7 @@ def test_phase1_local_takeover_run_writes_takeover_packet_and_manifest_reference manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) run_record = manifest["runs"][0] takeover_packet_path = job_root / run_record["takeover_packet_path"] + takeover_packet = takeover_packet_path.read_text(encoding="utf-8") artifact_index = json.loads((job_root / run_record["artifact_index_path"]).read_text(encoding="utf-8")) indexed_paths = {entry["path"] for entry in artifact_index["artifacts"]} @@ -801,6 +834,7 @@ def test_phase1_local_takeover_run_writes_takeover_packet_and_manifest_reference assert run_record["takeover_packet_path"].endswith("takeover/takeover-packet.en.md") assert "takeover/takeover-packet.en.md" in indexed_paths assert_takeover_pause_context(manifest, job_root, "AWAITING_TAKEOVER") + assert manifest["pause_context"]["related_card_id"] in takeover_packet @pytest.mark.integration From 8b51e56daa4567cb4a3dcbeeef711285810b3aaa Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Mon, 13 Apr 2026 17:56:49 +0800 Subject: [PATCH 17/19] fix: fix docs and bugs --- adapters/generic-cli/adapter.ts | 56 -- core/loop/phase1-local-flow.ts | 4 + docs/GUI_EXCEPTION_POLICY.md | 2 +- docs/LIGHTWEIGHT_RUNTIME_PLAN.md | 4 +- docs/LOOP_SPEC.md | 3 +- docs/SYSTEM_BLUEPRINT.md | 12 +- docs/UBUNTU_GUI_RUNTIME_PLAN.md | 15 +- docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.md | 395 +++++++++++++ .../TEAM_WORKFLOW_MIGRATION_PLAN.zh-CN.md | 526 ++++++++++++++++++ tests/integration/test_phase1_local_flow.py | 3 + 10 files changed, 948 insertions(+), 72 deletions(-) create mode 100644 docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.md create mode 100644 docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.zh-CN.md diff --git a/adapters/generic-cli/adapter.ts b/adapters/generic-cli/adapter.ts index ac4c8cd..ea4df35 100644 --- a/adapters/generic-cli/adapter.ts +++ b/adapters/generic-cli/adapter.ts @@ -165,59 +165,6 @@ function renderHandoff( ].join("\n"); } -function renderTakeoverPacket(envelope: RunEnvelope, workerOutput: WorkerOutput): string { - const approvalCardId = String((envelope.approval_context.approval_card_id ?? "n/a") as string); - const evidenceDestination = `artifacts/runs/${envelope.run_id}/takeover/result.en.md`; - const blockedStep = workerOutput.open[0] ?? workerOutput.next_action; - const reason = workerOutput.blockers[0] ?? `worker returned ${workerOutput.status}`; - return [ - "# Takeover Packet", - "", - "## Run Identity", - "", - `- job ID: ${envelope.job_id}`, - `- run ID: ${envelope.run_id}`, - `- freeze version: ${envelope.freeze_version}`, - `- story ID: ${envelope.story_id}`, - `- triggering run role: ${envelope.run_role}`, - `- triggering exit status: ${workerOutput.status}`, - "", - "## Blocked Step", - "", - `- exact blocked action: ${blockedStep}`, - `- reason automation cannot continue: ${reason}`, - "- current page, tool, or environment when relevant: generic-cli worker container", - "", - "## Required Human Action", - "", - `- concrete human task: ${workerOutput.next_action}`, - "- allowed action boundary: stay inside the active story, freeze, and run root", - "- forbidden actions: do not widen scope, mutate approvals, or bypass evidence capture", - "- expected completion signal: write the takeover outcome into the archived result record", - "", - "## Access And Approval Context", - "", - `- approval card ID: ${approvalCardId}`, - "- approved access method: local governed takeover", - "- credential handling rule: do not expose long-lived secrets in artifacts", - `- timeout or expiry condition: ${new Date(Date.now() + 24 * 60 * 60 * 1000).toISOString()}`, - "", - "## Expected Result", - "", - `- expected output: ${workerOutput.next_action}`, - `- artifact destination: ${evidenceDestination}`, - `- evidence destination: ${evidenceDestination}`, - "- resume criteria: result is archived under the same run_id takeover root and referenced by the manifest", - "", - "## Resume Notes", - "", - `- next loop role: ${envelope.run_role}`, - "- next command or check: review the takeover result and decide whether to resume or terminate", - "- rollback instruction if the takeover fails: stop the job and return control to owner review", - "", - ].join("\n"); -} - function withBuilderFallbackPaths(workerOutput: WorkerOutput): WorkerOutput { return { ...workerOutput, @@ -476,9 +423,6 @@ export class GenericCliAdapter { workerOutput.status === "AWAITING_TAKEOVER" ? join(runRoot, "takeover", "takeover-packet.en.md") : null; - if (takeoverPacketPath !== null) { - await writeText(takeoverPacketPath, renderTakeoverPacket(envelope, workerOutput)); - } const artifactIndex = await buildArtifactIndex(runRoot, envelope.run_id, envelope.run_role); await writeJson(artifactIndexPath, artifactIndex); diff --git a/core/loop/phase1-local-flow.ts b/core/loop/phase1-local-flow.ts index 4f6e7c5..1a448ea 100644 --- a/core/loop/phase1-local-flow.ts +++ b/core/loop/phase1-local-flow.ts @@ -1,5 +1,6 @@ import { join } from "node:path"; import { GenericCliAdapter } from "../../adapters/generic-cli/adapter.ts"; +import { buildArtifactIndex } from "../../ops/archive/artifact-index.ts"; import { writeApprovalArchive } from "../../ops/archive/approvals.ts"; import { buildEnvironmentSnapshotMetadata, @@ -641,6 +642,9 @@ async function writeRecoveryTakeoverPacket( "", ].join("\n"), ); + const artifactIndex = await buildArtifactIndex(execution.runRoot, execution.runResult.run_id, execution.runResult.run_role); + execution.artifactIndex = artifactIndex; + await writeJson(execution.artifactIndexPath, artifactIndex); } function buildPauseContext( diff --git a/docs/GUI_EXCEPTION_POLICY.md b/docs/GUI_EXCEPTION_POLICY.md index 475c1a7..d876fb9 100644 --- a/docs/GUI_EXCEPTION_POLICY.md +++ b/docs/GUI_EXCEPTION_POLICY.md @@ -46,4 +46,4 @@ GUI-related interruptions should map to: ## Phase 1 Rule -Phase 1 allows local GUI automation on the supported Ubuntu host. Remote desktop orchestration remains out of scope. +Phase 1 local execution records GUI and takeover contract artifacts only. Automated local GUI execution remains disabled until the active adapter profile explicitly enables browser and screenshot capabilities. Remote desktop orchestration remains out of scope. diff --git a/docs/LIGHTWEIGHT_RUNTIME_PLAN.md b/docs/LIGHTWEIGHT_RUNTIME_PLAN.md index bec2ae2..7348aeb 100644 --- a/docs/LIGHTWEIGHT_RUNTIME_PLAN.md +++ b/docs/LIGHTWEIGHT_RUNTIME_PLAN.md @@ -49,8 +49,8 @@ Current gaps: - `StateStore` writes directly, without an atomic write path or a write lock - state has one root and one mirror, but no root, session, and run scope resolver -- capability data is passed through, but `GenericCliAdapter` does not enforce default-deny execution against the manifest -- shell and secret rules exist as policy text, but not as host-side runtime guards +- shell and secret rules still exist mostly as policy text, not as host-side runtime guards +- local GUI automation remains a later profile enablement rather than an active Phase 1 runtime path ## External Adoption Targets diff --git a/docs/LOOP_SPEC.md b/docs/LOOP_SPEC.md index a5e7380..42803f0 100644 --- a/docs/LOOP_SPEC.md +++ b/docs/LOOP_SPEC.md @@ -156,4 +156,5 @@ After each run, the loop must update: - fixback should usually remain within the same story - fixback must not silently expand scope -- Phase 1 should cap fixback at 2 or 3 rounds per story +- the current Phase 1 local slice stops at `FIXBACK_PENDING` and requires an explicit next-step decision before another run is scheduled +- an automated fixback retry ceiling applies only after multi-round fixback scheduling is implemented diff --git a/docs/SYSTEM_BLUEPRINT.md b/docs/SYSTEM_BLUEPRINT.md index 81d78fb..0c2ec9e 100644 --- a/docs/SYSTEM_BLUEPRINT.md +++ b/docs/SYSTEM_BLUEPRINT.md @@ -41,7 +41,7 @@ CodingClaw is not: ### Control Shell -The control shell accepts Chinese commands, normalizes requirements, manages approval gates, budgets, policy guards, and dispatches loop work through adapters governed by [EXECUTOR_ADAPTER_CONTRACT.md](EXECUTOR_ADAPTER_CONTRACT.md). +The target control shell accepts Chinese commands, normalizes requirements, manages approval gates, budgets, policy guards, and dispatches loop work through adapters governed by [EXECUTOR_ADAPTER_CONTRACT.md](EXECUTOR_ADAPTER_CONTRACT.md). The current Phase 1 local slice exposes a repository-invoked `bun run phase1` proof command instead of the live intake surface. ### Coding Loop Kernel @@ -57,7 +57,7 @@ This plane stores all long-lived state, reports, logs, evidence, sessions, check ### Local GUI Runtime Plane -A single Ubuntu host with a graphical session is the supported GUI execution surface. Headed browser or desktop automation runs locally on that host. Manual takeover remains an exceptional fallback, not the primary path. +A single Ubuntu host with a graphical session is the planned GUI execution surface. The current Phase 1 local slice records takeover contracts and waiting states, but it does not yet enable live headed browser or desktop automation in the active adapter profile. ## Mandatory Lifecycle @@ -71,7 +71,7 @@ INTAKE -> STORY_QUEUE_READY -> BUILD_EXECUTION -> QA_VALIDATION - -> FIXBACK(optional) + -> FIXBACK(optional, manual in the current Phase 1 local slice) -> FINAL_APPROVAL(optional) -> ARCHIVE ``` @@ -122,12 +122,12 @@ At archive finalization: Phase 1 is the minimum working product. It includes: -- one Chinese mobile entry channel +- one repository-invoked local control command for the fixed proof slice - one control shell - one generic CLI adapter - one Claude Code builder worker - one Codex QA worker -- one Ubuntu host with a graphical session for local full automation when a story needs a real GUI surface +- takeover packet and waiting-state support for future local GUI execution, without live browser or desktop automation yet - contract binding to `base_commit` - traceability from story to acceptance to QA verdict - local artifact archival and checksums @@ -139,6 +139,8 @@ Phase 1 excludes: - multi-channel concurrency - multi-adapter parallel execution - mandatory review executor in the live path +- live Chinese mobile intake +- live local browser or desktop automation - cloud desktop bridges or vendor-specific remote desktop orchestration - dashboards - historical job reuse diff --git a/docs/UBUNTU_GUI_RUNTIME_PLAN.md b/docs/UBUNTU_GUI_RUNTIME_PLAN.md index c864f5e..ec544da 100644 --- a/docs/UBUNTU_GUI_RUNTIME_PLAN.md +++ b/docs/UBUNTU_GUI_RUNTIME_PLAN.md @@ -8,7 +8,7 @@ Supporting feasibility notes for this positioning are collected in [OFFICIAL_REF ## Positioning -The Ubuntu GUI host is the default graphical execution surface. It exists for: +The Ubuntu GUI host is the planned graphical execution surface. It exists for: - fully automated headed browser workflows - fully automated Linux desktop tooling @@ -36,7 +36,7 @@ The integration should define: ## Control Rules - the supported deployment target is one Ubuntu host with a graphical session -- standard in-scope local GUI automation may run automatically when the active adapter profile declares the required capability +- standard in-scope local GUI automation may run automatically only after the active adapter profile declares and enables the required capability - builder uses Claude Code and QA uses Codex in Phase 1 - main loop execution must transition into `AWAITING_TAKEOVER` during manual takeover unless the task is explicitly parallel-safe - all takeover results must be written back under `artifacts/runs//takeover/` and referenced by handoff and manifest records @@ -45,16 +45,17 @@ The integration should define: ### Phase 1 -- one Ubuntu host with a graphical session -- automated builder flow through Claude Code -- automated QA flow through Codex -- local headed browser or GUI execution when a story requires a real rendered surface +- one Ubuntu host with a graphical session may be prepared for later runtime rollout +- takeover packet generation and archive path for blocked GUI work +- no live browser or desktop automation in the active adapter profile yet - no dependency on a cloud desktop vendor ### Phase 2 +- automated builder flow through Claude Code +- automated QA flow through Codex +- local headed browser or GUI execution when a story requires a real rendered surface - hardened host display bootstrap -- managed takeover packet format and archive path - stable resume semantics ### Phase 3 diff --git a/docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.md b/docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.md new file mode 100644 index 0000000..a980f32 --- /dev/null +++ b/docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.md @@ -0,0 +1,395 @@ +# Team Workflow Migration Plan + +## Purpose + +This document defines the recommended path to move CodingClaw from a prompt2repo-style solo development workflow into a team collaboration workflow without breaking the current Phase 1 kernel. + +The target is not to replace the current control shell and loop model with an external multi-agent runtime. The target is to preserve the current contract-first, artifact-first, short-loop kernel and add a collaboration shell around it for issue intake, worktree isolation, PR review, merge governance, and deployment. + +## Executive Decision + +CodingClaw should adopt a two-layer operating model: + +- keep the current `Development Plan -> Contract Freeze -> one-story loop -> Builder -> QA -> archive` kernel as the execution core +- add a GitHub-centered collaboration shell around the kernel for issue routing, worktree isolation, branch governance, PR review, merge control, and deployment gates +- run two independent CodingClaw deployments when team scale or trust boundaries require it +- allow a `codingclaw-custom` fork for company-specific workflow policy, private adapters, internal agent systems, and deployment rules + +This is the best fit because the current repository explicitly keeps the loop kernel small and auditable, and explicitly avoids importing an external product shell, team runtime, memory layer, or orchestration stack into the core runtime. + +## Current-State Findings + +### Boundaries To Preserve + +- `docs/SYSTEM_BLUEPRINT.md` defines non-negotiable operating rules: plan before code, freeze before execution, one story per loop, files over memory, Builder and QA separation, and English repository-facing deliverables. +- `docs/LOOP_SPEC.md` defines a single-story, short-lived, auditable loop and does not allow one run to merge multiple independent stories. +- `docs/LIGHTWEIGHT_RUNTIME_PLAN.md` explicitly says CodingClaw should borrow ideas from oh-my-codex and IronClaw, but must not import their full shell, team runtime, or orchestration stack into the core runtime. +- `docs/DEPLOYMENT_PLAN.md` keeps the supported runtime small: one control host, Docker workers, one Ubuntu GUI surface, and no distributed complexity in Phase 1. + +### Existing Assets To Reuse + +- `.github/ISSUE_TEMPLATE/` already provides structured issue intake. +- `.github/pull_request_template.md` already asks for scope, contract impact, verification, evidence, and risks. +- `.github/workflows/verify.yml` already provides a base required verification workflow. +- `.github/workflows/enforce-repo-gate.yml` and `scripts/github_repo_gate.py` already bootstrap repository protection. +- `docs/REVIEW_CONTRACT.en.md` already defines an independent review role and can be promoted into a real PR review lane. +- `state/`, `task-packet`, `run-result`, `artifact-index`, `handoff`, and checksum conventions already provide the audit substrate required for team work. + +### Gaps To Close + +- repository protection is still closer to solo development than team governance because `scripts/github_repo_gate.py` does not require PR reviews or CODEOWNERS approval +- the live path still ends at QA and does not yet provide a first-class review, merge, or deploy lane +- the current runtime plan still treats atomic state writes, scoped state, capability enforcement, and shell/credential guards as hardening work rather than mandatory team-concurrency controls +- there is no first-class contract for `issue -> branch -> PR -> review -> merge -> deploy` + +## External Research Summary + +### Oh My Codex + +- the public documentation positions `Ralph` as the persistence loop that continues until completion and architect verification +- the public documentation positions `Team` as a conductor-led execution layer with explicit planning, execution, verification, and fix phases +- the public documentation recommends isolated worktree-based team execution and a workflow that handles parallel issues through separate branches and PRs + +Implication: + +- CodingClaw should use `Ralph` as the close-out loop for one approved story +- CodingClaw should use team or worktree-based parallelism outside the kernel, not inside one loop run + +### GitHub + +- rulesets and protected branches are the correct place for branch interaction policy, required checks, review requirements, linear history, and merge restrictions +- CODEOWNERS is the correct path-based ownership model, and owner review only works when the file exists on the PR base branch +- merge queue is the correct solution once multiple PRs contend for the same protected branch, and CI must support `merge_group` +- reusable workflows are the correct abstraction for repeatable multi-job PR, verify, and deploy lanes +- self-hosted runners are the correct boundary when the team needs custom runtime, internal network access, or heavyweight agent systems +- deployment environments are the correct boundary for staged secrets, required reviewers, and promotion gates +- fork PRs are the correct collaboration boundary for a public upstream plus a private company fork, but workflow and secrets changes from forks must be treated as high risk + +Implication: + +- CodingClaw should put team policy into GitHub repository governance instead of trying to encode all collaboration logic inside the loop runtime + +## Recommended Target Operating Model + +## 1. Kernel And Shell Split + +Keep the current CodingClaw kernel unchanged in responsibility: + +- intake normalization +- development plan approval +- contract freeze binding to `base_commit` +- one-story execution +- Builder execution +- QA validation +- artifact archival + +Move team collaboration concerns into a separate shell: + +- issue triage +- story slicing +- worktree creation +- branch naming +- PR creation +- review assignment +- merge queue entry +- environment promotion +- upstream or fork synchronization + +This split preserves current repository intent and avoids mixing prompt execution concerns with organization-level workflow governance. + +## 2. Default Work Unit + +The default unit of team work should be: + +- one GitHub issue +- one story queue derived from that issue when the issue is larger than one story +- one approved story per execution branch +- one isolated worktree per active story +- one freeze, one run, and one artifact set per story +- one PR per story by default + +The loop kernel must still execute one story at a time. Team throughput comes from multiple isolated worktrees and PRs in parallel, not from enlarging one loop. If one issue expands into multiple stories, split it before execution and let each story run through its own freeze, run, artifacts, review, and merge path. + +## 3. Recommended Branch Topology + +Use this structure by default in `codingclaw-custom`: + +- `main`: release and archive branch, strongest protection, merge queue enabled +- `dev`: integration branch for team issue PRs, required verification, required review, no direct pushes +- `story/-`: short-lived branch created from `dev` +- `hotfix/-`: emergency fixes, merged back into `main` and `dev` +- `sync/upstream-`: short-lived branch used only to bring upstream changes into `codingclaw-custom` + +Why this topology: + +- `dev` absorbs frequent small-scope issue PRs without destabilizing `main` +- `main` remains the promotion target with the strictest release and deploy gates +- upstream sync remains explicit and reviewable + +Use this structure in the public upstream by default: + +- `main`: the only long-lived branch, strongly protected +- short-lived feature or fix branches only + +Add `dev` to the public upstream only if upstream PR concurrency becomes high enough to justify a dedicated integration branch and merge queue. + +## 4. Recommended Daily Workflow + +### Intake And Planning + +- open a GitHub issue using the existing issue forms +- convert the issue into a story queue when necessary +- execute only one approved story per branch and per loop +- produce or update `DEVELOPMENT_PLAN.en.md` +- approve the plan +- generate or update `CONTRACT_FREEZE.en.md` and freeze artifacts + +### Implementation + +- create a dedicated worktree from `dev` for one approved story +- run `Ralph` for that single story scope +- keep all local execution evidence attached to the run artifact set +- require the worktree to stay single-purpose + +### Review + +- open a PR from `story/-` into `dev` +- require `verify` +- require at least one independent review +- require CODEOWNERS approval for touched paths +- run an explicit review lane based on `docs/REVIEW_CONTRACT.en.md` + +### Merge And Promotion + +- merge into `dev` through merge queue +- run integration verification on `dev` +- promote from `dev` to `main` through a release PR +- require deployment environment approval before production release + +### Handoff And Recovery + +- if work pauses mid-issue, checkpoint the run state and preserve the artifact trail +- if takeover is required, use the existing takeover packet and approval card model instead of ad hoc chat handoff + +## 5. Oh My Codex Role Mapping + +Use oh-my-codex outside the kernel in this shape: + +- `ralplan`: convert issue scope into a bounded execution plan when the issue is ambiguous or cross-cutting +- `team`: coordinate multiple parallel issues or multiple bounded lanes when the work naturally splits +- `ralph`: drive one story-scoped branch until implementation, verification, and architect-style sign-off are complete +- `review`: run a pre-landing review pass against the PR diff +- `ship`: push branch and create the PR +- `checkpoint`: preserve and resume team worktrees cleanly during interruptions +- `trace`: inspect multi-agent execution history when a run or review lane becomes hard to explain + +Operating rule: + +- `Ralph` owns completion for one approved story +- `Team` owns parallelism across issues +- GitHub owns merge and deploy governance + +## 6. GitHub Governance Changes + +### Repository Rules + +Replace the current minimal gate with branch rules or rulesets that enforce at least: + +- required status checks +- required conversation resolution +- required linear history +- force-push disabled +- branch deletion disabled +- required pull request before merge +- at least one approving review and required CODEOWNERS review on `dev` +- at least two approvals and required CODEOWNERS review on `main` +- stale review dismissal on new commits +- CODEOWNERS review on owned paths + +`scripts/github_repo_gate.py` can remain as a bootstrap script, but it should no longer be the only policy surface. + +### CODEOWNERS + +Add a real `.github/CODEOWNERS` file and map at least: + +- `core/` to loop kernel owners +- `control/` to control shell owners +- `adapters/` to adapter owners +- `ops/` to runtime and policy owners +- `.github/` to platform owners +- `docs/` to contract owners + +### Merge Queue + +Enable merge queue on `dev` and later on `main` when PR concurrency justifies it. + +Required follow-up: + +- update `.github/workflows/verify.yml` to run on `merge_group` in addition to `push` and `pull_request` + +### Reusable Workflows + +Split the current workflow surface into reusable units: + +- `_verify.yml` +- `_review-lane.yml` +- `_deploy.yml` +- `_repo-governance.yml` + +Then let repository workflows call those modules with pinned references. + +### Deployment Environments + +Create at least: + +- `staging` +- `production` + +Use them for: + +- environment-specific secrets +- required reviewers +- deployment promotion gates +- branch restrictions +- trusted-branch-only production access + +### Runners + +Use two runner classes: + +- GitHub-hosted runners for normal repository verification +- self-hosted or ephemeral runners for heavy backend agent integration, GUI automation, internal systems, or private network dependencies + +Trust boundary rules: + +- fork PRs run GitHub-hosted, read-only, minimum verification only +- fork PRs must not reach self-hosted runners +- fork PRs must not access protected deployment environments +- changes under `.github/workflows/**` must be reviewed and merged into a trusted branch before any private runner or protected environment can execute them +- production environments accept deployments from trusted protected branches only + +Recommended labels: + +- `codingclaw-build` +- `codingclaw-qa` +- `codingclaw-review` +- `codingclaw-agent` + +## 7. Dual Deployment Strategy + +When the team needs both public evolution and private company customization, run two independent CodingClaw deployments: + +### Deployment A: Upstream Or Public Baseline + +- tracks the canonical repository +- stays close to public contracts and generic capabilities +- uses public-safe workflows and public-safe secrets only +- serves as the clean upstream for broadly useful features +- defaults to protected `main` only unless upstream throughput later justifies `dev` + +### Deployment B: Company Collaboration Stack + +- runs against `codingclaw-custom` +- contains company-only adapters, workflow rules, internal backend agent integrations, and deployment policies +- uses self-hosted runners and private secrets +- integrates with internal systems that should never exist in the public baseline + +### Fork Policy + +Use `codingclaw-custom` only for changes that are truly company-specific: + +- private adapters +- private skill or policy wrappers +- internal deployment integrations +- internal approval and audit hooks +- company-specific `.github/` automation + +Push generic improvements upstream whenever possible: + +- contract clarifications +- runtime hardening +- generic review lane logic +- generic runner abstractions +- generic governance improvements + +This minimizes long-term fork drag. + +## 8. Implementation Roadmap + +### Phase 0: Governance Baseline + +- add `.github/CODEOWNERS` +- upgrade repository rules beyond the current `github_repo_gate.py` baseline +- add `merge_group` support to verification +- define ownership for `.github/`, `docs/`, `core/`, `control/`, `ops/`, and `adapters/` + +### Phase 1: Runtime Hardening Baseline + +- finish atomic state writes +- finish scoped state resolution +- enforce capability deny-by-default +- finish shell and credential guards + +This phase is the safety floor before parallel team execution expands. + +### Phase 2: Team Shell + +- define the formal mapping from issue to story to branch to PR +- add worktree discipline for every active issue +- standardize branch naming and PR metadata +- define when to use `Ralph` and when to use `Team` + +### Phase 3: Review And Merge Lane + +- turn `docs/REVIEW_CONTRACT.en.md` into a real review lane +- require independent review evidence before merge +- wire release promotion from `dev` to `main` + +### Phase 4: Dual Deployment + +- stand up the private `codingclaw-custom` line if needed +- separate public and private workflows, runners, secrets, and environments +- document upstream sync cadence + +## 9. Non-Goals + +- importing oh-my-codex team runtime directly into the CodingClaw kernel +- turning one loop into a multi-story multi-agent long-running session +- replacing freeze and approval artifacts with PR discussion alone +- adding distributed schedulers, multi-tenant queues, or cloud desktop complexity before governance and audit paths are stable + +## 10. Success Criteria + +- every small-scope issue can move through `issue -> story -> worktree -> branch -> PR -> review -> merge -> deploy` without ad hoc process +- each loop still executes exactly one approved story with its own freeze, run, and artifact set +- `main` is always protected, reviewable, and releasable +- team members can collaborate in parallel without sharing the same working tree +- public and private automation can diverge without corrupting the core kernel +- generic improvements can still flow upstream with manageable fork maintenance cost + +## Source Basis + +### Repository Evidence + +- [SYSTEM_BLUEPRINT.md](../SYSTEM_BLUEPRINT.md) +- [LIGHTWEIGHT_RUNTIME_PLAN.md](../LIGHTWEIGHT_RUNTIME_PLAN.md) +- [LOOP_SPEC.md](../LOOP_SPEC.md) +- [DEPLOYMENT_PLAN.md](../DEPLOYMENT_PLAN.md) +- [REVIEW_CONTRACT.en.md](../REVIEW_CONTRACT.en.md) +- [../.github/pull_request_template.md](../../.github/pull_request_template.md) +- [../.github/workflows/verify.yml](../../.github/workflows/verify.yml) +- [../scripts/github_repo_gate.py](../../scripts/github_repo_gate.py) + +### External References + +- Oh My Codex documentation: https://yeachan-heo.github.io/oh-my-codex-website/docs.html +- Git protected branches: https://docs.github.com/github/administering-a-repository/about-protected-branches +- GitHub rulesets: https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-rulesets/about-rulesets +- GitHub CODEOWNERS: https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners +- GitHub merge queue: https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/configuring-pull-request-merges/managing-a-merge-queue +- GitHub reusable workflows: https://docs.github.com/en/actions/sharing-automations/reusing-workflows +- GitHub self-hosted runners: https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners +- GitHub deployment environments: https://docs.github.com/actions/deployment/targeting-different-environments +- GitHub fork workflow: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo +- GitHub syncing a fork: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork +- Git worktree: https://git-scm.com/docs/git-worktree diff --git a/docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.zh-CN.md b/docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.zh-CN.md new file mode 100644 index 0000000..296614c --- /dev/null +++ b/docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.zh-CN.md @@ -0,0 +1,526 @@ +# CodingClaw 团队协作工作流迁移方案 + +## 目的 + +本文档定义了 CodingClaw 从 prompt2repo 风格的单人开发,迁移到团队协作开发工作流的推荐路径。 + +目标不是把当前 Phase 1 内核替换成外部多 agent runtime,而是在不破坏当前内核边界的前提下,在外层增加团队协作壳层,用于承接 issue、worktree 隔离、PR 审查、合并治理和部署门禁。 + +## 给负责人的结论 + +- 保留当前内核,不重写。 +- 团队协作能力放在 GitHub 和 oh-my-codex 的外层工作流上,不塞进 loop kernel。 +- 默认工作单元不是“大任务”,而是“一个 issue 拆成一个或多个 story,每个 story 单独 freeze、单独执行、单独 PR”。 +- 如果要支撑多人并行,必须先补 runtime hardening,再扩 team shell。 +- 如果公司内部流程和公开仓库流程差异大,应该拆成两套独立部署,并允许维护 `codingclaw-custom` 私有协作线。 + +一句话版本: + +CodingClaw 的正确演进方向不是“把它改造成一个重型团队 agent 平台”,而是“保留当前可审计的单 story 执行内核,在外层加上团队治理、分支治理、PR 治理和部署治理”。 + +## 核心决策 + +CodingClaw 应采用双层模型: + +- 内层保留现有 `Development Plan -> Contract Freeze -> one-story loop -> Builder -> QA -> archive` 执行内核 +- 外层增加以 GitHub 为中心的团队协作壳层,负责 issue 路由、worktree 隔离、分支治理、PR 审查、合并控制和部署门禁 +- 当团队规模、权限边界或公司内部流程需要时,运行两套独立的 CodingClaw 部署 +- 允许维护 `codingclaw-custom` fork,用于承接公司特有的流程策略、私有 adapter、内部 agent 系统和部署规则 + +这是当前仓库最匹配的路线,因为仓库现有设计已经明确: + +- 内核应保持小而可审计 +- 不应把外部产品壳、team runtime、memory layer、orchestration stack 直接导入核心运行时 + +## 当前仓库中必须保留的边界 + +### 1. 计划先于编码 + +`docs/SYSTEM_BLUEPRINT.md` 已经定义了最重要的制度边界: + +- 先计划再编码 +- 先 freeze 再执行 +- 一次 loop 只做一个 story +- 状态落文件,不依赖会话记忆 +- Builder 和 QA 必须分离 +- 仓库交付物统一英文 + +这些边界不能因为“团队协作”而被弱化。 + +### 2. One Story Per Loop + +`docs/LOOP_SPEC.md` 已明确规定,一次 loop 不允许把多个独立 story 合并执行。 + +因此未来的团队协作模型必须是: + +- `issue -> story queue` +- `story -> freeze` +- `story -> run` +- `story -> artifact` +- `story -> PR` + +而不是: + +- 一个 issue 下多人同时往一个大 branch 里堆改动 +- 一个 loop 同时处理多个 story + +### 3. 内核只借鉴,不吞并外部 runtime + +`docs/LIGHTWEIGHT_RUNTIME_PLAN.md` 已明确说明: + +- 可以借鉴 oh-my-codex 的状态、规划和 guard 思想 +- 可以借鉴 IronClaw 的 capability 和安全边界思想 +- 但不能把它们的 product shell、team runtime、memory system、orchestration stack 直接导入内核 + +所以团队化改造必须走“外层壳”路线,而不是“整体替换内核”路线。 + +### 4. 归档与证据链优先 + +现有仓库已经围绕这些对象建立了审计体系: + +- `task-packet` +- `run-result` +- `artifact-index` +- `handoff` +- `checksums` +- 各类 reports + +团队协作不能退化成“靠聊天记录和 PR 对话驱动”,而必须继续保留这套文件化证据链。 + +## 当前仓库已经具备的可复用资产 + +- `.github/ISSUE_TEMPLATE/` 已有结构化 issue 入口 +- `.github/pull_request_template.md` 已包含 scope、contract impact、verification、evidence、risks +- `.github/workflows/verify.yml` 已有基础验证 workflow +- `.github/workflows/enforce-repo-gate.yml` 与 `scripts/github_repo_gate.py` 已有最小仓库门禁基础 +- `docs/REVIEW_CONTRACT.en.md` 已具备独立 review lane 的雏形 +- `state/`、`task-packet`、`run-result`、`artifact-index`、`handoff`、checksum 体系已经可以承接团队协作的审计面 + +这意味着 CodingClaw 不是“完全没有团队化基础”,而是“缺少外层协作治理层”。 + +## 当前最明显的缺口 + +### 1. 仓库保护还偏单人模式 + +当前 `scripts/github_repo_gate.py` 主要还是: + +- required status check +- linear history +- conversation resolution +- 禁止 force push +- 禁止删除分支 + +但还没有真正进入团队 PR 治理所需的配置,例如: + +- required reviews +- CODEOWNERS review +- stale review dismissal +- merge queue + +### 2. Live path 仍停在 QA + +当前 live path 仍然主要是: + +- Builder +- QA + +独立 review、merge、deploy 还没有真正进入第一方执行路径。 + +### 3. Team 并发前的基础硬化还没完成 + +仓库当前仍把以下能力视为“下一阶段硬化项”: + +- atomic state writes +- scoped state resolution +- capability deny-by-default +- shell guard +- credential guard + +如果在这些没收口之前就把多人并发、worktree 并发、team shell 大规模铺开,会把团队流程建在未封口的状态和权限模型之上。 + +## 外部调研结论 + +## 1. Oh My Codex 的启发 + +公开文档显示: + +- `Ralph` 适合承担“持续推进直到完成和验证通过”的闭环角色 +- `Team` 适合承担“并行分工执行”的外层协调角色 +- 推荐 workflow 本身就强调 worktree 隔离、并行 issue、独立 PR 和最后收口 + +对 CodingClaw 的直接启发是: + +- `Ralph` 负责一个 story 的完成闭环 +- `Team` 负责多个 story 或多个 issue 的并行协调 +- 并行性放在外层,不放进单个 loop 内核 + +## 2. GitHub 的启发 + +官方文档足够支撑以下协作治理组件: + +- rulesets / protected branches +- CODEOWNERS +- merge queue +- reusable workflows +- self-hosted runners +- deployment environments +- fork PR + +对 CodingClaw 的直接启发是: + +- 团队治理主要应当落在 GitHub 的仓库规则、分支规则、审核规则、runner 边界和 deployment environment 上 +- 不应把这些组织级治理逻辑硬编码进 loop runtime + +## 推荐目标模型 + +## 1. 内核与协作壳分离 + +内核继续负责: + +- requirement normalization +- development plan approval +- contract freeze 绑定 `base_commit` +- one-story execution +- Builder +- QA +- artifact archive + +协作壳负责: + +- issue triage +- story slicing +- worktree creation +- branch naming +- PR creation +- review assignment +- merge queue +- environment promotion +- upstream / fork sync + +这个分层是整个方案最关键的设计原则。 + +## 2. 默认工作单元 + +团队默认工作单元应定义为: + +- 一个 GitHub issue +- 当 issue 超过一个 story 时,先拆成 story queue +- 每个 branch 只承载一个已批准 story +- 每个 active story 使用一个独立 worktree +- 每个 story 有自己独立的 freeze、run、artifact set +- 默认每个 story 对应一个 PR + +必须明确: + +- 一个 issue 可以拆成多个 story +- 但一个 loop 只能执行一个 story +- 一个 PR 默认也不应打包多个 story + +## 3. 推荐分支拓扑 + +### 在 `codingclaw-custom` 中默认采用 + +- `main`: 发布与归档主分支,保护最强 +- `dev`: 团队集成分支,小 scope issue 先合到这里 +- `story/-`: 单 story 短期分支 +- `hotfix/-`: 紧急修复分支 +- `sync/upstream-`: 从上游同步时使用的短期分支 + +这样做的好处: + +- `dev` 用于承接团队高频并行提交 +- `main` 用于承接强门禁发布 +- upstream sync 保持显式且可审查 + +### 在公开 upstream 中默认采用 + +- 只保留一个长期受保护的 `main` +- 其他仅使用短期 feature / fix 分支 + +只有当上游自身 PR 并发量足够大时,才考虑引入 `dev` 和 merge queue。 + +这样可以避免 upstream 和 custom fork 同时维护双长期分支,降低 fork drag。 + +## 4. 推荐日常工作流 + +### Intake And Planning + +- 用现有 issue forms 创建 issue +- 如有必要,将 issue 拆成 story queue +- 每次只选择一个已批准 story 进入执行 +- 产出或更新 `DEVELOPMENT_PLAN.en.md` +- 完成 owner approval +- 产出或更新 `CONTRACT_FREEZE.en.md` 及相关 freeze artifacts + +### Implementation + +- 从 `dev` 拉出一个 story 专属 worktree +- 使用 `Ralph` 推进该单 story 的实现闭环 +- 所有本地执行证据继续归档到 run artifact set +- worktree 必须保持单一用途 + +### Review + +- 从 `story/-` 向 `dev` 发 PR +- 要求 `verify` +- 要求至少一个独立 reviewer +- 要求 CODEOWNERS review +- 运行基于 `docs/REVIEW_CONTRACT.en.md` 的 review lane + +### Merge And Promotion + +- 先通过 merge queue 合入 `dev` +- 在 `dev` 上做集成验证 +- 再通过 release PR 从 `dev` 提升到 `main` +- 生产发布必须经过 deployment environment approval + +### Handoff And Recovery + +- 中断时用 checkpoint 保留状态 +- takeover 场景继续使用现有 takeover packet 和 approval card 模型 +- 不使用临时聊天记录替代正式 handoff + +## 5. Oh My Codex 角色映射 + +建议在外层这样使用 oh-my-codex: + +- `ralplan`: 当 issue 跨模块或不够清晰时,先把它收敛成可执行计划 +- `team`: 当多个 issue 或多个 story 可以并行时,负责分工协调 +- `ralph`: 负责一个已批准 story 的完成闭环 +- `review`: 用于 PR 落地前的审查 +- `ship`: 用于推分支和发 PR +- `checkpoint`: 用于中断后的恢复 +- `trace`: 用于回看多 agent 执行轨迹 + +一句话职责划分: + +- `Ralph` 负责“一个 story 做到底” +- `Team` 负责“多个 story 并行推进” +- GitHub 负责“谁能合、何时合、能否发” + +## 6. GitHub 治理改造建议 + +### Repository Rules + +在当前最小 gate 之上,补齐这些规则: + +- required status checks +- required conversation resolution +- required linear history +- 禁止 force push +- 禁止删除分支 +- 强制走 pull request +- `dev` 至少 1 个 approval 且要求 CODEOWNERS review +- `main` 至少 2 个 approvals 且要求 CODEOWNERS review +- 新 commit 进入后自动 dismiss stale reviews + +这里要注意,规则必须写成可落地的 AND 语义,不能写成模糊的 OR 语义。 + +### CODEOWNERS + +建议新增 `.github/CODEOWNERS`,至少覆盖: + +- `core/` +- `control/` +- `adapters/` +- `ops/` +- `.github/` +- `docs/` + +### Merge Queue + +建议在 `dev` 先启用 merge queue,后续再视情况扩到 `main`。 + +必要前提: + +- `.github/workflows/verify.yml` 需要支持 `merge_group` + +### Reusable Workflows + +建议把 workflow 拆成可复用模块: + +- `_verify.yml` +- `_review-lane.yml` +- `_deploy.yml` +- `_repo-governance.yml` + +### Deployment Environments + +至少建立: + +- `staging` +- `production` + +用于承接: + +- environment-specific secrets +- required reviewers +- deployment promotion gates +- branch restrictions +- 仅允许 trusted branches 访问 production + +### Runners + +建议使用两类 runner: + +- GitHub-hosted runners:普通仓库验证 +- self-hosted 或 ephemeral runners:重型后端 agent、GUI automation、内网系统、私有依赖 + +建议 runner labels: + +- `codingclaw-build` +- `codingclaw-qa` +- `codingclaw-review` +- `codingclaw-agent` + +### 信任边界必须写死 + +- fork PR 只跑 GitHub-hosted 的只读最小验证 +- fork PR 不允许触达 self-hosted runners +- fork PR 不允许访问受保护 deployment environments +- `.github/workflows/**` 改动必须先进入 trusted branch,之后才能触发私有 runner 或受保护 environment +- production environment 只接受受信保护分支 + +## 7. 双部署策略 + +当你既要维护公开方向,又要维护公司内协作流时,建议运行两套独立 CodingClaw: + +### Deployment A: Upstream / Public Baseline + +- 跟随 canonical repository +- 尽量保持公共 contract 和通用能力 +- 只使用 public-safe workflows 与 public-safe secrets +- 默认只维护受保护的 `main` + +### Deployment B: Company Collaboration Stack + +- 跑在 `codingclaw-custom` +- 承载公司私有 adapter、workflow rules、内部 backend agent 集成和部署策略 +- 使用 self-hosted runners 和 private secrets +- 对接公司内部系统 + +### Fork Policy + +`codingclaw-custom` 只承载真正公司特有的东西: + +- private adapters +- private skill / policy wrappers +- internal deployment integrations +- internal approval hooks +- company-specific `.github/` automation + +而以下内容应尽量回推 upstream: + +- contract clarifications +- runtime hardening +- generic review lane logic +- generic runner abstractions +- generic governance improvements + +这样才能控制长期 fork 维护成本。 + +## 8. 实施路线 + +### Phase 0: Governance Baseline + +- 增加 `.github/CODEOWNERS` +- 升级仓库规则,不再只依赖当前 `github_repo_gate.py` +- 给 `verify` 加上 `merge_group` 支持 +- 明确 `.github/`、`docs/`、`core/`、`control/`、`ops/`、`adapters/` 的 ownership + +### Phase 1: Runtime Hardening Baseline + +- 完成 atomic state writes +- 完成 scoped state resolution +- 完成 capability deny-by-default +- 完成 shell and credential guards + +这是团队并发扩张前的安全地板,不能后置。 + +### Phase 2: Team Shell + +- 正式定义 `issue -> story -> branch -> PR` 契约 +- 为每个 active story 建立 worktree discipline +- 固化 branch naming 和 PR metadata +- 明确什么场景用 `Ralph`,什么场景用 `Team` + +### Phase 3: Review And Merge Lane + +- 把 `docs/REVIEW_CONTRACT.en.md` 升级成真实 review lane +- 合并前必须有独立 review evidence +- 打通 `dev -> main` 的 release promotion + +### Phase 4: Dual Deployment + +- 如有需要,搭建 `codingclaw-custom` +- 分离 public/private workflows、runners、secrets、environments +- 固化 upstream sync cadence + +## 9. 非目标 + +以下方向当前都不应作为主路线: + +- 把 oh-my-codex team runtime 直接塞进 CodingClaw kernel +- 把一个 loop 改造成多 story、多 agent 的长运行 session +- 用 PR 对话代替 freeze、approval 和 artifact 体系 +- 在治理和审计面没稳定前,先上分布式调度、多租户队列、远程桌面复杂度 + +## 10. 成功标准 + +- 每个小 scope issue 都能稳定经过 `issue -> story -> worktree -> branch -> PR -> review -> merge -> deploy` +- 每个 loop 仍然只执行一个已批准 story,并保留独立 freeze、run、artifact set +- `main` 始终处于可审查、可发布、可追溯状态 +- 团队成员可以并行协作而不共享同一个工作目录 +- 公共能力和公司私有能力可以分层演进,而不破坏内核边界 +- generic improvements 仍能以可控成本回推 upstream + +## 建议你作为负责人优先推动的 5 件事 + +### 第一优先级 + +- 先补 `.github/CODEOWNERS` +- 先把 `verify` 补上 `merge_group` +- 先把仓库规则升到 team 级别 + +### 第二优先级 + +- 先补 runtime hardening,不要抢先上多人并发协作壳 + +### 第三优先级 + +- 确认 `codingclaw-custom` 是否真的需要独立存在 +- 如果需要,就从第一天开始把“哪些必须回推 upstream”定清楚 + +### 第四优先级 + +- 固化 `story/-` 分支命名 +- 固化“一 story 一 PR”纪律 + +### 第五优先级 + +- 再去扩展 review lane、deploy lane、internal agent lane + +## 参考基础 + +### 仓库内部依据 + +- [SYSTEM_BLUEPRINT.md](../SYSTEM_BLUEPRINT.md) +- [LIGHTWEIGHT_RUNTIME_PLAN.md](../LIGHTWEIGHT_RUNTIME_PLAN.md) +- [LOOP_SPEC.md](../LOOP_SPEC.md) +- [DEPLOYMENT_PLAN.md](../DEPLOYMENT_PLAN.md) +- [REVIEW_CONTRACT.en.md](../REVIEW_CONTRACT.en.md) +- [TEAM_WORKFLOW_MIGRATION_PLAN.md](./TEAM_WORKFLOW_MIGRATION_PLAN.md) + +### 外部资料 + +- Oh My Codex documentation: https://yeachan-heo.github.io/oh-my-codex-website/docs.html +- Git protected branches: https://docs.github.com/github/administering-a-repository/about-protected-branches +- GitHub rulesets: https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-rulesets/about-rulesets +- GitHub CODEOWNERS: https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners +- GitHub merge queue: https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/configuring-pull-request-merges/managing-a-merge-queue +- GitHub reusable workflows: https://docs.github.com/en/actions/sharing-automations/reusing-workflows +- GitHub self-hosted runners: https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners +- GitHub deployment environments: https://docs.github.com/actions/deployment/targeting-different-environments +- GitHub fork workflow: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo +- GitHub syncing a fork: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork +- Git worktree: https://git-scm.com/docs/git-worktree diff --git a/tests/integration/test_phase1_local_flow.py b/tests/integration/test_phase1_local_flow.py index 8e1083c..2114df3 100644 --- a/tests/integration/test_phase1_local_flow.py +++ b/tests/integration/test_phase1_local_flow.py @@ -825,6 +825,7 @@ def test_phase1_local_takeover_run_writes_takeover_packet_and_manifest_reference run_record = manifest["runs"][0] takeover_packet_path = job_root / run_record["takeover_packet_path"] takeover_packet = takeover_packet_path.read_text(encoding="utf-8") + recovery_card = json.loads((job_root / "approvals" / manifest["pause_context"]["related_card_id"] / "approval-card.json").read_text(encoding="utf-8")) artifact_index = json.loads((job_root / run_record["artifact_index_path"]).read_text(encoding="utf-8")) indexed_paths = {entry["path"] for entry in artifact_index["artifacts"]} @@ -835,6 +836,8 @@ def test_phase1_local_takeover_run_writes_takeover_packet_and_manifest_reference assert "takeover/takeover-packet.en.md" in indexed_paths assert_takeover_pause_context(manifest, job_root, "AWAITING_TAKEOVER") assert manifest["pause_context"]["related_card_id"] in takeover_packet + assert recovery_card["timeout_at"] == manifest["pause_context"]["expires_at"] + assert recovery_card["timeout_at"] in takeover_packet @pytest.mark.integration From 5ac26e0a93e424522c2e8374618b5af634634142 Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Mon, 13 Apr 2026 18:11:53 +0800 Subject: [PATCH 18/19] fix: fix --- tests/integration/test_phase1_local_flow.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_phase1_local_flow.py b/tests/integration/test_phase1_local_flow.py index 2114df3..de3f69d 100644 --- a/tests/integration/test_phase1_local_flow.py +++ b/tests/integration/test_phase1_local_flow.py @@ -767,8 +767,8 @@ def test_phase1_local_capability_gate_stops_undeclared_or_denied_requests_before job_root = repo_root / "jobs" / "job-phase1-local" manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) - run_root = next(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) - command_log = (run_root / "logs" / "command-log.txt").read_text(encoding="utf-8") + builder_run_root = job_root / next(run["root"] for run in manifest["runs"] if run["run_role"] == "builder") + command_log = (builder_run_root / "logs" / "command-log.txt").read_text(encoding="utf-8") assert [run["run_role"] for run in manifest["runs"]] == ["builder"] assert [run["run_exit_status"] for run in manifest["runs"]] == ["FAILED_POLICY"] @@ -796,8 +796,8 @@ def test_phase1_local_capability_gate_manifest_load_failure_returns_failed_polic job_root = repo_root / "jobs" / "job-phase1-local" manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) - run_root = next(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) - command_log = (run_root / "logs" / "command-log.txt").read_text(encoding="utf-8") + builder_run_root = job_root / next(run["root"] for run in manifest["runs"] if run["run_role"] == "builder") + command_log = (builder_run_root / "logs" / "command-log.txt").read_text(encoding="utf-8") assert [run["run_exit_status"] for run in manifest["runs"]] == ["FAILED_POLICY"] assert manifest["status"] == "AWAITING_OWNER" From 02f7c0ae7c79f4cab408ef520161b8c8534c0b12 Mon Sep 17 00:00:00 2001 From: purplevoid <2990668364@qq.com> Date: Mon, 13 Apr 2026 20:13:38 +0800 Subject: [PATCH 19/19] fix: harden phase1 docker worker boundaries Enforce canonical capability checks, isolate job-scoped workspaces, add state write recovery, and wire host-side shell guards. --- adapters/generic-cli/adapter-policy.json | 47 +- adapters/generic-cli/adapter.ts | 68 ++- adapters/generic-cli/capability-gate.ts | 14 +- adapters/generic-cli/docker-runtime.ts | 37 +- .../fixtures/phase1-local-run-envelope.json | 3 +- .../fixtures/phase1-local-task-packet.en.json | 3 +- core/contracts/types.ts | 8 + core/loop/phase1-local-flow.ts | 86 ++- core/loop/state-scope.ts | 163 +++++ core/loop/state-store.ts | 578 ++++++++++-------- core/loop/state-write.ts | 201 ++++++ ops/archive/job-root.ts | 5 + ops/guards/credential-injector.ts | 116 ++++ ops/guards/shell-policy.ts | 131 ++++ tests/integration/test_phase1_local_flow.py | 44 +- 15 files changed, 1200 insertions(+), 304 deletions(-) create mode 100644 core/loop/state-scope.ts create mode 100644 core/loop/state-write.ts create mode 100644 ops/guards/credential-injector.ts create mode 100644 ops/guards/shell-policy.ts diff --git a/adapters/generic-cli/adapter-policy.json b/adapters/generic-cli/adapter-policy.json index d6160d4..22d0c6f 100644 --- a/adapters/generic-cli/adapter-policy.json +++ b/adapters/generic-cli/adapter-policy.json @@ -5,5 +5,50 @@ "retry_limit": 0, "log_redaction": "no-secrets-in-fixture", "scope_policy": "single-approved-story-only", - "failure_policy": "return-standard-run-exit-status" + "failure_policy": "return-standard-run-exit-status", + "shell_policy": { + "blocked_commands": [ + "rm", + "del", + "format", + "diskpart" + ], + "dangerous_patterns": [ + "Authorization\\s*:\\s*Bearer\\s+\\S+", + "cookie\\s*[:=]\\s*\\S+", + "token\\s*[:=]\\s*\\S+" + ], + "never_auto_approve_patterns": [ + "--privileged", + "--pid=host", + "--network=host", + "type=bind,source=/,target=/" + ], + "env_allowlist": [ + "^HOME$", + "^XDG_CACHE_HOME$", + "^BUN_INSTALL_CACHE_DIR$", + "^CODINGCLAW_CRED_[A-Z0-9_]+$" + ] + }, + "credential_injection": { + "supported_modes": [ + "env" + ], + "supported_sources": [ + "fixture", + "env" + ], + "fixture_env_prefix": "CODINGCLAW_SECRET_FIXTURE_", + "credential_env_prefix": "CODINGCLAW_CRED_" + }, + "log_redaction_rules": { + "replace_with": "[REDACTED]", + "patterns": [ + "Authorization\\s*:\\s*Bearer\\s+\\S+", + "cookie\\s*[:=]\\s*[^\\r\\n;]+", + "token\\s*[:=]\\s*\\S+", + "password\\s*[:=]\\s*\\S+" + ] + } } diff --git a/adapters/generic-cli/adapter.ts b/adapters/generic-cli/adapter.ts index ea4df35..18d114e 100644 --- a/adapters/generic-cli/adapter.ts +++ b/adapters/generic-cli/adapter.ts @@ -12,7 +12,10 @@ import type { WorkerOutput, } from "../../core/contracts/types.ts"; import { DockerWorkerLauncher, type DockerWorkerLaunchResult, materializeContainerizedRunEnvelope } from "./docker-runtime.ts"; -import { CapabilityGate } from "./capability-gate.ts"; +import { CapabilityGate, type CapabilityGateDecision } from "./capability-gate.ts"; +import { CredentialInjector } from "../../ops/guards/credential-injector.ts"; + +const REQUIRED_LAUNCH_CAPABILITIES = ["container_control"] as const; function workerScriptForRole(rootPath: string, runRole: RunRole): string { if (runRole === "builder") { @@ -289,10 +292,12 @@ async function writeQaFallbackArtifacts( export class GenericCliAdapter { private readonly dockerLauncher: DockerWorkerLauncher; private readonly capabilityGate: CapabilityGate; + private readonly credentialInjector: CredentialInjector; constructor(private readonly repoRoot: string) { this.dockerLauncher = new DockerWorkerLauncher(repoRoot); this.capabilityGate = new CapabilityGate(repoRoot); + this.credentialInjector = new CredentialInjector(repoRoot); } async execute(envelope: RunEnvelope): Promise { @@ -305,12 +310,36 @@ export class GenericCliAdapter { const handoffPath = join(runRoot, "reports", "handoff.en.md"); await ensureHostWritableRunLayout(runRoot); let taskPacketPath = envelope.task_packet_path; + const taskPacket = await readJson(taskPacketPath); + const canonicalRequestedCapabilities = uniqueStrings(taskPacket.requested_capabilities); + const envelopeRequestedCapabilities = uniqueStrings(envelope.requested_capabilities); const startedAtDate = new Date(); let launchResult: DockerWorkerLaunchResult; - let capabilityDecision; + let capabilityDecision: CapabilityGateDecision; + const credentialInjection = await this.credentialInjector.resolve(taskPacket, envelope); + if (canonicalRequestedCapabilities.join("\n") !== envelopeRequestedCapabilities.join("\n")) { + launchResult = { + command: ["", ...canonicalRequestedCapabilities], + exitCode: 1, + stdout: "", + stderr: "run envelope requested_capabilities do not match the canonical task packet", + failure_status: "FAILED_POLICY", + }; + capabilityDecision = { + allowed: false, + reason: launchResult.stderr, + status: launchResult.failure_status, + }; + } else { try { - capabilityDecision = await this.capabilityGate.evaluate(envelope.requested_capabilities); + capabilityDecision = await this.capabilityGate.evaluate( + canonicalRequestedCapabilities, + [ + ...REQUIRED_LAUNCH_CAPABILITIES, + ...(taskPacket.credential_injection_requests?.length ? ["secret_injection"] : []), + ], + ); } catch (error) { capabilityDecision = { allowed: false, @@ -318,14 +347,23 @@ export class GenericCliAdapter { status: "FAILED_POLICY", }; } + } if (!capabilityDecision.allowed) { launchResult = { - command: ["", ...envelope.requested_capabilities], + command: ["", ...canonicalRequestedCapabilities], exitCode: 1, stdout: "", stderr: capabilityDecision.reason ?? "capability gate rejected the launch", failure_status: capabilityDecision.status ?? "FAILED_POLICY", }; + } else if (!credentialInjection.allowed) { + launchResult = { + command: [""], + exitCode: 1, + stdout: "", + stderr: credentialInjection.reason ?? "credential injection rejected the launch", + failure_status: credentialInjection.status ?? "FAILED_POLICY", + }; } else { try { const materialization = await materializeContainerizedRunEnvelope(envelope); @@ -341,29 +379,40 @@ export class GenericCliAdapter { envelope_path: materialization.runtime.envelope_container_path, runtime: materialization.runtime, time_limits: envelope.time_limits, + environment: credentialInjection.environment, }); } catch (error) { launchResult = unexpectedLaunchResult(error); } } + const redactedLaunchResult: DockerWorkerLaunchResult = { + ...launchResult, + command: credentialInjection.redactor.redactCommand(launchResult.command), + stdout: credentialInjection.redactor.redactText(launchResult.stdout), + stderr: credentialInjection.redactor.redactText(launchResult.stderr), + }; const exitCode = launchResult.exitCode; - const stdout = launchResult.stdout; - const stderr = launchResult.stderr; + const stdout = redactedLaunchResult.stdout; + const stderr = redactedLaunchResult.stderr; const endedAtDate = new Date(); let workerOutput: WorkerOutput; let usedFallbackWorkerOutput = false; if (exitCode === 0) { try { - workerOutput = JSON.parse(stdout) as WorkerOutput; + workerOutput = JSON.parse(launchResult.stdout) as WorkerOutput; } catch { workerOutput = fallbackWorkerOutput("worker output was not valid JSON"); usedFallbackWorkerOutput = true; } } else { - workerOutput = fallbackWorkerOutput(launchFailureText(launchResult), launchResult.failure_status ?? "FAILED_EXECUTION"); + workerOutput = fallbackWorkerOutput( + launchFailureText(redactedLaunchResult), + launchResult.failure_status ?? "FAILED_EXECUTION", + ); usedFallbackWorkerOutput = true; } + workerOutput = credentialInjection.redactor.redactWorkerOutput(workerOutput); const durationMs = Math.max(0, endedAtDate.getTime() - startedAtDate.getTime()); const runTimings: RunTimingMetadata = { @@ -379,7 +428,7 @@ export class GenericCliAdapter { duration_s: Math.max(0, Math.round(durationMs / 1000)), }; - await writeText(commandLogPath, renderCommandLog(launchResult.command, exitCode, stdout, stderr)); + await writeText(commandLogPath, renderCommandLog(redactedLaunchResult.command, exitCode, stdout, stderr)); await writeWorkerLog(workerLogPath, { job_id: envelope.job_id, run_id: envelope.run_id, @@ -398,7 +447,6 @@ export class GenericCliAdapter { } if (envelope.run_role === "qa") { workerOutput = withQaFallbackPaths(workerOutput); - const taskPacket = await readJson(envelope.task_packet_path); await writeQaFallbackArtifacts(runRoot, envelope, taskPacket, workerOutput); } } diff --git a/adapters/generic-cli/capability-gate.ts b/adapters/generic-cli/capability-gate.ts index 3fa8cef..ebf836d 100644 --- a/adapters/generic-cli/capability-gate.ts +++ b/adapters/generic-cli/capability-gate.ts @@ -38,9 +38,19 @@ export class CapabilityGate { return this.manifestPromise; } - async evaluate(requestedCapabilities: string[]): Promise { + async evaluate(requestedCapabilities: string[], requiredCapabilities: string[] = []): Promise { const manifest = await this.loadManifest(); - for (const capability of uniqueStrings(requestedCapabilities)) { + const requested = uniqueStrings(requestedCapabilities); + for (const capability of uniqueStrings(requiredCapabilities)) { + if (!requested.includes(capability)) { + return { + allowed: false, + reason: `required capability is missing for profile ${manifest.profile_id}: ${capability}`, + status: "FAILED_POLICY", + }; + } + } + for (const capability of requested) { const entry = manifest.capabilities[capability]; if (entry === undefined) { return { diff --git a/adapters/generic-cli/docker-runtime.ts b/adapters/generic-cli/docker-runtime.ts index 0922a4e..259fa73 100644 --- a/adapters/generic-cli/docker-runtime.ts +++ b/adapters/generic-cli/docker-runtime.ts @@ -8,6 +8,7 @@ import type { RunExitStatus, RunRole, } from "../../core/contracts/types.ts"; +import { ShellPolicy } from "../../ops/guards/shell-policy.ts"; export const CONTAINER_PATHS = { repo: "/work/repo", @@ -59,6 +60,7 @@ export interface DockerWorkerLaunchRequest { envelope_path: string; runtime: ContainerRuntimeConfig; time_limits: Record; + environment?: Record; } export interface DockerWorkerLaunchResult { @@ -350,6 +352,15 @@ function dockerUserArgs(): string[] { return ["--user", `${process.getuid()}:${process.getgid()}`]; } +function launchEnvironment(request: DockerWorkerLaunchRequest): Record { + return { + HOME: "/work/runtime-home/home", + XDG_CACHE_HOME: "/work/cache", + BUN_INSTALL_CACHE_DIR: "/work/cache/bun", + ...(request.environment ?? {}), + }; +} + function buildRunCommand(dockerExecutable: string, request: DockerWorkerLaunchRequest): string[] { return [ dockerExecutable, @@ -360,12 +371,7 @@ function buildRunCommand(dockerExecutable: string, request: DockerWorkerLaunchRe ...dockerUserArgs(), "--workdir", request.runtime.workdir, - "--env", - "HOME=/work/runtime-home/home", - "--env", - "XDG_CACHE_HOME=/work/cache", - "--env", - "BUN_INSTALL_CACHE_DIR=/work/cache/bun", + ...Object.entries(launchEnvironment(request)).flatMap(([key, value]) => ["--env", `${key}=${value}`]), ...request.runtime.mounts.flatMap((mount) => ["--mount", mountArg(mount)]), request.image, "bun", @@ -551,12 +557,14 @@ export class DockerWorkerLauncher implements RoleImageResolver { private readonly preparedImages = new Set(); private readonly baseImage: string; private readonly dockerExecutable: string; + private readonly shellPolicy: ShellPolicy; constructor(private readonly repoRoot: string) { const override = process.env[BASE_IMAGE_ENV_VAR]?.trim(); this.baseImage = override && override.length > 0 ? override : DEFAULT_BASE_IMAGE; const dockerOverride = process.env[DOCKER_BIN_ENV_VAR]?.trim(); this.dockerExecutable = dockerOverride && dockerOverride.length > 0 ? dockerOverride : "docker"; + this.shellPolicy = new ShellPolicy(repoRoot); } resolve(runRole: RunRole): string { @@ -636,6 +644,23 @@ export class DockerWorkerLauncher implements RoleImageResolver { async launch(request: DockerWorkerLaunchRequest): Promise { const command = buildRunCommand(this.dockerExecutable, request); + const shellDecision = await this.shellPolicy.evaluate({ + executable: this.dockerExecutable, + command, + envNames: Object.keys(launchEnvironment(request)), + dynamicEnvNames: Object.keys(request.environment ?? {}), + mounts: request.runtime.mounts, + runRole: request.run_role, + }); + if (!shellDecision.allowed) { + return { + command, + exitCode: 1, + stdout: "", + stderr: shellDecision.reason ?? "host shell policy blocked launch", + failure_status: shellDecision.status ?? "FAILED_POLICY", + }; + } const imagePreparationFailure = await this.ensureRoleImage(request.run_role, request.image); if (imagePreparationFailure) { return { diff --git a/control/fixtures/phase1-local-run-envelope.json b/control/fixtures/phase1-local-run-envelope.json index e84ff3e..9cafb58 100644 --- a/control/fixtures/phase1-local-run-envelope.json +++ b/control/fixtures/phase1-local-run-envelope.json @@ -41,6 +41,7 @@ "requested_capabilities": [ "filesystem_read", "filesystem_write", - "shell_command" + "shell_command", + "container_control" ] } diff --git a/control/fixtures/phase1-local-task-packet.en.json b/control/fixtures/phase1-local-task-packet.en.json index 7a124d4..4f5ab55 100644 --- a/control/fixtures/phase1-local-task-packet.en.json +++ b/control/fixtures/phase1-local-task-packet.en.json @@ -39,7 +39,8 @@ "requested_capabilities": [ "filesystem_read", "filesystem_write", - "shell_command" + "shell_command", + "container_control" ], "story": { "story_id": "STORY-PHASE1-LOCAL-001", diff --git a/core/contracts/types.ts b/core/contracts/types.ts index b8d5143..bd0ad18 100644 --- a/core/contracts/types.ts +++ b/core/contracts/types.ts @@ -57,6 +57,13 @@ export interface StoryContract { escalation_rules: string[]; } +export interface CredentialInjectionRequest { + secret_handle: string; + credential_alias: string; + allowed_host_patterns: string[]; + injection_mode: "env"; +} + export interface TaskPacket { job_id: string; freeze_id: string; @@ -79,6 +86,7 @@ export interface TaskPacket { approval_context: Record; previous_handoff_path: string; requested_capabilities: string[]; + credential_injection_requests?: CredentialInjectionRequest[]; story: StoryContract; } diff --git a/core/loop/phase1-local-flow.ts b/core/loop/phase1-local-flow.ts index 1a448ea..e41fd60 100644 --- a/core/loop/phase1-local-flow.ts +++ b/core/loop/phase1-local-flow.ts @@ -1,4 +1,5 @@ -import { join } from "node:path"; +import { copyFile, readdir, readlink, symlink } from "node:fs/promises"; +import { dirname, isAbsolute, join, relative, resolve } from "node:path"; import { GenericCliAdapter } from "../../adapters/generic-cli/adapter.ts"; import { buildArtifactIndex } from "../../ops/archive/artifact-index.ts"; import { writeApprovalArchive } from "../../ops/archive/approvals.ts"; @@ -55,6 +56,14 @@ const BUILDER_EXPECTED_ARTIFACTS = [ "logs/command-log.txt", "logs/worker.log", "metadata/task-packet.en.json", + "metadata/state/active-story.json", + "metadata/state/decisions.en.md", + "metadata/state/handoff.en.md", + "metadata/state/loop-metrics.json", + "metadata/state/progress.en.md", + "metadata/state/risk-register.en.md", + "metadata/state/story-queue.json", + "metadata/state/trace-index.json", "metadata/timings.json", "metadata/run-result.json", "metadata/artifact-index.json", @@ -78,6 +87,14 @@ const QA_EXPECTED_ARTIFACTS = [ "logs/command-log.txt", "logs/worker.log", "metadata/task-packet.en.json", + "metadata/state/active-story.json", + "metadata/state/decisions.en.md", + "metadata/state/handoff.en.md", + "metadata/state/loop-metrics.json", + "metadata/state/progress.en.md", + "metadata/state/risk-register.en.md", + "metadata/state/story-queue.json", + "metadata/state/trace-index.json", "metadata/timings.json", "metadata/run-result.json", "metadata/artifact-index.json", @@ -143,6 +160,47 @@ async function buildDependencySnapshotDigest(repoRoot: string): Promise return "absent"; } +const JOB_REPO_SNAPSHOT_EXCLUDES = new Set([".omx", "jobs", "state"]); + +function assertSnapshotSymlinkTarget(sourceRoot: string, sourcePath: string, targetPath: string): string { + const resolvedTarget = resolve(dirname(sourcePath), targetPath); + const relativeTarget = relative(resolve(sourceRoot), resolvedTarget); + if (relativeTarget === "" || (!relativeTarget.startsWith("..") && !isAbsolute(relativeTarget))) { + return targetPath; + } + throw new Error(`repo snapshot symlink escaped the source repo: ${sourcePath}`); +} + +async function copyRepoSnapshot(sourceRoot: string, targetRoot: string, relativePath = ""): Promise { + const sourceDir = relativePath.length === 0 ? sourceRoot : join(sourceRoot, relativePath); + const entries = await readdir(sourceDir, { withFileTypes: true }); + for (const entry of entries) { + if (relativePath.length === 0 && JOB_REPO_SNAPSHOT_EXCLUDES.has(entry.name)) { + continue; + } + const childRelativePath = relativePath.length === 0 ? entry.name : join(relativePath, entry.name); + const sourcePath = join(sourceRoot, childRelativePath); + const targetPath = join(targetRoot, childRelativePath); + if (entry.isDirectory()) { + await ensureDir(targetPath); + await copyRepoSnapshot(sourceRoot, targetRoot, childRelativePath); + continue; + } + if (entry.isSymbolicLink()) { + await ensureDir(dirname(targetPath)); + await symlink(assertSnapshotSymlinkTarget(sourceRoot, sourcePath, await readlink(sourcePath)), targetPath); + continue; + } + await ensureDir(dirname(targetPath)); + await copyFile(sourcePath, targetPath); + } +} + +async function stageJobRepoWorkspace(repoRoot: string, workerRepoRoot: string): Promise { + await ensureDir(workerRepoRoot); + await copyRepoSnapshot(repoRoot, workerRepoRoot); +} + async function assertFreshJobRoot(layout: ReturnType): Promise { if (!(await pathExists(layout.jobRoot))) { return; @@ -850,6 +908,8 @@ export async function runPhase1Local(repoRoot: string): Promise/ for governance, approvals, state, runtime-home, and run bundles.", + "Use one canonical job root under jobs// for governance, an isolated repo workspace, approvals, state, runtime-home, and run bundles.", "Preserve the existing builder to QA worker order and local generic CLI adapter.", - "Mirror the latest canonical state files into state/ for control-shell recovery.", + "Mirror the latest canonical state files into state/ for control-shell recovery, while keeping builder and QA runtime homes role-scoped.", ], milestones: [ "Write the fixed plan and approval archive.", @@ -967,11 +1027,11 @@ export async function runPhase1Local(repoRoot: string): Promise string; + resolveLivePath: (relativePath: string) => string; + resolveSessionPath: (sessionId: string, relativePath?: string) => string; + resolveRunPath: (runId: string, relativePath?: string) => string; + resolveReadPaths: (relativePath: string, options?: StateScopeReadOptions) => Promise; +} + +function normalizeInputPath(path: string): string { + const trimmed = path.trim(); + const wslMatch = trimmed.match(WSL_DRIVE_PATH); + if (wslMatch !== null) { + const drive = wslMatch[1].toUpperCase(); + const suffix = (wslMatch[2] ?? "").replaceAll("/", "\\").replace(/^\\/, ""); + return suffix.length > 0 ? `${drive}:\\${suffix}` : `${drive}:\\`; + } + if (/^[A-Za-z]:[\\/]/u.test(trimmed) || trimmed.startsWith("\\\\")) { + return trimmed.replaceAll("/", "\\"); + } + return trimmed; +} + +function normalizeAbsolutePath(path: string): string { + const normalizedInput = normalizeInputPath(path); + const absolutePath = isAbsolute(normalizedInput) ? normalizedInput : resolve(normalizedInput); + return normalize(absolutePath); +} + +function assertWithinRoot(rootPath: string, targetPath: string, label: string): string { + const normalizedRoot = normalizeAbsolutePath(rootPath); + const normalizedTarget = normalizeAbsolutePath(targetPath); + const relativePath = relative(normalizedRoot, normalizedTarget); + if (relativePath === "" || (!relativePath.startsWith("..") && !isAbsolute(relativePath))) { + return normalizedTarget; + } + throw new Error(`${label} escaped its allowed root: ${normalizedTarget}`); +} + +function normalizeRelativeStatePath(relativePath: string): string { + const normalized = relativePath.replaceAll("\\", "/").replace(/^\/+/, ""); + if (normalized.length === 0) { + return ""; + } + const segments = normalized.split("/").filter((segment) => segment.length > 0); + if (segments.length === 0) { + return ""; + } + for (const segment of segments) { + if (segment === "." || segment === "..") { + throw new Error(`state path may not contain traversal segments: ${relativePath}`); + } + } + return join(...segments); +} + +export function validateSessionId(sessionId: string): string { + if (!SAFE_SCOPE_SEGMENT.test(sessionId)) { + throw new Error(`invalid session_id: ${sessionId}`); + } + return sessionId; +} + +export function validateRunId(runId: string): string { + if (!SAFE_SCOPE_SEGMENT.test(runId)) { + throw new Error(`invalid run_id: ${runId}`); + } + return runId; +} + +export function resolveStateScopeRoots( + repoRoot: string, + archiveStateRoot: string, + liveStateRoot: string, +): StateScopeRoots { + const normalizedRepoRoot = normalizeAbsolutePath(repoRoot); + const normalizedRootStateRoot = assertWithinRoot(normalizedRepoRoot, archiveStateRoot, "root state path"); + const normalizedLiveStateRoot = assertWithinRoot(normalizedRepoRoot, liveStateRoot, "live state path"); + const normalizedJobRoot = assertWithinRoot(normalizedRepoRoot, dirname(normalizedRootStateRoot), "job root"); + const runsRoot = assertWithinRoot(normalizedJobRoot, join(normalizedJobRoot, "artifacts", "runs"), "run scope root"); + + return { + repoRoot: normalizedRepoRoot, + jobRoot: normalizedJobRoot, + rootStateRoot: normalizedRootStateRoot, + liveStateRoot: normalizedLiveStateRoot, + sessionsRoot: assertWithinRoot(normalizedRootStateRoot, join(normalizedRootStateRoot, "sessions"), "session scope root"), + currentSessionPath: assertWithinRoot( + normalizedRootStateRoot, + join(normalizedRootStateRoot, "current-session.json"), + "current session pointer", + ), + runsRoot, + }; +} + +async function readCurrentSessionId(currentSessionPath: string): Promise { + if (!(await pathExists(currentSessionPath))) { + return null; + } + const payload = await readJson>(currentSessionPath); + const candidate = payload.session_id; + if (typeof candidate !== "string" || candidate.length === 0) { + return null; + } + return validateSessionId(candidate); +} + +export function createStateScopeResolver( + repoRoot: string, + archiveStateRoot: string, + liveStateRoot: string, +): StateScopeResolver { + const roots = resolveStateScopeRoots(repoRoot, archiveStateRoot, liveStateRoot); + + const resolveScopedPath = (basePath: string, relativePath: string, label: string): string => { + const normalizedRelativePath = normalizeRelativeStatePath(relativePath); + const targetPath = normalizedRelativePath.length > 0 ? join(basePath, normalizedRelativePath) : basePath; + return assertWithinRoot(basePath, targetPath, label); + }; + + return { + roots, + resolveRootPath: (relativePath: string) => resolveScopedPath(roots.rootStateRoot, relativePath, "root scope path"), + resolveLivePath: (relativePath: string) => resolveScopedPath(roots.liveStateRoot, relativePath, "live scope path"), + resolveSessionPath: (sessionId: string, relativePath = "") => + resolveScopedPath(join(roots.sessionsRoot, validateSessionId(sessionId)), relativePath, "session scope path"), + resolveRunPath: (runId: string, relativePath = "") => + resolveScopedPath(join(roots.runsRoot, validateRunId(runId), "metadata", "state"), relativePath, "run scope path"), + resolveReadPaths: async (relativePath: string, options: StateScopeReadOptions = {}) => { + const resolvedPaths: string[] = []; + const sessionId = options.sessionId ?? (await readCurrentSessionId(roots.currentSessionPath)); + if (sessionId !== null && sessionId !== undefined) { + resolvedPaths.push(resolveScopedPath(join(roots.sessionsRoot, validateSessionId(sessionId)), relativePath, "session read path")); + } + if (options.includeRoot !== false) { + resolvedPaths.push(resolveScopedPath(roots.rootStateRoot, relativePath, "root read path")); + } + return resolvedPaths; + }, + }; +} diff --git a/core/loop/state-store.ts b/core/loop/state-store.ts index c97551e..50b4156 100644 --- a/core/loop/state-store.ts +++ b/core/loop/state-store.ts @@ -1,15 +1,8 @@ import { join } from "node:path"; import { mapRunExitToJobState, nextRequiredActionFromState, runningJobStateForRole } from "../contracts/status.ts"; -import { - ensureDir, - nowIso, - pathExists, - readJson, - readText, - uniqueStrings, - writeJson, - writeText, -} from "./support.ts"; +import { ensureDir, nowIso, pathExists, readText, uniqueStrings } from "./support.ts"; +import { createStateScopeResolver, type StateScopeResolver } from "./state-scope.ts"; +import { atomicWriteBatch, recoverPendingBatch, withWriteLock } from "./state-write.ts"; import type { ActiveStoryFile, AdapterExecutionResult, @@ -192,60 +185,124 @@ export interface StateStorePaths { liveStateRoot?: string; } +const STATE_FILE_ORDER = [ + "progress.en.md", + "story-queue.json", + "active-story.json", + "handoff.en.md", + "risk-register.en.md", + "loop-metrics.json", + "decisions.en.md", + "trace-index.json", +] as const; + +type StateFileName = (typeof STATE_FILE_ORDER)[number]; +type SerializedState = Partial>; + +function serializeJson(value: unknown): string { + return `${JSON.stringify(value, null, 2)}\n`; +} + export class StateStore { private readonly stateRoot: string; private readonly liveStateRoot: string; - private readonly progressPath: string; - private readonly storyQueuePath: string; - private readonly activeStoryPath: string; - private readonly handoffPath: string; - private readonly riskRegisterPath: string; - private readonly loopMetricsPath: string; - private readonly decisionsPath: string; + private readonly scopeResolver: StateScopeResolver; private readonly traceIndexPath: string; constructor(repoRoot: string, paths: StateStorePaths = {}) { - this.stateRoot = paths.archiveStateRoot ?? join(repoRoot, "state"); - this.liveStateRoot = paths.liveStateRoot ?? join(repoRoot, "state"); - this.progressPath = join(this.stateRoot, "progress.en.md"); - this.storyQueuePath = join(this.stateRoot, "story-queue.json"); - this.activeStoryPath = join(this.stateRoot, "active-story.json"); - this.handoffPath = join(this.stateRoot, "handoff.en.md"); - this.riskRegisterPath = join(this.stateRoot, "risk-register.en.md"); - this.loopMetricsPath = join(this.stateRoot, "loop-metrics.json"); - this.decisionsPath = join(this.stateRoot, "decisions.en.md"); - this.traceIndexPath = join(this.stateRoot, "trace-index.json"); + this.scopeResolver = createStateScopeResolver( + repoRoot, + paths.archiveStateRoot ?? join(repoRoot, "state"), + paths.liveStateRoot ?? join(repoRoot, "state"), + ); + this.stateRoot = this.scopeResolver.roots.rootStateRoot; + this.liveStateRoot = this.scopeResolver.roots.liveStateRoot; + this.traceIndexPath = this.scopeResolver.resolveRootPath("trace-index.json"); + } + + private rootPath(fileName: StateFileName): string { + return this.scopeResolver.resolveRootPath(fileName); } - private mirrorPath(fileName: string): string { - return join(this.liveStateRoot, fileName); + private mirrorPath(fileName: StateFileName): string { + return this.scopeResolver.resolveLivePath(fileName); } - private async writeMirroredText(path: string, fileName: string, value: string): Promise { - await writeText(path, value); - const mirrorPath = this.mirrorPath(fileName); - if (mirrorPath !== path) { - await writeText(mirrorPath, value); + private snapshotPath(runId: string, fileName: StateFileName): string { + return this.scopeResolver.resolveRunPath(runId, fileName); + } + + private async loadCanonicalState(): Promise { + await recoverPendingBatch(this.stateRoot); + const state: SerializedState = {}; + for (const fileName of STATE_FILE_ORDER) { + const path = this.rootPath(fileName); + if (await pathExists(path)) { + state[fileName] = await readText(path); + } } + return state; } - private async writeMirroredJson(path: string, fileName: string, value: unknown): Promise { - await writeJson(path, value); - const mirrorPath = this.mirrorPath(fileName); - if (mirrorPath !== path) { - await writeJson(mirrorPath, value); + private parseJsonState(state: SerializedState, fileName: StateFileName): T { + const serialized = state[fileName]; + if (serialized === undefined) { + throw new Error(`missing canonical state file: ${fileName}`); } + return JSON.parse(serialized) as T; } - getTraceIndexPath(): string { - return this.traceIndexPath; + private readTextState(state: SerializedState, fileName: StateFileName): string { + const serialized = state[fileName]; + if (serialized === undefined) { + throw new Error(`missing canonical state file: ${fileName}`); + } + return serialized; } - async bootstrap(taskPacket: TaskPacket): Promise { - await ensureDir(this.stateRoot); - await ensureDir(this.liveStateRoot); + private setJsonState(state: SerializedState, fileName: StateFileName, value: unknown): void { + state[fileName] = serializeJson(value); + } + + private setTextState(state: SerializedState, fileName: StateFileName, value: string): void { + state[fileName] = value; + } + + private async persistState(state: SerializedState, runId: string): Promise { + const entries: Array<{ path: string; content: string }> = []; + const seenPaths = new Set(); + for (const fileName of STATE_FILE_ORDER) { + const content = state[fileName]; + if (content === undefined) { + continue; + } + for (const path of [this.rootPath(fileName), this.mirrorPath(fileName), this.snapshotPath(runId, fileName)]) { + const dedupeKey = path.toLowerCase(); + if (seenPaths.has(dedupeKey)) { + continue; + } + seenPaths.add(dedupeKey); + entries.push({ path, content }); + } + } + await atomicWriteBatch(this.stateRoot, entries); + } + + private async mutateState(runId: string, mutate: (state: SerializedState) => Promise): Promise { + return withWriteLock( + this.stateRoot, + async () => { + const state = await this.loadCanonicalState(); + const result = await mutate(state); + await this.persistState(state, runId); + return result; + }, + { owner: "StateStore" }, + ); + } - if (!(await pathExists(this.storyQueuePath))) { + private bootstrapInitialState(taskPacket: TaskPacket, state: SerializedState): void { + if (state["story-queue.json"] === undefined) { const storyQueue: StoryQueueFile = { job_id: taskPacket.job_id, freeze_version: taskPacket.freeze_version, @@ -260,10 +317,10 @@ export class StateStore { }, ], }; - await this.writeMirroredJson(this.storyQueuePath, "story-queue.json", storyQueue); + this.setJsonState(state, "story-queue.json", storyQueue); } - if (!(await pathExists(this.traceIndexPath))) { + if (state["trace-index.json"] === undefined) { const traceIndex: TraceIndex = { job_id: taskPacket.job_id, freeze_id: taskPacket.freeze_id, @@ -273,28 +330,28 @@ export class StateStore { [taskPacket.story.story_id]: createStoryTrace(taskPacket), }, }; - await this.writeMirroredJson(this.traceIndexPath, "trace-index.json", traceIndex); + this.setJsonState(state, "trace-index.json", traceIndex); } - if (!(await pathExists(this.loopMetricsPath))) { + if (state["loop-metrics.json"] === undefined) { const loopMetrics: LoopMetricsFile = { job_id: taskPacket.job_id, runs: [], }; - await this.writeMirroredJson(this.loopMetricsPath, "loop-metrics.json", loopMetrics); + this.setJsonState(state, "loop-metrics.json", loopMetrics); } - if (!(await pathExists(this.decisionsPath))) { - await this.writeMirroredText(this.decisionsPath, "decisions.en.md", ["# Decisions", "", approvalDecisionEntry()].join("\n")); + if (state["decisions.en.md"] === undefined) { + this.setTextState(state, "decisions.en.md", ["# Decisions", "", approvalDecisionEntry()].join("\n")); } - if (!(await pathExists(this.riskRegisterPath))) { - await this.writeMirroredText(this.riskRegisterPath, "risk-register.en.md", renderRiskRegister("READY_TO_RUN", [])); + if (state["risk-register.en.md"] === undefined) { + this.setTextState(state, "risk-register.en.md", renderRiskRegister("READY_TO_RUN", [])); } - if (!(await pathExists(this.handoffPath))) { - await this.writeMirroredText( - this.handoffPath, + if (state["handoff.en.md"] === undefined) { + this.setTextState( + state, "handoff.en.md", [ "# Handoff", @@ -343,59 +400,72 @@ export class StateStore { ].join("\n"), ); } + } - await this.writeMirroredText( - this.progressPath, - "progress.en.md", - renderProgress( - taskPacket, - "READY_TO_RUN", - "", - "builder", - "The fixed local Phase 1 story is approved and ready for the builder run.", - ), - ); + getTraceIndexPath(): string { + return this.traceIndexPath; + } + + async bootstrap(taskPacket: TaskPacket): Promise { + await ensureDir(this.stateRoot); + await ensureDir(this.liveStateRoot); + await this.mutateState(taskPacket.run_id, async (state) => { + this.bootstrapInitialState(taskPacket, state); + this.setTextState( + state, + "progress.en.md", + renderProgress( + taskPacket, + "READY_TO_RUN", + "", + "builder", + "The fixed local Phase 1 story is approved and ready for the builder run.", + ), + ); + }); } async prepareRun(taskPacket: TaskPacket): Promise { - const jobState = runningJobStateForRole(taskPacket.run_role); - const storyQueue = await readJson(this.storyQueuePath); - storyQueue.freeze_version = taskPacket.freeze_version; - storyQueue.stories = storyQueue.stories.map((story) => - story.story_id === taskPacket.story.story_id - ? { - ...story, - queue_state: jobState, - last_run_id: taskPacket.run_id, - } - : story, - ); + await this.mutateState(taskPacket.run_id, async (state) => { + const jobState = runningJobStateForRole(taskPacket.run_role); + const storyQueue = this.parseJsonState(state, "story-queue.json"); + storyQueue.freeze_version = taskPacket.freeze_version; + storyQueue.stories = storyQueue.stories.map((story) => + story.story_id === taskPacket.story.story_id + ? { + ...story, + queue_state: jobState, + last_run_id: taskPacket.run_id, + } + : story, + ); - const activeStory: ActiveStoryFile = { - story_id: taskPacket.story.story_id, - freeze_version: taskPacket.freeze_version, - run_id: taskPacket.run_id, - run_role: taskPacket.run_role, - objective: taskPacket.story.story_objective, - acceptance_ids: taskPacket.story.acceptance_ids, - verification_targets: taskPacket.story.verification_targets, - stop_conditions: taskPacket.story.stop_conditions, - expected_artifacts: taskPacket.story.expected_artifacts, - }; - - await this.writeMirroredJson(this.storyQueuePath, "story-queue.json", storyQueue); - await this.writeMirroredJson(this.activeStoryPath, "active-story.json", activeStory); - await this.writeMirroredText( - this.progressPath, - "progress.en.md", - renderProgress( - taskPacket, - jobState, - taskPacket.run_id, - taskPacket.run_role, - `${taskPacket.run_role} is executing the fixed local proof-of-concept story.`, - ), - ); + const activeStory: ActiveStoryFile = { + story_id: taskPacket.story.story_id, + freeze_version: taskPacket.freeze_version, + run_id: taskPacket.run_id, + run_role: taskPacket.run_role, + objective: taskPacket.story.story_objective, + acceptance_ids: taskPacket.story.acceptance_ids, + verification_targets: taskPacket.story.verification_targets, + stop_conditions: taskPacket.story.stop_conditions, + expected_artifacts: taskPacket.story.expected_artifacts, + }; + + this.setJsonState(state, "story-queue.json", storyQueue); + this.setJsonState(state, "active-story.json", activeStory); + this.setTextState( + state, + "progress.en.md", + renderProgress( + taskPacket, + jobState, + taskPacket.run_id, + taskPacket.run_role, + `${taskPacket.run_role} is executing the fixed local proof-of-concept story.`, + ), + ); + }); } async recordRun( @@ -403,135 +473,131 @@ export class StateStore { execution: AdapterExecutionResult, recoveryState: RunRecoveryState | null = null, ): Promise { - const jobState = mapRunExitToJobState(execution.runResult.status, execution.runResult.run_role); - const loopMetrics = await readJson(this.loopMetricsPath); - const storyQueue = await readJson(this.storyQueuePath); - const traceIndex = await readJson(this.traceIndexPath); const handoffContent = await readText(execution.handoffPath); - - const metricEntry: LoopMetricEntry = { - run_id: execution.runResult.run_id, - story_id: execution.runResult.story_id, - run_role: execution.runResult.run_role, - started_at: execution.runResult.started_at, - ended_at: execution.runResult.ended_at, - duration_s: execution.runResult.duration_s, - estimated_cost: 0, - actual_cost: 0, - retry_index: taskPacket.run_attempt - 1, - run_exit_status: execution.runResult.status, - }; - loopMetrics.runs.push(metricEntry); - - storyQueue.freeze_version = taskPacket.freeze_version; - storyQueue.stories = storyQueue.stories.map((story) => - story.story_id === taskPacket.story.story_id - ? { - ...story, - queue_state: jobState, - last_run_id: execution.runResult.run_id, - } - : story, - ); - - const storyTrace = traceIndex.stories[taskPacket.story.story_id] ?? createStoryTrace(taskPacket); - const newArtifactRefs = execution.workerOutput.evidence_paths.map( - (path) => `artifacts/runs/${execution.runResult.run_id}/${path}`, - ); - - for (const acceptanceId of taskPacket.story.acceptance_ids) { - const previous = storyTrace.acceptance[acceptanceId] ?? createTraceEntry(); - storyTrace.acceptance[acceptanceId] = { - status: execution.workerOutput.acceptance_status, - artifacts: uniqueStrings([...previous.artifacts, ...newArtifactRefs]), - updated_at: nowIso(), - latest_run_id: execution.runResult.run_id, + await this.mutateState(execution.runResult.run_id, async (state) => { + const jobState = mapRunExitToJobState(execution.runResult.status, execution.runResult.run_role); + const loopMetrics = this.parseJsonState(state, "loop-metrics.json"); + const storyQueue = this.parseJsonState(state, "story-queue.json"); + const traceIndex = this.parseJsonState(state, "trace-index.json"); + + const metricEntry: LoopMetricEntry = { + run_id: execution.runResult.run_id, + story_id: execution.runResult.story_id, + run_role: execution.runResult.run_role, + started_at: execution.runResult.started_at, + ended_at: execution.runResult.ended_at, + duration_s: execution.runResult.duration_s, + estimated_cost: 0, + actual_cost: 0, + retry_index: taskPacket.run_attempt - 1, + run_exit_status: execution.runResult.status, }; - } + loopMetrics.runs.push(metricEntry); + + storyQueue.freeze_version = taskPacket.freeze_version; + storyQueue.stories = storyQueue.stories.map((story) => + story.story_id === taskPacket.story.story_id + ? { + ...story, + queue_state: jobState, + last_run_id: execution.runResult.run_id, + } + : story, + ); - for (const checkName of taskPacket.story.mandatory_checks) { - const previous = storyTrace.mandatory_checks[checkName] ?? createTraceEntry(); - storyTrace.mandatory_checks[checkName] = { - status: execution.workerOutput.mandatory_check_status, - artifacts: uniqueStrings([...previous.artifacts, ...newArtifactRefs]), - updated_at: nowIso(), - latest_run_id: execution.runResult.run_id, - }; - } + const storyTrace = traceIndex.stories[taskPacket.story.story_id] ?? createStoryTrace(taskPacket); + const newArtifactRefs = execution.workerOutput.evidence_paths.map( + (path) => `artifacts/runs/${execution.runResult.run_id}/${path}`, + ); - storyTrace.story_id = taskPacket.story.story_id; - storyTrace.latest_run_id = execution.runResult.run_id; - storyTrace.latest_run_role = execution.runResult.run_role; - storyTrace.latest_qa_status = - execution.runResult.run_role === "qa" ? execution.runResult.status : storyTrace.latest_qa_status; - storyTrace.artifact_locations = uniqueStrings([...storyTrace.artifact_locations, ...newArtifactRefs]); - traceIndex.active_story_id = taskPacket.story.story_id; - traceIndex.stories[taskPacket.story.story_id] = storyTrace; - - const blockers = execution.workerOutput.blockers; - const progressSummary = - execution.runResult.run_role === "builder" && execution.runResult.status === "SUCCESS" - ? "The builder completed the local slice and handed off to QA." - : execution.runResult.run_role === "qa" && execution.runResult.status === "SUCCESS" - ? "QA closed the local proof-of-concept story and the job is ready to archive." - : jobState === "AWAITING_OWNER" && recoveryState !== null - ? `${execution.runResult.run_role} ended with status ${execution.runResult.status}. Waiting for owner decision via recovery card ${recoveryState.card_id}.` - : jobState === "AWAITING_TAKEOVER" && recoveryState !== null - ? `${execution.runResult.run_role} ended with status ${execution.runResult.status}. Waiting for takeover via recovery card ${recoveryState.card_id}.` - : `${execution.runResult.run_role} ended with status ${execution.runResult.status}.`; - - const existingDecisions = await readText(this.decisionsPath); - await this.writeMirroredJson(this.storyQueuePath, "story-queue.json", storyQueue); - await this.writeMirroredJson(this.loopMetricsPath, "loop-metrics.json", loopMetrics); - await this.writeMirroredJson(this.traceIndexPath, "trace-index.json", traceIndex); - await this.writeMirroredText(this.handoffPath, "handoff.en.md", handoffContent); - await this.writeMirroredText( - this.riskRegisterPath, - "risk-register.en.md", - renderRiskRegister(jobState, blockers, recoveryState), - ); - await this.writeMirroredText( - this.decisionsPath, - "decisions.en.md", - `${existingDecisions.trimEnd()}\n\n${runDecisionEntry(execution.runResult.run_id, jobState, recoveryState)}`, - ); - await this.writeMirroredText( - this.progressPath, - "progress.en.md", - renderProgress(taskPacket, jobState, execution.runResult.run_id, execution.runResult.run_role, progressSummary), - ); + for (const acceptanceId of taskPacket.story.acceptance_ids) { + const previous = storyTrace.acceptance[acceptanceId] ?? createTraceEntry(); + storyTrace.acceptance[acceptanceId] = { + status: execution.workerOutput.acceptance_status, + artifacts: uniqueStrings([...previous.artifacts, ...newArtifactRefs]), + updated_at: nowIso(), + latest_run_id: execution.runResult.run_id, + }; + } + + for (const checkName of taskPacket.story.mandatory_checks) { + const previous = storyTrace.mandatory_checks[checkName] ?? createTraceEntry(); + storyTrace.mandatory_checks[checkName] = { + status: execution.workerOutput.mandatory_check_status, + artifacts: uniqueStrings([...previous.artifacts, ...newArtifactRefs]), + updated_at: nowIso(), + latest_run_id: execution.runResult.run_id, + }; + } + + storyTrace.story_id = taskPacket.story.story_id; + storyTrace.latest_run_id = execution.runResult.run_id; + storyTrace.latest_run_role = execution.runResult.run_role; + storyTrace.latest_qa_status = + execution.runResult.run_role === "qa" ? execution.runResult.status : storyTrace.latest_qa_status; + storyTrace.artifact_locations = uniqueStrings([...storyTrace.artifact_locations, ...newArtifactRefs]); + traceIndex.active_story_id = taskPacket.story.story_id; + traceIndex.stories[taskPacket.story.story_id] = storyTrace; + + const blockers = execution.workerOutput.blockers; + const progressSummary = + execution.runResult.run_role === "builder" && execution.runResult.status === "SUCCESS" + ? "The builder completed the local slice and handed off to QA." + : execution.runResult.run_role === "qa" && execution.runResult.status === "SUCCESS" + ? "QA closed the local proof-of-concept story and the job is ready to archive." + : jobState === "AWAITING_OWNER" && recoveryState !== null + ? `${execution.runResult.run_role} ended with status ${execution.runResult.status}. Waiting for owner decision via recovery card ${recoveryState.card_id}.` + : jobState === "AWAITING_TAKEOVER" && recoveryState !== null + ? `${execution.runResult.run_role} ended with status ${execution.runResult.status}. Waiting for takeover via recovery card ${recoveryState.card_id}.` + : `${execution.runResult.run_role} ended with status ${execution.runResult.status}.`; + + const existingDecisions = this.readTextState(state, "decisions.en.md"); + this.setJsonState(state, "story-queue.json", storyQueue); + this.setJsonState(state, "loop-metrics.json", loopMetrics); + this.setJsonState(state, "trace-index.json", traceIndex); + this.setTextState(state, "handoff.en.md", handoffContent); + this.setTextState(state, "risk-register.en.md", renderRiskRegister(jobState, blockers, recoveryState)); + this.setTextState( + state, + "decisions.en.md", + `${existingDecisions.trimEnd()}\n\n${runDecisionEntry(execution.runResult.run_id, jobState, recoveryState)}`, + ); + this.setTextState( + state, + "progress.en.md", + renderProgress(taskPacket, jobState, execution.runResult.run_id, execution.runResult.run_role, progressSummary), + ); + }); } async recordIntegrityFailure(taskPacket: TaskPacket, reason: string): Promise { - const storyQueue = await readJson(this.storyQueuePath); - storyQueue.freeze_version = taskPacket.freeze_version; - storyQueue.stories = storyQueue.stories.map((story) => - story.story_id === taskPacket.story.story_id - ? { - ...story, - queue_state: "INTEGRITY_FAILED", - last_run_id: taskPacket.run_id, - } - : story, - ); + await this.mutateState(taskPacket.run_id, async (state) => { + const storyQueue = this.parseJsonState(state, "story-queue.json"); + storyQueue.freeze_version = taskPacket.freeze_version; + storyQueue.stories = storyQueue.stories.map((story) => + story.story_id === taskPacket.story.story_id + ? { + ...story, + queue_state: "INTEGRITY_FAILED", + last_run_id: taskPacket.run_id, + } + : story, + ); - const existingDecisions = await readText(this.decisionsPath); - await this.writeMirroredJson(this.storyQueuePath, "story-queue.json", storyQueue); - await this.writeMirroredText( - this.riskRegisterPath, - "risk-register.en.md", - renderRiskRegister("INTEGRITY_FAILED", [reason]), - ); - await this.writeMirroredText( - this.decisionsPath, - "decisions.en.md", - `${existingDecisions.trimEnd()}\n\n${integrityFailureDecisionEntry(taskPacket.run_id, reason)}`, - ); - await this.writeMirroredText( - this.progressPath, - "progress.en.md", - renderProgress(taskPacket, "INTEGRITY_FAILED", taskPacket.run_id, taskPacket.run_role, reason), - ); + const existingDecisions = this.readTextState(state, "decisions.en.md"); + this.setJsonState(state, "story-queue.json", storyQueue); + this.setTextState(state, "risk-register.en.md", renderRiskRegister("INTEGRITY_FAILED", [reason])); + this.setTextState( + state, + "decisions.en.md", + `${existingDecisions.trimEnd()}\n\n${integrityFailureDecisionEntry(taskPacket.run_id, reason)}`, + ); + this.setTextState( + state, + "progress.en.md", + renderProgress(taskPacket, "INTEGRITY_FAILED", taskPacket.run_id, taskPacket.run_role, reason), + ); + }); } async recordArchiveFinalization( @@ -540,35 +606,37 @@ export class StateStore { latestRunRole: RunRole, finalSummaryPath: string, ): Promise { - const storyQueue = await readJson(this.storyQueuePath); - storyQueue.freeze_version = taskPacket.freeze_version; - storyQueue.stories = storyQueue.stories.map((story) => - story.story_id === taskPacket.story.story_id - ? { - ...story, - queue_state: "COMPLETED", - last_run_id: latestRunId, - } - : story, - ); + await this.mutateState(latestRunId, async (state) => { + const storyQueue = this.parseJsonState(state, "story-queue.json"); + storyQueue.freeze_version = taskPacket.freeze_version; + storyQueue.stories = storyQueue.stories.map((story) => + story.story_id === taskPacket.story.story_id + ? { + ...story, + queue_state: "COMPLETED", + last_run_id: latestRunId, + } + : story, + ); - const existingDecisions = await readText(this.decisionsPath); - await this.writeMirroredJson(this.storyQueuePath, "story-queue.json", storyQueue); - await this.writeMirroredText( - this.decisionsPath, - "decisions.en.md", - `${existingDecisions.trimEnd()}\n\n${archiveFinalizationDecisionEntry(latestRunId, finalSummaryPath)}`, - ); - await this.writeMirroredText( - this.progressPath, - "progress.en.md", - renderProgress( - taskPacket, - "COMPLETED", - latestRunId, - latestRunRole, - "Archive finalization completed and the canonical local job bundle is closed.", - ), - ); + const existingDecisions = this.readTextState(state, "decisions.en.md"); + this.setJsonState(state, "story-queue.json", storyQueue); + this.setTextState( + state, + "decisions.en.md", + `${existingDecisions.trimEnd()}\n\n${archiveFinalizationDecisionEntry(latestRunId, finalSummaryPath)}`, + ); + this.setTextState( + state, + "progress.en.md", + renderProgress( + taskPacket, + "COMPLETED", + latestRunId, + latestRunRole, + "Archive finalization completed and the canonical local job bundle is closed.", + ), + ); + }); } } diff --git a/core/loop/state-write.ts b/core/loop/state-write.ts new file mode 100644 index 0000000..a2641bd --- /dev/null +++ b/core/loop/state-write.ts @@ -0,0 +1,201 @@ +import { randomUUID } from "node:crypto"; +import { open, rename, rm, stat } from "node:fs/promises"; +import { basename, dirname, join } from "node:path"; +import { ensureDir, nowIso } from "./support.ts"; + +const DEFAULT_LOCK_TIMEOUT_MS = 15_000; +const DEFAULT_LOCK_POLL_MS = 50; +const DEFAULT_LOCK_STALE_MS = 30_000; +const LOCK_FILE_NAME = ".state-write.lock"; +const TRANSACTION_FILE_NAME = ".state-write.txn.json"; + +export interface StateWriteLockOptions { + lockName?: string; + owner?: string; + pollMs?: number; + staleMs?: number; + timeoutMs?: number; +} + +export interface AtomicWriteEntry { + path: string; + content: string; +} + +interface PendingBatchEntry { + path: string; + temp_path: string; +} + +interface PendingBatchRecord { + created_at: string; + entries: PendingBatchEntry[]; +} + +function delay(ms: number): Promise { + return new Promise((resolvePromise) => { + setTimeout(resolvePromise, ms); + }); +} + +async function isStaleLock(lockPath: string, staleMs: number): Promise { + try { + const metadata = await stat(lockPath); + return Date.now() - metadata.mtimeMs >= staleMs; + } catch { + return false; + } +} + +async function acquireWriteLock(lockRoot: string, options: StateWriteLockOptions): Promise<() => Promise> { + const timeoutMs = options.timeoutMs ?? DEFAULT_LOCK_TIMEOUT_MS; + const pollMs = options.pollMs ?? DEFAULT_LOCK_POLL_MS; + const staleMs = options.staleMs ?? DEFAULT_LOCK_STALE_MS; + const lockName = options.lockName ?? LOCK_FILE_NAME; + const lockPath = join(lockRoot, lockName); + const deadline = Date.now() + timeoutMs; + + await ensureDir(lockRoot); + + while (true) { + try { + const handle = await open(lockPath, "wx"); + try { + await handle.writeFile( + `${JSON.stringify({ pid: process.pid, owner: options.owner ?? "state-store", acquired_at: nowIso() }, null, 2)}\n`, + "utf8", + ); + await handle.sync(); + } finally { + await handle.close(); + } + return async () => { + await rm(lockPath, { force: true }); + }; + } catch (error) { + const code = error instanceof Error && "code" in error ? String((error as { code?: unknown }).code ?? "") : ""; + if (code !== "EEXIST") { + throw error; + } + if (await isStaleLock(lockPath, staleMs)) { + await rm(lockPath, { force: true }); + continue; + } + if (Date.now() >= deadline) { + throw new Error(`timed out acquiring state write lock: ${lockPath}`); + } + await delay(pollMs); + } + } +} + +export async function withWriteLock( + lockRoot: string, + callback: () => Promise, + options: StateWriteLockOptions = {}, +): Promise { + const release = await acquireWriteLock(lockRoot, options); + try { + return await callback(); + } finally { + await release(); + } +} + +export async function atomicWriteText(path: string, value: string): Promise { + await ensureDir(dirname(path)); + const tempPath = join(dirname(path), `.${basename(path)}.${process.pid}.${Date.now()}.${randomUUID()}.tmp`); + let handle: Awaited> | null = null; + try { + handle = await open(tempPath, "w"); + await handle.writeFile(value, "utf8"); + await handle.sync(); + await handle.close(); + handle = null; + await rename(tempPath, path); + } catch (error) { + if (handle !== null) { + await handle.close().catch(() => undefined); + } + await rm(tempPath, { force: true }).catch(() => undefined); + throw error; + } +} + +export async function atomicWriteJson(path: string, value: unknown): Promise { + await atomicWriteText(path, `${JSON.stringify(value, null, 2)}\n`); +} + +async function finalizePendingBatch(entries: PendingBatchEntry[]): Promise { + for (const entry of entries) { + try { + await rename(entry.temp_path, entry.path); + } catch (error) { + const code = error instanceof Error && "code" in error ? String((error as { code?: unknown }).code ?? "") : ""; + if (code === "ENOENT") { + continue; + } + throw error; + } + } +} + +async function readPendingBatch(transactionPath: string): Promise { + try { + return JSON.parse(await Bun.file(transactionPath).text()) as PendingBatchRecord; + } catch { + return null; + } +} + +export async function recoverPendingBatch(lockRoot: string): Promise { + const transactionPath = join(lockRoot, TRANSACTION_FILE_NAME); + const pendingBatch = await readPendingBatch(transactionPath); + if (pendingBatch === null) { + return; + } + await finalizePendingBatch(pendingBatch.entries); + await rm(transactionPath, { force: true }); +} + +export async function atomicWriteBatch(lockRoot: string, entries: AtomicWriteEntry[]): Promise { + const transactionPath = join(lockRoot, TRANSACTION_FILE_NAME); + await recoverPendingBatch(lockRoot); + + const pendingBatch: PendingBatchRecord = { + created_at: nowIso(), + entries: [], + }; + + for (const entry of entries) { + const tempPath = join(dirname(entry.path), `.${basename(entry.path)}.${process.pid}.${Date.now()}.${randomUUID()}.tmp`); + let handle: Awaited> | null = null; + try { + await ensureDir(dirname(entry.path)); + handle = await open(tempPath, "w"); + await handle.writeFile(entry.content, "utf8"); + await handle.sync(); + await handle.close(); + handle = null; + pendingBatch.entries.push({ + path: entry.path, + temp_path: tempPath, + }); + } catch (error) { + if (handle !== null) { + await handle.close().catch(() => undefined); + } + await rm(tempPath, { force: true }).catch(() => undefined); + throw error; + } + } + + await atomicWriteJson(transactionPath, pendingBatch); + + try { + await finalizePendingBatch(pendingBatch.entries); + await rm(transactionPath, { force: true }); + } catch (error) { + throw error; + } +} diff --git a/ops/archive/job-root.ts b/ops/archive/job-root.ts index 8aba7dc..475b39b 100644 --- a/ops/archive/job-root.ts +++ b/ops/archive/job-root.ts @@ -17,6 +17,7 @@ export interface JobRootLayout { environmentPath: string; finalSummaryPath: string; runtimeHomeRoot: string; + runtimeHomeRootForRole: (runRole: "builder" | "qa") => string; planPath: string; freezePath: string; freezeJsonPath: string; @@ -40,6 +41,7 @@ export function resolveJobRootLayout(repoRoot: string, jobId: string): JobRootLa const finalRoot = join(artifactRoot, "final"); const artifactMetadataRoot = join(artifactRoot, "metadata"); const runtimeHomeRoot = join(jobRoot, "runtime-home", "phase1-local"); + const runtimeHomeRootForRole = (runRole: "builder" | "qa") => join(runtimeHomeRoot, runRole); return { repoRoot, @@ -57,6 +59,7 @@ export function resolveJobRootLayout(repoRoot: string, jobId: string): JobRootLa environmentPath: join(artifactMetadataRoot, "environment.json"), finalSummaryPath: join(finalRoot, "final-summary.en.md"), runtimeHomeRoot, + runtimeHomeRootForRole, planPath: join(jobRoot, "DEVELOPMENT_PLAN.en.md"), freezePath: join(jobRoot, "CONTRACT_FREEZE.en.md"), freezeJsonPath: join(jobRoot, "contract-freeze.json"), @@ -80,4 +83,6 @@ export async function ensureJobRootLayout(layout: JobRootLayout): Promise await ensureDir(layout.finalRoot); await ensureDir(layout.artifactMetadataRoot); await ensureDir(layout.runtimeHomeRoot); + await ensureDir(layout.runtimeHomeRootForRole("builder")); + await ensureDir(layout.runtimeHomeRootForRole("qa")); } diff --git a/ops/guards/credential-injector.ts b/ops/guards/credential-injector.ts new file mode 100644 index 0000000..438b594 --- /dev/null +++ b/ops/guards/credential-injector.ts @@ -0,0 +1,116 @@ +import { join } from "node:path"; +import { readJson } from "../../core/loop/support.ts"; +import type { RunEnvelope, RunExitStatus, TaskPacket, WorkerOutput } from "../../core/contracts/types.ts"; + +interface AdapterPolicy { + credential_injection?: { + supported_modes?: string[]; + supported_sources?: string[]; + fixture_env_prefix?: string; + credential_env_prefix?: string; + }; + log_redaction_rules?: { + replace_with?: string; + patterns?: string[]; + }; +} + +export interface CredentialInjectionResult { + allowed: boolean; + status: RunExitStatus | null; + reason: string | null; + environment: Record; + redactor: LogRedactor; +} + +export class LogRedactor { + private readonly patterns: RegExp[]; + private readonly replaceWith: string; + + constructor(patterns: RegExp[], replaceWith: string, private readonly secrets: string[]) { + this.patterns = patterns; + this.replaceWith = replaceWith; + } + + redactText(value: string): string { + let redacted = value; + for (const pattern of this.patterns) { + redacted = redacted.replace(pattern, this.replaceWith); + } + for (const secret of this.secrets) { + if (secret.length > 0) { + redacted = redacted.replaceAll(secret, this.replaceWith); + } + } + return redacted; + } + + redactCommand(command: string[]): string[] { + return command.map((value) => this.redactText(value)); + } + + redactWorkerOutput(output: WorkerOutput): WorkerOutput { + const redactList = (values: string[]) => values.map((value) => this.redactText(value)); + return { + ...output, + completed: redactList(output.completed), + open: redactList(output.open), + blockers: redactList(output.blockers), + next_action: this.redactText(output.next_action), + evidence_paths: redactList(output.evidence_paths), + report_paths: redactList(output.report_paths), + test_result_paths: redactList(output.test_result_paths), + fixback_items: redactList(output.fixback_items), + }; + } +} + +function failure(status: RunExitStatus, reason: string, redactor: LogRedactor): CredentialInjectionResult { + return { + allowed: false, + status, + reason, + environment: {}, + redactor, + }; +} + +function success(environment: Record, redactor: LogRedactor): CredentialInjectionResult { + return { + allowed: true, + status: null, + reason: null, + environment, + redactor, + }; +} + +export class CredentialInjector { + private policyPromise: Promise | null = null; + + constructor(private readonly repoRoot: string) {} + + private async loadPolicy(): Promise { + if (this.policyPromise === null) { + this.policyPromise = readJson(join(this.repoRoot, "adapters", "generic-cli", "adapter-policy.json")); + } + return this.policyPromise; + } + + async resolve(taskPacket: TaskPacket, envelope: RunEnvelope): Promise { + void envelope; + const policy = await this.loadPolicy(); + const redactionPatterns = (policy.log_redaction_rules?.patterns ?? []).map((value) => new RegExp(value, "giu")); + const replaceWith = policy.log_redaction_rules?.replace_with ?? "[REDACTED]"; + const injectionRequests = taskPacket.credential_injection_requests ?? []; + + if (injectionRequests.length === 0) { + return success({}, new LogRedactor(redactionPatterns, replaceWith, [])); + } + return failure( + "FAILED_POLICY", + "host-boundary credential injection is not available for docker worker launches in this adapter profile", + new LogRedactor(redactionPatterns, replaceWith, []), + ); + } +} diff --git a/ops/guards/shell-policy.ts b/ops/guards/shell-policy.ts new file mode 100644 index 0000000..bae72ea --- /dev/null +++ b/ops/guards/shell-policy.ts @@ -0,0 +1,131 @@ +import { basename, join } from "node:path"; +import { readJson, toPosixPath } from "../../core/loop/support.ts"; +import type { ContainerPathMount, RunExitStatus } from "../../core/contracts/types.ts"; + +interface AdapterPolicy { + shell_policy?: { + blocked_commands?: string[]; + dangerous_patterns?: string[]; + never_auto_approve_patterns?: string[]; + env_allowlist?: string[]; + }; +} + +const DANGEROUS_DYNAMIC_ENV_NAMES = new Set([ + "BASH_ENV", + "DYLD_INSERT_LIBRARIES", + "ENV", + "GIT_CONFIG_GLOBAL", + "GIT_CONFIG_SYSTEM", + "LD_PRELOAD", + "NODE_OPTIONS", + "PATH", + "PROMPT_COMMAND", + "PYTHONPATH", +]); + +export interface ShellPolicyDecision { + allowed: boolean; + reason: string | null; + status: RunExitStatus | null; +} + +export interface ShellPolicyInput { + executable: string; + command: string[]; + envNames: string[]; + dynamicEnvNames?: string[]; + mounts: ContainerPathMount[]; + runRole: string; +} + +function deny(reason: string): ShellPolicyDecision { + return { + allowed: false, + reason, + status: "FAILED_POLICY", + }; +} + +function normalizeCommandValue(value: string): string { + return value.trim().toLowerCase(); +} + +function normalizeHostPath(value: string): string { + return toPosixPath(value).replace(/\/+$/u, ""); +} + +export class ShellPolicy { + private policyPromise: Promise | null = null; + + constructor(private readonly repoRoot: string) {} + + private async loadPolicy(): Promise { + if (this.policyPromise === null) { + this.policyPromise = readJson(join(this.repoRoot, "adapters", "generic-cli", "adapter-policy.json")); + } + return this.policyPromise; + } + + async evaluate(input: ShellPolicyInput): Promise { + const policy = await this.loadPolicy(); + const shellPolicy = policy.shell_policy ?? {}; + const executableName = basename(input.executable).toLowerCase(); + const blockedCommands = new Set((shellPolicy.blocked_commands ?? []).map((value) => value.toLowerCase())); + if (blockedCommands.has(executableName)) { + return deny(`host shell policy blocked executable: ${executableName}`); + } + + const envAllowlist = (shellPolicy.env_allowlist ?? []).map((value) => new RegExp(value, "u")); + const dynamicEnvNames = new Set(input.dynamicEnvNames ?? []); + for (const envName of input.envNames) { + const dynamicEnvAllowed = + dynamicEnvNames.has(envName) && + /^[A-Z][A-Z0-9_]*$/u.test(envName) && + !DANGEROUS_DYNAMIC_ENV_NAMES.has(envName); + if (!dynamicEnvAllowed && !envAllowlist.some((pattern) => pattern.test(envName))) { + return deny(`host shell policy blocked environment variable: ${envName}`); + } + } + + const dangerousPatterns = (shellPolicy.dangerous_patterns ?? []).map((value) => new RegExp(value, "iu")); + const neverAutoApprovePatterns = (shellPolicy.never_auto_approve_patterns ?? []).map((value) => new RegExp(value, "iu")); + for (const token of input.command) { + const normalizedToken = normalizeCommandValue(token); + if (dangerousPatterns.some((pattern) => pattern.test(token)) || neverAutoApprovePatterns.some((pattern) => pattern.test(token))) { + return deny(`host shell policy blocked command token: ${token}`); + } + if (normalizedToken === "--network=host" || normalizedToken === "--privileged") { + return deny(`host shell policy blocked command token: ${token}`); + } + } + for (let index = 0; index < input.command.length; index += 1) { + if (normalizeCommandValue(input.command[index]) === "--network") { + const networkMode = input.command[index + 1]?.trim().toLowerCase() ?? ""; + if (networkMode !== "none") { + return deny(`host shell policy requires --network none, got ${networkMode || ""}`); + } + } + } + + const writableMounts = input.mounts.filter((mount) => !mount.read_only); + const allowedWritableMounts = new Set(["repo", "run-artifacts", "repo-run-artifacts", "runtime-home", "cache"]); + for (const mount of writableMounts) { + if (!allowedWritableMounts.has(mount.name)) { + return deny(`host shell policy blocked writable mount: ${mount.name}`); + } + if (mount.name === "repo" && normalizeHostPath(mount.host_path) === normalizeHostPath(this.repoRoot)) { + return deny("repo mount must use the job-scoped workspace, not the control repo root"); + } + if (mount.name === "runtime-home" && /\/runtime-home\/phase1-local$/u.test(normalizeHostPath(mount.host_path))) { + return deny(`runtime home must be role-scoped for ${input.runRole}`); + } + } + + return { + allowed: true, + reason: null, + status: null, + }; + } +} diff --git a/tests/integration/test_phase1_local_flow.py b/tests/integration/test_phase1_local_flow.py index de3f69d..cf906fd 100644 --- a/tests/integration/test_phase1_local_flow.py +++ b/tests/integration/test_phase1_local_flow.py @@ -549,6 +549,8 @@ def test_phase1_local_success_records_contract_checks_and_acceptance_mapping(tmp qa_run_root = next(path for path in run_roots if path.name.startswith("run-qa-")) builder_check = json.loads((builder_run_root / "evidence" / "test-results" / "builder-check.json").read_text(encoding="utf-8")) qa_verdict = json.loads((qa_run_root / "metadata" / "qa-verdict.json").read_text(encoding="utf-8")) + builder_state_snapshot = json.loads((builder_run_root / "metadata" / "state" / "trace-index.json").read_text(encoding="utf-8")) + qa_state_snapshot = json.loads((qa_run_root / "metadata" / "state" / "trace-index.json").read_text(encoding="utf-8")) assert builder_check["self_checks"]["approval_context"] == "pass" assert builder_check["self_checks"]["artifact_presence"] == "pass" @@ -558,6 +560,8 @@ def test_phase1_local_success_records_contract_checks_and_acceptance_mapping(tmp assert qa_verdict["language_validation"]["status"] == "pass" assert qa_verdict["scope_validation"]["undeclared_builder_artifacts"] == [] assert qa_verdict["mandatory_checks"]["acceptance-closure"]["status"] == "pass" + assert builder_state_snapshot["stories"]["STORY-PHASE1-LOCAL-001"]["latest_run_role"] == "builder" + assert qa_state_snapshot["stories"]["STORY-PHASE1-LOCAL-001"]["latest_run_role"] == "qa" @pytest.mark.integration @@ -583,11 +587,11 @@ def test_phase1_local_builder_container_materialization_uses_read_only_inputs_an mounts = {mount["target"]: mount for mount in builder_capture["mounts"]} job_root = repo_root / "jobs" / "job-phase1-local" - assert builder_task_packet["repo_path"] == repo_root.as_posix() + assert builder_task_packet["repo_path"] == (job_root / "repo").as_posix() assert builder_task_packet["state_path"] == (job_root / "state").as_posix() - assert builder_task_packet["runtime_home"] == (job_root / "runtime-home" / "phase1-local").as_posix() + assert builder_task_packet["runtime_home"] == (job_root / "runtime-home" / "phase1-local" / "builder").as_posix() assert builder_task_packet["artifact_path"] == (job_root / "artifacts" / "runs" / builder_envelope["run_id"]).as_posix() - assert "container_control" not in builder_task_packet["requested_capabilities"] + assert "container_control" in builder_task_packet["requested_capabilities"] assert container_paths["repo_path"] == "/work/repo" assert container_paths["state_path"] == "/work/state" assert container_paths["runtime_home"] == "/work/runtime-home" @@ -599,21 +603,20 @@ def test_phase1_local_builder_container_materialization_uses_read_only_inputs_an (repo_root / "adapters" / "generic-cli" / "adapter-capability.json").read_text(encoding="utf-8") ) filesystem_write_scope = set(capability_manifest["capabilities"]["filesystem_write"]["scope"]) - repo_job_root_path = f"/work/repo/jobs/job-phase1-local" - repo_run_root_path = f"{repo_job_root_path}/artifacts/runs/{builder_envelope['run_id']}" task_packet_target = f"{builder_envelope['artifact_path']}/metadata/task-packet.en.json" + mount_sources = {mount["target"]: mount["source"] for mount in builder_capture["mounts"]} + assert mount_sources["/work/repo"] == (job_root / "repo").as_posix() assert mounts["/work/repo"].get("readonly") != "true" - assert mounts[repo_job_root_path]["readonly"] == "true" assert mounts["/work/state"]["readonly"] == "true" assert mounts["/work/artifacts"]["readonly"] == "true" assert mounts[builder_envelope["artifact_path"]].get("readonly") != "true" - assert mounts[repo_run_root_path].get("readonly") != "true" assert mounts[task_packet_target]["readonly"] == "true" - assert mounts[f"{repo_run_root_path}/metadata/task-packet.en.json"]["readonly"] == "true" + assert not any(target.startswith("/work/repo/jobs/") for target in mounts) assert mounts["/work/runtime-home"].get("readonly") != "true" - assert mounts[f"{repo_job_root_path}/runtime-home/phase1-local"].get("readonly") != "true" - assert filesystem_write_scope == {"repo", "run-artifacts", "runtime-home"} + assert mount_sources["/work/runtime-home"] == (job_root / "runtime-home" / "phase1-local" / "builder").as_posix() + assert {"run-artifacts", "runtime-home"}.issubset(filesystem_write_scope) + assert all(source != repo_root.as_posix() for source in mount_sources.values()) manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) freeze = json.loads((job_root / "contract-freeze.json").read_text(encoding="utf-8")) @@ -622,7 +625,7 @@ def test_phase1_local_builder_container_materialization_uses_read_only_inputs_an assert expected_task_packet_path == f"artifacts/runs/{builder_envelope['run_id']}/metadata/task-packet.en.json" assert json.loads((job_root / expected_task_packet_path).read_text(encoding="utf-8")) == builder_task_packet - assert not (job_root / "runtime-home" / "phase1-local" / "envelopes" / "container" / "task-packets").exists() + assert not (job_root / "runtime-home" / "phase1-local" / "builder" / "envelopes" / "container" / "task-packets").exists() assert expected_task_packet_path in (job_root / "checksums.txt").read_text(encoding="utf-8") assert freeze["task_packet_digests"][builder_envelope["run_id"]] == builder_task_packet["task_packet_sha256"] @@ -653,15 +656,23 @@ def test_phase1_local_qa_packet_keeps_freeze_base_commit_when_builder_moves_head check=True, ).stdout.strip() job_root = repo_root / "jobs" / "job-phase1-local" + worker_repo_head = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=job_root / "repo", + capture_output=True, + text=True, + check=True, + ).stdout.strip() freeze = json.loads((job_root / "contract-freeze.json").read_text(encoding="utf-8")) manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) qa_capture = load_capture(capture_dir, "qa") - assert current_head != initial_commit + assert current_head == initial_commit + assert worker_repo_head != initial_commit assert freeze["base_commit"] == initial_commit assert manifest["base_commit"] == initial_commit assert qa_capture["task_packet"]["base_commit"] == initial_commit - assert qa_capture["task_packet"]["base_commit"] != current_head + assert qa_capture["task_packet"]["base_commit"] != worker_repo_head assert freeze["task_packet_digests"][qa_capture["task_packet"]["run_id"]] == qa_capture["task_packet"]["task_packet_sha256"] @@ -754,7 +765,10 @@ def test_phase1_local_capability_gate_stops_undeclared_or_denied_requests_before repo_root = export_repo(tmp_path) fake_docker = write_fake_docker(tmp_path) capture_dir = tmp_path / "captures" - write_requested_capabilities(repo_root, ["filesystem_read", "filesystem_write", "shell_command", "browser"]) + write_requested_capabilities( + repo_root, + ["filesystem_read", "filesystem_write", "shell_command", "container_control", "browser"], + ) result = run_phase1( repo_root, { @@ -773,7 +787,7 @@ def test_phase1_local_capability_gate_stops_undeclared_or_denied_requests_before assert [run["run_role"] for run in manifest["runs"]] == ["builder"] assert [run["run_exit_status"] for run in manifest["runs"]] == ["FAILED_POLICY"] assert manifest["status"] == "AWAITING_OWNER" - assert " filesystem_read filesystem_write shell_command browser" in command_log + assert " browser container_control filesystem_read filesystem_write shell_command" in command_log assert not capture_dir.exists() or not list(capture_dir.iterdir())