diff --git a/adapters/generic-cli/adapter-capability.json b/adapters/generic-cli/adapter-capability.json index a040fe3..5e12241 100644 --- a/adapters/generic-cli/adapter-capability.json +++ b/adapters/generic-cli/adapter-capability.json @@ -17,8 +17,8 @@ "filesystem_write": { "mode": "allow", "scope": [ - "state", - "artifacts", + "repo", + "run-artifacts", "runtime-home" ], "cost_level": "low", @@ -28,10 +28,10 @@ "shell_command": { "mode": "allow", "scope": [ - "local-worker-command" + "docker-worker-command" ], "cost_level": "low", - "risk_level": "medium", + "risk_level": "high", "approval_requirement": "none" }, "git": { @@ -58,11 +58,13 @@ "approval_requirement": "not-supported" }, "container_control": { - "mode": "deny", - "scope": [], - "cost_level": "none", - "risk_level": "low", - "approval_requirement": "not-supported" + "mode": "allow", + "scope": [ + "single-node-local-worker-containers" + ], + "cost_level": "low", + "risk_level": "high", + "approval_requirement": "none" }, "screenshot": { "mode": "deny", diff --git a/adapters/generic-cli/adapter-policy.json b/adapters/generic-cli/adapter-policy.json index d6160d4..22d0c6f 100644 --- a/adapters/generic-cli/adapter-policy.json +++ b/adapters/generic-cli/adapter-policy.json @@ -5,5 +5,50 @@ "retry_limit": 0, "log_redaction": "no-secrets-in-fixture", "scope_policy": "single-approved-story-only", - "failure_policy": "return-standard-run-exit-status" + "failure_policy": "return-standard-run-exit-status", + "shell_policy": { + "blocked_commands": [ + "rm", + "del", + "format", + "diskpart" + ], + "dangerous_patterns": [ + "Authorization\\s*:\\s*Bearer\\s+\\S+", + "cookie\\s*[:=]\\s*\\S+", + "token\\s*[:=]\\s*\\S+" + ], + "never_auto_approve_patterns": [ + "--privileged", + "--pid=host", + "--network=host", + "type=bind,source=/,target=/" + ], + "env_allowlist": [ + "^HOME$", + "^XDG_CACHE_HOME$", + "^BUN_INSTALL_CACHE_DIR$", + "^CODINGCLAW_CRED_[A-Z0-9_]+$" + ] + }, + "credential_injection": { + "supported_modes": [ + "env" + ], + "supported_sources": [ + "fixture", + "env" + ], + "fixture_env_prefix": "CODINGCLAW_SECRET_FIXTURE_", + "credential_env_prefix": "CODINGCLAW_CRED_" + }, + "log_redaction_rules": { + "replace_with": "[REDACTED]", + "patterns": [ + "Authorization\\s*:\\s*Bearer\\s+\\S+", + "cookie\\s*[:=]\\s*[^\\r\\n;]+", + "token\\s*[:=]\\s*\\S+", + "password\\s*[:=]\\s*\\S+" + ] + } } diff --git a/adapters/generic-cli/adapter.json b/adapters/generic-cli/adapter.json index 2500292..29c6096 100644 --- a/adapters/generic-cli/adapter.json +++ b/adapters/generic-cli/adapter.json @@ -16,7 +16,7 @@ "local-fixture" ], "supported_sandbox_model": [ - "host-process" + "docker-container" ], "supported_artifact_outputs": [ "run-result.json", diff --git a/adapters/generic-cli/adapter.ts b/adapters/generic-cli/adapter.ts index 7e18048..18d114e 100644 --- a/adapters/generic-cli/adapter.ts +++ b/adapters/generic-cli/adapter.ts @@ -1,29 +1,35 @@ -import { dirname, join } from "node:path"; +import { join } from "node:path"; import { buildArtifactIndex } from "../../ops/archive/artifact-index.ts"; import { writeRunTimings, writeWorkerLog } from "../../ops/archive/run-metadata.ts"; -import { ensureDir, relativePosix, writeJson, writeText } from "../../core/loop/support.ts"; +import { ensureDir, readJson, relativePosix, toPosixPath, uniqueStrings, writeJson, writeText } from "../../core/loop/support.ts"; import type { AdapterExecutionResult, RunEnvelope, RunResult, RunRole, RunTimingMetadata, + TaskPacket, WorkerOutput, } from "../../core/contracts/types.ts"; +import { DockerWorkerLauncher, type DockerWorkerLaunchResult, materializeContainerizedRunEnvelope } from "./docker-runtime.ts"; +import { CapabilityGate, type CapabilityGateDecision } from "./capability-gate.ts"; +import { CredentialInjector } from "../../ops/guards/credential-injector.ts"; -function workerScriptForRole(repoRoot: string, runRole: RunRole): string { +const REQUIRED_LAUNCH_CAPABILITIES = ["container_control"] as const; + +function workerScriptForRole(rootPath: string, runRole: RunRole): string { if (runRole === "builder") { - return join(repoRoot, "ops", "workers", "builder.ts"); + return toPosixPath(join(rootPath, "ops", "workers", "builder.ts")); } if (runRole === "qa") { - return join(repoRoot, "ops", "workers", "qa.ts"); + return toPosixPath(join(rootPath, "ops", "workers", "qa.ts")); } throw new Error(`unsupported run role: ${runRole}`); } -function fallbackWorkerOutput(errorText: string): WorkerOutput { +function fallbackWorkerOutput(errorText: string, status: WorkerOutput["status"] = "FAILED_EXECUTION"): WorkerOutput { return { - status: "FAILED_EXECUTION", + status, completed: [], open: ["Inspect the worker stderr output."], blockers: errorText ? [errorText.trim()] : ["worker execution failed"], @@ -37,6 +43,60 @@ function fallbackWorkerOutput(errorText: string): WorkerOutput { }; } +function formatErrorText(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +function launchFailureText(launchResult: DockerWorkerLaunchResult): string { + if (launchResult.failure_status === "TIMEOUT") { + return "worker timed out before producing a complete result"; + } + if (launchResult.failure_status === "BUDGET_EXCEEDED") { + return "worker exceeded the configured budget"; + } + const stderr = launchResult.stderr.trim(); + if (stderr) { + return stderr; + } + const stdout = launchResult.stdout.trim(); + if (stdout) { + return stdout; + } + return `launcher exited with code ${launchResult.exitCode}`; +} + +function unexpectedLaunchResult(error: unknown): DockerWorkerLaunchResult { + return { + command: [""], + exitCode: -1, + stdout: "", + stderr: formatErrorText(error), + failure_status: "FAILED_INFRA", + }; +} + +const BUILDER_FALLBACK_REPORT_PATHS = ["reports/implementation-summary.en.md", "reports/self-check.en.md"]; +const BUILDER_FALLBACK_TEST_RESULT_PATHS = ["evidence/test-results/builder-check.json"]; +const BUILDER_FALLBACK_EVIDENCE_PATHS = [ + ...BUILDER_FALLBACK_REPORT_PATHS, + ...BUILDER_FALLBACK_TEST_RESULT_PATHS, +]; +const QA_FALLBACK_REPORT_PATHS = ["reports/qa-report.en.md", "reports/fixback-items.en.md"]; +const QA_FALLBACK_TEST_RESULT_PATHS = ["evidence/test-results/qa-check.json"]; +const QA_FALLBACK_METADATA_PATHS = ["metadata/qa-verdict.json"]; +const QA_FALLBACK_EVIDENCE_PATHS = [ + ...QA_FALLBACK_REPORT_PATHS, + ...QA_FALLBACK_TEST_RESULT_PATHS, + ...QA_FALLBACK_METADATA_PATHS, +]; + +async function ensureHostWritableRunLayout(runRoot: string): Promise { + await ensureDir(join(runRoot, "logs")); + await ensureDir(join(runRoot, "reports")); + await ensureDir(join(runRoot, "metadata")); + await ensureDir(join(runRoot, "evidence", "test-results")); +} + function renderCommandLog(command: string[], exitCode: number, stdout: string, stderr: string): string { return [ `command: ${command.map((value) => value.replaceAll("\\", "/")).join(" ")}`, @@ -108,49 +168,251 @@ function renderHandoff( ].join("\n"); } +function withBuilderFallbackPaths(workerOutput: WorkerOutput): WorkerOutput { + return { + ...workerOutput, + evidence_paths: uniqueStrings([...workerOutput.evidence_paths, ...BUILDER_FALLBACK_EVIDENCE_PATHS]), + report_paths: uniqueStrings([...workerOutput.report_paths, ...BUILDER_FALLBACK_REPORT_PATHS]), + test_result_paths: uniqueStrings([...workerOutput.test_result_paths, ...BUILDER_FALLBACK_TEST_RESULT_PATHS]), + }; +} + +function qaFallbackItems(workerOutput: WorkerOutput): string[] { + if (workerOutput.fixback_items.length > 0) { + return workerOutput.fixback_items; + } + if (workerOutput.blockers.length > 0) { + return workerOutput.blockers; + } + return ["QA did not produce its required outputs."]; +} + +function withQaFallbackPaths(workerOutput: WorkerOutput): WorkerOutput { + const fixbackItems = qaFallbackItems(workerOutput); + return { + ...workerOutput, + evidence_paths: uniqueStrings([...workerOutput.evidence_paths, ...QA_FALLBACK_EVIDENCE_PATHS]), + report_paths: uniqueStrings([...workerOutput.report_paths, ...QA_FALLBACK_REPORT_PATHS]), + test_result_paths: uniqueStrings([...workerOutput.test_result_paths, ...QA_FALLBACK_TEST_RESULT_PATHS]), + fixback_items: fixbackItems, + }; +} + +async function writeBuilderFallbackArtifacts( + runRoot: string, + envelope: RunEnvelope, + workerOutput: WorkerOutput, +): Promise { + await writeText( + join(runRoot, "reports", "implementation-summary.en.md"), + [ + "# Implementation Summary", + "", + `- job_id: ${envelope.job_id}`, + `- run_id: ${envelope.run_id}`, + `- story_id: ${envelope.story_id}`, + `- status: ${workerOutput.status}`, + "- completed work:", + "- worker execution did not produce the builder implementation bundle", + "- adapter synthesized the required builder reports for archival completeness", + "", + ].join("\n"), + ); + await writeText( + join(runRoot, "reports", "self-check.en.md"), + [ + "# Self Check", + "", + "- required checks executed:", + "- scope-compliance: blocked", + "- artifact-presence: failed", + "- evidence-completeness: failed", + `- next required action: ${workerOutput.next_action}`, + "", + ].join("\n"), + ); + await writeJson(join(runRoot, "evidence", "test-results", "builder-check.json"), { + run_id: envelope.run_id, + run_role: envelope.run_role, + story_id: envelope.story_id, + status: workerOutput.status, + checked_items: [], + blockers: workerOutput.blockers, + }); +} + +async function writeQaFallbackArtifacts( + runRoot: string, + envelope: RunEnvelope, + taskPacket: TaskPacket, + workerOutput: WorkerOutput, +): Promise { + await writeText( + join(runRoot, "reports", "qa-report.en.md"), + [ + "# QA Report", + "", + `- job_id: ${envelope.job_id}`, + `- run_id: ${envelope.run_id}`, + `- story_id: ${taskPacket.story.story_id}`, + `- QA verdict: ${workerOutput.status}`, + "- checked artifacts:", + "- none", + "", + "- missing artifacts:", + ...workerOutput.fixback_items.map((value) => `- ${value}`), + "", + ].join("\n"), + ); + await writeText( + join(runRoot, "reports", "fixback-items.en.md"), + ["# Fixback Items", "", ...workerOutput.fixback_items.map((value) => `- ${value}`), ""].join("\n"), + ); + await writeJson(join(runRoot, "evidence", "test-results", "qa-check.json"), { + run_id: envelope.run_id, + run_role: envelope.run_role, + story_id: taskPacket.story.story_id, + status: workerOutput.status, + checked_items: [], + blockers: workerOutput.blockers, + }); + await writeJson(join(runRoot, "metadata", "qa-verdict.json"), { + story_id: taskPacket.story.story_id, + status_family: "run_exit", + status: workerOutput.status, + acceptance_closure: { + pass: 0, + fail: 0, + blocked: taskPacket.story.acceptance_ids.length, + total: taskPacket.story.acceptance_ids.length, + }, + }); +} + export class GenericCliAdapter { - constructor(private readonly repoRoot: string) {} + private readonly dockerLauncher: DockerWorkerLauncher; + private readonly capabilityGate: CapabilityGate; + private readonly credentialInjector: CredentialInjector; + + constructor(private readonly repoRoot: string) { + this.dockerLauncher = new DockerWorkerLauncher(repoRoot); + this.capabilityGate = new CapabilityGate(repoRoot); + this.credentialInjector = new CredentialInjector(repoRoot); + } async execute(envelope: RunEnvelope): Promise { const runRoot = envelope.artifact_path; - const envelopePath = join(envelope.runtime_home, "envelopes", `${envelope.run_id}.json`); const commandLogPath = join(runRoot, "logs", "command-log.txt"); const workerLogPath = join(runRoot, "logs", "worker.log"); const runResultPath = join(runRoot, "metadata", "run-result.json"); const timingsPath = join(runRoot, "metadata", "timings.json"); const artifactIndexPath = join(runRoot, "metadata", "artifact-index.json"); const handoffPath = join(runRoot, "reports", "handoff.en.md"); - const workerScript = workerScriptForRole(this.repoRoot, envelope.run_role); - - await ensureDir(dirname(envelopePath)); - await writeJson(envelopePath, envelope); + await ensureHostWritableRunLayout(runRoot); + let taskPacketPath = envelope.task_packet_path; + const taskPacket = await readJson(taskPacketPath); + const canonicalRequestedCapabilities = uniqueStrings(taskPacket.requested_capabilities); + const envelopeRequestedCapabilities = uniqueStrings(envelope.requested_capabilities); - const command = [process.execPath, workerScript, envelopePath]; const startedAtDate = new Date(); - const processHandle = Bun.spawn({ - cmd: command, - cwd: this.repoRoot, - stdout: "pipe", - stderr: "pipe", - }); - - const stdoutPromise = new Response(processHandle.stdout).text(); - const stderrPromise = new Response(processHandle.stderr).text(); - const exitCode = await processHandle.exited; - const stdout = await stdoutPromise; - const stderr = await stderrPromise; + let launchResult: DockerWorkerLaunchResult; + let capabilityDecision: CapabilityGateDecision; + const credentialInjection = await this.credentialInjector.resolve(taskPacket, envelope); + if (canonicalRequestedCapabilities.join("\n") !== envelopeRequestedCapabilities.join("\n")) { + launchResult = { + command: ["", ...canonicalRequestedCapabilities], + exitCode: 1, + stdout: "", + stderr: "run envelope requested_capabilities do not match the canonical task packet", + failure_status: "FAILED_POLICY", + }; + capabilityDecision = { + allowed: false, + reason: launchResult.stderr, + status: launchResult.failure_status, + }; + } else { + try { + capabilityDecision = await this.capabilityGate.evaluate( + canonicalRequestedCapabilities, + [ + ...REQUIRED_LAUNCH_CAPABILITIES, + ...(taskPacket.credential_injection_requests?.length ? ["secret_injection"] : []), + ], + ); + } catch (error) { + capabilityDecision = { + allowed: false, + reason: `capability gate could not load adapter policy: ${formatErrorText(error)}`, + status: "FAILED_POLICY", + }; + } + } + if (!capabilityDecision.allowed) { + launchResult = { + command: ["", ...canonicalRequestedCapabilities], + exitCode: 1, + stdout: "", + stderr: capabilityDecision.reason ?? "capability gate rejected the launch", + failure_status: capabilityDecision.status ?? "FAILED_POLICY", + }; + } else if (!credentialInjection.allowed) { + launchResult = { + command: [""], + exitCode: 1, + stdout: "", + stderr: credentialInjection.reason ?? "credential injection rejected the launch", + failure_status: credentialInjection.status ?? "FAILED_POLICY", + }; + } else { + try { + const materialization = await materializeContainerizedRunEnvelope(envelope); + taskPacketPath = materialization.canonical_task_packet_path; + const workerScript = workerScriptForRole( + materialization.runtime.container_paths.repo_path, + materialization.container_envelope.run_role, + ); + launchResult = await this.dockerLauncher.launch({ + run_role: materialization.container_envelope.run_role, + image: materialization.runtime.image, + worker_script_path: workerScript, + envelope_path: materialization.runtime.envelope_container_path, + runtime: materialization.runtime, + time_limits: envelope.time_limits, + environment: credentialInjection.environment, + }); + } catch (error) { + launchResult = unexpectedLaunchResult(error); + } + } + const redactedLaunchResult: DockerWorkerLaunchResult = { + ...launchResult, + command: credentialInjection.redactor.redactCommand(launchResult.command), + stdout: credentialInjection.redactor.redactText(launchResult.stdout), + stderr: credentialInjection.redactor.redactText(launchResult.stderr), + }; + const exitCode = launchResult.exitCode; + const stdout = redactedLaunchResult.stdout; + const stderr = redactedLaunchResult.stderr; const endedAtDate = new Date(); let workerOutput: WorkerOutput; + let usedFallbackWorkerOutput = false; if (exitCode === 0) { try { - workerOutput = JSON.parse(stdout) as WorkerOutput; + workerOutput = JSON.parse(launchResult.stdout) as WorkerOutput; } catch { workerOutput = fallbackWorkerOutput("worker output was not valid JSON"); + usedFallbackWorkerOutput = true; } } else { - workerOutput = fallbackWorkerOutput(stderr); + workerOutput = fallbackWorkerOutput( + launchFailureText(redactedLaunchResult), + launchResult.failure_status ?? "FAILED_EXECUTION", + ); + usedFallbackWorkerOutput = true; } + workerOutput = credentialInjection.redactor.redactWorkerOutput(workerOutput); const durationMs = Math.max(0, endedAtDate.getTime() - startedAtDate.getTime()); const runTimings: RunTimingMetadata = { @@ -166,7 +428,7 @@ export class GenericCliAdapter { duration_s: Math.max(0, Math.round(durationMs / 1000)), }; - await writeText(commandLogPath, renderCommandLog(command, exitCode, stdout, stderr)); + await writeText(commandLogPath, renderCommandLog(redactedLaunchResult.command, exitCode, stdout, stderr)); await writeWorkerLog(workerLogPath, { job_id: envelope.job_id, run_id: envelope.run_id, @@ -178,6 +440,16 @@ export class GenericCliAdapter { stderr, }); await writeRunTimings(timingsPath, runTimings); + if (usedFallbackWorkerOutput) { + if (envelope.run_role === "builder") { + workerOutput = withBuilderFallbackPaths(workerOutput); + await writeBuilderFallbackArtifacts(runRoot, envelope, workerOutput); + } + if (envelope.run_role === "qa") { + workerOutput = withQaFallbackPaths(workerOutput); + await writeQaFallbackArtifacts(runRoot, envelope, taskPacket, workerOutput); + } + } const runResult: RunResult = { run_id: envelope.run_id, @@ -195,6 +467,10 @@ export class GenericCliAdapter { const archivedHandoffPath = relativePosix(this.repoRoot, handoffPath); await writeJson(runResultPath, runResult); await writeText(handoffPath, renderHandoff(envelope, workerOutput.status, workerOutput, archivedHandoffPath)); + const takeoverPacketPath = + workerOutput.status === "AWAITING_TAKEOVER" + ? join(runRoot, "takeover", "takeover-packet.en.md") + : null; const artifactIndex = await buildArtifactIndex(runRoot, envelope.run_id, envelope.run_role); await writeJson(artifactIndexPath, artifactIndex); @@ -204,11 +480,12 @@ export class GenericCliAdapter { artifactIndex, workerOutput, runRoot, - taskPacketPath: envelope.task_packet_path, + taskPacketPath, runResultPath, artifactIndexPath, commandLogPath, handoffPath, + takeoverPacketPath, }; } } diff --git a/adapters/generic-cli/capability-gate.ts b/adapters/generic-cli/capability-gate.ts new file mode 100644 index 0000000..ebf836d --- /dev/null +++ b/adapters/generic-cli/capability-gate.ts @@ -0,0 +1,83 @@ +import { join } from "node:path"; +import { readJson, uniqueStrings } from "../../core/loop/support.ts"; +import type { RunExitStatus } from "../../core/contracts/types.ts"; + +interface CapabilityEntry { + mode: string; + approval_requirement: string; +} + +interface CapabilityManifest { + profile_id: string; + capabilities: Record; +} + +export interface CapabilityGateDecision { + allowed: boolean; + reason: string | null; + status: RunExitStatus | null; +} + +function deniedCapabilityStatus(entry: CapabilityEntry): RunExitStatus { + return entry.approval_requirement !== "none" && entry.approval_requirement !== "not-supported" + ? "AWAITING_APPROVAL" + : "FAILED_POLICY"; +} + +export class CapabilityGate { + private manifestPromise: Promise | null = null; + + constructor(private readonly repoRoot: string) {} + + private async loadManifest(): Promise { + if (this.manifestPromise === null) { + this.manifestPromise = readJson( + join(this.repoRoot, "adapters", "generic-cli", "adapter-capability.json"), + ); + } + return this.manifestPromise; + } + + async evaluate(requestedCapabilities: string[], requiredCapabilities: string[] = []): Promise { + const manifest = await this.loadManifest(); + const requested = uniqueStrings(requestedCapabilities); + for (const capability of uniqueStrings(requiredCapabilities)) { + if (!requested.includes(capability)) { + return { + allowed: false, + reason: `required capability is missing for profile ${manifest.profile_id}: ${capability}`, + status: "FAILED_POLICY", + }; + } + } + for (const capability of requested) { + const entry = manifest.capabilities[capability]; + if (entry === undefined) { + return { + allowed: false, + reason: `requested capability is undeclared for profile ${manifest.profile_id}: ${capability}`, + status: "FAILED_POLICY", + }; + } + if (entry.mode !== "allow") { + return { + allowed: false, + reason: `requested capability is denied for profile ${manifest.profile_id}: ${capability}`, + status: deniedCapabilityStatus(entry), + }; + } + if (entry.approval_requirement !== "none") { + return { + allowed: false, + reason: `requested capability needs explicit approval before launch: ${capability}`, + status: "AWAITING_APPROVAL", + }; + } + } + return { + allowed: true, + reason: null, + status: null, + }; + } +} diff --git a/adapters/generic-cli/docker-runtime.ts b/adapters/generic-cli/docker-runtime.ts new file mode 100644 index 0000000..259fa73 --- /dev/null +++ b/adapters/generic-cli/docker-runtime.ts @@ -0,0 +1,683 @@ +import { basename, dirname, extname, isAbsolute, join, resolve } from "node:path"; +import { ensureDir, readText, sha256Text, toPosixPath, writeJson } from "../../core/loop/support.ts"; +import type { + ContainerPathMap, + ContainerPathMount, + ContainerRuntimeConfig, + RunEnvelope, + RunExitStatus, + RunRole, +} from "../../core/contracts/types.ts"; +import { ShellPolicy } from "../../ops/guards/shell-policy.ts"; + +export const CONTAINER_PATHS = { + repo: "/work/repo", + state: "/work/state", + artifacts: "/work/artifacts", + runtimeHome: "/work/runtime-home", + cache: "/work/cache", +} as const; + +const DEFAULT_BASE_IMAGE = "codingclaw-worker-base:phase1-local"; +const IMAGE_SIGNATURE_LABEL = "io.codingclaw.image-signature"; +const WINDOWS_SCRIPT_TIMEOUT_GRACE_MS = 500; + +const DEFAULT_ROLE_IMAGES: Record<"builder" | "qa", string> = { + builder: "codingclaw-worker-builder:phase1-local", + qa: "codingclaw-worker-qa:phase1-local", +}; + +const ROLE_IMAGE_ENV_VARS: Record<"builder" | "qa", string> = { + builder: "CODINGCLAW_WORKER_BUILDER_IMAGE", + qa: "CODINGCLAW_WORKER_QA_IMAGE", +}; + +const BASE_IMAGE_ENV_VAR = "CODINGCLAW_WORKER_BASE_IMAGE"; +const DOCKER_BIN_ENV_VAR = "CODINGCLAW_DOCKER_BIN"; + +interface NormalizedContainerPathMount extends ContainerPathMount { + normalized_host_path: string; + mount_order: number; +} + +export interface RoleImageResolver { + resolve(runRole: RunRole): string; +} + +export interface ContainerizedRunEnvelopeMaterialization { + host_envelope: RunEnvelope; + container_envelope: RunEnvelope; + runtime: ContainerRuntimeConfig; + host_envelope_path: string; + container_envelope_path: string; + canonical_task_packet_path: string; +} + +export interface DockerWorkerLaunchRequest { + run_role: RunRole; + image: string; + worker_script_path: string; + envelope_path: string; + runtime: ContainerRuntimeConfig; + time_limits: Record; + environment?: Record; +} + +export interface DockerWorkerLaunchResult { + command: string[]; + exitCode: number; + stdout: string; + stderr: string; + failure_status: RunExitStatus | null; +} + +interface CommandExecutionResult { + command: string[]; + exitCode: number; + stdout: string; + stderr: string; + spawn_error: string | null; + timed_out: boolean; +} + +interface DockerMappedPaths { + repo_path: string; + state_path: string; + artifact_path: string; + runtime_home: string; +} + +export interface DockerPathMappingRequest extends DockerMappedPaths { + run_role?: RunRole; +} + +function normalizeHostPath(value: string): string { + return toPosixPath(resolve(value)).replace(/\/+$/u, ""); +} + +function artifactRootFromRunRoot(runRoot: string): string { + return dirname(dirname(runRoot)); +} + +function runRootContainerPath(runRoot: string): string { + return `${CONTAINER_PATHS.artifacts}/runs/${basename(normalizeHostPath(runRoot))}`; +} + +function taskPacketPathFromRunRoot(runRoot: string): string { + return join(runRoot, "metadata", "task-packet.en.json"); +} + +function repoContainerPathForHostPath(repoRoot: string, hostPath: string): string | null { + const normalizedRepoRoot = normalizeHostPath(repoRoot); + const normalizedHostPath = normalizeHostPath(hostPath); + if (normalizedHostPath === normalizedRepoRoot) { + return CONTAINER_PATHS.repo; + } + if (!normalizedHostPath.startsWith(`${normalizedRepoRoot}/`)) { + return null; + } + return `${CONTAINER_PATHS.repo}${normalizedHostPath.slice(normalizedRepoRoot.length)}`; +} + +export function resolveDockerWorkerImage(runRole: RunRole): string { + if (runRole !== "builder" && runRole !== "qa") { + throw new Error(`unsupported docker worker role: ${runRole}`); + } + const override = process.env[ROLE_IMAGE_ENV_VARS[runRole]]?.trim(); + return override && override.length > 0 ? override : DEFAULT_ROLE_IMAGES[runRole]; +} + +export class DockerPathMapper { + readonly mounts: ContainerPathMount[]; + private readonly normalizedMounts: NormalizedContainerPathMount[]; + + constructor(mounts: ContainerPathMount[]) { + this.mounts = mounts; + this.normalizedMounts = mounts + .map((mount, mountOrder) => ({ + ...mount, + normalized_host_path: normalizeHostPath(mount.host_path), + mount_order: mountOrder, + })) + .sort( + (left, right) => + right.normalized_host_path.length - left.normalized_host_path.length || left.mount_order - right.mount_order, + ); + } + + mapPath(hostPath: string): string { + if (!hostPath || !isAbsolute(hostPath)) { + return hostPath; + } + const normalizedPath = normalizeHostPath(hostPath); + for (const mount of this.normalizedMounts) { + if (normalizedPath === mount.normalized_host_path) { + return mount.container_path; + } + if (normalizedPath.startsWith(`${mount.normalized_host_path}/`)) { + return `${mount.container_path}${normalizedPath.slice(mount.normalized_host_path.length)}`; + } + } + return hostPath; + } + + mapValue(value: unknown): unknown { + if (typeof value === "string") { + return this.mapPath(value); + } + if (Array.isArray(value)) { + return value.map((entry) => this.mapValue(entry)); + } + if (!value || typeof value !== "object") { + return value; + } + return Object.fromEntries(Object.entries(value).map(([key, entry]) => [key, this.mapValue(entry)])); + } +} + +export function buildDockerPathMapping(paths: DockerPathMappingRequest): DockerPathMapper { + const repoReadOnly = paths.run_role === "builder" ? false : true; + const jobRoot = dirname(paths.state_path); + const runRootContainer = runRootContainerPath(paths.artifact_path); + const taskPacketPath = taskPacketPathFromRunRoot(paths.artifact_path); + const repoJobRootPath = repoContainerPathForHostPath(paths.repo_path, jobRoot); + const repoRunRootPath = repoContainerPathForHostPath(paths.repo_path, paths.artifact_path); + const repoTaskPacketPath = repoContainerPathForHostPath(paths.repo_path, taskPacketPath); + const repoRuntimeHomePath = repoContainerPathForHostPath(paths.repo_path, paths.runtime_home); + const mounts: ContainerPathMount[] = [ + { + name: "repo", + host_path: paths.repo_path, + container_path: CONTAINER_PATHS.repo, + read_only: repoReadOnly, + }, + ...(repoJobRootPath === null + ? [] + : [ + { + name: "repo-job-root" as const, + host_path: jobRoot, + container_path: repoJobRootPath, + read_only: true, + }, + ]), + { + name: "state", + host_path: paths.state_path, + container_path: CONTAINER_PATHS.state, + read_only: true, + }, + { + name: "artifacts", + host_path: artifactRootFromRunRoot(paths.artifact_path), + container_path: CONTAINER_PATHS.artifacts, + read_only: true, + }, + { + name: "run-artifacts", + host_path: paths.artifact_path, + container_path: runRootContainer, + read_only: false, + }, + ...(repoRunRootPath === null + ? [] + : [ + { + name: "repo-run-artifacts" as const, + host_path: paths.artifact_path, + container_path: repoRunRootPath, + read_only: false, + }, + ]), + { + name: "task-packet", + host_path: taskPacketPath, + container_path: `${runRootContainer}/metadata/task-packet.en.json`, + read_only: true, + }, + ...(repoTaskPacketPath === null + ? [] + : [ + { + name: "repo-task-packet" as const, + host_path: taskPacketPath, + container_path: repoTaskPacketPath, + read_only: true, + }, + ]), + { + name: "runtime-home", + host_path: paths.runtime_home, + container_path: CONTAINER_PATHS.runtimeHome, + read_only: false, + }, + ...(repoRuntimeHomePath === null + ? [] + : [ + { + name: "repo-runtime-home" as const, + host_path: paths.runtime_home, + container_path: repoRuntimeHomePath, + read_only: false, + }, + ]), + { + name: "cache", + host_path: join(paths.runtime_home, "cache"), + container_path: CONTAINER_PATHS.cache, + read_only: false, + }, + ]; + return new DockerPathMapper(mounts); +} + +function buildContainerPathMap(envelope: RunEnvelope, mapper: DockerPathMapper): ContainerPathMap { + return { + repo_path: mapper.mapPath(envelope.repo_path), + state_path: mapper.mapPath(envelope.state_path), + artifact_path: mapper.mapPath(envelope.artifact_path), + runtime_home: mapper.mapPath(envelope.runtime_home), + task_packet_path: mapper.mapPath(envelope.task_packet_path), + previous_handoff_path: mapper.mapPath(envelope.previous_handoff_path), + approval_snapshot_path: mapper.mapPath(envelope.approval_snapshot_path), + trace_context: mapper.mapValue(envelope.trace_context) as Record, + }; +} + +export async function materializeContainerizedRunEnvelope( + envelope: RunEnvelope, +): Promise { + const hostEnvelopePath = join(envelope.runtime_home, "envelopes", `${envelope.run_id}.json`); + const containerEnvelopePath = join(envelope.runtime_home, "envelopes", "container", `${envelope.run_id}.json`); + const cacheHostPath = join(envelope.runtime_home, "cache"); + const mapper = buildDockerPathMapping(envelope); + const runtime: ContainerRuntimeConfig = { + runtime: "docker", + image: resolveDockerWorkerImage(envelope.run_role), + workdir: CONTAINER_PATHS.repo, + envelope_host_path: hostEnvelopePath, + envelope_container_path: mapper.mapPath(containerEnvelopePath), + mounts: mapper.mounts, + container_paths: buildContainerPathMap(envelope, mapper), + }; + const hostEnvelope: RunEnvelope = { + ...envelope, + container_runtime: runtime, + }; + const containerEnvelope: RunEnvelope = { + ...hostEnvelope, + repo_path: runtime.container_paths.repo_path, + state_path: runtime.container_paths.state_path, + artifact_path: runtime.container_paths.artifact_path, + runtime_home: runtime.container_paths.runtime_home, + task_packet_path: runtime.container_paths.task_packet_path, + task_packet_sha256: envelope.task_packet_sha256, + previous_handoff_path: runtime.container_paths.previous_handoff_path, + approval_snapshot_path: runtime.container_paths.approval_snapshot_path, + trace_context: runtime.container_paths.trace_context, + }; + + await ensureDir(dirname(hostEnvelopePath)); + await ensureDir(dirname(containerEnvelopePath)); + await ensureDir(cacheHostPath); + await writeJson(hostEnvelopePath, hostEnvelope); + await writeJson(containerEnvelopePath, containerEnvelope); + + return { + host_envelope: hostEnvelope, + container_envelope: containerEnvelope, + runtime, + host_envelope_path: hostEnvelopePath, + container_envelope_path: containerEnvelopePath, + canonical_task_packet_path: envelope.task_packet_path, + }; +} + +function buildImageDockerfile(repoRoot: string, runRole: "base" | "builder" | "qa"): string { + if (runRole === "base") { + return join(repoRoot, "docker", "worker-base.Dockerfile"); + } + return join(repoRoot, "docker", `worker-${runRole}.Dockerfile`); +} + +function mountArg(mount: ContainerPathMount): string { + const mode = mount.read_only ? ",readonly" : ""; + return `type=bind,source=${mount.host_path},target=${mount.container_path}${mode}`; +} + +function dockerUserArgs(): string[] { + if (typeof process.getuid !== "function" || typeof process.getgid !== "function") { + return []; + } + return ["--user", `${process.getuid()}:${process.getgid()}`]; +} + +function launchEnvironment(request: DockerWorkerLaunchRequest): Record { + return { + HOME: "/work/runtime-home/home", + XDG_CACHE_HOME: "/work/cache", + BUN_INSTALL_CACHE_DIR: "/work/cache/bun", + ...(request.environment ?? {}), + }; +} + +function buildRunCommand(dockerExecutable: string, request: DockerWorkerLaunchRequest): string[] { + return [ + dockerExecutable, + "run", + "--rm", + "--network", + "none", + ...dockerUserArgs(), + "--workdir", + request.runtime.workdir, + ...Object.entries(launchEnvironment(request)).flatMap(([key, value]) => ["--env", `${key}=${value}`]), + ...request.runtime.mounts.flatMap((mount) => ["--mount", mountArg(mount)]), + request.image, + "bun", + request.worker_script_path, + request.envelope_path, + ]; +} + +function normalizeImageSignature(value: string): string | null { + const normalized = value.trim(); + if (!normalized || normalized === "") { + return null; + } + return normalized; +} + +async function buildImageSignature( + dockerfilePath: string, + buildArgs: string[], + extraInputs: string[] = [], +): Promise { + return sha256Text( + JSON.stringify({ + dockerfile_path: toPosixPath(dockerfilePath), + dockerfile_text: await readText(dockerfilePath), + build_args: buildArgs, + extra_inputs: extraInputs, + }), + ); +} + +function readNumericLimit(value: unknown): number | null { + if (typeof value === "number" && Number.isFinite(value)) { + return value; + } + if (typeof value === "string" && value.trim().length > 0) { + const parsed = Number(value); + if (Number.isFinite(parsed)) { + return parsed; + } + } + return null; +} + +function resolveTimeoutMs(timeLimits: Record): number | null { + const milliseconds = readNumericLimit(timeLimits.milliseconds ?? timeLimits.ms); + if (milliseconds !== null) { + return Math.max(0, Math.round(milliseconds)); + } + const seconds = readNumericLimit(timeLimits.seconds); + if (seconds !== null) { + return Math.max(0, Math.round(seconds * 1000)); + } + const minutes = readNumericLimit(timeLimits.minutes); + if (minutes !== null) { + return Math.max(0, Math.round(minutes * 60_000)); + } + return null; +} + +async function spawnCommand(command: string[], cwd: string, timeoutMs: number | null = null): Promise { + const spawnedCommand = normalizeSpawnCommand(command); + const effectiveTimeoutMs = + timeoutMs === null ? null : timeoutMs + timeoutGraceMs(command, spawnedCommand); + try { + const handle = Bun.spawn({ + cmd: spawnedCommand, + cwd, + stdout: "pipe", + stderr: "pipe", + }); + const stdoutPromise = new Response(handle.stdout).text(); + const stderrPromise = new Response(handle.stderr).text(); + let timedOut = false; + const timeoutHandle = + effectiveTimeoutMs === null + ? null + : setTimeout(() => { + timedOut = true; + handle.kill(); + }, effectiveTimeoutMs); + const exitCode = await handle.exited; + if (timeoutHandle !== null) { + clearTimeout(timeoutHandle); + } + return { + command, + exitCode, + stdout: await stdoutPromise, + stderr: await stderrPromise, + spawn_error: null, + timed_out: timedOut, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { + command, + exitCode: -1, + stdout: "", + stderr: message, + spawn_error: message, + timed_out: false, + }; + } +} + +function pythonLauncherCommand(scriptPath: string): string[] | null { + const python = Bun.which("python") ?? Bun.which("py"); + if (python === null) { + return null; + } + const launcher = basename(python).toLowerCase(); + if (launcher === "py" || launcher === "py.exe") { + return [python, "-3", scriptPath]; + } + return [python, scriptPath]; +} + +function normalizeSpawnCommand(command: string[]): string[] { + if (process.platform !== "win32" || command.length === 0) { + return command; + } + const executable = command[0]; + if (!isAbsolute(executable)) { + return command; + } + const extension = extname(executable).toLowerCase(); + if (extension === ".py") { + const launcher = pythonLauncherCommand(executable); + return launcher === null ? command : [...launcher, ...command.slice(1)]; + } + if (extension === ".cmd" || extension === ".bat") { + const shell = process.env.ComSpec?.trim() || "cmd.exe"; + return [shell, "/d", "/s", "/c", executable, ...command.slice(1)]; + } + if (extension === ".ps1") { + return [ + "powershell.exe", + "-NoLogo", + "-NoProfile", + "-ExecutionPolicy", + "Bypass", + "-File", + executable, + ...command.slice(1), + ]; + } + return command; +} + +function timeoutGraceMs(command: string[], spawnedCommand: string[]): number { + if (process.platform !== "win32" || command.length === 0) { + return 0; + } + if (command.length === spawnedCommand.length && command.every((value, index) => value === spawnedCommand[index])) { + return 0; + } + return WINDOWS_SCRIPT_TIMEOUT_GRACE_MS; +} + +function classifyRunFailure(result: CommandExecutionResult): RunExitStatus | null { + if (result.timed_out) { + return "TIMEOUT"; + } + if (result.exitCode === 0) { + return null; + } + if (result.spawn_error) { + return "FAILED_INFRA"; + } + const stderr = result.stderr.trim(); + if ( + /^\s*docker:/iu.test(stderr) || + /cannot connect to the docker daemon/iu.test(stderr) || + /error during connect/iu.test(stderr) + ) { + return "FAILED_INFRA"; + } + return "FAILED_EXECUTION"; +} + +export class DockerWorkerLauncher implements RoleImageResolver { + private readonly preparedImages = new Set(); + private readonly baseImage: string; + private readonly dockerExecutable: string; + private readonly shellPolicy: ShellPolicy; + + constructor(private readonly repoRoot: string) { + const override = process.env[BASE_IMAGE_ENV_VAR]?.trim(); + this.baseImage = override && override.length > 0 ? override : DEFAULT_BASE_IMAGE; + const dockerOverride = process.env[DOCKER_BIN_ENV_VAR]?.trim(); + this.dockerExecutable = dockerOverride && dockerOverride.length > 0 ? dockerOverride : "docker"; + this.shellPolicy = new ShellPolicy(repoRoot); + } + + resolve(runRole: RunRole): string { + return resolveDockerWorkerImage(runRole); + } + + private async ensureImage( + image: string, + dockerfilePath: string, + buildArgs: string[] = [], + extraSignatureInputs: string[] = [], + ): Promise { + const signature = await buildImageSignature(dockerfilePath, buildArgs, extraSignatureInputs); + const preparedKey = `${image}@${signature}`; + if (this.preparedImages.has(preparedKey)) { + return null; + } + + const inspect = await spawnCommand( + [ + this.dockerExecutable, + "image", + "inspect", + image, + "--format", + `{{index .Config.Labels "${IMAGE_SIGNATURE_LABEL}"}}`, + ], + this.repoRoot, + ); + if (inspect.spawn_error) { + return inspect; + } + if (inspect.exitCode === 0 && normalizeImageSignature(inspect.stdout) === signature) { + this.preparedImages.add(preparedKey); + return null; + } + + const build = await spawnCommand( + [ + this.dockerExecutable, + "build", + "--file", + dockerfilePath, + "--tag", + image, + "--label", + `${IMAGE_SIGNATURE_LABEL}=${signature}`, + ...buildArgs, + this.repoRoot, + ], + this.repoRoot, + ); + if (build.spawn_error || build.exitCode !== 0) { + return build; + } + this.preparedImages.add(preparedKey); + return null; + } + + private async ensureRoleImage(runRole: RunRole, image: string): Promise { + if (runRole !== "builder" && runRole !== "qa") { + throw new Error(`unsupported docker worker role: ${runRole}`); + } + const baseDockerfilePath = buildImageDockerfile(this.repoRoot, "base"); + const baseSignature = await buildImageSignature(baseDockerfilePath, []); + const baseFailure = await this.ensureImage(this.baseImage, baseDockerfilePath); + if (baseFailure) { + return baseFailure; + } + return this.ensureImage( + image, + buildImageDockerfile(this.repoRoot, runRole), + ["--build-arg", `CODINGCLAW_BASE_IMAGE=${this.baseImage}`], + [this.baseImage, baseSignature], + ); + } + + async launch(request: DockerWorkerLaunchRequest): Promise { + const command = buildRunCommand(this.dockerExecutable, request); + const shellDecision = await this.shellPolicy.evaluate({ + executable: this.dockerExecutable, + command, + envNames: Object.keys(launchEnvironment(request)), + dynamicEnvNames: Object.keys(request.environment ?? {}), + mounts: request.runtime.mounts, + runRole: request.run_role, + }); + if (!shellDecision.allowed) { + return { + command, + exitCode: 1, + stdout: "", + stderr: shellDecision.reason ?? "host shell policy blocked launch", + failure_status: shellDecision.status ?? "FAILED_POLICY", + }; + } + const imagePreparationFailure = await this.ensureRoleImage(request.run_role, request.image); + if (imagePreparationFailure) { + return { + command, + exitCode: imagePreparationFailure.exitCode, + stdout: imagePreparationFailure.stdout, + stderr: imagePreparationFailure.stderr, + failure_status: "FAILED_INFRA", + }; + } + const run = await spawnCommand(command, this.repoRoot, resolveTimeoutMs(request.time_limits)); + return { + command: run.command, + exitCode: run.exitCode, + stdout: run.stdout, + stderr: run.stderr, + failure_status: classifyRunFailure(run), + }; + } +} diff --git a/codingclaw_ironclaw_revision_blueprint_cn.md b/codingclaw_ironclaw_revision_blueprint_cn.md index 49cc5ad..07258af 100644 --- a/codingclaw_ironclaw_revision_blueprint_cn.md +++ b/codingclaw_ironclaw_revision_blueprint_cn.md @@ -12,7 +12,7 @@ CodingClaw 是基于 **IronClaw** 思路进行二次设计与定制化扩展的 - 以冻结合同为范围控制基础 - 以 Builder / QA 双轨机制提升质量 - 以 Docker 隔离和轨迹归档保证复现性 -- 在必要时引入云桌面进行 GUI 特例处理与人工接管 +- 在需要真实图形界面时,允许系统在本地 Ubuntu 图形会话中自动执行 GUI 步骤 的 **7×24 小时自动编码编排系统**。 @@ -87,7 +87,7 @@ v0.2 的实现方式明确为: - **Coding Loop = Execution Kernel** - **Builder / QA Executors = Role-specific Workers** - **Artifact Storage = Audit Plane** -- **Wuying Desktop = GUI Exception Plane** +- **Local Ubuntu GUI Session = GUI Execution Surface** ### 3.3 文件状态优先于长上下文记忆 @@ -162,10 +162,10 @@ v0.2 的实现方式明确为: |- Metadata DB Optional: -[Aliyun Wuying Desktop] - |- GUI Exception Handling - |- Human Takeover - |- Remote Assistance +[Local Ubuntu GUI Session] + |- Headed GUI Automation + |- Local Human Takeover + |- Rendered Evidence Capture ``` ## 4.2 四层模型 @@ -785,7 +785,8 @@ codingclaw/ contracts/ adapters/ - codex/ + claude-code-builder/ + codex-qa/ aider/ generic-cli/ @@ -973,16 +974,16 @@ artifacts/ --- -## 17. 阿里无影云电脑集成策略 +## 17. 本地 Ubuntu 图形执行面策略 ### 17.1 定位 -阿里无影不是主编码执行面,而是: +本地 Ubuntu 图形会话不是旁路特例,而是默认 GUI 执行面: -- GUI 特例平面 -- 人工接管平面 -- 远程协助平面 -- Windows-only / 桌面类工具处理平面 +- Headed Browser / GUI 自动化平面 +- 渲染证据采集平面 +- 本地人工接管平面 +- Linux 桌面类工具执行平面 ### 17.2 推荐使用顺序 @@ -995,9 +996,9 @@ artifacts/ ### 17.3 不推荐方式 -不推荐将无影作为: +不推荐将图形执行面设计成: -- 主 Builder 执行器 +- 云桌面或远程桌面桥接 - 全天候唯一编码环境 - 全链路自动点击执行核心 @@ -1100,7 +1101,7 @@ Phase 1 明确不纳入: - 多通道同时接入 - 多执行器并行接入 -- 无影桥接 +- 云桌面桥接 - Dashboard - 历史任务复用 - 生产级多任务调度 @@ -1120,13 +1121,14 @@ Phase 1 明确不纳入: 目标: -- Codex 适配器 +- Claude Code Builder 适配器 +- Codex QA 适配器 - Aider 适配器 - 通用 CLI 适配器 - role packs - test matrix 支持 -### Phase 4:无影与人工接管 +### Phase 4:本地 GUI 自动化与人工接管 目标: @@ -1184,9 +1186,9 @@ Phase 1 明确不纳入: - `CHECKSUM_POLICY.md` - `JOB_MANIFEST_SCHEMA.md` -### 20.5 无影相关文档 +### 20.5 本地 GUI 相关文档 -- `WUYING_INTEGRATION_PLAN.md` +- `UBUNTU_GUI_RUNTIME_PLAN.md` - `TAKEOVER_FLOW.md` - `TAKEOVER_PACKET_TEMPLATE.en.md` - `GUI_EXCEPTION_POLICY.md` @@ -1198,7 +1200,7 @@ Phase 1 明确不纳入: 以下内容不属于当前版本目标: - 跳过用户审批直接开始编码 -- 让 GUI 云桌面成为主编码执行面 +- 依赖云桌面或远程桌面桥接作为主编码执行面 - 依赖单一长会话连续完成所有任务 - 不做预算、不做熔断地无限循环 - 伪造执行器轨迹代替真实运行痕迹 @@ -1216,13 +1218,13 @@ Phase 1 明确不纳入: 3. 跑通单轮 **Builder / QA**,确保 story traceability 与 QA verdict 成立。 4. 补充归档、checksums、失败恢复、审批卡片与预算约束。 5. 再扩展执行器适配层。 -6. 最后接入无影桥接与高级运营能力。 +6. 最后补齐本地 GUI 自动化与高级运营能力。 --- ## 23. 一句话架构定义 -> CodingClaw 是一个以 IronClaw Fork 为控制外壳、以 coding loop 为执行内核、以冻结合同为范围边界、以 Builder/QA 双轨为质量机制、以 Docker 隔离和归档为审计基础、并在必要时借助阿里无影完成 GUI 特例处理与人工接管的自动编码代理系统。 +> CodingClaw 是一个以 IronClaw Fork 为控制外壳、以 coding loop 为执行内核、以冻结合同为范围边界、以 Builder/QA 双轨为质量机制、以 Docker 隔离和归档为审计基础、并在本地 Ubuntu 图形会话中完成自动 GUI 执行与必要时人工接管的自动编码代理系统。 --- @@ -1478,7 +1480,7 @@ v0.2 不追求一次接完所有执行器。 1. 先冻结 `Executor Adapter Contract` 2. 先实现一个 `Generic CLI Adapter` -3. 再实现 `Codex Adapter` +3. 再实现 `Claude Code Builder Adapter` 与 `Codex QA Adapter` 4. 再实现 `Aider Adapter` --- diff --git a/control/fixtures/phase1-local-approval-decision.json b/control/fixtures/phase1-local-approval-decision.json new file mode 100644 index 0000000..c976b54 --- /dev/null +++ b/control/fixtures/phase1-local-approval-decision.json @@ -0,0 +1,12 @@ +{ + "job_id": "job-phase1-local", + "card_id": "card-phase1-local-001", + "card_type": "plan-approval", + "story_id": "STORY-PHASE1-LOCAL-001", + "freeze_version": "1", + "decision": "approve", + "actor": "local-owner", + "decided_at": "2026-04-08T00:00:00Z", + "card_state": "DECIDED", + "requested_action": "Approve the fixed local Phase 1 proof-of-concept story and allow the builder-to-QA execution flow." +} diff --git a/control/fixtures/phase1-local-run-envelope.json b/control/fixtures/phase1-local-run-envelope.json index e84ff3e..9cafb58 100644 --- a/control/fixtures/phase1-local-run-envelope.json +++ b/control/fixtures/phase1-local-run-envelope.json @@ -41,6 +41,7 @@ "requested_capabilities": [ "filesystem_read", "filesystem_write", - "shell_command" + "shell_command", + "container_control" ] } diff --git a/control/fixtures/phase1-local-task-packet.en.json b/control/fixtures/phase1-local-task-packet.en.json index fbee9c4..4f5ab55 100644 --- a/control/fixtures/phase1-local-task-packet.en.json +++ b/control/fixtures/phase1-local-task-packet.en.json @@ -39,7 +39,8 @@ "requested_capabilities": [ "filesystem_read", "filesystem_write", - "shell_command" + "shell_command", + "container_control" ], "story": { "story_id": "STORY-PHASE1-LOCAL-001", @@ -57,7 +58,7 @@ "out_of_scope_checklist": [ "Mobile channel input", "Production deployment hardening", - "Wuying integration", + "Cloud desktop vendor integration", "Distributed scheduling", "Dashboard UI" ], diff --git a/core/contracts/types.ts b/core/contracts/types.ts index 2246c50..bd0ad18 100644 --- a/core/contracts/types.ts +++ b/core/contracts/types.ts @@ -57,6 +57,13 @@ export interface StoryContract { escalation_rules: string[]; } +export interface CredentialInjectionRequest { + secret_handle: string; + credential_alias: string; + allowed_host_patterns: string[]; + injection_mode: "env"; +} + export interface TaskPacket { job_id: string; freeze_id: string; @@ -79,9 +86,51 @@ export interface TaskPacket { approval_context: Record; previous_handoff_path: string; requested_capabilities: string[]; + credential_injection_requests?: CredentialInjectionRequest[]; story: StoryContract; } +export type ContainerMountName = + | "repo" + | "repo-job-root" + | "state" + | "artifacts" + | "run-artifacts" + | "repo-run-artifacts" + | "task-packet" + | "repo-task-packet" + | "runtime-home" + | "repo-runtime-home" + | "cache"; + +export interface ContainerPathMount { + name: ContainerMountName; + host_path: string; + container_path: string; + read_only: boolean; +} + +export interface ContainerPathMap { + repo_path: string; + state_path: string; + artifact_path: string; + runtime_home: string; + task_packet_path: string; + previous_handoff_path: string; + approval_snapshot_path: string; + trace_context: Record; +} + +export interface ContainerRuntimeConfig { + runtime: "docker"; + image: string; + workdir: string; + envelope_host_path: string; + envelope_container_path: string; + mounts: ContainerPathMount[]; + container_paths: ContainerPathMap; +} + export interface RunEnvelope { job_id: string; freeze_id: string; @@ -106,6 +155,7 @@ export interface RunEnvelope { approval_snapshot_path: string; trace_context: Record; requested_capabilities: string[]; + container_runtime?: ContainerRuntimeConfig | null; } export interface WorkerOutput { @@ -282,6 +332,20 @@ export interface LoopMetricsFile { runs: LoopMetricEntry[]; } +export interface ApprovalRequestSnapshot { + request_id: string; + job_id: string; + story_id: string; + run_id: string; + run_role: RunRole; + action_summary: string; + reason: string; + risk_level: string; + requested_capability: string; + suggested_alternatives: string[]; + timeout_at: string; +} + export interface ApprovalCardSnapshot { job_id: string; card_id: string; @@ -296,6 +360,17 @@ export interface ApprovalCardSnapshot { timeout_at: string; created_at: string; evidence_refs: string[]; + approval_request?: ApprovalRequestSnapshot | null; + recovery_context?: { + last_exit_reason: RunExitStatus; + current_freeze_version: string; + current_story: string; + latest_evidence_path: string; + recommended_next_action: string; + resume_gate: "owner" | "takeover"; + paused_run_id: string; + paused_run_role: RunRole; + } | null; } export interface ApprovalDecisionReceipt { @@ -403,11 +478,12 @@ export interface JobManifestApprovalRecord { card_state: ApprovalCardState; card_type: string; requested_action: string; - decision: string; + decision: string | null; snapshot_path: string; - decision_path: string; + decision_path: string | null; summary_zh_ref: string; - decided_at: string; + decided_at: string | null; + approval_request?: ApprovalRequestSnapshot | null; } export interface JobManifestArtifactRecord { @@ -458,4 +534,5 @@ export interface AdapterExecutionResult { artifactIndexPath: string; commandLogPath: string; handoffPath: string; + takeoverPacketPath: string | null; } diff --git a/core/loop/phase1-local-flow.ts b/core/loop/phase1-local-flow.ts index 3da4789..e41fd60 100644 --- a/core/loop/phase1-local-flow.ts +++ b/core/loop/phase1-local-flow.ts @@ -1,5 +1,7 @@ -import { join } from "node:path"; +import { copyFile, readdir, readlink, symlink } from "node:fs/promises"; +import { dirname, isAbsolute, join, relative, resolve } from "node:path"; import { GenericCliAdapter } from "../../adapters/generic-cli/adapter.ts"; +import { buildArtifactIndex } from "../../ops/archive/artifact-index.ts"; import { writeApprovalArchive } from "../../ops/archive/approvals.ts"; import { buildEnvironmentSnapshotMetadata, @@ -25,6 +27,7 @@ import { readJson, readText, sha256Text, + toPosixPath, uniqueStrings, writeJson, writeText, @@ -33,15 +36,18 @@ import type { AdapterExecutionResult, ApprovalCardSnapshot, ApprovalDecisionReceipt, + ApprovalRequestSnapshot, ChecksumRecord, ContractFreezeMetadata, JobManifest, JobManifestApprovalRecord, + JobManifestPauseContext, JobManifestRunRecord, JobManifestStoryRecord, JobState, RunResult, RunEnvelope, + RunExitStatus, RunRole, TaskPacket, } from "../contracts/types.ts"; @@ -50,6 +56,14 @@ const BUILDER_EXPECTED_ARTIFACTS = [ "logs/command-log.txt", "logs/worker.log", "metadata/task-packet.en.json", + "metadata/state/active-story.json", + "metadata/state/decisions.en.md", + "metadata/state/handoff.en.md", + "metadata/state/loop-metrics.json", + "metadata/state/progress.en.md", + "metadata/state/risk-register.en.md", + "metadata/state/story-queue.json", + "metadata/state/trace-index.json", "metadata/timings.json", "metadata/run-result.json", "metadata/artifact-index.json", @@ -73,6 +87,14 @@ const QA_EXPECTED_ARTIFACTS = [ "logs/command-log.txt", "logs/worker.log", "metadata/task-packet.en.json", + "metadata/state/active-story.json", + "metadata/state/decisions.en.md", + "metadata/state/handoff.en.md", + "metadata/state/loop-metrics.json", + "metadata/state/progress.en.md", + "metadata/state/risk-register.en.md", + "metadata/state/story-queue.json", + "metadata/state/trace-index.json", "metadata/timings.json", "metadata/run-result.json", "metadata/artifact-index.json", @@ -107,19 +129,17 @@ function roleArtifacts(runRole: RunRole): { expectedArtifacts: string[]; verific }; } -function buildApprovalDecision(card: ApprovalCardSnapshot): ApprovalDecisionReceipt { - return { - job_id: card.job_id, - card_id: card.card_id, - card_type: card.card_type, - story_id: card.story_id, - freeze_version: card.freeze_version, - decision: "approve", - actor: "local-owner", - decided_at: "2026-04-08T00:00:00Z", - card_state: "DECIDED", - requested_action: card.requested_action, - }; +async function loadApprovalDecision(repoRoot: string, card: ApprovalCardSnapshot): Promise { + const decision = await readJson( + join(repoRoot, "control", "fixtures", "phase1-local-approval-decision.json"), + ); + if (decision.job_id !== card.job_id || decision.card_id !== card.card_id) { + throw new Error("phase1 approval decision fixture does not match the plan approval card"); + } + if (decision.card_state !== "DECIDED") { + throw new Error("phase1 approval decision fixture must be decided before freeze generation"); + } + return decision; } async function loadAdapterInfo(repoRoot: string): Promise<{ adapter_id: string; adapter_version: string }> { @@ -140,6 +160,47 @@ async function buildDependencySnapshotDigest(repoRoot: string): Promise return "absent"; } +const JOB_REPO_SNAPSHOT_EXCLUDES = new Set([".omx", "jobs", "state"]); + +function assertSnapshotSymlinkTarget(sourceRoot: string, sourcePath: string, targetPath: string): string { + const resolvedTarget = resolve(dirname(sourcePath), targetPath); + const relativeTarget = relative(resolve(sourceRoot), resolvedTarget); + if (relativeTarget === "" || (!relativeTarget.startsWith("..") && !isAbsolute(relativeTarget))) { + return targetPath; + } + throw new Error(`repo snapshot symlink escaped the source repo: ${sourcePath}`); +} + +async function copyRepoSnapshot(sourceRoot: string, targetRoot: string, relativePath = ""): Promise { + const sourceDir = relativePath.length === 0 ? sourceRoot : join(sourceRoot, relativePath); + const entries = await readdir(sourceDir, { withFileTypes: true }); + for (const entry of entries) { + if (relativePath.length === 0 && JOB_REPO_SNAPSHOT_EXCLUDES.has(entry.name)) { + continue; + } + const childRelativePath = relativePath.length === 0 ? entry.name : join(relativePath, entry.name); + const sourcePath = join(sourceRoot, childRelativePath); + const targetPath = join(targetRoot, childRelativePath); + if (entry.isDirectory()) { + await ensureDir(targetPath); + await copyRepoSnapshot(sourceRoot, targetRoot, childRelativePath); + continue; + } + if (entry.isSymbolicLink()) { + await ensureDir(dirname(targetPath)); + await symlink(assertSnapshotSymlinkTarget(sourceRoot, sourcePath, await readlink(sourcePath)), targetPath); + continue; + } + await ensureDir(dirname(targetPath)); + await copyFile(sourcePath, targetPath); + } +} + +async function stageJobRepoWorkspace(repoRoot: string, workerRepoRoot: string): Promise { + await ensureDir(workerRepoRoot); + await copyRepoSnapshot(repoRoot, workerRepoRoot); +} + async function assertFreshJobRoot(layout: ReturnType): Promise { if (!(await pathExists(layout.jobRoot))) { return; @@ -156,16 +217,22 @@ async function assertFreshJobRoot(layout: ReturnType { const taskPacketPath = join(artifactRoot, "metadata", "task-packet.en.json"); + const normalizedRepoRoot = toPosixPath(repoRoot); + const normalizedStateRoot = toPosixPath(stateRoot); + const normalizedArtifactRoot = toPosixPath(artifactRoot); + const normalizedRuntimeHome = toPosixPath(runtimeHome); + const normalizedPreviousHandoffPath = previousHandoffPath.length > 0 ? toPosixPath(previousHandoffPath) : ""; const { expectedArtifacts, verificationTargets } = roleArtifacts(runRole); - const baseCommit = detectBaseCommit(repoRoot); const packetWithoutChecksum = await materializeJsonTemplate( join(repoRoot, "control", "fixtures", "phase1-local-task-packet.en.json"), @@ -173,24 +240,26 @@ async function buildTaskPacket( __RUN_ROLE__: runRole, __RUN_ATTEMPT__: 1, __RUN_ID__: runId, - __REPO_PATH__: repoRoot, + __REPO_PATH__: normalizedRepoRoot, __BASE_COMMIT__: baseCommit, - __STATE_PATH__: stateRoot, - __ARTIFACT_PATH__: artifactRoot, - __RUNTIME_HOME__: runtimeHome, + __STATE_PATH__: normalizedStateRoot, + __ARTIFACT_PATH__: normalizedArtifactRoot, + __RUNTIME_HOME__: normalizedRuntimeHome, __TASK_PACKET_SHA256__: "", - __PREVIOUS_HANDOFF_PATH__: previousHandoffPath, + __PREVIOUS_HANDOFF_PATH__: normalizedPreviousHandoffPath, __VERIFICATION_TARGETS__: verificationTargets, __EXPECTED_ARTIFACTS__: expectedArtifacts, }, ); - await ensureDir(join(artifactRoot, "metadata")); const finalPacket: TaskPacket = { ...packetWithoutChecksum, task_packet_sha256: taskPacketDigest(packetWithoutChecksum), }; - await writeJson(taskPacketPath, finalPacket); + if (persist) { + await ensureDir(join(artifactRoot, "metadata")); + await writeJson(taskPacketPath, finalPacket); + } return finalPacket; } @@ -202,28 +271,46 @@ function taskPacketDigest(taskPacket: TaskPacket): string { async function buildRunEnvelope( repoRoot: string, taskPacket: TaskPacket, + stateRoot: string, + artifactRoot: string, + runtimeHome: string, previousHandoffPath: string, approvalSnapshotPath: string, traceContext: Record, ): Promise { - return materializeJsonTemplate( + const normalizedRepoRoot = toPosixPath(repoRoot); + const normalizedStateRoot = toPosixPath(stateRoot); + const normalizedArtifactRoot = toPosixPath(artifactRoot); + const normalizedRuntimeHome = toPosixPath(runtimeHome); + const normalizedTaskPacketPath = toPosixPath(join(artifactRoot, "metadata", "task-packet.en.json")); + const normalizedPreviousHandoffPath = previousHandoffPath.length > 0 ? toPosixPath(previousHandoffPath) : ""; + const normalizedApprovalSnapshotPath = toPosixPath(approvalSnapshotPath); + const envelope = await materializeJsonTemplate( join(repoRoot, "control", "fixtures", "phase1-local-run-envelope.json"), { __RUN_ID__: taskPacket.run_id, __RUN_ROLE__: taskPacket.run_role, __RUN_ATTEMPT__: taskPacket.run_attempt, - __REPO_PATH__: taskPacket.repo_path, + __REPO_PATH__: normalizedRepoRoot, __BASE_COMMIT__: taskPacket.base_commit, - __STATE_PATH__: taskPacket.state_path, - __ARTIFACT_PATH__: taskPacket.artifact_path, - __RUNTIME_HOME__: taskPacket.runtime_home, - __TASK_PACKET_PATH__: join(taskPacket.artifact_path, "metadata", "task-packet.en.json"), + __STATE_PATH__: normalizedStateRoot, + __ARTIFACT_PATH__: normalizedArtifactRoot, + __RUNTIME_HOME__: normalizedRuntimeHome, + __TASK_PACKET_PATH__: normalizedTaskPacketPath, __TASK_PACKET_SHA256__: taskPacket.task_packet_sha256, - __PREVIOUS_HANDOFF_PATH__: previousHandoffPath, - __APPROVAL_SNAPSHOT_PATH__: approvalSnapshotPath, + __PREVIOUS_HANDOFF_PATH__: normalizedPreviousHandoffPath, + __APPROVAL_SNAPSHOT_PATH__: normalizedApprovalSnapshotPath, __TRACE_CONTEXT__: traceContext, }, ); + + return { + ...envelope, + budget_limits: taskPacket.budget_limits, + time_limits: taskPacket.time_limits, + requested_capabilities: taskPacket.requested_capabilities, + container_runtime: null, + }; } async function normalizeRunArtifacts( @@ -316,6 +403,20 @@ function buildFreezeMetadata( }; } +function approvalRelativePaths( + layout: ReturnType, + approvalRecord: Awaited>, +): string[] { + const relativePaths = [ + layout.relativeToJobRoot(approvalRecord.snapshot_path), + layout.relativeToJobRoot(approvalRecord.summary_path), + ]; + if (approvalRecord.decision_path !== null) { + relativePaths.push(layout.relativeToJobRoot(approvalRecord.decision_path)); + } + return uniqueStrings(relativePaths); +} + function buildManifestApprovalRecord( layout: ReturnType, approvalRecord: Awaited>, @@ -327,9 +428,11 @@ function buildManifestApprovalRecord( requested_action: approvalRecord.requested_action, decision: approvalRecord.decision, snapshot_path: layout.relativeToJobRoot(approvalRecord.snapshot_path), - decision_path: layout.relativeToJobRoot(approvalRecord.decision_path), + decision_path: + approvalRecord.decision_path === null ? null : layout.relativeToJobRoot(approvalRecord.decision_path), summary_zh_ref: layout.relativeToJobRoot(approvalRecord.summary_path), decided_at: approvalRecord.decided_at, + ...(approvalRecord.approval_request === null ? {} : { approval_request: approvalRecord.approval_request }), }; } @@ -347,7 +450,8 @@ function buildManifestRunRecords( run_result_path: layout.relativeToJobRoot(execution.runResultPath), artifact_index_path: layout.relativeToJobRoot(execution.artifactIndexPath), handoff_path: layout.relativeToJobRoot(execution.handoffPath), - takeover_packet_path: null, + takeover_packet_path: + execution.takeoverPacketPath === null ? null : layout.relativeToJobRoot(execution.takeoverPacketPath), started_at: execution.runResult.started_at, ended_at: execution.runResult.ended_at, })); @@ -385,6 +489,258 @@ function buildManifestStoryRecord( }; } +function emptyPauseContext(): JobManifestPauseContext { + return { + is_paused: false, + pause_reason: null, + waiting_on: null, + resume_action: null, + paused_at: null, + related_card_id: null, + expires_at: null, + }; +} + +function waitingOnForState(state: JobState): string | null { + if (state === "AWAITING_OWNER") { + return "owner"; + } + if (state === "AWAITING_TAKEOVER") { + return "takeover"; + } + return null; +} + +function latestEvidencePath(execution: AdapterExecutionResult): string { + const relativePath = execution.workerOutput.evidence_paths[0] ?? "reports/handoff.en.md"; + return `artifacts/runs/${execution.runResult.run_id}/${relativePath}`; +} + +function recoveryRiskLevel(status: RunExitStatus): string { + if ( + status === "FAILED_POLICY" || + status === "FAILED_EXECUTION" || + status === "FAILED_INFRA" || + status === "TIMEOUT" || + status === "BUDGET_EXCEEDED" + ) { + return "high"; + } + return "medium"; +} + +function recoveryRequestedAction(status: RunExitStatus, resumeGate: "owner" | "takeover"): string { + if (status === "AWAITING_APPROVAL") { + return "Review the executor approval request before continuing."; + } + if (status === "AWAITING_CREDENTIALS") { + return "Provide the required credential or choose an alternative before continuing."; + } + if (resumeGate === "takeover") { + return "Review the blocked run and trigger takeover before continuing."; + } + return "Review the blocked run and decide how to resume the job."; +} + +function recoveryCandidateActions(status: RunExitStatus, resumeGate: "owner" | "takeover"): string[] { + if (status === "AWAITING_APPROVAL") { + return ["approve requested action", "request revision", "trigger takeover"]; + } + if (status === "AWAITING_CREDENTIALS") { + return ["provide credentials", "request revision", "trigger takeover"]; + } + if (resumeGate === "takeover") { + return ["trigger takeover", "resume job", "request revision"]; + } + return ["resume job", "request revision", "trigger takeover"]; +} + +function approvalRequestReason(execution: AdapterExecutionResult): string { + const blocker = execution.workerOutput.blockers.find((value) => value.trim().length > 0); + if (blocker !== undefined) { + return blocker; + } + if (execution.workerOutput.next_action.trim().length > 0) { + return execution.workerOutput.next_action; + } + return `worker reported ${execution.runResult.status}`; +} + +function buildRecoveryApprovalRequest( + taskPacket: TaskPacket, + execution: AdapterExecutionResult, + timeoutAt: string, + riskLevel: string, +): ApprovalRequestSnapshot | null { + if (execution.runResult.status === "AWAITING_APPROVAL") { + return { + request_id: `approval-request-${execution.runResult.run_id}`, + job_id: taskPacket.job_id, + story_id: taskPacket.story.story_id, + run_id: execution.runResult.run_id, + run_role: execution.runResult.run_role, + action_summary: `${execution.runResult.run_role} requested owner approval before continuing.`, + reason: approvalRequestReason(execution), + risk_level: riskLevel, + requested_capability: "interactive_approval", + suggested_alternatives: ["request revision", "trigger takeover"], + timeout_at: timeoutAt, + }; + } + if (execution.runResult.status === "AWAITING_CREDENTIALS") { + return { + request_id: `approval-request-${execution.runResult.run_id}`, + job_id: taskPacket.job_id, + story_id: taskPacket.story.story_id, + run_id: execution.runResult.run_id, + run_role: execution.runResult.run_role, + action_summary: `${execution.runResult.run_role} requested credentials before continuing.`, + reason: approvalRequestReason(execution), + risk_level: riskLevel, + requested_capability: "secret_injection", + suggested_alternatives: ["continue without credentials", "request revision", "trigger takeover"], + timeout_at: timeoutAt, + }; + } + return null; +} + +function buildRecoveryCard(taskPacket: TaskPacket, execution: AdapterExecutionResult): ApprovalCardSnapshot { + const jobState = mapRunExitToJobState(execution.runResult.status, execution.runResult.run_role); + const resumeGate = jobState === "AWAITING_TAKEOVER" ? "takeover" : "owner"; + const latestEvidenceRef = latestEvidencePath(execution); + const timeoutAt = new Date(Date.now() + 24 * 60 * 60 * 1000).toISOString(); + const riskLevel = recoveryRiskLevel(execution.runResult.status); + const approvalRequest = buildRecoveryApprovalRequest(taskPacket, execution, timeoutAt, riskLevel); + return { + job_id: taskPacket.job_id, + card_id: `card-recovery-${execution.runResult.run_id}`, + card_state: "PENDING", + card_type: "recovery", + story_id: taskPacket.story.story_id, + freeze_version: taskPacket.freeze_version, + risk_level: riskLevel, + summary_zh: `运行 ${execution.runResult.run_id} 已以 ${execution.runResult.status} 停止,当前故事需要${resumeGate === "takeover" ? "接管" : "人工决策"}后继续。`, + requested_action: recoveryRequestedAction(execution.runResult.status, resumeGate), + candidate_actions: recoveryCandidateActions(execution.runResult.status, resumeGate), + timeout_at: timeoutAt, + created_at: execution.runResult.ended_at, + evidence_refs: uniqueStrings([ + latestEvidenceRef, + `artifacts/runs/${execution.runResult.run_id}/metadata/run-result.json`, + `artifacts/runs/${execution.runResult.run_id}/reports/handoff.en.md`, + ]), + ...(approvalRequest === null ? {} : { approval_request: approvalRequest }), + recovery_context: { + last_exit_reason: execution.runResult.status, + current_freeze_version: taskPacket.freeze_version, + current_story: taskPacket.story.story_id, + latest_evidence_path: latestEvidenceRef, + recommended_next_action: execution.workerOutput.next_action, + resume_gate: resumeGate, + paused_run_id: execution.runResult.run_id, + paused_run_role: execution.runResult.run_role, + }, + }; +} + +async function writeRecoveryTakeoverPacket( + taskPacket: TaskPacket, + execution: AdapterExecutionResult, + approvalRecord: Awaited>, +): Promise { + const takeoverPacketPath = execution.takeoverPacketPath ?? join(execution.runRoot, "takeover", "takeover-packet.en.md"); + execution.takeoverPacketPath = takeoverPacketPath; + await writeText( + takeoverPacketPath, + [ + "# Takeover Packet", + "", + "## Run Identity", + "", + `- job ID: ${taskPacket.job_id}`, + `- run ID: ${execution.runResult.run_id}`, + `- freeze version: ${taskPacket.freeze_version}`, + `- story ID: ${taskPacket.story.story_id}`, + `- triggering run role: ${execution.runResult.run_role}`, + `- triggering exit status: ${execution.runResult.status}`, + "", + "## Blocked Step", + "", + `- exact blocked action: ${execution.workerOutput.open[0] ?? execution.workerOutput.next_action}`, + `- reason automation cannot continue: ${approvalRequestReason(execution)}`, + "- current page, tool, or environment when relevant: generic-cli worker container", + "", + "## Required Human Action", + "", + `- concrete human task: ${execution.workerOutput.next_action}`, + "- allowed action boundary: stay inside the active story, freeze, and archived run root", + "- forbidden actions: do not widen scope, rewrite approvals, or bypass evidence capture", + "- expected completion signal: archive the takeover outcome under the same run_id takeover root", + "", + "## Access And Approval Context", + "", + `- approval card ID: ${approvalRecord.card_id}`, + "- approved access method: governed local takeover", + "- credential handling rule: do not place long-lived secrets in takeover artifacts", + `- timeout or expiry condition: ${approvalRecord.timeout_at}`, + "", + "## Expected Result", + "", + `- expected output: ${execution.workerOutput.next_action}`, + `- artifact destination: artifacts/runs/${execution.runResult.run_id}/takeover/result.en.md`, + `- evidence destination: artifacts/runs/${execution.runResult.run_id}/takeover/result.en.md`, + `- resume criteria: update pause context via ${approvalRecord.card_id} and reference the same run_id takeover root in the manifest`, + "", + "## Resume Notes", + "", + `- next loop role: ${execution.runResult.run_role}`, + "- next command or check: review the archived takeover result and decide whether to resume or terminate", + "- rollback instruction if the takeover fails: stop the job and return control to owner review", + "", + ].join("\n"), + ); + const artifactIndex = await buildArtifactIndex(execution.runRoot, execution.runResult.run_id, execution.runResult.run_role); + execution.artifactIndex = artifactIndex; + await writeJson(execution.artifactIndexPath, artifactIndex); +} + +function buildPauseContext( + execution: AdapterExecutionResult | null, + approvalRecord: Awaited> | null, +): JobManifestPauseContext { + if (execution === null) { + return emptyPauseContext(); + } + const jobState = mapRunExitToJobState(execution.runResult.status, execution.runResult.run_role); + const waitingOn = waitingOnForState(jobState); + if (waitingOn === null) { + return emptyPauseContext(); + } + return { + is_paused: true, + pause_reason: execution.runResult.status, + waiting_on: approvalRecord?.waiting_on ?? waitingOn, + resume_action: approvalRecord?.resume_action ?? execution.workerOutput.next_action, + paused_at: execution.runResult.ended_at, + related_card_id: approvalRecord?.card_id ?? null, + expires_at: approvalRecord?.timeout_at ?? null, + }; +} + +function buildRunRecoveryState( + approvalRecord: Awaited> | null, +): { card_id: string; waiting_on: "owner" | "takeover"; resume_action: string } | null { + if (approvalRecord === null || approvalRecord.waiting_on === null || approvalRecord.resume_action === null) { + return null; + } + return { + card_id: approvalRecord.card_id, + waiting_on: approvalRecord.waiting_on, + resume_action: approvalRecord.resume_action, + }; +} + function buildJobManifest( layout: ReturnType, taskPacket: TaskPacket, @@ -392,13 +748,16 @@ function buildJobManifest( planRecord: Awaited>, freezeRecord: Awaited>, approvalRecord: Awaited>, + approvalRecords: Array>>, executions: AdapterExecutionResult[], adapterId: string, ): JobManifest { const runs = buildManifestRunRecords(layout, executions); const storyRecord = buildManifestStoryRecord(taskPacket, executions); const latestRunId = runs.length > 0 ? runs[runs.length - 1].run_id : ""; - const createdAt = approvalRecord.decided_at; + const latestExecution = executions.length > 0 ? executions[executions.length - 1] : null; + const latestApprovalRecord = approvalRecords.length > 0 ? approvalRecords[approvalRecords.length - 1] : null; + const createdAt = approvalRecord.decided_at ?? nowIso(); const updatedAt = executions.length > 0 ? executions[executions.length - 1].runResult.ended_at : nowIso(); const status = storyRecord.queue_state; @@ -415,15 +774,7 @@ function buildJobManifest( active_story_id: taskPacket.story.story_id, current_run_id: latestRunId, approved_adapter_set: [adapterId], - pause_context: { - is_paused: false, - pause_reason: null, - waiting_on: null, - resume_action: null, - paused_at: null, - related_card_id: null, - expires_at: null, - }, + pause_context: buildPauseContext(latestExecution, latestApprovalRecord), language_policy: taskPacket.language_policy, budget_limits: taskPacket.budget_limits, time_limits: taskPacket.time_limits, @@ -438,7 +789,7 @@ function buildJobManifest( approval_card_id: approvalRecord.card_id, summary_zh_ref: layout.relativeToJobRoot(approvalRecord.summary_path), approval_state: approvalRecord.card_state, - approved_at: approvalRecord.decided_at, + approved_at: approvalRecord.decided_at ?? createdAt, }, freeze: { freeze_id: taskPacket.freeze_id, @@ -448,11 +799,11 @@ function buildJobManifest( checksum_path: layout.relativeToJobRoot(freezeRecord.checksum_path), hash: freezeRecord.hash, approval_card_id: approvalRecord.card_id, - approved_at: approvalRecord.decided_at, + approved_at: approvalRecord.decided_at ?? createdAt, }, stories: [storyRecord], runs, - approvals: [buildManifestApprovalRecord(layout, approvalRecord)], + approvals: approvalRecords.map((record) => buildManifestApprovalRecord(layout, record)), artifacts: { runs_root: "artifacts/runs", sessions_root: "artifacts/sessions", @@ -462,9 +813,7 @@ function buildJobManifest( layout.relativeToJobRoot(freezeRecord.path), layout.relativeToJobRoot(freezeRecord.json_path), layout.relativeToJobRoot(freezeRecord.checksum_path), - layout.relativeToJobRoot(approvalRecord.snapshot_path), - layout.relativeToJobRoot(approvalRecord.decision_path), - layout.relativeToJobRoot(approvalRecord.summary_path), + ...approvalRecords.flatMap((record) => approvalRelativePaths(layout, record)), "job-manifest.json", "checksums.txt", ]), @@ -511,7 +860,7 @@ async function buildChecksumRecords( layout: ReturnType, planRecord: Awaited>, freezeRecord: Awaited>, - approvalRecord: Awaited>, + approvalRecords: Array>>, executions: AdapterExecutionResult[], extraRelativePaths: string[] = [], ): Promise { @@ -529,9 +878,7 @@ async function buildChecksumRecords( layout.relativeToJobRoot(freezeRecord.path), layout.relativeToJobRoot(freezeRecord.json_path), layout.relativeToJobRoot(freezeRecord.checksum_path), - layout.relativeToJobRoot(approvalRecord.snapshot_path), - layout.relativeToJobRoot(approvalRecord.decision_path), - layout.relativeToJobRoot(approvalRecord.summary_path), + ...approvalRecords.flatMap((record) => approvalRelativePaths(layout, record)), "job-manifest.json", ...runRelativePaths, ...extraRelativePaths, @@ -544,9 +891,9 @@ export interface Phase1RunSummary { job_id: string; job_root: string; builder_run_id: string; - qa_run_id: string; + qa_run_id: string | null; builder_status: string; - qa_status: string; + qa_status: string | null; manifest_path: string; checksums_path: string; } @@ -558,10 +905,11 @@ export async function runPhase1Local(repoRoot: string): Promise( join(repoRoot, "control", "fixtures", "phase1-local-approval-card.json"), ); - const decision = buildApprovalDecision(approvalCard); const layout = resolveJobRootLayout(repoRoot, approvalCard.job_id); await assertFreshJobRoot(layout); await ensureJobRootLayout(layout); + await stageJobRepoWorkspace(repoRoot, layout.repoArchiveRoot); + const workerRepoRoot = layout.repoArchiveRoot; const builderRunId = createRunId("builder"); const qaRunId = createRunId("qa"); @@ -571,28 +919,29 @@ export async function runPhase1Local(repoRoot: string): Promise/ for governance, approvals, state, runtime-home, and run bundles.", + "Use one canonical job root under jobs// for governance, an isolated repo workspace, approvals, state, runtime-home, and run bundles.", "Preserve the existing builder to QA worker order and local generic CLI adapter.", - "Mirror the latest canonical state files into state/ for control-shell recovery.", + "Mirror the latest canonical state files into state/ for control-shell recovery, while keeping builder and QA runtime homes role-scoped.", ], milestones: [ "Write the fixed plan and approval archive.", @@ -631,6 +980,15 @@ export async function runPhase1Local(repoRoot: string): Promise>> = [approvalRecord]; const freezeRecord = await writeContractFreeze(layout.freezePath, layout.freezeJsonPath, { metadata: buildFreezeMetadata( @@ -640,7 +998,7 @@ export async function runPhase1Local(repoRoot: string): Promise> | null = null; + const builderJobState = mapRunExitToJobState(builderExecution.runResult.status, builderExecution.runResult.run_role); + if (builderJobState === "AWAITING_OWNER" || builderJobState === "AWAITING_TAKEOVER") { + const recoveryCard = buildRecoveryCard(builderTaskPacket, builderExecution); + builderRecoveryRecord = await writeApprovalArchive(layout.approvalRoot(recoveryCard.card_id), recoveryCard, null); + if (builderJobState === "AWAITING_TAKEOVER") { + await writeRecoveryTakeoverPacket(builderTaskPacket, builderExecution, builderRecoveryRecord); + } + approvalRecords.push(builderRecoveryRecord); + } + await stateStore.recordRun(builderTaskPacket, builderExecution, buildRunRecoveryState(builderRecoveryRecord)); const builderManifest = buildJobManifest( layout, @@ -691,66 +1062,100 @@ export async function runPhase1Local(repoRoot: string): Promise> | null = null; + const qaJobState = mapRunExitToJobState(qaExecution.runResult.status, qaExecution.runResult.run_role); + if (qaJobState === "AWAITING_OWNER" || qaJobState === "AWAITING_TAKEOVER") { + const recoveryCard = buildRecoveryCard(qaTaskPacket, qaExecution); + qaRecoveryRecord = await writeApprovalArchive(layout.approvalRoot(recoveryCard.card_id), recoveryCard, null); + if (qaJobState === "AWAITING_TAKEOVER") { + await writeRecoveryTakeoverPacket(qaTaskPacket, qaExecution, qaRecoveryRecord); + } + approvalRecords.push(qaRecoveryRecord); + } + await stateStore.recordRun(qaTaskPacket, qaExecution, buildRunRecoveryState(qaRecoveryRecord)); + + const finalManifest = buildJobManifest( layout, - manifest: finalManifest, - environment: buildEnvironmentSnapshotMetadata(layout, finalManifest), - finalSummary: buildFinalSummaryMetadata(layout, qaTaskPacket, finalManifest, executions, completedAt), - }); - manifestForChecksums = finalization.manifest; - extraChecksumPaths.push(finalization.environment_path, finalization.final_summary_path); - await writeJobManifest(layout.manifestPath, manifestForChecksums); - await stateStore.recordArchiveFinalization( qaTaskPacket, - qaExecution.runResult.run_id, - qaExecution.runResult.run_role, - finalization.final_summary_path, + baseBranch, + planRecord, + freezeRecord, + approvalRecord, + approvalRecords, + executions, + adapterInfo.adapter_id, ); + manifestForChecksums = finalManifest; + await writeJobManifest(layout.manifestPath, finalManifest); + + if (qaExecution.runResult.status === "SUCCESS") { + const completedAt = nowIso(); + const finalization = await finalizeArchive({ + layout, + manifest: finalManifest, + environment: buildEnvironmentSnapshotMetadata(layout, finalManifest), + finalSummary: buildFinalSummaryMetadata(layout, qaTaskPacket, finalManifest, executions, completedAt), + }); + manifestForChecksums = finalization.manifest; + extraChecksumPaths.push(finalization.environment_path, finalization.final_summary_path); + await writeJobManifest(layout.manifestPath, manifestForChecksums); + await stateStore.recordArchiveFinalization( + qaTaskPacket, + qaExecution.runResult.run_id, + qaExecution.runResult.run_role, + finalization.final_summary_path, + ); + } + } else { + extraChecksumPaths.push(layout.relativeToJobRoot(join(qaRunRoot, "metadata", "task-packet.en.json"))); } const checksumRecords = await buildChecksumRecords( layout, planRecord, freezeRecord, - approvalRecord, + approvalRecords, executions, extraChecksumPaths, ); @@ -762,7 +1167,7 @@ export async function runPhase1Local(repoRoot: string): Promise string; + resolveLivePath: (relativePath: string) => string; + resolveSessionPath: (sessionId: string, relativePath?: string) => string; + resolveRunPath: (runId: string, relativePath?: string) => string; + resolveReadPaths: (relativePath: string, options?: StateScopeReadOptions) => Promise; +} + +function normalizeInputPath(path: string): string { + const trimmed = path.trim(); + const wslMatch = trimmed.match(WSL_DRIVE_PATH); + if (wslMatch !== null) { + const drive = wslMatch[1].toUpperCase(); + const suffix = (wslMatch[2] ?? "").replaceAll("/", "\\").replace(/^\\/, ""); + return suffix.length > 0 ? `${drive}:\\${suffix}` : `${drive}:\\`; + } + if (/^[A-Za-z]:[\\/]/u.test(trimmed) || trimmed.startsWith("\\\\")) { + return trimmed.replaceAll("/", "\\"); + } + return trimmed; +} + +function normalizeAbsolutePath(path: string): string { + const normalizedInput = normalizeInputPath(path); + const absolutePath = isAbsolute(normalizedInput) ? normalizedInput : resolve(normalizedInput); + return normalize(absolutePath); +} + +function assertWithinRoot(rootPath: string, targetPath: string, label: string): string { + const normalizedRoot = normalizeAbsolutePath(rootPath); + const normalizedTarget = normalizeAbsolutePath(targetPath); + const relativePath = relative(normalizedRoot, normalizedTarget); + if (relativePath === "" || (!relativePath.startsWith("..") && !isAbsolute(relativePath))) { + return normalizedTarget; + } + throw new Error(`${label} escaped its allowed root: ${normalizedTarget}`); +} + +function normalizeRelativeStatePath(relativePath: string): string { + const normalized = relativePath.replaceAll("\\", "/").replace(/^\/+/, ""); + if (normalized.length === 0) { + return ""; + } + const segments = normalized.split("/").filter((segment) => segment.length > 0); + if (segments.length === 0) { + return ""; + } + for (const segment of segments) { + if (segment === "." || segment === "..") { + throw new Error(`state path may not contain traversal segments: ${relativePath}`); + } + } + return join(...segments); +} + +export function validateSessionId(sessionId: string): string { + if (!SAFE_SCOPE_SEGMENT.test(sessionId)) { + throw new Error(`invalid session_id: ${sessionId}`); + } + return sessionId; +} + +export function validateRunId(runId: string): string { + if (!SAFE_SCOPE_SEGMENT.test(runId)) { + throw new Error(`invalid run_id: ${runId}`); + } + return runId; +} + +export function resolveStateScopeRoots( + repoRoot: string, + archiveStateRoot: string, + liveStateRoot: string, +): StateScopeRoots { + const normalizedRepoRoot = normalizeAbsolutePath(repoRoot); + const normalizedRootStateRoot = assertWithinRoot(normalizedRepoRoot, archiveStateRoot, "root state path"); + const normalizedLiveStateRoot = assertWithinRoot(normalizedRepoRoot, liveStateRoot, "live state path"); + const normalizedJobRoot = assertWithinRoot(normalizedRepoRoot, dirname(normalizedRootStateRoot), "job root"); + const runsRoot = assertWithinRoot(normalizedJobRoot, join(normalizedJobRoot, "artifacts", "runs"), "run scope root"); + + return { + repoRoot: normalizedRepoRoot, + jobRoot: normalizedJobRoot, + rootStateRoot: normalizedRootStateRoot, + liveStateRoot: normalizedLiveStateRoot, + sessionsRoot: assertWithinRoot(normalizedRootStateRoot, join(normalizedRootStateRoot, "sessions"), "session scope root"), + currentSessionPath: assertWithinRoot( + normalizedRootStateRoot, + join(normalizedRootStateRoot, "current-session.json"), + "current session pointer", + ), + runsRoot, + }; +} + +async function readCurrentSessionId(currentSessionPath: string): Promise { + if (!(await pathExists(currentSessionPath))) { + return null; + } + const payload = await readJson>(currentSessionPath); + const candidate = payload.session_id; + if (typeof candidate !== "string" || candidate.length === 0) { + return null; + } + return validateSessionId(candidate); +} + +export function createStateScopeResolver( + repoRoot: string, + archiveStateRoot: string, + liveStateRoot: string, +): StateScopeResolver { + const roots = resolveStateScopeRoots(repoRoot, archiveStateRoot, liveStateRoot); + + const resolveScopedPath = (basePath: string, relativePath: string, label: string): string => { + const normalizedRelativePath = normalizeRelativeStatePath(relativePath); + const targetPath = normalizedRelativePath.length > 0 ? join(basePath, normalizedRelativePath) : basePath; + return assertWithinRoot(basePath, targetPath, label); + }; + + return { + roots, + resolveRootPath: (relativePath: string) => resolveScopedPath(roots.rootStateRoot, relativePath, "root scope path"), + resolveLivePath: (relativePath: string) => resolveScopedPath(roots.liveStateRoot, relativePath, "live scope path"), + resolveSessionPath: (sessionId: string, relativePath = "") => + resolveScopedPath(join(roots.sessionsRoot, validateSessionId(sessionId)), relativePath, "session scope path"), + resolveRunPath: (runId: string, relativePath = "") => + resolveScopedPath(join(roots.runsRoot, validateRunId(runId), "metadata", "state"), relativePath, "run scope path"), + resolveReadPaths: async (relativePath: string, options: StateScopeReadOptions = {}) => { + const resolvedPaths: string[] = []; + const sessionId = options.sessionId ?? (await readCurrentSessionId(roots.currentSessionPath)); + if (sessionId !== null && sessionId !== undefined) { + resolvedPaths.push(resolveScopedPath(join(roots.sessionsRoot, validateSessionId(sessionId)), relativePath, "session read path")); + } + if (options.includeRoot !== false) { + resolvedPaths.push(resolveScopedPath(roots.rootStateRoot, relativePath, "root read path")); + } + return resolvedPaths; + }, + }; +} diff --git a/core/loop/state-store.ts b/core/loop/state-store.ts index 27490da..50b4156 100644 --- a/core/loop/state-store.ts +++ b/core/loop/state-store.ts @@ -1,15 +1,8 @@ import { join } from "node:path"; import { mapRunExitToJobState, nextRequiredActionFromState, runningJobStateForRole } from "../contracts/status.ts"; -import { - ensureDir, - nowIso, - pathExists, - readJson, - readText, - uniqueStrings, - writeJson, - writeText, -} from "./support.ts"; +import { ensureDir, nowIso, pathExists, readText, uniqueStrings } from "./support.ts"; +import { createStateScopeResolver, type StateScopeResolver } from "./state-scope.ts"; +import { atomicWriteBatch, recoverPendingBatch, withWriteLock } from "./state-write.ts"; import type { ActiveStoryFile, AdapterExecutionResult, @@ -47,6 +40,12 @@ function createStoryTrace(taskPacket: TaskPacket): StoryTrace { }; } +export interface RunRecoveryState { + card_id: string; + waiting_on: "owner" | "takeover"; + resume_action: string; +} + function renderProgress( taskPacket: TaskPacket, jobState: JobState, @@ -70,7 +69,32 @@ function renderProgress( ].join("\n"); } -function renderRiskRegister(openRisks: string[]): string { +function renderRiskRegister( + jobState: JobState, + openRisks: string[], + recoveryState: RunRecoveryState | null = null, +): string { + let mitigationStatus: string[]; + let ownerReviewNeeds: string[]; + if (jobState === "AWAITING_OWNER") { + mitigationStatus = [ + `- recovery card ${recoveryState?.card_id ?? "n/a"} is archived and waiting for owner decision`, + `- resume action: ${recoveryState?.resume_action ?? "Wait for owner input before continuing."}`, + ]; + ownerReviewNeeds = ["- owner review is required before continuing"]; + } else if (jobState === "AWAITING_TAKEOVER") { + mitigationStatus = [ + `- recovery card ${recoveryState?.card_id ?? "n/a"} is archived and waiting for takeover`, + `- resume action: ${recoveryState?.resume_action ?? "Wait for takeover before continuing."}`, + ]; + ownerReviewNeeds = ["- takeover is required before continuing"]; + } else if (openRisks.length === 0) { + mitigationStatus = ["- no mitigation is required"]; + ownerReviewNeeds = ["- none"]; + } else { + mitigationStatus = ["- fix the active blockers before the next run"]; + ownerReviewNeeds = ["- owner review is required if fixback cannot stay in scope"]; + } return [ "# Risk Register", "", @@ -80,7 +104,7 @@ function renderRiskRegister(openRisks: string[]): string { "", "## Mitigation Status", "", - ...(openRisks.length === 0 ? ["- no mitigation is required"] : ["- fix the active blockers before the next run"]), + ...mitigationStatus, "", "## Escalated Risks", "", @@ -88,7 +112,7 @@ function renderRiskRegister(openRisks: string[]): string { "", "## Owner Review Needs", "", - ...(openRisks.length === 0 ? ["- none"] : ["- owner review is required if fixback cannot stay in scope"]), + ...ownerReviewNeeds, "", "## Last Updated", "", @@ -110,15 +134,22 @@ function approvalDecisionEntry(): string { ].join("\n"); } -function runDecisionEntry(runId: string, jobState: JobState): string { +function runDecisionEntry(runId: string, jobState: JobState, recoveryState: RunRecoveryState | null = null): string { + const decisionSummary = + jobState === "AWAITING_OWNER" + ? `The control shell archived recovery card ${recoveryState?.card_id ?? "n/a"} and is waiting for owner decision.` + : jobState === "AWAITING_TAKEOVER" + ? `The control shell archived recovery card ${recoveryState?.card_id ?? "n/a"} and is waiting for takeover.` + : "The control shell recorded the latest run outcome."; return [ `## ${nowIso()}`, "", - "- card_id: n/a", + `- card_id: ${recoveryState?.card_id ?? "n/a"}`, `- run_id: ${runId}`, - "- decision summary: The control shell recorded the latest run outcome.", + `- decision summary: ${decisionSummary}`, `- resulting job state: ${jobState}`, `- next required action: ${nextRequiredActionFromState(jobState)}`, + ...(recoveryState === null ? [] : [`- recovery resume action: ${recoveryState.resume_action}`]), "", ].join("\n"); } @@ -154,60 +185,124 @@ export interface StateStorePaths { liveStateRoot?: string; } +const STATE_FILE_ORDER = [ + "progress.en.md", + "story-queue.json", + "active-story.json", + "handoff.en.md", + "risk-register.en.md", + "loop-metrics.json", + "decisions.en.md", + "trace-index.json", +] as const; + +type StateFileName = (typeof STATE_FILE_ORDER)[number]; +type SerializedState = Partial>; + +function serializeJson(value: unknown): string { + return `${JSON.stringify(value, null, 2)}\n`; +} + export class StateStore { private readonly stateRoot: string; private readonly liveStateRoot: string; - private readonly progressPath: string; - private readonly storyQueuePath: string; - private readonly activeStoryPath: string; - private readonly handoffPath: string; - private readonly riskRegisterPath: string; - private readonly loopMetricsPath: string; - private readonly decisionsPath: string; + private readonly scopeResolver: StateScopeResolver; private readonly traceIndexPath: string; constructor(repoRoot: string, paths: StateStorePaths = {}) { - this.stateRoot = paths.archiveStateRoot ?? join(repoRoot, "state"); - this.liveStateRoot = paths.liveStateRoot ?? join(repoRoot, "state"); - this.progressPath = join(this.stateRoot, "progress.en.md"); - this.storyQueuePath = join(this.stateRoot, "story-queue.json"); - this.activeStoryPath = join(this.stateRoot, "active-story.json"); - this.handoffPath = join(this.stateRoot, "handoff.en.md"); - this.riskRegisterPath = join(this.stateRoot, "risk-register.en.md"); - this.loopMetricsPath = join(this.stateRoot, "loop-metrics.json"); - this.decisionsPath = join(this.stateRoot, "decisions.en.md"); - this.traceIndexPath = join(this.stateRoot, "trace-index.json"); + this.scopeResolver = createStateScopeResolver( + repoRoot, + paths.archiveStateRoot ?? join(repoRoot, "state"), + paths.liveStateRoot ?? join(repoRoot, "state"), + ); + this.stateRoot = this.scopeResolver.roots.rootStateRoot; + this.liveStateRoot = this.scopeResolver.roots.liveStateRoot; + this.traceIndexPath = this.scopeResolver.resolveRootPath("trace-index.json"); } - private mirrorPath(fileName: string): string { - return join(this.liveStateRoot, fileName); + private rootPath(fileName: StateFileName): string { + return this.scopeResolver.resolveRootPath(fileName); } - private async writeMirroredText(path: string, fileName: string, value: string): Promise { - await writeText(path, value); - const mirrorPath = this.mirrorPath(fileName); - if (mirrorPath !== path) { - await writeText(mirrorPath, value); + private mirrorPath(fileName: StateFileName): string { + return this.scopeResolver.resolveLivePath(fileName); + } + + private snapshotPath(runId: string, fileName: StateFileName): string { + return this.scopeResolver.resolveRunPath(runId, fileName); + } + + private async loadCanonicalState(): Promise { + await recoverPendingBatch(this.stateRoot); + const state: SerializedState = {}; + for (const fileName of STATE_FILE_ORDER) { + const path = this.rootPath(fileName); + if (await pathExists(path)) { + state[fileName] = await readText(path); + } } + return state; } - private async writeMirroredJson(path: string, fileName: string, value: unknown): Promise { - await writeJson(path, value); - const mirrorPath = this.mirrorPath(fileName); - if (mirrorPath !== path) { - await writeJson(mirrorPath, value); + private parseJsonState(state: SerializedState, fileName: StateFileName): T { + const serialized = state[fileName]; + if (serialized === undefined) { + throw new Error(`missing canonical state file: ${fileName}`); } + return JSON.parse(serialized) as T; } - getTraceIndexPath(): string { - return this.traceIndexPath; + private readTextState(state: SerializedState, fileName: StateFileName): string { + const serialized = state[fileName]; + if (serialized === undefined) { + throw new Error(`missing canonical state file: ${fileName}`); + } + return serialized; } - async bootstrap(taskPacket: TaskPacket): Promise { - await ensureDir(this.stateRoot); - await ensureDir(this.liveStateRoot); + private setJsonState(state: SerializedState, fileName: StateFileName, value: unknown): void { + state[fileName] = serializeJson(value); + } + + private setTextState(state: SerializedState, fileName: StateFileName, value: string): void { + state[fileName] = value; + } + + private async persistState(state: SerializedState, runId: string): Promise { + const entries: Array<{ path: string; content: string }> = []; + const seenPaths = new Set(); + for (const fileName of STATE_FILE_ORDER) { + const content = state[fileName]; + if (content === undefined) { + continue; + } + for (const path of [this.rootPath(fileName), this.mirrorPath(fileName), this.snapshotPath(runId, fileName)]) { + const dedupeKey = path.toLowerCase(); + if (seenPaths.has(dedupeKey)) { + continue; + } + seenPaths.add(dedupeKey); + entries.push({ path, content }); + } + } + await atomicWriteBatch(this.stateRoot, entries); + } + + private async mutateState(runId: string, mutate: (state: SerializedState) => Promise): Promise { + return withWriteLock( + this.stateRoot, + async () => { + const state = await this.loadCanonicalState(); + const result = await mutate(state); + await this.persistState(state, runId); + return result; + }, + { owner: "StateStore" }, + ); + } - if (!(await pathExists(this.storyQueuePath))) { + private bootstrapInitialState(taskPacket: TaskPacket, state: SerializedState): void { + if (state["story-queue.json"] === undefined) { const storyQueue: StoryQueueFile = { job_id: taskPacket.job_id, freeze_version: taskPacket.freeze_version, @@ -222,10 +317,10 @@ export class StateStore { }, ], }; - await this.writeMirroredJson(this.storyQueuePath, "story-queue.json", storyQueue); + this.setJsonState(state, "story-queue.json", storyQueue); } - if (!(await pathExists(this.traceIndexPath))) { + if (state["trace-index.json"] === undefined) { const traceIndex: TraceIndex = { job_id: taskPacket.job_id, freeze_id: taskPacket.freeze_id, @@ -235,28 +330,28 @@ export class StateStore { [taskPacket.story.story_id]: createStoryTrace(taskPacket), }, }; - await this.writeMirroredJson(this.traceIndexPath, "trace-index.json", traceIndex); + this.setJsonState(state, "trace-index.json", traceIndex); } - if (!(await pathExists(this.loopMetricsPath))) { + if (state["loop-metrics.json"] === undefined) { const loopMetrics: LoopMetricsFile = { job_id: taskPacket.job_id, runs: [], }; - await this.writeMirroredJson(this.loopMetricsPath, "loop-metrics.json", loopMetrics); + this.setJsonState(state, "loop-metrics.json", loopMetrics); } - if (!(await pathExists(this.decisionsPath))) { - await this.writeMirroredText(this.decisionsPath, "decisions.en.md", ["# Decisions", "", approvalDecisionEntry()].join("\n")); + if (state["decisions.en.md"] === undefined) { + this.setTextState(state, "decisions.en.md", ["# Decisions", "", approvalDecisionEntry()].join("\n")); } - if (!(await pathExists(this.riskRegisterPath))) { - await this.writeMirroredText(this.riskRegisterPath, "risk-register.en.md", renderRiskRegister([])); + if (state["risk-register.en.md"] === undefined) { + this.setTextState(state, "risk-register.en.md", renderRiskRegister("READY_TO_RUN", [])); } - if (!(await pathExists(this.handoffPath))) { - await this.writeMirroredText( - this.handoffPath, + if (state["handoff.en.md"] === undefined) { + this.setTextState( + state, "handoff.en.md", [ "# Handoff", @@ -305,179 +400,204 @@ export class StateStore { ].join("\n"), ); } - - await this.writeMirroredText( - this.progressPath, - "progress.en.md", - renderProgress( - taskPacket, - "READY_TO_RUN", - "", - "builder", - "The fixed local Phase 1 story is approved and ready for the builder run.", - ), - ); } - async prepareRun(taskPacket: TaskPacket): Promise { - const jobState = runningJobStateForRole(taskPacket.run_role); - const storyQueue = await readJson(this.storyQueuePath); - storyQueue.freeze_version = taskPacket.freeze_version; - storyQueue.stories = storyQueue.stories.map((story) => - story.story_id === taskPacket.story.story_id - ? { - ...story, - queue_state: jobState, - last_run_id: taskPacket.run_id, - } - : story, - ); + getTraceIndexPath(): string { + return this.traceIndexPath; + } - const activeStory: ActiveStoryFile = { - story_id: taskPacket.story.story_id, - freeze_version: taskPacket.freeze_version, - run_id: taskPacket.run_id, - run_role: taskPacket.run_role, - objective: taskPacket.story.story_objective, - acceptance_ids: taskPacket.story.acceptance_ids, - verification_targets: taskPacket.story.verification_targets, - stop_conditions: taskPacket.story.stop_conditions, - expected_artifacts: taskPacket.story.expected_artifacts, - }; - - await this.writeMirroredJson(this.storyQueuePath, "story-queue.json", storyQueue); - await this.writeMirroredJson(this.activeStoryPath, "active-story.json", activeStory); - await this.writeMirroredText( - this.progressPath, - "progress.en.md", - renderProgress( - taskPacket, - jobState, - taskPacket.run_id, - taskPacket.run_role, - `${taskPacket.run_role} is executing the fixed local proof-of-concept story.`, - ), - ); + async bootstrap(taskPacket: TaskPacket): Promise { + await ensureDir(this.stateRoot); + await ensureDir(this.liveStateRoot); + await this.mutateState(taskPacket.run_id, async (state) => { + this.bootstrapInitialState(taskPacket, state); + this.setTextState( + state, + "progress.en.md", + renderProgress( + taskPacket, + "READY_TO_RUN", + "", + "builder", + "The fixed local Phase 1 story is approved and ready for the builder run.", + ), + ); + }); } - async recordRun(taskPacket: TaskPacket, execution: AdapterExecutionResult): Promise { - const jobState = mapRunExitToJobState(execution.runResult.status, execution.runResult.run_role); - const loopMetrics = await readJson(this.loopMetricsPath); - const storyQueue = await readJson(this.storyQueuePath); - const traceIndex = await readJson(this.traceIndexPath); - const handoffContent = await readText(execution.handoffPath); + async prepareRun(taskPacket: TaskPacket): Promise { + await this.mutateState(taskPacket.run_id, async (state) => { + const jobState = runningJobStateForRole(taskPacket.run_role); + const storyQueue = this.parseJsonState(state, "story-queue.json"); + storyQueue.freeze_version = taskPacket.freeze_version; + storyQueue.stories = storyQueue.stories.map((story) => + story.story_id === taskPacket.story.story_id + ? { + ...story, + queue_state: jobState, + last_run_id: taskPacket.run_id, + } + : story, + ); - const metricEntry: LoopMetricEntry = { - run_id: execution.runResult.run_id, - story_id: execution.runResult.story_id, - run_role: execution.runResult.run_role, - started_at: execution.runResult.started_at, - ended_at: execution.runResult.ended_at, - duration_s: execution.runResult.duration_s, - estimated_cost: 0, - actual_cost: 0, - retry_index: taskPacket.run_attempt - 1, - run_exit_status: execution.runResult.status, - }; - loopMetrics.runs.push(metricEntry); - - storyQueue.freeze_version = taskPacket.freeze_version; - storyQueue.stories = storyQueue.stories.map((story) => - story.story_id === taskPacket.story.story_id - ? { - ...story, - queue_state: jobState, - last_run_id: execution.runResult.run_id, - } - : story, - ); + const activeStory: ActiveStoryFile = { + story_id: taskPacket.story.story_id, + freeze_version: taskPacket.freeze_version, + run_id: taskPacket.run_id, + run_role: taskPacket.run_role, + objective: taskPacket.story.story_objective, + acceptance_ids: taskPacket.story.acceptance_ids, + verification_targets: taskPacket.story.verification_targets, + stop_conditions: taskPacket.story.stop_conditions, + expected_artifacts: taskPacket.story.expected_artifacts, + }; - const storyTrace = traceIndex.stories[taskPacket.story.story_id] ?? createStoryTrace(taskPacket); - const newArtifactRefs = execution.workerOutput.evidence_paths.map( - (path) => `artifacts/runs/${execution.runResult.run_id}/${path}`, - ); + this.setJsonState(state, "story-queue.json", storyQueue); + this.setJsonState(state, "active-story.json", activeStory); + this.setTextState( + state, + "progress.en.md", + renderProgress( + taskPacket, + jobState, + taskPacket.run_id, + taskPacket.run_role, + `${taskPacket.run_role} is executing the fixed local proof-of-concept story.`, + ), + ); + }); + } - for (const acceptanceId of taskPacket.story.acceptance_ids) { - const previous = storyTrace.acceptance[acceptanceId] ?? createTraceEntry(); - storyTrace.acceptance[acceptanceId] = { - status: execution.workerOutput.acceptance_status, - artifacts: uniqueStrings([...previous.artifacts, ...newArtifactRefs]), - updated_at: nowIso(), - latest_run_id: execution.runResult.run_id, + async recordRun( + taskPacket: TaskPacket, + execution: AdapterExecutionResult, + recoveryState: RunRecoveryState | null = null, + ): Promise { + const handoffContent = await readText(execution.handoffPath); + await this.mutateState(execution.runResult.run_id, async (state) => { + const jobState = mapRunExitToJobState(execution.runResult.status, execution.runResult.run_role); + const loopMetrics = this.parseJsonState(state, "loop-metrics.json"); + const storyQueue = this.parseJsonState(state, "story-queue.json"); + const traceIndex = this.parseJsonState(state, "trace-index.json"); + + const metricEntry: LoopMetricEntry = { + run_id: execution.runResult.run_id, + story_id: execution.runResult.story_id, + run_role: execution.runResult.run_role, + started_at: execution.runResult.started_at, + ended_at: execution.runResult.ended_at, + duration_s: execution.runResult.duration_s, + estimated_cost: 0, + actual_cost: 0, + retry_index: taskPacket.run_attempt - 1, + run_exit_status: execution.runResult.status, }; - } + loopMetrics.runs.push(metricEntry); + + storyQueue.freeze_version = taskPacket.freeze_version; + storyQueue.stories = storyQueue.stories.map((story) => + story.story_id === taskPacket.story.story_id + ? { + ...story, + queue_state: jobState, + last_run_id: execution.runResult.run_id, + } + : story, + ); - for (const checkName of taskPacket.story.mandatory_checks) { - const previous = storyTrace.mandatory_checks[checkName] ?? createTraceEntry(); - storyTrace.mandatory_checks[checkName] = { - status: execution.workerOutput.mandatory_check_status, - artifacts: uniqueStrings([...previous.artifacts, ...newArtifactRefs]), - updated_at: nowIso(), - latest_run_id: execution.runResult.run_id, - }; - } + const storyTrace = traceIndex.stories[taskPacket.story.story_id] ?? createStoryTrace(taskPacket); + const newArtifactRefs = execution.workerOutput.evidence_paths.map( + (path) => `artifacts/runs/${execution.runResult.run_id}/${path}`, + ); - storyTrace.story_id = taskPacket.story.story_id; - storyTrace.latest_run_id = execution.runResult.run_id; - storyTrace.latest_run_role = execution.runResult.run_role; - storyTrace.latest_qa_status = - execution.runResult.run_role === "qa" ? execution.runResult.status : storyTrace.latest_qa_status; - storyTrace.artifact_locations = uniqueStrings([...storyTrace.artifact_locations, ...newArtifactRefs]); - traceIndex.active_story_id = taskPacket.story.story_id; - traceIndex.stories[taskPacket.story.story_id] = storyTrace; - - const blockers = execution.workerOutput.blockers; - const progressSummary = - execution.runResult.run_role === "builder" && execution.runResult.status === "SUCCESS" - ? "The builder completed the local slice and handed off to QA." - : execution.runResult.run_role === "qa" && execution.runResult.status === "SUCCESS" - ? "QA closed the local proof-of-concept story and the job is ready to archive." - : `${execution.runResult.run_role} ended with status ${execution.runResult.status}.`; - - const existingDecisions = await readText(this.decisionsPath); - await this.writeMirroredJson(this.storyQueuePath, "story-queue.json", storyQueue); - await this.writeMirroredJson(this.loopMetricsPath, "loop-metrics.json", loopMetrics); - await this.writeMirroredJson(this.traceIndexPath, "trace-index.json", traceIndex); - await this.writeMirroredText(this.handoffPath, "handoff.en.md", handoffContent); - await this.writeMirroredText(this.riskRegisterPath, "risk-register.en.md", renderRiskRegister(blockers)); - await this.writeMirroredText( - this.decisionsPath, - "decisions.en.md", - `${existingDecisions.trimEnd()}\n\n${runDecisionEntry(execution.runResult.run_id, jobState)}`, - ); - await this.writeMirroredText( - this.progressPath, - "progress.en.md", - renderProgress(taskPacket, jobState, execution.runResult.run_id, execution.runResult.run_role, progressSummary), - ); + for (const acceptanceId of taskPacket.story.acceptance_ids) { + const previous = storyTrace.acceptance[acceptanceId] ?? createTraceEntry(); + storyTrace.acceptance[acceptanceId] = { + status: execution.workerOutput.acceptance_status, + artifacts: uniqueStrings([...previous.artifacts, ...newArtifactRefs]), + updated_at: nowIso(), + latest_run_id: execution.runResult.run_id, + }; + } + + for (const checkName of taskPacket.story.mandatory_checks) { + const previous = storyTrace.mandatory_checks[checkName] ?? createTraceEntry(); + storyTrace.mandatory_checks[checkName] = { + status: execution.workerOutput.mandatory_check_status, + artifacts: uniqueStrings([...previous.artifacts, ...newArtifactRefs]), + updated_at: nowIso(), + latest_run_id: execution.runResult.run_id, + }; + } + + storyTrace.story_id = taskPacket.story.story_id; + storyTrace.latest_run_id = execution.runResult.run_id; + storyTrace.latest_run_role = execution.runResult.run_role; + storyTrace.latest_qa_status = + execution.runResult.run_role === "qa" ? execution.runResult.status : storyTrace.latest_qa_status; + storyTrace.artifact_locations = uniqueStrings([...storyTrace.artifact_locations, ...newArtifactRefs]); + traceIndex.active_story_id = taskPacket.story.story_id; + traceIndex.stories[taskPacket.story.story_id] = storyTrace; + + const blockers = execution.workerOutput.blockers; + const progressSummary = + execution.runResult.run_role === "builder" && execution.runResult.status === "SUCCESS" + ? "The builder completed the local slice and handed off to QA." + : execution.runResult.run_role === "qa" && execution.runResult.status === "SUCCESS" + ? "QA closed the local proof-of-concept story and the job is ready to archive." + : jobState === "AWAITING_OWNER" && recoveryState !== null + ? `${execution.runResult.run_role} ended with status ${execution.runResult.status}. Waiting for owner decision via recovery card ${recoveryState.card_id}.` + : jobState === "AWAITING_TAKEOVER" && recoveryState !== null + ? `${execution.runResult.run_role} ended with status ${execution.runResult.status}. Waiting for takeover via recovery card ${recoveryState.card_id}.` + : `${execution.runResult.run_role} ended with status ${execution.runResult.status}.`; + + const existingDecisions = this.readTextState(state, "decisions.en.md"); + this.setJsonState(state, "story-queue.json", storyQueue); + this.setJsonState(state, "loop-metrics.json", loopMetrics); + this.setJsonState(state, "trace-index.json", traceIndex); + this.setTextState(state, "handoff.en.md", handoffContent); + this.setTextState(state, "risk-register.en.md", renderRiskRegister(jobState, blockers, recoveryState)); + this.setTextState( + state, + "decisions.en.md", + `${existingDecisions.trimEnd()}\n\n${runDecisionEntry(execution.runResult.run_id, jobState, recoveryState)}`, + ); + this.setTextState( + state, + "progress.en.md", + renderProgress(taskPacket, jobState, execution.runResult.run_id, execution.runResult.run_role, progressSummary), + ); + }); } async recordIntegrityFailure(taskPacket: TaskPacket, reason: string): Promise { - const storyQueue = await readJson(this.storyQueuePath); - storyQueue.freeze_version = taskPacket.freeze_version; - storyQueue.stories = storyQueue.stories.map((story) => - story.story_id === taskPacket.story.story_id - ? { - ...story, - queue_state: "INTEGRITY_FAILED", - last_run_id: taskPacket.run_id, - } - : story, - ); + await this.mutateState(taskPacket.run_id, async (state) => { + const storyQueue = this.parseJsonState(state, "story-queue.json"); + storyQueue.freeze_version = taskPacket.freeze_version; + storyQueue.stories = storyQueue.stories.map((story) => + story.story_id === taskPacket.story.story_id + ? { + ...story, + queue_state: "INTEGRITY_FAILED", + last_run_id: taskPacket.run_id, + } + : story, + ); - const existingDecisions = await readText(this.decisionsPath); - await this.writeMirroredJson(this.storyQueuePath, "story-queue.json", storyQueue); - await this.writeMirroredText(this.riskRegisterPath, "risk-register.en.md", renderRiskRegister([reason])); - await this.writeMirroredText( - this.decisionsPath, - "decisions.en.md", - `${existingDecisions.trimEnd()}\n\n${integrityFailureDecisionEntry(taskPacket.run_id, reason)}`, - ); - await this.writeMirroredText( - this.progressPath, - "progress.en.md", - renderProgress(taskPacket, "INTEGRITY_FAILED", taskPacket.run_id, taskPacket.run_role, reason), - ); + const existingDecisions = this.readTextState(state, "decisions.en.md"); + this.setJsonState(state, "story-queue.json", storyQueue); + this.setTextState(state, "risk-register.en.md", renderRiskRegister("INTEGRITY_FAILED", [reason])); + this.setTextState( + state, + "decisions.en.md", + `${existingDecisions.trimEnd()}\n\n${integrityFailureDecisionEntry(taskPacket.run_id, reason)}`, + ); + this.setTextState( + state, + "progress.en.md", + renderProgress(taskPacket, "INTEGRITY_FAILED", taskPacket.run_id, taskPacket.run_role, reason), + ); + }); } async recordArchiveFinalization( @@ -486,35 +606,37 @@ export class StateStore { latestRunRole: RunRole, finalSummaryPath: string, ): Promise { - const storyQueue = await readJson(this.storyQueuePath); - storyQueue.freeze_version = taskPacket.freeze_version; - storyQueue.stories = storyQueue.stories.map((story) => - story.story_id === taskPacket.story.story_id - ? { - ...story, - queue_state: "COMPLETED", - last_run_id: latestRunId, - } - : story, - ); + await this.mutateState(latestRunId, async (state) => { + const storyQueue = this.parseJsonState(state, "story-queue.json"); + storyQueue.freeze_version = taskPacket.freeze_version; + storyQueue.stories = storyQueue.stories.map((story) => + story.story_id === taskPacket.story.story_id + ? { + ...story, + queue_state: "COMPLETED", + last_run_id: latestRunId, + } + : story, + ); - const existingDecisions = await readText(this.decisionsPath); - await this.writeMirroredJson(this.storyQueuePath, "story-queue.json", storyQueue); - await this.writeMirroredText( - this.decisionsPath, - "decisions.en.md", - `${existingDecisions.trimEnd()}\n\n${archiveFinalizationDecisionEntry(latestRunId, finalSummaryPath)}`, - ); - await this.writeMirroredText( - this.progressPath, - "progress.en.md", - renderProgress( - taskPacket, - "COMPLETED", - latestRunId, - latestRunRole, - "Archive finalization completed and the canonical local job bundle is closed.", - ), - ); + const existingDecisions = this.readTextState(state, "decisions.en.md"); + this.setJsonState(state, "story-queue.json", storyQueue); + this.setTextState( + state, + "decisions.en.md", + `${existingDecisions.trimEnd()}\n\n${archiveFinalizationDecisionEntry(latestRunId, finalSummaryPath)}`, + ); + this.setTextState( + state, + "progress.en.md", + renderProgress( + taskPacket, + "COMPLETED", + latestRunId, + latestRunRole, + "Archive finalization completed and the canonical local job bundle is closed.", + ), + ); + }); } } diff --git a/core/loop/state-write.ts b/core/loop/state-write.ts new file mode 100644 index 0000000..a2641bd --- /dev/null +++ b/core/loop/state-write.ts @@ -0,0 +1,201 @@ +import { randomUUID } from "node:crypto"; +import { open, rename, rm, stat } from "node:fs/promises"; +import { basename, dirname, join } from "node:path"; +import { ensureDir, nowIso } from "./support.ts"; + +const DEFAULT_LOCK_TIMEOUT_MS = 15_000; +const DEFAULT_LOCK_POLL_MS = 50; +const DEFAULT_LOCK_STALE_MS = 30_000; +const LOCK_FILE_NAME = ".state-write.lock"; +const TRANSACTION_FILE_NAME = ".state-write.txn.json"; + +export interface StateWriteLockOptions { + lockName?: string; + owner?: string; + pollMs?: number; + staleMs?: number; + timeoutMs?: number; +} + +export interface AtomicWriteEntry { + path: string; + content: string; +} + +interface PendingBatchEntry { + path: string; + temp_path: string; +} + +interface PendingBatchRecord { + created_at: string; + entries: PendingBatchEntry[]; +} + +function delay(ms: number): Promise { + return new Promise((resolvePromise) => { + setTimeout(resolvePromise, ms); + }); +} + +async function isStaleLock(lockPath: string, staleMs: number): Promise { + try { + const metadata = await stat(lockPath); + return Date.now() - metadata.mtimeMs >= staleMs; + } catch { + return false; + } +} + +async function acquireWriteLock(lockRoot: string, options: StateWriteLockOptions): Promise<() => Promise> { + const timeoutMs = options.timeoutMs ?? DEFAULT_LOCK_TIMEOUT_MS; + const pollMs = options.pollMs ?? DEFAULT_LOCK_POLL_MS; + const staleMs = options.staleMs ?? DEFAULT_LOCK_STALE_MS; + const lockName = options.lockName ?? LOCK_FILE_NAME; + const lockPath = join(lockRoot, lockName); + const deadline = Date.now() + timeoutMs; + + await ensureDir(lockRoot); + + while (true) { + try { + const handle = await open(lockPath, "wx"); + try { + await handle.writeFile( + `${JSON.stringify({ pid: process.pid, owner: options.owner ?? "state-store", acquired_at: nowIso() }, null, 2)}\n`, + "utf8", + ); + await handle.sync(); + } finally { + await handle.close(); + } + return async () => { + await rm(lockPath, { force: true }); + }; + } catch (error) { + const code = error instanceof Error && "code" in error ? String((error as { code?: unknown }).code ?? "") : ""; + if (code !== "EEXIST") { + throw error; + } + if (await isStaleLock(lockPath, staleMs)) { + await rm(lockPath, { force: true }); + continue; + } + if (Date.now() >= deadline) { + throw new Error(`timed out acquiring state write lock: ${lockPath}`); + } + await delay(pollMs); + } + } +} + +export async function withWriteLock( + lockRoot: string, + callback: () => Promise, + options: StateWriteLockOptions = {}, +): Promise { + const release = await acquireWriteLock(lockRoot, options); + try { + return await callback(); + } finally { + await release(); + } +} + +export async function atomicWriteText(path: string, value: string): Promise { + await ensureDir(dirname(path)); + const tempPath = join(dirname(path), `.${basename(path)}.${process.pid}.${Date.now()}.${randomUUID()}.tmp`); + let handle: Awaited> | null = null; + try { + handle = await open(tempPath, "w"); + await handle.writeFile(value, "utf8"); + await handle.sync(); + await handle.close(); + handle = null; + await rename(tempPath, path); + } catch (error) { + if (handle !== null) { + await handle.close().catch(() => undefined); + } + await rm(tempPath, { force: true }).catch(() => undefined); + throw error; + } +} + +export async function atomicWriteJson(path: string, value: unknown): Promise { + await atomicWriteText(path, `${JSON.stringify(value, null, 2)}\n`); +} + +async function finalizePendingBatch(entries: PendingBatchEntry[]): Promise { + for (const entry of entries) { + try { + await rename(entry.temp_path, entry.path); + } catch (error) { + const code = error instanceof Error && "code" in error ? String((error as { code?: unknown }).code ?? "") : ""; + if (code === "ENOENT") { + continue; + } + throw error; + } + } +} + +async function readPendingBatch(transactionPath: string): Promise { + try { + return JSON.parse(await Bun.file(transactionPath).text()) as PendingBatchRecord; + } catch { + return null; + } +} + +export async function recoverPendingBatch(lockRoot: string): Promise { + const transactionPath = join(lockRoot, TRANSACTION_FILE_NAME); + const pendingBatch = await readPendingBatch(transactionPath); + if (pendingBatch === null) { + return; + } + await finalizePendingBatch(pendingBatch.entries); + await rm(transactionPath, { force: true }); +} + +export async function atomicWriteBatch(lockRoot: string, entries: AtomicWriteEntry[]): Promise { + const transactionPath = join(lockRoot, TRANSACTION_FILE_NAME); + await recoverPendingBatch(lockRoot); + + const pendingBatch: PendingBatchRecord = { + created_at: nowIso(), + entries: [], + }; + + for (const entry of entries) { + const tempPath = join(dirname(entry.path), `.${basename(entry.path)}.${process.pid}.${Date.now()}.${randomUUID()}.tmp`); + let handle: Awaited> | null = null; + try { + await ensureDir(dirname(entry.path)); + handle = await open(tempPath, "w"); + await handle.writeFile(entry.content, "utf8"); + await handle.sync(); + await handle.close(); + handle = null; + pendingBatch.entries.push({ + path: entry.path, + temp_path: tempPath, + }); + } catch (error) { + if (handle !== null) { + await handle.close().catch(() => undefined); + } + await rm(tempPath, { force: true }).catch(() => undefined); + throw error; + } + } + + await atomicWriteJson(transactionPath, pendingBatch); + + try { + await finalizePendingBatch(pendingBatch.entries); + await rm(transactionPath, { force: true }); + } catch (error) { + throw error; + } +} diff --git a/docker/worker-base.Dockerfile b/docker/worker-base.Dockerfile new file mode 100644 index 0000000..d6fdd8f --- /dev/null +++ b/docker/worker-base.Dockerfile @@ -0,0 +1,11 @@ +FROM oven/bun:1 + +WORKDIR /work/repo + +RUN mkdir -p /work/repo /work/state /work/artifacts /work/runtime-home /work/cache + +ENV HOME=/work/runtime-home/home +ENV XDG_CACHE_HOME=/work/cache +ENV BUN_INSTALL_CACHE_DIR=/work/cache/bun + +CMD ["bun", "--version"] diff --git a/docker/worker-builder.Dockerfile b/docker/worker-builder.Dockerfile new file mode 100644 index 0000000..f4db06c --- /dev/null +++ b/docker/worker-builder.Dockerfile @@ -0,0 +1,8 @@ +ARG CODINGCLAW_BASE_IMAGE=codingclaw-worker-base:phase1-local +FROM ${CODINGCLAW_BASE_IMAGE} + +WORKDIR /work/repo + +ENV CODINGCLAW_WORKER_ROLE=builder + +CMD ["bun", "--version"] diff --git a/docker/worker-qa.Dockerfile b/docker/worker-qa.Dockerfile new file mode 100644 index 0000000..3c03d0a --- /dev/null +++ b/docker/worker-qa.Dockerfile @@ -0,0 +1,8 @@ +ARG CODINGCLAW_BASE_IMAGE=codingclaw-worker-base:phase1-local +FROM ${CODINGCLAW_BASE_IMAGE} + +WORKDIR /work/repo + +ENV CODINGCLAW_WORKER_ROLE=qa + +CMD ["bun", "--version"] diff --git a/docs/ARCHITECTURE_OVERVIEW.md b/docs/ARCHITECTURE_OVERVIEW.md index ef80ef6..304a01c 100644 --- a/docs/ARCHITECTURE_OVERVIEW.md +++ b/docs/ARCHITECTURE_OVERVIEW.md @@ -19,11 +19,11 @@ This document describes the runtime architecture of CodingClaw and the boundarie |- Policy Guard |- Budget Guard |- Channel Adapters - |- GUI Exception Gate + |- GUI Runtime Gate | - +------> [GUI Exception Plane] - | |- Wuying Bridge - | |- Takeover Session + +------> [Local GUI Runtime Plane] + | |- Ubuntu Graphical Session + | |- Headed Automation Session | v [Coding Loop Kernel] @@ -34,8 +34,8 @@ This document describes the runtime architecture of CodingClaw and the boundarie | v [Execution Workers] - |- Builder Worker - |- QA Worker + |- Builder Worker (Claude Code) + |- QA Worker (Codex) |- Optional Review Worker | v @@ -84,11 +84,11 @@ This document describes the runtime architecture of CodingClaw and the boundarie - serve as the only long-lived memory source - support recovery after crash, timeout, or handoff -### GUI Exception Plane +### Local GUI Runtime Plane -- handle approved GUI-only interruptions -- bridge human takeover back into the control shell -- remain an exception path rather than a default execution surface +- run approved headed browser or desktop automation on the same Ubuntu host +- capture rendered evidence without a cloud desktop bridge +- allow governed local takeover only when automation cannot proceed ## Runtime Objects @@ -182,11 +182,11 @@ codingclaw/ Phase 1 ships a single-node topology: -- control shell on one host +- control shell on one Ubuntu host with a graphical session - local or same-host Docker workers - one active job at a time - one active story at a time -- GUI exception handling documented as a governed pause-and-takeover path +- local GUI automation available on the same host when the story requires a real browser or desktop surface - local artifact volume plus SQLite or Postgres metadata This keeps the first release small enough to verify end-to-end governance before scaling scheduler complexity. diff --git a/docs/BUILDER_CONTRACT.en.md b/docs/BUILDER_CONTRACT.en.md index 314be29..d51da6b 100644 --- a/docs/BUILDER_CONTRACT.en.md +++ b/docs/BUILDER_CONTRACT.en.md @@ -4,6 +4,8 @@ The Builder implements one approved story within the boundary of the active Contract Freeze. +Phase 1 binds the builder role to the Claude Code execution profile. + ## Inputs The Builder reads: diff --git a/docs/DEPLOYMENT_PLAN.md b/docs/DEPLOYMENT_PLAN.md index c1b7e0b..fcef7b7 100644 --- a/docs/DEPLOYMENT_PLAN.md +++ b/docs/DEPLOYMENT_PLAN.md @@ -2,7 +2,7 @@ ## Goal -This document defines the recommended deployment path for CodingClaw from local development to a stable single-node production baseline. +This document defines the recommended deployment path for CodingClaw from local development to a stable single-node production baseline on one Ubuntu host with a graphical session. Official feasibility references for Docker, SQLite, and PostgreSQL are collected in [OFFICIAL_REFERENCE_NOTES.md](OFFICIAL_REFERENCE_NOTES.md). @@ -24,14 +24,15 @@ The control host runs: - approval queue manager - scheduler - budget and policy guards +- the local graphical session used by headed automation when a story requires a real GUI surface ### Worker Runtime Workers run in Docker with role-specific images: - `worker-base` -- `worker-builder` -- `worker-qa` +- `worker-builder` with the Claude Code builder profile +- `worker-qa` with the Codex QA profile Each worker mounts: @@ -84,16 +85,17 @@ Redis is optional and not required for the first release. ### Stage 3: Adapter Expansion -- Codex adapter +- Claude Code builder adapter +- Codex QA adapter - Aider adapter - richer capability manifests - optional review executor -### Stage 4: GUI Exception Integration +### Stage 4: Local GUI Automation Hardening -- Wuying bridge -- takeover packet flow -- assisted resume path +- host display bootstrap +- headed browser or desktop automation evidence capture +- takeover packet flow for exceptional local recovery only ## Environment Separation @@ -143,6 +145,6 @@ The following items are intentionally deferred beyond Phase 1: - multi-tenant job queues - horizontally scaled schedulers -- browser or GUI automation as a primary surface +- cloud desktop bridges or remote-assistance control planes - dashboard analytics - shared warm workers diff --git a/docs/DEVELOPMENT_PLAN_TEMPLATE.en.md b/docs/DEVELOPMENT_PLAN_TEMPLATE.en.md index 4dc1cd6..71fd766 100644 --- a/docs/DEVELOPMENT_PLAN_TEMPLATE.en.md +++ b/docs/DEVELOPMENT_PLAN_TEMPLATE.en.md @@ -44,7 +44,7 @@ This document defines the required section order for `DEVELOPMENT_PLAN.en.md`. - planned adapters - expected run roles -- any GUI exception expectation +- any local GUI automation expectation ### 8. Architecture Direction diff --git a/docs/EXECUTOR_ADAPTER_CONTRACT.md b/docs/EXECUTOR_ADAPTER_CONTRACT.md index b1d1ab9..a4cee0c 100644 --- a/docs/EXECUTOR_ADAPTER_CONTRACT.md +++ b/docs/EXECUTOR_ADAPTER_CONTRACT.md @@ -91,6 +91,11 @@ The envelope must include: Phase 1 implementations are required to support `builder` and `qa`. `review` remains optional until the review executor is enabled. +Phase 1 role binding is profile-specific: + +- builder should use a Claude Code adapter profile +- qa should use a Codex adapter profile + ## Task Packet Boundary `task-packet.en.json` must follow [TASK_PACKET_TEMPLATE.en.md](TASK_PACKET_TEMPLATE.en.md). @@ -201,4 +206,6 @@ Before a new adapter is accepted for live use, it must pass a consistency check Phase 1 should freeze this contract first, then implement: 1. one generic CLI adapter -2. later adapters such as Codex or Aider +2. one Claude Code builder adapter profile +3. one Codex QA adapter profile +4. later adapters such as Aider diff --git a/docs/GUI_EXCEPTION_POLICY.md b/docs/GUI_EXCEPTION_POLICY.md index 78657d8..d876fb9 100644 --- a/docs/GUI_EXCEPTION_POLICY.md +++ b/docs/GUI_EXCEPTION_POLICY.md @@ -2,20 +2,20 @@ ## Purpose -This policy defines when GUI handling is allowed and how it is governed. +This policy defines when local GUI automation is allowed and how it is governed. ## GUI Exception Definition -A GUI exception is a task step that cannot reasonably be completed through approved APIs, CLI tools, or non-interactive automation. +A GUI execution step is a task step that needs a real display, browser window, or desktop application and cannot reasonably be completed through pure CLI or API automation alone. ## Allowed Cases Allowed cases may include: +- headed browser automation on the Ubuntu host +- Linux desktop-only tooling - mandatory interactive login -- desktop-only tooling -- remote approval inside a managed desktop -- visual validation that requires a real GUI surface +- visual validation that requires a real rendered surface ## Disallowed Cases @@ -24,13 +24,15 @@ GUI handling must not be used to: - replace normal builder execution - bypass missing CLI automation that should be implemented - hide unlogged actions +- depend on a cloud desktop bridge for the normal execution path - avoid approval and evidence requirements ## Governance Rules -- GUI entry requires explicit approval -- the main loop must leave its active `RUNNING_*` state and enter `AWAITING_TAKEOVER` before takeover starts -- the human or assisted operator must record what changed +- in-scope local GUI automation may run automatically on the approved Ubuntu host when the active adapter profile declares the required capability +- manual takeover still requires explicit approval +- the main loop must leave its active `RUNNING_*` state and enter `AWAITING_TAKEOVER` before manual takeover starts +- the human operator must record what changed - resulting artifacts must be archived - the resumed loop must reference the takeover output @@ -38,10 +40,10 @@ GUI handling must not be used to: GUI-related interruptions should map to: -- `AWAITING_TAKEOVER` when waiting for takeover +- `AWAITING_TAKEOVER` when waiting for manual takeover - `AWAITING_APPROVAL` when approval is missing -- `FAILED_EXECUTION` or `FAILED_INFRA` when the desktop session fails +- `FAILED_EXECUTION` or `FAILED_INFRA` when the local GUI session fails ## Phase 1 Rule -Phase 1 treats GUI handling as documented policy only. Operational automation is deferred until the core loop is stable. +Phase 1 local execution records GUI and takeover contract artifacts only. Automated local GUI execution remains disabled until the active adapter profile explicitly enables browser and screenshot capabilities. Remote desktop orchestration remains out of scope. diff --git a/docs/LIGHTWEIGHT_RUNTIME_PLAN.md b/docs/LIGHTWEIGHT_RUNTIME_PLAN.md new file mode 100644 index 0000000..7348aeb --- /dev/null +++ b/docs/LIGHTWEIGHT_RUNTIME_PLAN.md @@ -0,0 +1,363 @@ +# Lightweight Runtime Plan + +## Purpose + +This document defines the minimum runtime hardening plan for CodingClaw. + +The target is a lightweight runtime with harder boundaries, not a bridge into another product shell and not a full fork of an external agent host. + +`SYSTEM_BLUEPRINT.md` remains the canonical top-level definition. This document is an implementation plan for the next runtime slice. + +## Decision Summary + +CodingClaw should keep the current Phase 1 control shell and the existing builder to QA loop. + +The next runtime work should harden three boundaries: + +- state scope and atomic persistence +- adapter capability enforcement +- host-side shell and credential guards + +External reuse priority should be: + +- borrow TypeScript state path and planning gate ideas from oh-my-codex +- borrow capability, secret, and shell guard ideas from IronClaw +- do not import either product shell, team runtime, memory system, or orchestration stack + +## Why This Fits The Current Repository + +The repository already points in this direction: + +- [SYSTEM_BLUEPRINT.md](SYSTEM_BLUEPRINT.md) already defines control shell, files over memory, executor-agnostic adapters, and short-lived workers +- [ARCHITECTURE_OVERVIEW.md](ARCHITECTURE_OVERVIEW.md) already preserves the builder to QA loop and the single-node Phase 1 shape +- [EXECUTOR_ADAPTER_CONTRACT.md](EXECUTOR_ADAPTER_CONTRACT.md) already defines a capability manifest boundary +- [SECURITY_POLICY.md](SECURITY_POLICY.md) already requires secret injection, log redaction, and approval interception + +This plan does not replace the current architecture. It tightens the boundaries the current architecture already claims. + +## What Already Exists + +The current codebase already has the right shell, but several boundaries are still declarative instead of enforced: + +- `core/loop/phase1-local-flow.ts` already owns the Phase 1 builder to QA control path +- `core/loop/state-store.ts` already mirrors archived state back into `state/` +- `adapters/generic-cli/adapter-capability.json` already declares capability intent +- `control/fixtures/phase1-local-task-packet.en.json` and `control/fixtures/phase1-local-run-envelope.json` already carry `requested_capabilities` +- `docs/SECURITY_POLICY.md` already states that secrets must not be written into task packets + +Current gaps: + +- `StateStore` writes directly, without an atomic write path or a write lock +- state has one root and one mirror, but no root, session, and run scope resolver +- shell and secret rules still exist mostly as policy text, not as host-side runtime guards +- local GUI automation remains a later profile enablement rather than an active Phase 1 runtime path + +## External Adoption Targets + +### Borrow From Oh My Codex + +Adopt the state boundary ideas, not the product shell: + +- session ID validation +- Windows and WSL path normalization +- working-directory allowlist checks before resolving state paths +- root scope plus session scope plus current-session fallback reads +- atomic file writes through temp file then rename +- write locking around state mutations + +Adopt the planning gate rules, not the planner prompt wording: + +- inspect the repository before asking the user for code facts +- use an adaptive step count instead of a fixed five-step template +- do not execute before explicit user approval + +### Borrow From IronClaw + +Adopt the security boundary ideas, not the Rust host shell: + +- default-deny capability objects +- missing capability sidecar means no permissions +- host-boundary credential injection, where tools never see secret values +- shell policy split into blocked commands, dangerous patterns, never-auto-approve patterns, and safe environment allowlist + +## Target Runtime Shape + +```text +[Control Shell] + | + +--> [Planner Gate] + | + +--> [State Scope Resolver] + | |- root scope + | |- session scope + | `- run scope + | + +--> [Task Packet + Run Envelope] + | + `--> [Adapter Capability Gate] + | + +--> [shell-policy] + +--> [credential-injector] + `--> [Generic CLI Worker] + | + +--> Builder + `--> QA +``` + +The lightweight runtime should keep the current archive-first model: + +- canonical job state remains under `jobs//state/` +- latest live mirror remains under repository `state/` +- session-local state becomes optional and isolated +- run-local state becomes archived and replayable + +## State Scope Model + +The new state model should define three scopes: + +### Root Scope + +Canonical job state under `jobs//state/`. + +This remains the only authoritative control-shell state for: + +- `progress.en.md` +- `story-queue.json` +- `active-story.json` +- `handoff.en.md` +- `risk-register.en.md` +- `loop-metrics.json` +- `decisions.en.md` +- `trace-index.json` + +### Session Scope + +Optional adapter session state under `jobs//state/sessions//`. + +This scope should exist only for executor-specific resumability and should never redefine contract scope. + +Read behavior should support: + +- explicit session ID +- current session fallback from `jobs//state/current-session.json` +- fallback to root scope when no session state exists + +### Run Scope + +Archived run-local state under `artifacts/runs//metadata/state/`. + +This scope should hold transient state snapshots needed for replay, recovery, and forensic review. + +### Resolution Rules + +- path resolution must normalize Windows and WSL forms before validation +- resolved paths must stay inside the allowed job root +- session IDs must use a strict safe pattern +- the control shell may promote selected session or run outputs back into root scope +- workers may read scoped state, but they must not rewrite control-shell canonical files directly + +## Required Modules + +The minimum useful implementation should add these modules: + +- `core/loop/state-scope.ts` +- `core/loop/state-write.ts` +- `adapters/generic-cli/capability-gate.ts` +- `ops/guards/shell-policy.ts` +- `ops/guards/credential-injector.ts` + +Responsibilities: + +- `state-scope.ts`: resolve root, session, and run paths, validate session IDs, normalize Windows and WSL paths, and provide read scope order +- `state-write.ts`: provide write lock and atomic write primitives +- `capability-gate.ts`: load adapter capabilities, intersect them with requested capabilities, and reject undeclared actions by default +- `shell-policy.ts`: classify commands and environment variables before execution +- `credential-injector.ts`: translate secret handles and host allowlists into host-side request injection without exposing raw secrets to workers + +## Existing File Changes + +The first implementation pass should update only the following existing files: + +- `core/loop/state-store.ts` +- `core/loop/phase1-local-flow.ts` +- `adapters/generic-cli/adapter.ts` +- `adapters/generic-cli/adapter-capability.json` +- `core/contracts/types.ts` + +The goal is to keep the diff narrow and preserve the current Phase 1 control path. + +## Capability Enforcement Rules + +Capability handling should become executable policy, not metadata decoration. + +The adapter gate should enforce: + +- effective capability set equals `adapter-capability.json` allow entries intersected with `requested_capabilities` +- any missing capability entry is deny +- any denied capability request fails before worker launch +- any privileged capability outside the approved profile returns a standard approval or policy failure path + +The first enforced capabilities should be: + +- `shell_command` +- `container_control` +- `secret_injection` +- `interactive_approval` + +## Shell And Secret Guard Rules + +### Shell Policy + +The shell guard should run before any worker command is launched. + +It should evaluate: + +- exact blocked commands +- dangerous substrings or command patterns +- commands that can never be auto-approved +- environment variable allowlist + +Phase 1 does not need a perfect shell sandbox. It needs a host-side deny path for obvious unsafe commands and obvious secret exfiltration attempts. + +### Credential Injection + +Secrets must never enter: + +- `task-packet.en.json` +- run envelopes +- archived reports +- worker stdout or stderr + +The task packet should carry only: + +- secret handle +- credential alias +- allowed host patterns +- injection mode metadata + +The host should resolve the secret and inject it only at the outgoing request boundary. + +## Planner Gate Follow-Up + +Planner hardening is a control-shell follow-up, not the first runtime blocker. + +When the live planner role is implemented, it should inherit three rules: + +- repo inspection before user questions about code facts +- scope-matched step count +- explicit approval before execution handoff + +This is a policy borrow from oh-my-codex, not a prompt import project. + +## Implementation Sequence + +### Step 1 + +Add state scope resolution plus atomic state writes. + +Acceptance: + +- root, session, and run scope paths resolve deterministically +- direct state writes are replaced by atomic write helpers +- concurrent writes cannot truncate or partially overwrite canonical state files + +### Step 2 + +Add default-deny capability enforcement in `GenericCliAdapter`. + +Acceptance: + +- adapter launch fails when a requested capability is undeclared or denied +- capability checks happen before worker start +- failure surfaces through the existing run status vocabulary + +### Step 3 + +Add shell policy and credential injection guards. + +Acceptance: + +- unsafe shell commands are blocked before launch +- environment scrubbing is enforced for worker execution +- secret handles can be resolved and injected without writing secret values into artifacts + +### Step 4 + +Wire planner gate rules into the future live planner implementation. + +Acceptance: + +- planner flow inspects the repo before asking for code facts +- plan length matches actual scope +- no execution starts without owner approval + +## Collaboration Model + +The work can be split into three lanes: + +| Lane | Modules Touched | Depends On | +|------|-----------------|------------| +| A | `core/loop/` | - | +| B | `ops/guards/` | - | +| C | `adapters/generic-cli/`, `core/contracts/` | A, B | + +Recommended execution order: + +- launch Lane A and Lane B in parallel +- merge both +- implement Lane C after the state and guard interfaces are stable + +Conflict notes: + +- Lane C touches adapter launch and will likely need the final guard interfaces from Lane B +- Lane A should avoid changing archive layout semantics beyond scoped state additions + +## Failure Modes To Design For + +| Codepath | Production Failure | Test Need | Error Handling Need | User Outcome | +|----------|--------------------|-----------|---------------------|--------------| +| state write | partial file write on crash | yes | yes | explicit failure, never silent corruption | +| state resolve | path escape or invalid session ID | yes | yes | fail closed | +| capability gate | undeclared capability still executes | yes | yes | fail before worker launch | +| shell policy | dangerous command bypasses deny list | yes | yes | approval or policy failure | +| credential injection | secret leaks into packet or logs | yes | yes | policy failure and redacted evidence | + +Any implementation that leaves a silent secret leak or a silent capability bypass is below the minimum bar. + +## Not In Scope + +This plan explicitly excludes: + +- bridging CodingClaw into oh-my-codex runtime +- forking IronClaw as the new host shell +- tmux, HUD, multi-agent team runtime, or worktree orchestration from oh-my-codex +- WASM tool ecosystems, channels, routines, web gateways, or long-lived memory layers from IronClaw +- multi-job concurrency or multi-adapter parallel execution +- browser QA expansion, large GUI automation expansion, or remote desktop vendor integration + +## Source Basis + +Internal references: + +- [SYSTEM_BLUEPRINT.md](SYSTEM_BLUEPRINT.md) +- [ARCHITECTURE_OVERVIEW.md](ARCHITECTURE_OVERVIEW.md) +- [EXECUTOR_ADAPTER_CONTRACT.md](EXECUTOR_ADAPTER_CONTRACT.md) +- [SECURITY_POLICY.md](SECURITY_POLICY.md) + +External references: + +- oh-my-codex state scope source: https://github.com/Yeachan-Heo/oh-my-codex/blob/main/src/mcp/state-paths.ts +- oh-my-codex atomic state write source: https://github.com/Yeachan-Heo/oh-my-codex/blob/main/src/mcp/state-server.ts +- oh-my-codex planner prompt source: https://github.com/Yeachan-Heo/oh-my-codex/blob/main/prompts/planner.md +- IronClaw capability source: https://github.com/nearai/ironclaw/blob/staging/src/tools/wasm/capabilities.rs +- IronClaw capability loader source: https://github.com/nearai/ironclaw/blob/staging/src/tools/wasm/loader.rs +- IronClaw credential injection source: https://github.com/nearai/ironclaw/blob/staging/src/tools/wasm/credential_injector.rs +- IronClaw shell policy source: https://github.com/nearai/ironclaw/blob/staging/src/tools/builtin/shell.rs + +## Related Documents + +- [README.md](README.md) +- [STATE_STORE_SPEC.md](STATE_STORE_SPEC.md) +- [LOOP_SPEC.md](LOOP_SPEC.md) +- [STATUS_MODEL.md](STATUS_MODEL.md) +- [UBUNTU_GUI_RUNTIME_PLAN.md](UBUNTU_GUI_RUNTIME_PLAN.md) diff --git a/docs/LOOP_SPEC.md b/docs/LOOP_SPEC.md index 954b0d6..42803f0 100644 --- a/docs/LOOP_SPEC.md +++ b/docs/LOOP_SPEC.md @@ -72,7 +72,7 @@ The task packet must: - the loop launches only one role at a time - the worker may be builder, QA, or review -- Phase 1 requires builder and QA only. Review remains optional until a later phase enables it. +- Phase 1 requires builder and QA only. Builder is expected to run through Claude Code, QA through Codex. Review remains optional until a later phase enables it. - workers must return standard output objects and a standard exit status - unapproved privileged actions must interrupt execution and return approval-needed status @@ -156,4 +156,5 @@ After each run, the loop must update: - fixback should usually remain within the same story - fixback must not silently expand scope -- Phase 1 should cap fixback at 2 or 3 rounds per story +- the current Phase 1 local slice stops at `FIXBACK_PENDING` and requires an explicit next-step decision before another run is scheduled +- an automated fixback retry ceiling applies only after multi-round fixback scheduling is implemented diff --git a/docs/OFFICIAL_REFERENCE_NOTES.md b/docs/OFFICIAL_REFERENCE_NOTES.md index 589a5f1..bf70595 100644 --- a/docs/OFFICIAL_REFERENCE_NOTES.md +++ b/docs/OFFICIAL_REFERENCE_NOTES.md @@ -13,8 +13,8 @@ Repository documents remain normative. External documents in this file are suppo - Docker volumes fit cache or container-owned persistent data better than additional bind mounts. - SQLite is a reasonable Phase 1 metadata store only for single-node, local metadata with low write concurrency. - PostgreSQL is the correct upgrade path once the system needs multi-process coordination, multi-client access, or higher write concurrency. -- Wuying is feasible as a governed GUI exception and human takeover surface. -- The current Wuying Phase 1 strategy remains correct: document the handoff path first, do not depend on undocumented automation interfaces. +- The supported Phase 1 GUI surface is a single Ubuntu host with a local graphical session. +- The system should not depend on a cloud desktop vendor or remote-assistance bridge for normal automation. - The executor-agnostic adapter contract is feasible, but capability declarations must stay profile-specific rather than assume one universal tool surface. ## Docker Worker Runtime @@ -45,19 +45,11 @@ Repository documents remain normative. External documents in this file are suppo - `DEPLOYMENT_PLAN.md`: PostgreSQL MVCC and advisory locks make it a better fit for scheduler coordination, queue ownership, and other multi-process control-plane workflows. References: [MVCC Introduction](https://www.postgresql.org/docs/current/mvcc-intro.html), [Explicit Locking](https://www.postgresql.org/docs/current/explicit-locking.html) -## Wuying GUI Exception Plane +## Local Ubuntu GUI Runtime Baseline -- `WUYING_INTEGRATION_PLAN.md`, `GUI_EXCEPTION_POLICY.md`: Wuying is positioned by Alibaba Cloud as desktop-as-a-service for end users, not as a generic server control plane. - Reference: [What is Elastic Desktop Service](https://help.aliyun.com/zh/wuying-workspace/product-overview/what-is-elastic-desktop-service) +- `UBUNTU_GUI_RUNTIME_PLAN.md`, `GUI_EXCEPTION_POLICY.md`, `DEPLOYMENT_PLAN.md`: the repository design keeps the control shell, workers, artifacts, and headed GUI surface on one Ubuntu host so audit paths and evidence paths stay local and deterministic. -- `WUYING_INTEGRATION_PLAN.md`, `TAKEOVER_FLOW.md`: remote assistance supports an approval-and-accept flow that matches a governed takeover path better than a silent automation path. - Reference: [Use remote assistance and collaboration session](https://help.aliyun.com/zh/wtc/user-guide/use-remote-assitance-and-collaboration-session) - -- `WUYING_INTEGRATION_PLAN.md`, `SECURITY_POLICY.md`: Wuying login and access controls support SSO, MFA, client validation, and organization-scoped access control. - References: [Certification overview](https://help.aliyun.com/zh/wuying-workspace/user-guide/certification-overview), [Web client](https://help.aliyun.com/zh/wtc/user-guide/web-client) - -- `WUYING_INTEGRATION_PLAN.md`: the Web client is a convenient access path but has usage limits such as internet-only access and no local disk mapping, which supports keeping it as an exception surface rather than the default coding environment. - Reference: [Web client](https://help.aliyun.com/zh/wtc/user-guide/web-client) +- `TAKEOVER_FLOW.md`: human takeover remains a fallback path for blocked interactive steps. It is not the normal execution surface and does not require a cloud desktop vendor to exist. ## Executor And Adapter Boundary @@ -73,5 +65,5 @@ Repository documents remain normative. External documents in this file are suppo ## Limits Of External Proof - No official Docker, SQLite, PostgreSQL, Alibaba Cloud, or OpenAI document defines CodingClaw's freeze contract, approval card schema, artifact archive structure, or trace index schema. -- No official Wuying document found in this review directly proves a stable native API for the full `takeover packet -> resume semantics` workflow defined by this repository. +- No official external document in this review defines the full `takeover packet -> resume semantics` workflow or the exact local GUI orchestration rules used by this repository. - These governance and audit objects remain internal platform design decisions. They are feasible, but their correctness must be validated by implementation and integration tests, not by vendor documentation alone. diff --git a/docs/QA_CONTRACT.en.md b/docs/QA_CONTRACT.en.md index 861351a..6ac4d50 100644 --- a/docs/QA_CONTRACT.en.md +++ b/docs/QA_CONTRACT.en.md @@ -4,6 +4,8 @@ The QA executor validates that a builder output is reproducible, inside scope, and supported by evidence. +Phase 1 binds the QA role to the Codex execution profile. + ## Inputs QA reads: diff --git a/docs/README.md b/docs/README.md index ce5d078..236228d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -32,6 +32,7 @@ Start with these documents in order: - [LOOP_SPEC.md](LOOP_SPEC.md) - [STATE_STORE_SPEC.md](STATE_STORE_SPEC.md) +- [LIGHTWEIGHT_RUNTIME_PLAN.md](LIGHTWEIGHT_RUNTIME_PLAN.md) - [EXECUTOR_ADAPTER_CONTRACT.md](EXECUTOR_ADAPTER_CONTRACT.md) - [PLANNER_CONTRACT.en.md](PLANNER_CONTRACT.en.md) - [BUILDER_CONTRACT.en.md](BUILDER_CONTRACT.en.md) @@ -47,9 +48,9 @@ Start with these documents in order: - [CHECKSUM_POLICY.md](CHECKSUM_POLICY.md) - [JOB_MANIFEST_SCHEMA.md](JOB_MANIFEST_SCHEMA.md) -## Takeover And Wuying +## GUI Runtime And Takeover -- [WUYING_INTEGRATION_PLAN.md](WUYING_INTEGRATION_PLAN.md) +- [UBUNTU_GUI_RUNTIME_PLAN.md](UBUNTU_GUI_RUNTIME_PLAN.md) - [TAKEOVER_FLOW.md](TAKEOVER_FLOW.md) - [TAKEOVER_PACKET_TEMPLATE.en.md](TAKEOVER_PACKET_TEMPLATE.en.md) diff --git a/docs/SYSTEM_BLUEPRINT.md b/docs/SYSTEM_BLUEPRINT.md index 438a6b9..0c2ec9e 100644 --- a/docs/SYSTEM_BLUEPRINT.md +++ b/docs/SYSTEM_BLUEPRINT.md @@ -41,7 +41,7 @@ CodingClaw is not: ### Control Shell -The control shell accepts Chinese commands, normalizes requirements, manages approval gates, budgets, policy guards, and dispatches loop work through adapters governed by [EXECUTOR_ADAPTER_CONTRACT.md](EXECUTOR_ADAPTER_CONTRACT.md). +The target control shell accepts Chinese commands, normalizes requirements, manages approval gates, budgets, policy guards, and dispatches loop work through adapters governed by [EXECUTOR_ADAPTER_CONTRACT.md](EXECUTOR_ADAPTER_CONTRACT.md). The current Phase 1 local slice exposes a repository-invoked `bun run phase1` proof command instead of the live intake surface. ### Coding Loop Kernel @@ -55,9 +55,9 @@ Workers run in isolated environments. Builder produces implementation artifacts. This plane stores all long-lived state, reports, logs, evidence, sessions, checksums, and manifests required for replay and audit. -### GUI Exception Plane +### Local GUI Runtime Plane -Aliyun Wuying Desktop is reserved for GUI-only tasks, human takeover, and assisted recovery. It is not a primary coding surface. +A single Ubuntu host with a graphical session is the planned GUI execution surface. The current Phase 1 local slice records takeover contracts and waiting states, but it does not yet enable live headed browser or desktop automation in the active adapter profile. ## Mandatory Lifecycle @@ -71,7 +71,7 @@ INTAKE -> STORY_QUEUE_READY -> BUILD_EXECUTION -> QA_VALIDATION - -> FIXBACK(optional) + -> FIXBACK(optional, manual in the current Phase 1 local slice) -> FINAL_APPROVAL(optional) -> ARCHIVE ``` @@ -122,11 +122,12 @@ At archive finalization: Phase 1 is the minimum working product. It includes: -- one Chinese mobile entry channel +- one repository-invoked local control command for the fixed proof slice - one control shell - one generic CLI adapter -- one builder worker -- one QA worker +- one Claude Code builder worker +- one Codex QA worker +- takeover packet and waiting-state support for future local GUI execution, without live browser or desktop automation yet - contract binding to `base_commit` - traceability from story to acceptance to QA verdict - local artifact archival and checksums @@ -138,7 +139,9 @@ Phase 1 excludes: - multi-channel concurrency - multi-adapter parallel execution - mandatory review executor in the live path -- Wuying automation +- live Chinese mobile intake +- live local browser or desktop automation +- cloud desktop bridges or vendor-specific remote desktop orchestration - dashboards - historical job reuse - production-scale multi-tenant scheduling @@ -158,5 +161,5 @@ Phase 1 excludes: - [APPROVAL_CARD_SPEC.md](APPROVAL_CARD_SPEC.md) - [REVIEW_CONTRACT.en.md](REVIEW_CONTRACT.en.md) - [ARTIFACT_LAYOUT_SPEC.md](ARTIFACT_LAYOUT_SPEC.md) -- [WUYING_INTEGRATION_PLAN.md](WUYING_INTEGRATION_PLAN.md) +- [UBUNTU_GUI_RUNTIME_PLAN.md](UBUNTU_GUI_RUNTIME_PLAN.md) - [TAKEOVER_PACKET_TEMPLATE.en.md](TAKEOVER_PACKET_TEMPLATE.en.md) diff --git a/docs/TAKEOVER_FLOW.md b/docs/TAKEOVER_FLOW.md index f5af52a..ebaf06f 100644 --- a/docs/TAKEOVER_FLOW.md +++ b/docs/TAKEOVER_FLOW.md @@ -2,15 +2,14 @@ ## Purpose -This document defines the controlled handoff from automated execution to human takeover. +This document defines the controlled handoff from automated execution to human takeover when local automation on the Ubuntu host cannot proceed. ## Trigger Conditions Takeover may be triggered when: -- a GUI-only step blocks progress +- a local GUI step blocks progress after normal automation has been attempted - credentials require an interactive login -- a Windows-only tool is required - policy demands human confirmation inside a live UI ## Standard Flow @@ -20,8 +19,8 @@ DETECT_GUI_EXCEPTION -> PAUSE_MAIN_LOOP -> PREPARE_TAKEOVER_PACKET -> ISSUE_APPROVAL_CARD - -> OPEN_WUYING_BRIDGE - -> HUMAN_OR_ASSISTED_ACTION + -> OPEN_LOCAL_GUI_SESSION + -> HUMAN_ACTION -> CAPTURE_RESULT -> WRITE_HANDOFF -> RESUME_OR_TERMINATE diff --git a/docs/TAKEOVER_PACKET_TEMPLATE.en.md b/docs/TAKEOVER_PACKET_TEMPLATE.en.md index 51d7cb1..10ada30 100644 --- a/docs/TAKEOVER_PACKET_TEMPLATE.en.md +++ b/docs/TAKEOVER_PACKET_TEMPLATE.en.md @@ -2,7 +2,7 @@ ## Purpose -`takeover-packet.en.md` is the standard human handoff document for a governed GUI or interactive interruption. +`takeover-packet.en.md` is the standard human handoff document for a governed local GUI or interactive interruption. ## Archive Location diff --git a/docs/UBUNTU_GUI_RUNTIME_PLAN.md b/docs/UBUNTU_GUI_RUNTIME_PLAN.md new file mode 100644 index 0000000..ec544da --- /dev/null +++ b/docs/UBUNTU_GUI_RUNTIME_PLAN.md @@ -0,0 +1,70 @@ +# Ubuntu GUI Runtime Plan + +## Purpose + +This document defines how CodingClaw runs on a single Ubuntu host with a local graphical session. + +Supporting feasibility notes for this positioning are collected in [OFFICIAL_REFERENCE_NOTES.md](OFFICIAL_REFERENCE_NOTES.md). + +## Positioning + +The Ubuntu GUI host is the planned graphical execution surface. It exists for: + +- fully automated headed browser workflows +- fully automated Linux desktop tooling +- screenshot and rendered evidence capture +- exceptional local takeover when automation is blocked + +## Preferred Order of Use + +1. API or CLI automation +2. local headed browser or desktop automation on the Ubuntu host +3. local human takeover + +## Integration Components + +The integration should define: + +- local display and session bootstrap +- builder launch with the Claude Code profile +- QA launch with the Codex profile +- takeover packet generation following `TAKEOVER_PACKET_TEMPLATE.en.md` +- secure local access handoff when takeover is required +- result collection +- resume signal back into the control shell + +## Control Rules + +- the supported deployment target is one Ubuntu host with a graphical session +- standard in-scope local GUI automation may run automatically only after the active adapter profile declares and enables the required capability +- builder uses Claude Code and QA uses Codex in Phase 1 +- main loop execution must transition into `AWAITING_TAKEOVER` during manual takeover unless the task is explicitly parallel-safe +- all takeover results must be written back under `artifacts/runs//takeover/` and referenced by handoff and manifest records + +## Phase Plan + +### Phase 1 + +- one Ubuntu host with a graphical session may be prepared for later runtime rollout +- takeover packet generation and archive path for blocked GUI work +- no live browser or desktop automation in the active adapter profile yet +- no dependency on a cloud desktop vendor + +### Phase 2 + +- automated builder flow through Claude Code +- automated QA flow through Codex +- local headed browser or GUI execution when a story requires a real rendered surface +- hardened host display bootstrap +- stable resume semantics + +### Phase 3 + +- broader local desktop tooling support +- stronger evidence capture for local GUI flows + +## Non-Goals + +- depending on a cloud desktop bridge +- depending on Windows-only tools for normal execution +- replacing auditable local automation with manual remote sessions diff --git a/docs/WUYING_INTEGRATION_PLAN.md b/docs/WUYING_INTEGRATION_PLAN.md deleted file mode 100644 index 3595896..0000000 --- a/docs/WUYING_INTEGRATION_PLAN.md +++ /dev/null @@ -1,64 +0,0 @@ -# Wuying Integration Plan - -## Purpose - -This document defines how Aliyun Wuying Desktop fits into CodingClaw as a GUI exception and human takeover surface. - -Official feasibility references for this positioning are collected in [OFFICIAL_REFERENCE_NOTES.md](OFFICIAL_REFERENCE_NOTES.md). - -## Positioning - -Wuying is not the primary builder environment. It exists for: - -- GUI-only workflows -- Windows-only tools -- human takeover -- remote assistance - -## Preferred Order of Use - -1. API or management SDK control -2. web bridge access -3. human takeover -4. assisted GUI operation - -## Integration Components - -The integration should define: - -- Wuying session provisioning or lookup -- takeover packet generation following `TAKEOVER_PACKET_TEMPLATE.en.md` -- secure access handoff -- result collection -- resume signal back into the control shell - -## Control Rules - -- Wuying access must be explicitly approved -- main loop execution must transition into `AWAITING_TAKEOVER` during takeover unless the task is explicitly parallel-safe -- all takeover results must be written back under `artifacts/runs//takeover/` and referenced by handoff and manifest records - -## Phase Plan - -### Phase 1 - -- no active Wuying automation -- only reserve document hooks and policy definitions -- keep Wuying limited to approved takeover and remote assistance paths -- do not depend on undocumented Wuying-native orchestration for resume control - -### Phase 2 - -- managed takeover packet format and archive path -- stable resume semantics - -### Phase 3 - -- controlled bridge integration -- assisted GUI support - -## Non-Goals - -- using Wuying as the default coding environment -- replacing Docker workers with desktop sessions -- turning GUI automation into the main execution path diff --git a/docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.md b/docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.md new file mode 100644 index 0000000..a980f32 --- /dev/null +++ b/docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.md @@ -0,0 +1,395 @@ +# Team Workflow Migration Plan + +## Purpose + +This document defines the recommended path to move CodingClaw from a prompt2repo-style solo development workflow into a team collaboration workflow without breaking the current Phase 1 kernel. + +The target is not to replace the current control shell and loop model with an external multi-agent runtime. The target is to preserve the current contract-first, artifact-first, short-loop kernel and add a collaboration shell around it for issue intake, worktree isolation, PR review, merge governance, and deployment. + +## Executive Decision + +CodingClaw should adopt a two-layer operating model: + +- keep the current `Development Plan -> Contract Freeze -> one-story loop -> Builder -> QA -> archive` kernel as the execution core +- add a GitHub-centered collaboration shell around the kernel for issue routing, worktree isolation, branch governance, PR review, merge control, and deployment gates +- run two independent CodingClaw deployments when team scale or trust boundaries require it +- allow a `codingclaw-custom` fork for company-specific workflow policy, private adapters, internal agent systems, and deployment rules + +This is the best fit because the current repository explicitly keeps the loop kernel small and auditable, and explicitly avoids importing an external product shell, team runtime, memory layer, or orchestration stack into the core runtime. + +## Current-State Findings + +### Boundaries To Preserve + +- `docs/SYSTEM_BLUEPRINT.md` defines non-negotiable operating rules: plan before code, freeze before execution, one story per loop, files over memory, Builder and QA separation, and English repository-facing deliverables. +- `docs/LOOP_SPEC.md` defines a single-story, short-lived, auditable loop and does not allow one run to merge multiple independent stories. +- `docs/LIGHTWEIGHT_RUNTIME_PLAN.md` explicitly says CodingClaw should borrow ideas from oh-my-codex and IronClaw, but must not import their full shell, team runtime, or orchestration stack into the core runtime. +- `docs/DEPLOYMENT_PLAN.md` keeps the supported runtime small: one control host, Docker workers, one Ubuntu GUI surface, and no distributed complexity in Phase 1. + +### Existing Assets To Reuse + +- `.github/ISSUE_TEMPLATE/` already provides structured issue intake. +- `.github/pull_request_template.md` already asks for scope, contract impact, verification, evidence, and risks. +- `.github/workflows/verify.yml` already provides a base required verification workflow. +- `.github/workflows/enforce-repo-gate.yml` and `scripts/github_repo_gate.py` already bootstrap repository protection. +- `docs/REVIEW_CONTRACT.en.md` already defines an independent review role and can be promoted into a real PR review lane. +- `state/`, `task-packet`, `run-result`, `artifact-index`, `handoff`, and checksum conventions already provide the audit substrate required for team work. + +### Gaps To Close + +- repository protection is still closer to solo development than team governance because `scripts/github_repo_gate.py` does not require PR reviews or CODEOWNERS approval +- the live path still ends at QA and does not yet provide a first-class review, merge, or deploy lane +- the current runtime plan still treats atomic state writes, scoped state, capability enforcement, and shell/credential guards as hardening work rather than mandatory team-concurrency controls +- there is no first-class contract for `issue -> branch -> PR -> review -> merge -> deploy` + +## External Research Summary + +### Oh My Codex + +- the public documentation positions `Ralph` as the persistence loop that continues until completion and architect verification +- the public documentation positions `Team` as a conductor-led execution layer with explicit planning, execution, verification, and fix phases +- the public documentation recommends isolated worktree-based team execution and a workflow that handles parallel issues through separate branches and PRs + +Implication: + +- CodingClaw should use `Ralph` as the close-out loop for one approved story +- CodingClaw should use team or worktree-based parallelism outside the kernel, not inside one loop run + +### GitHub + +- rulesets and protected branches are the correct place for branch interaction policy, required checks, review requirements, linear history, and merge restrictions +- CODEOWNERS is the correct path-based ownership model, and owner review only works when the file exists on the PR base branch +- merge queue is the correct solution once multiple PRs contend for the same protected branch, and CI must support `merge_group` +- reusable workflows are the correct abstraction for repeatable multi-job PR, verify, and deploy lanes +- self-hosted runners are the correct boundary when the team needs custom runtime, internal network access, or heavyweight agent systems +- deployment environments are the correct boundary for staged secrets, required reviewers, and promotion gates +- fork PRs are the correct collaboration boundary for a public upstream plus a private company fork, but workflow and secrets changes from forks must be treated as high risk + +Implication: + +- CodingClaw should put team policy into GitHub repository governance instead of trying to encode all collaboration logic inside the loop runtime + +## Recommended Target Operating Model + +## 1. Kernel And Shell Split + +Keep the current CodingClaw kernel unchanged in responsibility: + +- intake normalization +- development plan approval +- contract freeze binding to `base_commit` +- one-story execution +- Builder execution +- QA validation +- artifact archival + +Move team collaboration concerns into a separate shell: + +- issue triage +- story slicing +- worktree creation +- branch naming +- PR creation +- review assignment +- merge queue entry +- environment promotion +- upstream or fork synchronization + +This split preserves current repository intent and avoids mixing prompt execution concerns with organization-level workflow governance. + +## 2. Default Work Unit + +The default unit of team work should be: + +- one GitHub issue +- one story queue derived from that issue when the issue is larger than one story +- one approved story per execution branch +- one isolated worktree per active story +- one freeze, one run, and one artifact set per story +- one PR per story by default + +The loop kernel must still execute one story at a time. Team throughput comes from multiple isolated worktrees and PRs in parallel, not from enlarging one loop. If one issue expands into multiple stories, split it before execution and let each story run through its own freeze, run, artifacts, review, and merge path. + +## 3. Recommended Branch Topology + +Use this structure by default in `codingclaw-custom`: + +- `main`: release and archive branch, strongest protection, merge queue enabled +- `dev`: integration branch for team issue PRs, required verification, required review, no direct pushes +- `story/-`: short-lived branch created from `dev` +- `hotfix/-`: emergency fixes, merged back into `main` and `dev` +- `sync/upstream-`: short-lived branch used only to bring upstream changes into `codingclaw-custom` + +Why this topology: + +- `dev` absorbs frequent small-scope issue PRs without destabilizing `main` +- `main` remains the promotion target with the strictest release and deploy gates +- upstream sync remains explicit and reviewable + +Use this structure in the public upstream by default: + +- `main`: the only long-lived branch, strongly protected +- short-lived feature or fix branches only + +Add `dev` to the public upstream only if upstream PR concurrency becomes high enough to justify a dedicated integration branch and merge queue. + +## 4. Recommended Daily Workflow + +### Intake And Planning + +- open a GitHub issue using the existing issue forms +- convert the issue into a story queue when necessary +- execute only one approved story per branch and per loop +- produce or update `DEVELOPMENT_PLAN.en.md` +- approve the plan +- generate or update `CONTRACT_FREEZE.en.md` and freeze artifacts + +### Implementation + +- create a dedicated worktree from `dev` for one approved story +- run `Ralph` for that single story scope +- keep all local execution evidence attached to the run artifact set +- require the worktree to stay single-purpose + +### Review + +- open a PR from `story/-` into `dev` +- require `verify` +- require at least one independent review +- require CODEOWNERS approval for touched paths +- run an explicit review lane based on `docs/REVIEW_CONTRACT.en.md` + +### Merge And Promotion + +- merge into `dev` through merge queue +- run integration verification on `dev` +- promote from `dev` to `main` through a release PR +- require deployment environment approval before production release + +### Handoff And Recovery + +- if work pauses mid-issue, checkpoint the run state and preserve the artifact trail +- if takeover is required, use the existing takeover packet and approval card model instead of ad hoc chat handoff + +## 5. Oh My Codex Role Mapping + +Use oh-my-codex outside the kernel in this shape: + +- `ralplan`: convert issue scope into a bounded execution plan when the issue is ambiguous or cross-cutting +- `team`: coordinate multiple parallel issues or multiple bounded lanes when the work naturally splits +- `ralph`: drive one story-scoped branch until implementation, verification, and architect-style sign-off are complete +- `review`: run a pre-landing review pass against the PR diff +- `ship`: push branch and create the PR +- `checkpoint`: preserve and resume team worktrees cleanly during interruptions +- `trace`: inspect multi-agent execution history when a run or review lane becomes hard to explain + +Operating rule: + +- `Ralph` owns completion for one approved story +- `Team` owns parallelism across issues +- GitHub owns merge and deploy governance + +## 6. GitHub Governance Changes + +### Repository Rules + +Replace the current minimal gate with branch rules or rulesets that enforce at least: + +- required status checks +- required conversation resolution +- required linear history +- force-push disabled +- branch deletion disabled +- required pull request before merge +- at least one approving review and required CODEOWNERS review on `dev` +- at least two approvals and required CODEOWNERS review on `main` +- stale review dismissal on new commits +- CODEOWNERS review on owned paths + +`scripts/github_repo_gate.py` can remain as a bootstrap script, but it should no longer be the only policy surface. + +### CODEOWNERS + +Add a real `.github/CODEOWNERS` file and map at least: + +- `core/` to loop kernel owners +- `control/` to control shell owners +- `adapters/` to adapter owners +- `ops/` to runtime and policy owners +- `.github/` to platform owners +- `docs/` to contract owners + +### Merge Queue + +Enable merge queue on `dev` and later on `main` when PR concurrency justifies it. + +Required follow-up: + +- update `.github/workflows/verify.yml` to run on `merge_group` in addition to `push` and `pull_request` + +### Reusable Workflows + +Split the current workflow surface into reusable units: + +- `_verify.yml` +- `_review-lane.yml` +- `_deploy.yml` +- `_repo-governance.yml` + +Then let repository workflows call those modules with pinned references. + +### Deployment Environments + +Create at least: + +- `staging` +- `production` + +Use them for: + +- environment-specific secrets +- required reviewers +- deployment promotion gates +- branch restrictions +- trusted-branch-only production access + +### Runners + +Use two runner classes: + +- GitHub-hosted runners for normal repository verification +- self-hosted or ephemeral runners for heavy backend agent integration, GUI automation, internal systems, or private network dependencies + +Trust boundary rules: + +- fork PRs run GitHub-hosted, read-only, minimum verification only +- fork PRs must not reach self-hosted runners +- fork PRs must not access protected deployment environments +- changes under `.github/workflows/**` must be reviewed and merged into a trusted branch before any private runner or protected environment can execute them +- production environments accept deployments from trusted protected branches only + +Recommended labels: + +- `codingclaw-build` +- `codingclaw-qa` +- `codingclaw-review` +- `codingclaw-agent` + +## 7. Dual Deployment Strategy + +When the team needs both public evolution and private company customization, run two independent CodingClaw deployments: + +### Deployment A: Upstream Or Public Baseline + +- tracks the canonical repository +- stays close to public contracts and generic capabilities +- uses public-safe workflows and public-safe secrets only +- serves as the clean upstream for broadly useful features +- defaults to protected `main` only unless upstream throughput later justifies `dev` + +### Deployment B: Company Collaboration Stack + +- runs against `codingclaw-custom` +- contains company-only adapters, workflow rules, internal backend agent integrations, and deployment policies +- uses self-hosted runners and private secrets +- integrates with internal systems that should never exist in the public baseline + +### Fork Policy + +Use `codingclaw-custom` only for changes that are truly company-specific: + +- private adapters +- private skill or policy wrappers +- internal deployment integrations +- internal approval and audit hooks +- company-specific `.github/` automation + +Push generic improvements upstream whenever possible: + +- contract clarifications +- runtime hardening +- generic review lane logic +- generic runner abstractions +- generic governance improvements + +This minimizes long-term fork drag. + +## 8. Implementation Roadmap + +### Phase 0: Governance Baseline + +- add `.github/CODEOWNERS` +- upgrade repository rules beyond the current `github_repo_gate.py` baseline +- add `merge_group` support to verification +- define ownership for `.github/`, `docs/`, `core/`, `control/`, `ops/`, and `adapters/` + +### Phase 1: Runtime Hardening Baseline + +- finish atomic state writes +- finish scoped state resolution +- enforce capability deny-by-default +- finish shell and credential guards + +This phase is the safety floor before parallel team execution expands. + +### Phase 2: Team Shell + +- define the formal mapping from issue to story to branch to PR +- add worktree discipline for every active issue +- standardize branch naming and PR metadata +- define when to use `Ralph` and when to use `Team` + +### Phase 3: Review And Merge Lane + +- turn `docs/REVIEW_CONTRACT.en.md` into a real review lane +- require independent review evidence before merge +- wire release promotion from `dev` to `main` + +### Phase 4: Dual Deployment + +- stand up the private `codingclaw-custom` line if needed +- separate public and private workflows, runners, secrets, and environments +- document upstream sync cadence + +## 9. Non-Goals + +- importing oh-my-codex team runtime directly into the CodingClaw kernel +- turning one loop into a multi-story multi-agent long-running session +- replacing freeze and approval artifacts with PR discussion alone +- adding distributed schedulers, multi-tenant queues, or cloud desktop complexity before governance and audit paths are stable + +## 10. Success Criteria + +- every small-scope issue can move through `issue -> story -> worktree -> branch -> PR -> review -> merge -> deploy` without ad hoc process +- each loop still executes exactly one approved story with its own freeze, run, and artifact set +- `main` is always protected, reviewable, and releasable +- team members can collaborate in parallel without sharing the same working tree +- public and private automation can diverge without corrupting the core kernel +- generic improvements can still flow upstream with manageable fork maintenance cost + +## Source Basis + +### Repository Evidence + +- [SYSTEM_BLUEPRINT.md](../SYSTEM_BLUEPRINT.md) +- [LIGHTWEIGHT_RUNTIME_PLAN.md](../LIGHTWEIGHT_RUNTIME_PLAN.md) +- [LOOP_SPEC.md](../LOOP_SPEC.md) +- [DEPLOYMENT_PLAN.md](../DEPLOYMENT_PLAN.md) +- [REVIEW_CONTRACT.en.md](../REVIEW_CONTRACT.en.md) +- [../.github/pull_request_template.md](../../.github/pull_request_template.md) +- [../.github/workflows/verify.yml](../../.github/workflows/verify.yml) +- [../scripts/github_repo_gate.py](../../scripts/github_repo_gate.py) + +### External References + +- Oh My Codex documentation: https://yeachan-heo.github.io/oh-my-codex-website/docs.html +- Git protected branches: https://docs.github.com/github/administering-a-repository/about-protected-branches +- GitHub rulesets: https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-rulesets/about-rulesets +- GitHub CODEOWNERS: https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners +- GitHub merge queue: https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/configuring-pull-request-merges/managing-a-merge-queue +- GitHub reusable workflows: https://docs.github.com/en/actions/sharing-automations/reusing-workflows +- GitHub self-hosted runners: https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners +- GitHub deployment environments: https://docs.github.com/actions/deployment/targeting-different-environments +- GitHub fork workflow: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo +- GitHub syncing a fork: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork +- Git worktree: https://git-scm.com/docs/git-worktree diff --git a/docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.zh-CN.md b/docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.zh-CN.md new file mode 100644 index 0000000..296614c --- /dev/null +++ b/docs/custom/TEAM_WORKFLOW_MIGRATION_PLAN.zh-CN.md @@ -0,0 +1,526 @@ +# CodingClaw 团队协作工作流迁移方案 + +## 目的 + +本文档定义了 CodingClaw 从 prompt2repo 风格的单人开发,迁移到团队协作开发工作流的推荐路径。 + +目标不是把当前 Phase 1 内核替换成外部多 agent runtime,而是在不破坏当前内核边界的前提下,在外层增加团队协作壳层,用于承接 issue、worktree 隔离、PR 审查、合并治理和部署门禁。 + +## 给负责人的结论 + +- 保留当前内核,不重写。 +- 团队协作能力放在 GitHub 和 oh-my-codex 的外层工作流上,不塞进 loop kernel。 +- 默认工作单元不是“大任务”,而是“一个 issue 拆成一个或多个 story,每个 story 单独 freeze、单独执行、单独 PR”。 +- 如果要支撑多人并行,必须先补 runtime hardening,再扩 team shell。 +- 如果公司内部流程和公开仓库流程差异大,应该拆成两套独立部署,并允许维护 `codingclaw-custom` 私有协作线。 + +一句话版本: + +CodingClaw 的正确演进方向不是“把它改造成一个重型团队 agent 平台”,而是“保留当前可审计的单 story 执行内核,在外层加上团队治理、分支治理、PR 治理和部署治理”。 + +## 核心决策 + +CodingClaw 应采用双层模型: + +- 内层保留现有 `Development Plan -> Contract Freeze -> one-story loop -> Builder -> QA -> archive` 执行内核 +- 外层增加以 GitHub 为中心的团队协作壳层,负责 issue 路由、worktree 隔离、分支治理、PR 审查、合并控制和部署门禁 +- 当团队规模、权限边界或公司内部流程需要时,运行两套独立的 CodingClaw 部署 +- 允许维护 `codingclaw-custom` fork,用于承接公司特有的流程策略、私有 adapter、内部 agent 系统和部署规则 + +这是当前仓库最匹配的路线,因为仓库现有设计已经明确: + +- 内核应保持小而可审计 +- 不应把外部产品壳、team runtime、memory layer、orchestration stack 直接导入核心运行时 + +## 当前仓库中必须保留的边界 + +### 1. 计划先于编码 + +`docs/SYSTEM_BLUEPRINT.md` 已经定义了最重要的制度边界: + +- 先计划再编码 +- 先 freeze 再执行 +- 一次 loop 只做一个 story +- 状态落文件,不依赖会话记忆 +- Builder 和 QA 必须分离 +- 仓库交付物统一英文 + +这些边界不能因为“团队协作”而被弱化。 + +### 2. One Story Per Loop + +`docs/LOOP_SPEC.md` 已明确规定,一次 loop 不允许把多个独立 story 合并执行。 + +因此未来的团队协作模型必须是: + +- `issue -> story queue` +- `story -> freeze` +- `story -> run` +- `story -> artifact` +- `story -> PR` + +而不是: + +- 一个 issue 下多人同时往一个大 branch 里堆改动 +- 一个 loop 同时处理多个 story + +### 3. 内核只借鉴,不吞并外部 runtime + +`docs/LIGHTWEIGHT_RUNTIME_PLAN.md` 已明确说明: + +- 可以借鉴 oh-my-codex 的状态、规划和 guard 思想 +- 可以借鉴 IronClaw 的 capability 和安全边界思想 +- 但不能把它们的 product shell、team runtime、memory system、orchestration stack 直接导入内核 + +所以团队化改造必须走“外层壳”路线,而不是“整体替换内核”路线。 + +### 4. 归档与证据链优先 + +现有仓库已经围绕这些对象建立了审计体系: + +- `task-packet` +- `run-result` +- `artifact-index` +- `handoff` +- `checksums` +- 各类 reports + +团队协作不能退化成“靠聊天记录和 PR 对话驱动”,而必须继续保留这套文件化证据链。 + +## 当前仓库已经具备的可复用资产 + +- `.github/ISSUE_TEMPLATE/` 已有结构化 issue 入口 +- `.github/pull_request_template.md` 已包含 scope、contract impact、verification、evidence、risks +- `.github/workflows/verify.yml` 已有基础验证 workflow +- `.github/workflows/enforce-repo-gate.yml` 与 `scripts/github_repo_gate.py` 已有最小仓库门禁基础 +- `docs/REVIEW_CONTRACT.en.md` 已具备独立 review lane 的雏形 +- `state/`、`task-packet`、`run-result`、`artifact-index`、`handoff`、checksum 体系已经可以承接团队协作的审计面 + +这意味着 CodingClaw 不是“完全没有团队化基础”,而是“缺少外层协作治理层”。 + +## 当前最明显的缺口 + +### 1. 仓库保护还偏单人模式 + +当前 `scripts/github_repo_gate.py` 主要还是: + +- required status check +- linear history +- conversation resolution +- 禁止 force push +- 禁止删除分支 + +但还没有真正进入团队 PR 治理所需的配置,例如: + +- required reviews +- CODEOWNERS review +- stale review dismissal +- merge queue + +### 2. Live path 仍停在 QA + +当前 live path 仍然主要是: + +- Builder +- QA + +独立 review、merge、deploy 还没有真正进入第一方执行路径。 + +### 3. Team 并发前的基础硬化还没完成 + +仓库当前仍把以下能力视为“下一阶段硬化项”: + +- atomic state writes +- scoped state resolution +- capability deny-by-default +- shell guard +- credential guard + +如果在这些没收口之前就把多人并发、worktree 并发、team shell 大规模铺开,会把团队流程建在未封口的状态和权限模型之上。 + +## 外部调研结论 + +## 1. Oh My Codex 的启发 + +公开文档显示: + +- `Ralph` 适合承担“持续推进直到完成和验证通过”的闭环角色 +- `Team` 适合承担“并行分工执行”的外层协调角色 +- 推荐 workflow 本身就强调 worktree 隔离、并行 issue、独立 PR 和最后收口 + +对 CodingClaw 的直接启发是: + +- `Ralph` 负责一个 story 的完成闭环 +- `Team` 负责多个 story 或多个 issue 的并行协调 +- 并行性放在外层,不放进单个 loop 内核 + +## 2. GitHub 的启发 + +官方文档足够支撑以下协作治理组件: + +- rulesets / protected branches +- CODEOWNERS +- merge queue +- reusable workflows +- self-hosted runners +- deployment environments +- fork PR + +对 CodingClaw 的直接启发是: + +- 团队治理主要应当落在 GitHub 的仓库规则、分支规则、审核规则、runner 边界和 deployment environment 上 +- 不应把这些组织级治理逻辑硬编码进 loop runtime + +## 推荐目标模型 + +## 1. 内核与协作壳分离 + +内核继续负责: + +- requirement normalization +- development plan approval +- contract freeze 绑定 `base_commit` +- one-story execution +- Builder +- QA +- artifact archive + +协作壳负责: + +- issue triage +- story slicing +- worktree creation +- branch naming +- PR creation +- review assignment +- merge queue +- environment promotion +- upstream / fork sync + +这个分层是整个方案最关键的设计原则。 + +## 2. 默认工作单元 + +团队默认工作单元应定义为: + +- 一个 GitHub issue +- 当 issue 超过一个 story 时,先拆成 story queue +- 每个 branch 只承载一个已批准 story +- 每个 active story 使用一个独立 worktree +- 每个 story 有自己独立的 freeze、run、artifact set +- 默认每个 story 对应一个 PR + +必须明确: + +- 一个 issue 可以拆成多个 story +- 但一个 loop 只能执行一个 story +- 一个 PR 默认也不应打包多个 story + +## 3. 推荐分支拓扑 + +### 在 `codingclaw-custom` 中默认采用 + +- `main`: 发布与归档主分支,保护最强 +- `dev`: 团队集成分支,小 scope issue 先合到这里 +- `story/-`: 单 story 短期分支 +- `hotfix/-`: 紧急修复分支 +- `sync/upstream-`: 从上游同步时使用的短期分支 + +这样做的好处: + +- `dev` 用于承接团队高频并行提交 +- `main` 用于承接强门禁发布 +- upstream sync 保持显式且可审查 + +### 在公开 upstream 中默认采用 + +- 只保留一个长期受保护的 `main` +- 其他仅使用短期 feature / fix 分支 + +只有当上游自身 PR 并发量足够大时,才考虑引入 `dev` 和 merge queue。 + +这样可以避免 upstream 和 custom fork 同时维护双长期分支,降低 fork drag。 + +## 4. 推荐日常工作流 + +### Intake And Planning + +- 用现有 issue forms 创建 issue +- 如有必要,将 issue 拆成 story queue +- 每次只选择一个已批准 story 进入执行 +- 产出或更新 `DEVELOPMENT_PLAN.en.md` +- 完成 owner approval +- 产出或更新 `CONTRACT_FREEZE.en.md` 及相关 freeze artifacts + +### Implementation + +- 从 `dev` 拉出一个 story 专属 worktree +- 使用 `Ralph` 推进该单 story 的实现闭环 +- 所有本地执行证据继续归档到 run artifact set +- worktree 必须保持单一用途 + +### Review + +- 从 `story/-` 向 `dev` 发 PR +- 要求 `verify` +- 要求至少一个独立 reviewer +- 要求 CODEOWNERS review +- 运行基于 `docs/REVIEW_CONTRACT.en.md` 的 review lane + +### Merge And Promotion + +- 先通过 merge queue 合入 `dev` +- 在 `dev` 上做集成验证 +- 再通过 release PR 从 `dev` 提升到 `main` +- 生产发布必须经过 deployment environment approval + +### Handoff And Recovery + +- 中断时用 checkpoint 保留状态 +- takeover 场景继续使用现有 takeover packet 和 approval card 模型 +- 不使用临时聊天记录替代正式 handoff + +## 5. Oh My Codex 角色映射 + +建议在外层这样使用 oh-my-codex: + +- `ralplan`: 当 issue 跨模块或不够清晰时,先把它收敛成可执行计划 +- `team`: 当多个 issue 或多个 story 可以并行时,负责分工协调 +- `ralph`: 负责一个已批准 story 的完成闭环 +- `review`: 用于 PR 落地前的审查 +- `ship`: 用于推分支和发 PR +- `checkpoint`: 用于中断后的恢复 +- `trace`: 用于回看多 agent 执行轨迹 + +一句话职责划分: + +- `Ralph` 负责“一个 story 做到底” +- `Team` 负责“多个 story 并行推进” +- GitHub 负责“谁能合、何时合、能否发” + +## 6. GitHub 治理改造建议 + +### Repository Rules + +在当前最小 gate 之上,补齐这些规则: + +- required status checks +- required conversation resolution +- required linear history +- 禁止 force push +- 禁止删除分支 +- 强制走 pull request +- `dev` 至少 1 个 approval 且要求 CODEOWNERS review +- `main` 至少 2 个 approvals 且要求 CODEOWNERS review +- 新 commit 进入后自动 dismiss stale reviews + +这里要注意,规则必须写成可落地的 AND 语义,不能写成模糊的 OR 语义。 + +### CODEOWNERS + +建议新增 `.github/CODEOWNERS`,至少覆盖: + +- `core/` +- `control/` +- `adapters/` +- `ops/` +- `.github/` +- `docs/` + +### Merge Queue + +建议在 `dev` 先启用 merge queue,后续再视情况扩到 `main`。 + +必要前提: + +- `.github/workflows/verify.yml` 需要支持 `merge_group` + +### Reusable Workflows + +建议把 workflow 拆成可复用模块: + +- `_verify.yml` +- `_review-lane.yml` +- `_deploy.yml` +- `_repo-governance.yml` + +### Deployment Environments + +至少建立: + +- `staging` +- `production` + +用于承接: + +- environment-specific secrets +- required reviewers +- deployment promotion gates +- branch restrictions +- 仅允许 trusted branches 访问 production + +### Runners + +建议使用两类 runner: + +- GitHub-hosted runners:普通仓库验证 +- self-hosted 或 ephemeral runners:重型后端 agent、GUI automation、内网系统、私有依赖 + +建议 runner labels: + +- `codingclaw-build` +- `codingclaw-qa` +- `codingclaw-review` +- `codingclaw-agent` + +### 信任边界必须写死 + +- fork PR 只跑 GitHub-hosted 的只读最小验证 +- fork PR 不允许触达 self-hosted runners +- fork PR 不允许访问受保护 deployment environments +- `.github/workflows/**` 改动必须先进入 trusted branch,之后才能触发私有 runner 或受保护 environment +- production environment 只接受受信保护分支 + +## 7. 双部署策略 + +当你既要维护公开方向,又要维护公司内协作流时,建议运行两套独立 CodingClaw: + +### Deployment A: Upstream / Public Baseline + +- 跟随 canonical repository +- 尽量保持公共 contract 和通用能力 +- 只使用 public-safe workflows 与 public-safe secrets +- 默认只维护受保护的 `main` + +### Deployment B: Company Collaboration Stack + +- 跑在 `codingclaw-custom` +- 承载公司私有 adapter、workflow rules、内部 backend agent 集成和部署策略 +- 使用 self-hosted runners 和 private secrets +- 对接公司内部系统 + +### Fork Policy + +`codingclaw-custom` 只承载真正公司特有的东西: + +- private adapters +- private skill / policy wrappers +- internal deployment integrations +- internal approval hooks +- company-specific `.github/` automation + +而以下内容应尽量回推 upstream: + +- contract clarifications +- runtime hardening +- generic review lane logic +- generic runner abstractions +- generic governance improvements + +这样才能控制长期 fork 维护成本。 + +## 8. 实施路线 + +### Phase 0: Governance Baseline + +- 增加 `.github/CODEOWNERS` +- 升级仓库规则,不再只依赖当前 `github_repo_gate.py` +- 给 `verify` 加上 `merge_group` 支持 +- 明确 `.github/`、`docs/`、`core/`、`control/`、`ops/`、`adapters/` 的 ownership + +### Phase 1: Runtime Hardening Baseline + +- 完成 atomic state writes +- 完成 scoped state resolution +- 完成 capability deny-by-default +- 完成 shell and credential guards + +这是团队并发扩张前的安全地板,不能后置。 + +### Phase 2: Team Shell + +- 正式定义 `issue -> story -> branch -> PR` 契约 +- 为每个 active story 建立 worktree discipline +- 固化 branch naming 和 PR metadata +- 明确什么场景用 `Ralph`,什么场景用 `Team` + +### Phase 3: Review And Merge Lane + +- 把 `docs/REVIEW_CONTRACT.en.md` 升级成真实 review lane +- 合并前必须有独立 review evidence +- 打通 `dev -> main` 的 release promotion + +### Phase 4: Dual Deployment + +- 如有需要,搭建 `codingclaw-custom` +- 分离 public/private workflows、runners、secrets、environments +- 固化 upstream sync cadence + +## 9. 非目标 + +以下方向当前都不应作为主路线: + +- 把 oh-my-codex team runtime 直接塞进 CodingClaw kernel +- 把一个 loop 改造成多 story、多 agent 的长运行 session +- 用 PR 对话代替 freeze、approval 和 artifact 体系 +- 在治理和审计面没稳定前,先上分布式调度、多租户队列、远程桌面复杂度 + +## 10. 成功标准 + +- 每个小 scope issue 都能稳定经过 `issue -> story -> worktree -> branch -> PR -> review -> merge -> deploy` +- 每个 loop 仍然只执行一个已批准 story,并保留独立 freeze、run、artifact set +- `main` 始终处于可审查、可发布、可追溯状态 +- 团队成员可以并行协作而不共享同一个工作目录 +- 公共能力和公司私有能力可以分层演进,而不破坏内核边界 +- generic improvements 仍能以可控成本回推 upstream + +## 建议你作为负责人优先推动的 5 件事 + +### 第一优先级 + +- 先补 `.github/CODEOWNERS` +- 先把 `verify` 补上 `merge_group` +- 先把仓库规则升到 team 级别 + +### 第二优先级 + +- 先补 runtime hardening,不要抢先上多人并发协作壳 + +### 第三优先级 + +- 确认 `codingclaw-custom` 是否真的需要独立存在 +- 如果需要,就从第一天开始把“哪些必须回推 upstream”定清楚 + +### 第四优先级 + +- 固化 `story/-` 分支命名 +- 固化“一 story 一 PR”纪律 + +### 第五优先级 + +- 再去扩展 review lane、deploy lane、internal agent lane + +## 参考基础 + +### 仓库内部依据 + +- [SYSTEM_BLUEPRINT.md](../SYSTEM_BLUEPRINT.md) +- [LIGHTWEIGHT_RUNTIME_PLAN.md](../LIGHTWEIGHT_RUNTIME_PLAN.md) +- [LOOP_SPEC.md](../LOOP_SPEC.md) +- [DEPLOYMENT_PLAN.md](../DEPLOYMENT_PLAN.md) +- [REVIEW_CONTRACT.en.md](../REVIEW_CONTRACT.en.md) +- [TEAM_WORKFLOW_MIGRATION_PLAN.md](./TEAM_WORKFLOW_MIGRATION_PLAN.md) + +### 外部资料 + +- Oh My Codex documentation: https://yeachan-heo.github.io/oh-my-codex-website/docs.html +- Git protected branches: https://docs.github.com/github/administering-a-repository/about-protected-branches +- GitHub rulesets: https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-rulesets/about-rulesets +- GitHub CODEOWNERS: https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners +- GitHub merge queue: https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/configuring-pull-request-merges/managing-a-merge-queue +- GitHub reusable workflows: https://docs.github.com/en/actions/sharing-automations/reusing-workflows +- GitHub self-hosted runners: https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners +- GitHub deployment environments: https://docs.github.com/actions/deployment/targeting-different-environments +- GitHub fork workflow: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo +- GitHub syncing a fork: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork +- Git worktree: https://git-scm.com/docs/git-worktree diff --git a/docs/progress/prompt_phase1_non_success_containerization.md b/docs/progress/prompt_phase1_non_success_containerization.md new file mode 100644 index 0000000..c504e06 --- /dev/null +++ b/docs/progress/prompt_phase1_non_success_containerization.md @@ -0,0 +1,73 @@ +You are implementing one bounded Phase 1 task for CodingClaw. + +Current behavior +- Dockerized Phase 1 now keeps exactly one task packet per run at `artifacts/runs//metadata/task-packet.en.json`. +- Container execution reads that canonical packet through the mapped `RunEnvelope.task_packet_path`; path translation lives in `RunEnvelope.container_runtime.container_paths`. +- `runtime-home/.../envelopes/container/task-packets/` transport copies are no longer generated, and the task packet is no longer rewritten with containerized paths or injected `container_control`. +- `contract-freeze.json.task_packet_digests`, `job-manifest.json.runs[].task_packet_path`, and `checksums.txt` bind only the canonical run-root packet. +- Builder or QA non-success exits still stop the loop before unsupported downstream work, archive the required run bundle, and skip `artifacts/final/final-summary.en.md` unless QA reaches `SUCCESS`. + +Read these documents first: +- docs/SYSTEM_BLUEPRINT.md +- docs/ARCHITECTURE_OVERVIEW.md +- docs/DEPLOYMENT_PLAN.md +- docs/STATE_STORE_SPEC.md +- docs/EXECUTOR_ADAPTER_CONTRACT.md +- docs/ARTIFACT_LAYOUT_SPEC.md +- docs/STATUS_MODEL.md +- docs/LOOP_SPEC.md +- docs/QA_CONTRACT.en.md + +Task +Extend the Phase 1 local dockerized loop so the current success-only implementation also closes correctly for `FIXBACK_REQUIRED`, `FAILED_INFRA`, and `TIMEOUT`. + +Goal +Build the smallest working implementation that preserves the current real-Docker happy path while making the Phase 1 loop stop, archive, and mirror state correctly for the allowed non-success run exits. + +In scope +- `core/loop/phase1-local-flow.ts` +- `adapters/generic-cli/adapter.ts` +- `adapters/generic-cli/docker-runtime.ts` +- `core/loop/state-store.ts` +- focused verification in `tests/integration/` and any directly related targeted tests +- canonical outputs under `jobs//artifacts/runs//...` +- canonical outputs under `jobs//state/...` +- canonical outputs under `state/...` +- `job-manifest.json`, `checksums.txt`, and `contract-freeze.sha256` behavior for non-success exits + +Out of scope +- implementing a real fixback executor or multi-round fixback workflow +- new run roles or review executor behavior +- change-request, approval-resume, credential-resume, or takeover expansions beyond preserving current behavior +- new status vocabulary +- unrelated refactors + +Constraints +- Preserve the single-node Phase 1 design. +- Keep builder and QA launched through the existing Docker worker path. +- Stop the loop after the first non-success exit instead of continuing into an unsupported downstream run. +- Reuse the existing run exit to job state mapping from `STATUS_MODEL.md`. +- Keep artifacts and state externalized at the canonical host paths. +- Do not write `artifacts/final/final-summary.en.md` for jobs that do not reach final archive state. +- Do not add new dependencies. +- English repo-facing artifacts only. + +Deliverables +- updated loop branching that handles builder and QA non-success exits without breaking checksum or manifest closure +- adapter runtime handling that classifies Docker timeout and container launch failure into the correct standard run exit status +- any minimal state-store adjustments required so mirrored `state/` files match the archived run outcome +- focused verification that covers at least one builder-side early stop and one QA-side non-success closeout + +Acceptance criteria +- If builder exits `FIXBACK_REQUIRED`, `FAILED_INFRA`, or `TIMEOUT`, the loop does not dispatch QA, still writes the canonical builder run bundle, updates `job-manifest.json` and mirrored `state/` to the mapped job state, and keeps checksum verification passing. +- If QA exits `FIXBACK_REQUIRED`, `FAILED_INFRA`, or `TIMEOUT`, the loop does not run archive finalization, leaves `artifacts/final/final-summary.en.md` absent, and keeps the manifest plus mirrored state aligned with `STATUS_MODEL.md`. +- Docker launch failures are reported as `FAILED_INFRA`, and enforced runtime timeout exits are reported as `TIMEOUT`, without inventing new machine-readable statuses. +- The command logs still record the `docker run` invocation and the archived outputs remain under the canonical host paths. +- The existing real-Docker success path is not regressed. +- The result matches the existing docs instead of redefining them. + +Execution instructions +1. Explore the relevant docs and current code first, especially `core/loop/phase1-local-flow.ts`, `adapters/generic-cli/adapter.ts`, `adapters/generic-cli/docker-runtime.ts`, and `core/loop/state-store.ts`. +2. Implement the minimum viable vertical slice for these three non-success statuses only. +3. Run focused verification, including real Docker where practical. +4. Report changed files, verification performed, and any unresolved risks. diff --git a/docs/progress/prompt_phase1_recovery_cards.md b/docs/progress/prompt_phase1_recovery_cards.md new file mode 100644 index 0000000..d2816d9 --- /dev/null +++ b/docs/progress/prompt_phase1_recovery_cards.md @@ -0,0 +1,78 @@ +You are implementing one bounded Phase 1 task for CodingClaw. + +Current behavior +- Recovery cards are archived under `approvals//` before archived `state/` and mirrored `state/` files are updated. +- Pending recovery archive records now expose `waiting_on`, `resume_action`, and `paused_run_id` so loop state can consume them directly. +- `job-manifest.json.pause_context` is populated from the archived recovery card when the latest run leaves the job in `AWAITING_OWNER` or `AWAITING_TAKEOVER`. +- `state/decisions.en.md`, `state/progress.en.md`, and `state/risk-register.en.md` now mirror the recovery gate with the archived `card_id` and waiting target instead of reusing fixback wording. +- Waiting-owner states describe owner recovery review, and waiting-takeover states describe takeover gating. + +Read these documents first: +- docs/SYSTEM_BLUEPRINT.md +- docs/ARCHITECTURE_OVERVIEW.md +- docs/DEPLOYMENT_PLAN.md +- docs/STATE_STORE_SPEC.md +- docs/EXECUTOR_ADAPTER_CONTRACT.md +- docs/ARTIFACT_LAYOUT_SPEC.md +- docs/STATUS_MODEL.md +- docs/APPROVAL_CARD_SPEC.md +- docs/JOB_MANIFEST_SCHEMA.md +- docs/LANGUAGE_BOUNDARY_POLICY.md + +Task +Implement Phase 1 recovery-card archival and `pause_context` population for suspended run exits in the local loop. + +Goal +Build the smallest working implementation that preserves the current local Phase 1 builder-to-QA flow while adding the missing control-plane recovery objects and manifest/state recovery context required by the docs. + +In scope +- `core/loop/phase1-local-flow.ts` +- `core/loop/state-store.ts` +- `ops/archive/approvals.ts` +- minimal new helpers under `ops/recovery/` if they reduce branching in the loop +- `core/contracts/types.ts` only for minimal type additions required by recovery-card payloads or archive records +- focused verification in `tests/integration/` and any directly related harness fixtures or smoke tests +- recovery control-plane artifacts under `jobs//approvals//...` +- `job-manifest.json` `approvals[]` and `pause_context` +- mirrored `jobs//state/...` and `state/...` recovery-facing files + +Out of scope +- mobile channel delivery or webhook integration +- owner decision intake, pause/resume commands, or actual resume execution +- takeover packet generation or local GUI session orchestration +- review executor behavior +- fixback or change-request workflow redesign +- policy-engine expansion for `FAILED_POLICY` +- unrelated refactors + +Constraints +- Preserve the single-node Phase 1 design. +- Reuse the existing job-state and run-exit vocabularies from `STATUS_MODEL.md`. +- Generate recovery cards only when the latest run leaves the job in a waiting state that needs owner or takeover recovery context. +- Keep recovery cards as control-plane artifacts under `approvals/`, not under `artifacts/runs//`. +- Keep repository-facing outputs in English; Chinese is allowed only for owner-facing recovery summaries in control-plane artifacts. +- Do not overwrite the existing approved plan/freeze approval archives when creating a recovery card. +- If a pending recovery card does not yet have a real owner decision, do not fabricate a fake resolved decision just to satisfy the current helper shape; adapt the archive flow minimally and honestly. +- Do not add new dependencies. +- Do not regress the current success path or the recently fixed non-success run bundle behavior. + +Deliverables +- minimal recovery-card creation and archival for suspended Phase 1 runs +- any minimal approval-archive support required so pending recovery cards can be stored without breaking existing decided approval archives +- `job-manifest.json` updates so `pause_context` explains suspended jobs and references the related recovery card +- mirrored `state/progress.en.md`, `state/decisions.en.md`, and `state/risk-register.en.md` updates that reflect the recovery gate +- focused verification for at least one waiting-owner recovery case and one waiting-takeover or approval-interrupt recovery case + +Acceptance criteria +- When builder or QA exits `FAILED_EXECUTION`, `FAILED_INFRA`, `TIMEOUT`, or `BUDGET_EXCEEDED`, the job maps to `AWAITING_OWNER`, archives a recovery card under `approvals//`, records the new approval entry in `job-manifest.json`, and populates `pause_context` with a non-empty reason, waiting target, resume action, paused timestamp, and related card ID. +- When builder or QA exits `AWAITING_APPROVAL`, `AWAITING_CREDENTIALS`, or `AWAITING_TAKEOVER`, the loop still stops without dispatching downstream work, and the mapped waiting state plus `pause_context` align with `STATUS_MODEL.md`. +- The archived recovery card contains the required recovery context from `APPROVAL_CARD_SPEC.md`: last exit reason, current freeze version, current story, latest evidence path, recommended next action, and resume gate. +- Recovery card artifacts stay outside per-run artifact indexes and remain referenced through canonical job-root-relative paths in `job-manifest.json`. +- Existing approved plan/freeze approval archives still work, and the current successful builder-plus-QA path is not regressed. +- The result matches the existing docs instead of redefining them. + +Execution instructions +1. Explore the relevant docs and current code first, especially `core/loop/phase1-local-flow.ts`, `core/loop/state-store.ts`, `ops/archive/approvals.ts`, and the manifest/pause-context rules in `docs/APPROVAL_CARD_SPEC.md` plus `docs/JOB_MANIFEST_SCHEMA.md`. +2. Implement the minimum viable vertical slice for recovery-card archival and `pause_context` only. +3. Run focused verification for suspended-run recovery cases and the existing happy path. +4. Report changed files, verification performed, and any unresolved risks. diff --git a/ops/archive/approvals.ts b/ops/archive/approvals.ts index 6622a17..1b874e1 100644 --- a/ops/archive/approvals.ts +++ b/ops/archive/approvals.ts @@ -1,22 +1,53 @@ -import type { ApprovalCardSnapshot, ApprovalDecisionReceipt } from "../../core/contracts/types.ts"; +import type { ApprovalCardSnapshot, ApprovalDecisionReceipt, ApprovalRequestSnapshot } from "../../core/contracts/types.ts"; import { sha256Text, writeJson, writeText } from "../../core/loop/support.ts"; export interface ApprovalArchiveRecord { card_id: string; snapshot_path: string; - decision_path: string; + decision_path: string | null; summary_path: string; snapshot_checksum: string; - decision_checksum: string; + decision_checksum: string | null; summary_checksum: string; card_state: ApprovalCardSnapshot["card_state"]; card_type: string; requested_action: string; - decision: string; - decided_at: string; + decision: string | null; + decided_at: string | null; + timeout_at: string; + waiting_on: "owner" | "takeover" | null; + resume_action: string | null; + paused_run_id: string | null; + approval_request: ApprovalRequestSnapshot | null; } -function renderApprovalSummary(card: ApprovalCardSnapshot, decision: ApprovalDecisionReceipt): string { +function renderApprovalSummary(card: ApprovalCardSnapshot, decision: ApprovalDecisionReceipt | null): string { + if (decision === null) { + const latestEvidencePath = card.recovery_context?.latest_evidence_path ?? (card.evidence_refs[0] ?? "none"); + const recommendedNextAction = card.recovery_context?.recommended_next_action ?? card.requested_action; + const approvalRequestLines = + card.approval_request === null || card.approval_request === undefined + ? [] + : [ + `- 请求能力: ${card.approval_request.requested_capability}`, + `- 请求原因: ${card.approval_request.reason}`, + `- 替代方案: ${card.approval_request.suggested_alternatives.join(", ")}`, + ]; + return [ + "# 审批摘要", + "", + `- 作业ID: ${card.job_id}`, + `- 卡片ID: ${card.card_id}`, + `- 请求动作: ${card.requested_action}`, + `- 风险级别: ${card.risk_level}`, + `- 卡片状态: ${card.card_state}`, + `- 当前摘要: ${card.summary_zh}`, + `- 最新证据: ${latestEvidencePath}`, + `- 下一步: ${recommendedNextAction}`, + ...approvalRequestLines, + "", + ].join("\n"); + } return [ "# 审批摘要", "", @@ -34,17 +65,18 @@ function renderApprovalSummary(card: ApprovalCardSnapshot, decision: ApprovalDec export async function writeApprovalArchive( approvalRoot: string, card: ApprovalCardSnapshot, - decision: ApprovalDecisionReceipt, + decision: ApprovalDecisionReceipt | null, ): Promise { const snapshotPath = `${approvalRoot}/approval-card.json`; - const decisionPath = `${approvalRoot}/decision.json`; + const decisionPath = decision === null ? null : `${approvalRoot}/decision.json`; const summaryPath = `${approvalRoot}/summary.zh.md`; const summaryText = renderApprovalSummary(card, decision); const snapshotText = `${JSON.stringify(card, null, 2)}\n`; - const decisionText = `${JSON.stringify(decision, null, 2)}\n`; await writeJson(snapshotPath, card); - await writeJson(decisionPath, decision); + if (decisionPath !== null) { + await writeJson(decisionPath, decision); + } await writeText(summaryPath, summaryText); return { @@ -53,12 +85,17 @@ export async function writeApprovalArchive( decision_path: decisionPath, summary_path: summaryPath, snapshot_checksum: sha256Text(snapshotText), - decision_checksum: sha256Text(decisionText), + decision_checksum: decision === null ? null : sha256Text(`${JSON.stringify(decision, null, 2)}\n`), summary_checksum: sha256Text(summaryText), card_state: card.card_state, card_type: card.card_type, requested_action: card.requested_action, - decision: decision.decision, - decided_at: decision.decided_at, + decision: decision?.decision ?? null, + decided_at: decision?.decided_at ?? null, + timeout_at: card.timeout_at, + waiting_on: card.recovery_context?.resume_gate ?? null, + resume_action: card.recovery_context?.recommended_next_action ?? null, + paused_run_id: card.recovery_context?.paused_run_id ?? null, + approval_request: card.approval_request ?? null, }; } diff --git a/ops/archive/job-root.ts b/ops/archive/job-root.ts index 8aba7dc..475b39b 100644 --- a/ops/archive/job-root.ts +++ b/ops/archive/job-root.ts @@ -17,6 +17,7 @@ export interface JobRootLayout { environmentPath: string; finalSummaryPath: string; runtimeHomeRoot: string; + runtimeHomeRootForRole: (runRole: "builder" | "qa") => string; planPath: string; freezePath: string; freezeJsonPath: string; @@ -40,6 +41,7 @@ export function resolveJobRootLayout(repoRoot: string, jobId: string): JobRootLa const finalRoot = join(artifactRoot, "final"); const artifactMetadataRoot = join(artifactRoot, "metadata"); const runtimeHomeRoot = join(jobRoot, "runtime-home", "phase1-local"); + const runtimeHomeRootForRole = (runRole: "builder" | "qa") => join(runtimeHomeRoot, runRole); return { repoRoot, @@ -57,6 +59,7 @@ export function resolveJobRootLayout(repoRoot: string, jobId: string): JobRootLa environmentPath: join(artifactMetadataRoot, "environment.json"), finalSummaryPath: join(finalRoot, "final-summary.en.md"), runtimeHomeRoot, + runtimeHomeRootForRole, planPath: join(jobRoot, "DEVELOPMENT_PLAN.en.md"), freezePath: join(jobRoot, "CONTRACT_FREEZE.en.md"), freezeJsonPath: join(jobRoot, "contract-freeze.json"), @@ -80,4 +83,6 @@ export async function ensureJobRootLayout(layout: JobRootLayout): Promise await ensureDir(layout.finalRoot); await ensureDir(layout.artifactMetadataRoot); await ensureDir(layout.runtimeHomeRoot); + await ensureDir(layout.runtimeHomeRootForRole("builder")); + await ensureDir(layout.runtimeHomeRootForRole("qa")); } diff --git a/ops/guards/credential-injector.ts b/ops/guards/credential-injector.ts new file mode 100644 index 0000000..438b594 --- /dev/null +++ b/ops/guards/credential-injector.ts @@ -0,0 +1,116 @@ +import { join } from "node:path"; +import { readJson } from "../../core/loop/support.ts"; +import type { RunEnvelope, RunExitStatus, TaskPacket, WorkerOutput } from "../../core/contracts/types.ts"; + +interface AdapterPolicy { + credential_injection?: { + supported_modes?: string[]; + supported_sources?: string[]; + fixture_env_prefix?: string; + credential_env_prefix?: string; + }; + log_redaction_rules?: { + replace_with?: string; + patterns?: string[]; + }; +} + +export interface CredentialInjectionResult { + allowed: boolean; + status: RunExitStatus | null; + reason: string | null; + environment: Record; + redactor: LogRedactor; +} + +export class LogRedactor { + private readonly patterns: RegExp[]; + private readonly replaceWith: string; + + constructor(patterns: RegExp[], replaceWith: string, private readonly secrets: string[]) { + this.patterns = patterns; + this.replaceWith = replaceWith; + } + + redactText(value: string): string { + let redacted = value; + for (const pattern of this.patterns) { + redacted = redacted.replace(pattern, this.replaceWith); + } + for (const secret of this.secrets) { + if (secret.length > 0) { + redacted = redacted.replaceAll(secret, this.replaceWith); + } + } + return redacted; + } + + redactCommand(command: string[]): string[] { + return command.map((value) => this.redactText(value)); + } + + redactWorkerOutput(output: WorkerOutput): WorkerOutput { + const redactList = (values: string[]) => values.map((value) => this.redactText(value)); + return { + ...output, + completed: redactList(output.completed), + open: redactList(output.open), + blockers: redactList(output.blockers), + next_action: this.redactText(output.next_action), + evidence_paths: redactList(output.evidence_paths), + report_paths: redactList(output.report_paths), + test_result_paths: redactList(output.test_result_paths), + fixback_items: redactList(output.fixback_items), + }; + } +} + +function failure(status: RunExitStatus, reason: string, redactor: LogRedactor): CredentialInjectionResult { + return { + allowed: false, + status, + reason, + environment: {}, + redactor, + }; +} + +function success(environment: Record, redactor: LogRedactor): CredentialInjectionResult { + return { + allowed: true, + status: null, + reason: null, + environment, + redactor, + }; +} + +export class CredentialInjector { + private policyPromise: Promise | null = null; + + constructor(private readonly repoRoot: string) {} + + private async loadPolicy(): Promise { + if (this.policyPromise === null) { + this.policyPromise = readJson(join(this.repoRoot, "adapters", "generic-cli", "adapter-policy.json")); + } + return this.policyPromise; + } + + async resolve(taskPacket: TaskPacket, envelope: RunEnvelope): Promise { + void envelope; + const policy = await this.loadPolicy(); + const redactionPatterns = (policy.log_redaction_rules?.patterns ?? []).map((value) => new RegExp(value, "giu")); + const replaceWith = policy.log_redaction_rules?.replace_with ?? "[REDACTED]"; + const injectionRequests = taskPacket.credential_injection_requests ?? []; + + if (injectionRequests.length === 0) { + return success({}, new LogRedactor(redactionPatterns, replaceWith, [])); + } + return failure( + "FAILED_POLICY", + "host-boundary credential injection is not available for docker worker launches in this adapter profile", + new LogRedactor(redactionPatterns, replaceWith, []), + ); + } +} diff --git a/ops/guards/shell-policy.ts b/ops/guards/shell-policy.ts new file mode 100644 index 0000000..bae72ea --- /dev/null +++ b/ops/guards/shell-policy.ts @@ -0,0 +1,131 @@ +import { basename, join } from "node:path"; +import { readJson, toPosixPath } from "../../core/loop/support.ts"; +import type { ContainerPathMount, RunExitStatus } from "../../core/contracts/types.ts"; + +interface AdapterPolicy { + shell_policy?: { + blocked_commands?: string[]; + dangerous_patterns?: string[]; + never_auto_approve_patterns?: string[]; + env_allowlist?: string[]; + }; +} + +const DANGEROUS_DYNAMIC_ENV_NAMES = new Set([ + "BASH_ENV", + "DYLD_INSERT_LIBRARIES", + "ENV", + "GIT_CONFIG_GLOBAL", + "GIT_CONFIG_SYSTEM", + "LD_PRELOAD", + "NODE_OPTIONS", + "PATH", + "PROMPT_COMMAND", + "PYTHONPATH", +]); + +export interface ShellPolicyDecision { + allowed: boolean; + reason: string | null; + status: RunExitStatus | null; +} + +export interface ShellPolicyInput { + executable: string; + command: string[]; + envNames: string[]; + dynamicEnvNames?: string[]; + mounts: ContainerPathMount[]; + runRole: string; +} + +function deny(reason: string): ShellPolicyDecision { + return { + allowed: false, + reason, + status: "FAILED_POLICY", + }; +} + +function normalizeCommandValue(value: string): string { + return value.trim().toLowerCase(); +} + +function normalizeHostPath(value: string): string { + return toPosixPath(value).replace(/\/+$/u, ""); +} + +export class ShellPolicy { + private policyPromise: Promise | null = null; + + constructor(private readonly repoRoot: string) {} + + private async loadPolicy(): Promise { + if (this.policyPromise === null) { + this.policyPromise = readJson(join(this.repoRoot, "adapters", "generic-cli", "adapter-policy.json")); + } + return this.policyPromise; + } + + async evaluate(input: ShellPolicyInput): Promise { + const policy = await this.loadPolicy(); + const shellPolicy = policy.shell_policy ?? {}; + const executableName = basename(input.executable).toLowerCase(); + const blockedCommands = new Set((shellPolicy.blocked_commands ?? []).map((value) => value.toLowerCase())); + if (blockedCommands.has(executableName)) { + return deny(`host shell policy blocked executable: ${executableName}`); + } + + const envAllowlist = (shellPolicy.env_allowlist ?? []).map((value) => new RegExp(value, "u")); + const dynamicEnvNames = new Set(input.dynamicEnvNames ?? []); + for (const envName of input.envNames) { + const dynamicEnvAllowed = + dynamicEnvNames.has(envName) && + /^[A-Z][A-Z0-9_]*$/u.test(envName) && + !DANGEROUS_DYNAMIC_ENV_NAMES.has(envName); + if (!dynamicEnvAllowed && !envAllowlist.some((pattern) => pattern.test(envName))) { + return deny(`host shell policy blocked environment variable: ${envName}`); + } + } + + const dangerousPatterns = (shellPolicy.dangerous_patterns ?? []).map((value) => new RegExp(value, "iu")); + const neverAutoApprovePatterns = (shellPolicy.never_auto_approve_patterns ?? []).map((value) => new RegExp(value, "iu")); + for (const token of input.command) { + const normalizedToken = normalizeCommandValue(token); + if (dangerousPatterns.some((pattern) => pattern.test(token)) || neverAutoApprovePatterns.some((pattern) => pattern.test(token))) { + return deny(`host shell policy blocked command token: ${token}`); + } + if (normalizedToken === "--network=host" || normalizedToken === "--privileged") { + return deny(`host shell policy blocked command token: ${token}`); + } + } + for (let index = 0; index < input.command.length; index += 1) { + if (normalizeCommandValue(input.command[index]) === "--network") { + const networkMode = input.command[index + 1]?.trim().toLowerCase() ?? ""; + if (networkMode !== "none") { + return deny(`host shell policy requires --network none, got ${networkMode || ""}`); + } + } + } + + const writableMounts = input.mounts.filter((mount) => !mount.read_only); + const allowedWritableMounts = new Set(["repo", "run-artifacts", "repo-run-artifacts", "runtime-home", "cache"]); + for (const mount of writableMounts) { + if (!allowedWritableMounts.has(mount.name)) { + return deny(`host shell policy blocked writable mount: ${mount.name}`); + } + if (mount.name === "repo" && normalizeHostPath(mount.host_path) === normalizeHostPath(this.repoRoot)) { + return deny("repo mount must use the job-scoped workspace, not the control repo root"); + } + if (mount.name === "runtime-home" && /\/runtime-home\/phase1-local$/u.test(normalizeHostPath(mount.host_path))) { + return deny(`runtime home must be role-scoped for ${input.runRole}`); + } + } + + return { + allowed: true, + reason: null, + status: null, + }; + } +} diff --git a/ops/workers/builder.ts b/ops/workers/builder.ts index 58e7997..c9bfcc6 100644 --- a/ops/workers/builder.ts +++ b/ops/workers/builder.ts @@ -1,62 +1,103 @@ -import { writeJson, writeText } from "../../core/loop/support.ts"; +import { pathExists, writeJson, writeText } from "../../core/loop/support.ts"; import type { WorkerOutput } from "../../core/contracts/types.ts"; import { emitWorkerOutput, loadWorkerContext } from "./common.ts"; +const CHINESE_TEXT = /[\u4e00-\u9fff]/u; + async function main(): Promise { const { envelope, taskPacket, runRoot } = await loadWorkerContext(); const implementationSummaryPath = `${runRoot}/reports/implementation-summary.en.md`; const selfCheckPath = `${runRoot}/reports/self-check.en.md`; const builderCheckPath = `${runRoot}/evidence/test-results/builder-check.json`; + const implementationSummaryText = [ + "# Implementation Summary", + "", + `- job_id: ${envelope.job_id}`, + `- run_id: ${envelope.run_id}`, + `- story_id: ${taskPacket.story.story_id}`, + `- objective: ${taskPacket.story.story_objective}`, + `- freeze_version: ${taskPacket.freeze_version}`, + `- base_commit: ${taskPacket.base_commit}`, + `- requested_capabilities: ${taskPacket.requested_capabilities.join(", ")}`, + `- expected_artifacts: ${taskPacket.story.expected_artifacts.join(", ")}`, + "", + ].join("\n"); + const selfCheckText = [ + "# Self Check", + "", + "- required checks executed:", + "- approval-context", + "- language-boundary", + "- artifact-presence", + "- evidence-completeness", + "- next required action: run QA against the same story", + "", + ].join("\n"); - await writeText( - implementationSummaryPath, - [ - "# Implementation Summary", - "", - `- job_id: ${envelope.job_id}`, - `- run_id: ${envelope.run_id}`, - `- story_id: ${taskPacket.story.story_id}`, - `- objective: ${taskPacket.story.story_objective}`, - "- completed work:", - "- materialized the fixed local Phase 1 builder slice", - "- wrote the required builder reports and evidence under the run root", - "", - ].join("\n"), - ); + await writeText(implementationSummaryPath, implementationSummaryText); + await writeText(selfCheckPath, selfCheckText); - await writeText( - selfCheckPath, - [ - "# Self Check", - "", - "- required checks executed:", - "- scope-compliance", - "- artifact-presence", - "- evidence-completeness", - "- next required action: run QA against the same story", - "", - ].join("\n"), - ); + const producedArtifacts = [ + "reports/implementation-summary.en.md", + "reports/self-check.en.md", + ]; + const missingArtifacts: string[] = []; + for (const relativePath of producedArtifacts) { + if (!(await pathExists(`${runRoot}/${relativePath}`))) { + missingArtifacts.push(relativePath); + } + } + const languageViolations = [ + ["reports/implementation-summary.en.md", implementationSummaryText], + ["reports/self-check.en.md", selfCheckText], + ] + .filter(([, content]) => CHINESE_TEXT.test(content)) + .map(([relativePath]) => relativePath); + const approvalState = String(taskPacket.approval_context.approval_state ?? ""); + const approvalCardId = String(taskPacket.approval_context.approval_card_id ?? ""); + const blockers = [ + ...(approvalState === "DECIDED" ? [] : [`approval context is not decided: ${approvalState || "missing"}`]), + ...missingArtifacts.map((relativePath) => `required builder artifact missing after self-check: ${relativePath}`), + ...languageViolations.map((relativePath) => `non-English repository-facing content: ${relativePath}`), + ]; + const status: WorkerOutput["status"] = blockers.length === 0 ? "SUCCESS" : "FAILED_POLICY"; await writeJson(builderCheckPath, { run_id: envelope.run_id, run_role: envelope.run_role, story_id: taskPacket.story.story_id, - status: "prepared-for-qa", + status: status === "SUCCESS" ? "prepared-for-qa" : "blocked", + approval_context: { + approval_card_id: approvalCardId, + approval_state: approvalState || "missing", + }, + self_checks: { + approval_context: approvalState === "DECIDED" ? "pass" : "fail", + language_boundary: languageViolations.length === 0 ? "pass" : "fail", + artifact_presence: missingArtifacts.length === 0 ? "pass" : "fail", + evidence_completeness: missingArtifacts.length === 0 ? "pass" : "fail", + }, + verification_targets: taskPacket.story.verification_targets, + produced_artifacts: [...producedArtifacts, "evidence/test-results/builder-check.json"], + missing_artifacts: missingArtifacts, + language_violations: languageViolations, checked_items: taskPacket.story.mandatory_checks, }); const output: WorkerOutput = { - status: "SUCCESS", - completed: [ - "Read the fixed local task packet.", - "Wrote the builder reports and local verification evidence.", - ], - open: ["Run QA against the same story."], - blockers: [], - next_action: "run QA", + status, + completed: + status === "SUCCESS" + ? [ + "Read the fixed local task packet and approval context.", + "Wrote the builder reports and local verification evidence.", + ] + : ["Stopped after the builder self-check found contract or policy violations."], + open: status === "SUCCESS" ? ["Run QA against the same story."] : ["Resolve the builder blockers before re-running QA."], + blockers, + next_action: status === "SUCCESS" ? "run QA" : "stop and review builder blockers", acceptance_status: "blocked", - mandatory_check_status: "blocked", + mandatory_check_status: status === "SUCCESS" ? "pass" : "fail", evidence_paths: [ "reports/implementation-summary.en.md", "reports/self-check.en.md", diff --git a/ops/workers/qa.ts b/ops/workers/qa.ts index d13789d..5cb0e96 100644 --- a/ops/workers/qa.ts +++ b/ops/workers/qa.ts @@ -1,13 +1,39 @@ -import { readJson, pathExists, writeJson, writeText } from "../../core/loop/support.ts"; -import type { RunResult, WorkerOutput } from "../../core/contracts/types.ts"; +import { collectRelativeFiles, pathExists, readJson, readText, writeJson, writeText } from "../../core/loop/support.ts"; +import type { ArtifactIndex, RunResult, TaskPacket, WorkerOutput } from "../../core/contracts/types.ts"; import { emitWorkerOutput, loadWorkerContext } from "./common.ts"; +const CHINESE_TEXT = /[\u4e00-\u9fff]/u; + +function countAcceptanceStatuses(verdicts: Record): { + pass: number; + fail: number; + blocked: number; + total: number; +} { + const counts = { pass: 0, fail: 0, blocked: 0, total: 0 }; + for (const verdict of Object.values(verdicts)) { + counts.total += 1; + counts[verdict.status] += 1; + } + return counts; +} + async function main(): Promise { const { envelope, taskPacket, runRoot } = await loadWorkerContext(); const builderRunRoot = String(envelope.trace_context.builder_run_root ?? ""); const builderRunResultPath = String(envelope.trace_context.builder_run_result_path ?? ""); + const builderTaskPacketPath = `${builderRunRoot}/metadata/task-packet.en.json`; + const builderArtifactIndexPath = `${builderRunRoot}/metadata/artifact-index.json`; - const requiredBuilderArtifacts = [ + const qaReportPath = `${runRoot}/reports/qa-report.en.md`; + const qaCheckPath = `${runRoot}/evidence/test-results/qa-check.json`; + const qaVerdictPath = `${runRoot}/metadata/qa-verdict.json`; + const fixbackItemsPath = `${runRoot}/reports/fixback-items.en.md`; + const builderTaskPacket = + builderTaskPacketPath && (await pathExists(builderTaskPacketPath)) + ? await readJson(builderTaskPacketPath) + : null; + const requiredBuilderArtifacts = builderTaskPacket?.story.expected_artifacts ?? [ "metadata/task-packet.en.json", "metadata/timings.json", "metadata/run-result.json", @@ -19,71 +45,192 @@ async function main(): Promise { "reports/self-check.en.md", "evidence/test-results/builder-check.json", ]; - - const missingArtifacts: string[] = []; - for (const relativePath of requiredBuilderArtifacts) { - const absolutePath = `${builderRunRoot}/${relativePath}`; - if (!(await pathExists(absolutePath))) { - missingArtifacts.push(relativePath); - } - } - + const builderProducedArtifacts = builderRunRoot && (await pathExists(builderRunRoot)) ? await collectRelativeFiles(builderRunRoot) : []; + const missingArtifacts = requiredBuilderArtifacts.filter((relativePath) => !builderProducedArtifacts.includes(relativePath)); + const builderArtifactIndex = + builderArtifactIndexPath && (await pathExists(builderArtifactIndexPath)) + ? await readJson(builderArtifactIndexPath) + : null; + const indexedArtifacts = new Set((builderArtifactIndex?.artifacts ?? []).map((entry) => entry.path)); + const unindexedArtifacts = builderProducedArtifacts.filter((relativePath) => !indexedArtifacts.has(relativePath)); + const undeclaredArtifacts = builderProducedArtifacts.filter((relativePath) => !requiredBuilderArtifacts.includes(relativePath)); let builderStatus = "UNKNOWN"; + let builderRunResult: RunResult | null = null; if (builderRunResultPath && (await pathExists(builderRunResultPath))) { - const builderResult = await readJson(builderRunResultPath); - builderStatus = builderResult.status; + builderRunResult = await readJson(builderRunResultPath); + builderStatus = builderRunResult.status; } else { missingArtifacts.push("metadata/run-result.json"); } + const reproducibilityIssues = [ + ...(builderTaskPacket === null ? ["missing builder task packet"] : []), + ...(builderTaskPacket !== null && builderTaskPacket.story.story_id !== taskPacket.story.story_id + ? ["builder task packet story does not match QA story"] + : []), + ...(builderTaskPacket !== null && builderTaskPacket.freeze_version !== taskPacket.freeze_version + ? ["builder task packet freeze version does not match QA freeze"] + : []), + ...(builderTaskPacket !== null && builderTaskPacket.base_commit !== taskPacket.base_commit + ? ["builder task packet base commit does not match QA base commit"] + : []), + ...(builderRunResult !== null && builderRunResult.story_id !== taskPacket.story.story_id + ? ["builder run result story does not match QA story"] + : []), + ]; + const textSurfaces = ( + await Promise.all( + ["reports/handoff.en.md", "reports/implementation-summary.en.md", "reports/self-check.en.md"].map(async (relativePath) => { + const absolutePath = `${builderRunRoot}/${relativePath}`; + if (!(await pathExists(absolutePath))) { + return null; + } + return { + relativePath, + content: await readText(absolutePath), + }; + }), + ) + ).filter((value): value is { relativePath: string; content: string } => value !== null); + const languageViolations = textSurfaces + .filter((surface) => CHINESE_TEXT.test(surface.content)) + .map((surface) => surface.relativePath); + const fixbackItems = [ + ...missingArtifacts.map((relativePath) => `Restore builder artifact: ${relativePath}`), + ...unindexedArtifacts.map((relativePath) => `Add artifact-index entry for builder output: ${relativePath}`), + ...undeclaredArtifacts.map((relativePath) => `Move or remove out-of-scope builder artifact: ${relativePath}`), + ...reproducibilityIssues.map((issue) => `Restore reproducibility contract: ${issue}`), + ...languageViolations.map((relativePath) => `Rewrite repository-facing output in English: ${relativePath}`), + ...(builderStatus === "SUCCESS" ? [] : [`Builder did not finish successfully: ${builderStatus}`]), + ]; + const status: WorkerOutput["status"] = + undeclaredArtifacts.length > 0 + ? "CHANGE_REQUEST_REQUIRED" + : fixbackItems.length === 0 + ? "SUCCESS" + : "FIXBACK_REQUIRED"; + const acceptanceVerdicts = Object.fromEntries( + taskPacket.story.acceptance_ids.map((acceptanceId, index) => { + const passEvidence = + index === 0 + ? ["reports/implementation-summary.en.md", "evidence/test-results/builder-check.json"] + : ["reports/qa-report.en.md", "metadata/qa-verdict.json", "evidence/test-results/qa-check.json"]; + const failEvidence = status === "CHANGE_REQUEST_REQUIRED" ? ["reports/fixback-items.en.md", "reports/qa-report.en.md"] : passEvidence; + return [ + acceptanceId, + { + status: status === "SUCCESS" ? "pass" : status === "CHANGE_REQUEST_REQUIRED" ? "blocked" : "fail", + evidence_paths: status === "SUCCESS" ? passEvidence : failEvidence, + }, + ]; + }), + ) as Record; + const mandatoryChecks = { + "scope-compliance": { + status: undeclaredArtifacts.length === 0 ? "pass" : "fail", + evidence_paths: + undeclaredArtifacts.length === 0 ? ["metadata/qa-verdict.json"] : ["reports/fixback-items.en.md", "reports/qa-report.en.md"], + }, + "artifact-presence": { + status: missingArtifacts.length === 0 ? "pass" : "fail", + evidence_paths: + missingArtifacts.length === 0 + ? ["reports/qa-report.en.md", "metadata/qa-verdict.json"] + : ["reports/fixback-items.en.md", "reports/qa-report.en.md"], + }, + "evidence-completeness": { + status: unindexedArtifacts.length === 0 ? "pass" : "fail", + evidence_paths: + unindexedArtifacts.length === 0 + ? ["evidence/test-results/qa-check.json", "metadata/qa-verdict.json"] + : ["reports/fixback-items.en.md", "reports/qa-report.en.md"], + }, + "acceptance-closure": { + status: Object.values(acceptanceVerdicts).every((entry) => entry.status === "pass") ? "pass" : "fail", + evidence_paths: ["metadata/qa-verdict.json", "reports/qa-report.en.md"], + }, + }; + const acceptanceClosure = countAcceptanceStatuses(acceptanceVerdicts); + const qaReportText = [ + "# QA Report", + "", + `- job_id: ${envelope.job_id}`, + `- run_id: ${envelope.run_id}`, + `- story_id: ${taskPacket.story.story_id}`, + `- verified builder run root: ${builderRunRoot.replaceAll("\\", "/")}`, + `- builder run status: ${builderStatus}`, + `- QA verdict: ${status}`, + "", + "## Contract Checks", + "", + `- scope-compliance: ${mandatoryChecks["scope-compliance"].status}`, + `- build-or-install reproducibility: ${reproducibilityIssues.length === 0 ? "pass" : "fail"}`, + `- language boundary compliance: ${languageViolations.length === 0 ? "pass" : "fail"}`, + `- evidence completeness: ${mandatoryChecks["evidence-completeness"].status}`, + `- artifact presence: ${mandatoryChecks["artifact-presence"].status}`, + `- acceptance closure: ${mandatoryChecks["acceptance-closure"].status}`, + "", + "- checked artifacts:", + ...requiredBuilderArtifacts.map((value) => `- ${value}`), + "", + "- missing artifacts:", + ...(missingArtifacts.length === 0 ? ["- none"] : missingArtifacts.map((value) => `- ${value}`)), + "", + "- undeclared builder artifacts:", + ...(undeclaredArtifacts.length === 0 ? ["- none"] : undeclaredArtifacts.map((value) => `- ${value}`)), + "", + "- language violations:", + ...(languageViolations.length === 0 ? ["- none"] : languageViolations.map((value) => `- ${value}`)), + "", + "- reproducibility issues:", + ...(reproducibilityIssues.length === 0 ? ["- none"] : reproducibilityIssues.map((value) => `- ${value}`)), + "", + ].join("\n"); - const status = missingArtifacts.length === 0 && builderStatus === "SUCCESS" ? "SUCCESS" : "FIXBACK_REQUIRED"; - const qaReportPath = `${runRoot}/reports/qa-report.en.md`; - const qaCheckPath = `${runRoot}/evidence/test-results/qa-check.json`; - const qaVerdictPath = `${runRoot}/metadata/qa-verdict.json`; - const fixbackItemsPath = `${runRoot}/reports/fixback-items.en.md`; - - await writeText( - qaReportPath, - [ - "# QA Report", - "", - `- job_id: ${envelope.job_id}`, - `- run_id: ${envelope.run_id}`, - `- story_id: ${taskPacket.story.story_id}`, - `- verified builder run root: ${builderRunRoot.replaceAll("\\", "/")}`, - `- builder run status: ${builderStatus}`, - `- QA verdict: ${status}`, - "- checked artifacts:", - ...requiredBuilderArtifacts.map((value) => `- ${value}`), - "", - "- missing artifacts:", - ...(missingArtifacts.length === 0 ? ["- none"] : missingArtifacts.map((value) => `- ${value}`)), - "", - ].join("\n"), - ); + await writeText(qaReportPath, qaReportText); await writeJson(qaCheckPath, { run_id: envelope.run_id, run_role: envelope.run_role, story_id: taskPacket.story.story_id, + scope_validation: { + undeclared_builder_artifacts: undeclaredArtifacts, + unindexed_builder_artifacts: unindexedArtifacts, + }, + reproducibility: { + issues: reproducibilityIssues, + builder_task_packet_path: builderTaskPacketPath, + builder_run_result_path: builderRunResultPath, + }, + language_validation: { + violations: languageViolations, + checked_surfaces: textSurfaces.map((surface) => surface.relativePath), + }, verified_builder_artifacts: requiredBuilderArtifacts, missing_builder_artifacts: missingArtifacts, builder_status: builderStatus, }); - const acceptanceClosure = - status === "SUCCESS" - ? { pass: taskPacket.story.acceptance_ids.length, fail: 0, blocked: 0, total: taskPacket.story.acceptance_ids.length } - : { pass: 0, fail: taskPacket.story.acceptance_ids.length, blocked: 0, total: taskPacket.story.acceptance_ids.length }; - await writeJson(qaVerdictPath, { story_id: taskPacket.story.story_id, status_family: "run_exit", status, acceptance_closure: acceptanceClosure, + acceptance_verdicts: acceptanceVerdicts, + mandatory_checks: mandatoryChecks, + scope_validation: { + undeclared_builder_artifacts: undeclaredArtifacts, + unindexed_builder_artifacts: unindexedArtifacts, + }, + reproducibility: { + status: reproducibilityIssues.length === 0 ? "pass" : "fail", + issues: reproducibilityIssues, + }, + language_validation: { + status: languageViolations.length === 0 ? "pass" : "fail", + violations: languageViolations, + }, }); - const fixbackItems = missingArtifacts.map((value) => `Restore builder artifact: ${value}`); if (fixbackItems.length > 0) { await writeText( fixbackItemsPath, @@ -96,14 +243,21 @@ async function main(): Promise { completed: status === "SUCCESS" ? [ - "Verified the builder artifact bundle against the same approved story.", - "Closed acceptance and mandatory checks with QA evidence.", + "Verified the builder artifact bundle against the same approved story and freeze.", + "Closed acceptance, scope, reproducibility, language, and evidence checks with QA evidence.", ] - : ["Ran QA against the builder bundle and found missing required artifacts."], - open: status === "SUCCESS" ? ["Archive the local proof-of-concept story."] : ["Run an in-scope fixback for the missing builder artifacts."], + : status === "CHANGE_REQUEST_REQUIRED" + ? ["Ran QA against the builder bundle and found scope drift."] + : ["Ran QA against the builder bundle and found fixback work inside the active scope."], + open: + status === "SUCCESS" + ? ["Archive the local proof-of-concept story."] + : status === "CHANGE_REQUEST_REQUIRED" + ? ["Create a change request before continuing."] + : ["Run an in-scope fixback for the active QA findings."], blockers: fixbackItems, - next_action: status === "SUCCESS" ? "archive" : "enter fixback", - acceptance_status: status === "SUCCESS" ? "pass" : "fail", + next_action: status === "SUCCESS" ? "archive" : status === "CHANGE_REQUEST_REQUIRED" ? "request change" : "enter fixback", + acceptance_status: status === "SUCCESS" ? "pass" : status === "CHANGE_REQUEST_REQUIRED" ? "blocked" : "fail", mandatory_check_status: status === "SUCCESS" ? "pass" : "fail", evidence_paths: [ "reports/qa-report.en.md", diff --git a/prompt_template.md b/prompt_template.md index 4cf31d2..7b0ecd1 100644 --- a/prompt_template.md +++ b/prompt_template.md @@ -22,7 +22,7 @@ In scope Out of scope - mobile channel integration -- Wuying automation +- cloud desktop or remote desktop vendor integration - review executor - multi-tenant scheduling - dashboards diff --git a/tests/harness/fixtures/samples.py b/tests/harness/fixtures/samples.py index 5d697d6..8f392a4 100644 --- a/tests/harness/fixtures/samples.py +++ b/tests/harness/fixtures/samples.py @@ -269,14 +269,23 @@ def build_job_manifest( approval_records: list[dict], ) -> dict: job_state = map_run_exit_to_job_state(run_result["status"]) + paused = job_state in {"AWAITING_OWNER", "AWAITING_TAKEOVER"} + waiting_on = "owner" if job_state == "AWAITING_OWNER" else ("takeover" if job_state == "AWAITING_TAKEOVER" else None) + latest_approval = approval_records[-1] if paused and approval_records else None pause_context = { - "is_paused": job_state in {"AWAITING_OWNER", "AWAITING_TAKEOVER"}, - "pause_reason": run_result["status"] if job_state in {"AWAITING_OWNER", "AWAITING_TAKEOVER"} else "", - "waiting_on": "owner" if job_state == "AWAITING_OWNER" else ("takeover" if job_state == "AWAITING_TAKEOVER" else ""), - "resume_action": "resume-run" if job_state in {"AWAITING_OWNER", "AWAITING_TAKEOVER"} else "", - "paused_at": "2026-04-08T00:05:00Z" if job_state in {"AWAITING_OWNER", "AWAITING_TAKEOVER"} else "", - "related_card_id": approval_records[-1]["card_id"] if approval_records else "", - "expires_at": "", + "is_paused": paused, + "pause_reason": run_result["status"] if paused else None, + "waiting_on": (latest_approval or {}).get("waiting_on", waiting_on) if paused else None, + "resume_action": ( + (latest_approval or {}).get("resume_action") + or (latest_approval or {}).get("requested_action") + or ("Wait for owner input before continuing." if waiting_on == "owner" else "Wait for takeover before continuing.") + ) + if paused + else None, + "paused_at": "2026-04-08T00:05:00Z" if paused else None, + "related_card_id": latest_approval["card_id"] if latest_approval else None, + "expires_at": (latest_approval or {}).get("timeout_at") if paused else None, } return { "job_id": job_id, diff --git a/tests/harness/runners/scenario.py b/tests/harness/runners/scenario.py index 78df314..97098d3 100644 --- a/tests/harness/runners/scenario.py +++ b/tests/harness/runners/scenario.py @@ -81,7 +81,7 @@ def _default_mandatory_statuses(exit_status: str) -> dict[str, str]: def _expected_artifacts(scenario: Scenario) -> list[str]: artifacts = [ - "task-packet.json", + "metadata/task-packet.en.json", "logs/command-log.txt", "metadata/run-result.json", "metadata/trace-index.json", @@ -178,7 +178,7 @@ def materialize_scenario(root: Path, scenario: Scenario) -> dict: freeze_json["base_commit"] = "tampered-commit" _write_json(freeze_json_path, freeze_json) - _write_json(run_root / "task-packet.json", task_packet) + _write_json(run_root / "metadata" / "task-packet.en.json", task_packet) acceptance_statuses = scenario.acceptance_statuses or _default_acceptance_statuses(story, scenario.exit_status) mandatory_check_statuses = scenario.mandatory_check_statuses or _default_mandatory_statuses(scenario.exit_status) @@ -241,11 +241,14 @@ def materialize_scenario(root: Path, scenario: Scenario) -> dict: "card_state": approval_card["status"], "card_type": "approval", "requested_action": approval_card["requested_action"], - "decision": "", + "decision": None, "snapshot_path": f"artifacts/runs/{scenario.run_id}/approvals/{scenario.approval_card_id}/approval-card.json", - "decision_path": "", + "decision_path": None, "summary_zh_ref": f"artifacts/runs/{scenario.run_id}/approvals/{scenario.approval_card_id}/summary.zh.md", - "decided_at": "", + "decided_at": None, + "waiting_on": "owner" if scenario.exit_status != "AWAITING_TAKEOVER" else "takeover", + "resume_action": approval_card["requested_action"], + "timeout_at": "2026-04-09T00:00:00Z", } if scenario.approval_decided: decision_payload = build_approval_card( @@ -303,7 +306,7 @@ def materialize_scenario(root: Path, scenario: Scenario) -> dict: freeze_checksum=sha256_file(freeze_path), run_result=run_result, run_root=f"artifacts/runs/{scenario.run_id}", - task_packet_path=f"artifacts/runs/{scenario.run_id}/task-packet.json", + task_packet_path=f"artifacts/runs/{scenario.run_id}/metadata/task-packet.en.json", artifact_index_path=f"artifacts/runs/{scenario.run_id}/metadata/artifact-index.json", handoff_path=f"artifacts/runs/{scenario.run_id}/reports/handoff.en.md", active_story=story, @@ -334,7 +337,7 @@ def materialize_scenario(root: Path, scenario: Scenario) -> dict: "freeze_path": freeze_path, "freeze_json_path": freeze_json_path, "freeze_checksum_path": freeze_checksum_path, - "task_packet_path": run_root / "task-packet.json", + "task_packet_path": run_root / "metadata" / "task-packet.en.json", "run_result_path": run_root / "metadata" / "run-result.json", "trace_index_path": run_root / "metadata" / "trace-index.json", "artifact_index_path": run_root / "metadata" / "artifact-index.json", diff --git a/tests/integration/test_phase1_local_flow.py b/tests/integration/test_phase1_local_flow.py index 7de262f..cf906fd 100644 --- a/tests/integration/test_phase1_local_flow.py +++ b/tests/integration/test_phase1_local_flow.py @@ -2,8 +2,10 @@ import hashlib import json +import os import shutil import subprocess +import textwrap from pathlib import Path import pytest @@ -17,13 +19,15 @@ def export_repo(tmp_path: Path) -> Path: repo_root = tmp_path / "repo" repo_root.mkdir() tracked_files = subprocess.run( - ["git", "ls-files"], + ["git", "ls-files", "--cached", "--others", "--exclude-standard"], cwd=REPO_ROOT, capture_output=True, text=True, check=True, ).stdout.splitlines() for relative_path in tracked_files: + if relative_path.startswith(".omx/"): + continue source = REPO_ROOT / relative_path target = repo_root / relative_path target.parent.mkdir(parents=True, exist_ok=True) @@ -31,12 +35,16 @@ def export_repo(tmp_path: Path) -> Path: return repo_root -def run_phase1(repo_root: Path) -> subprocess.CompletedProcess[str]: +def run_phase1(repo_root: Path, extra_env: dict[str, str] | None = None) -> subprocess.CompletedProcess[str]: + env = os.environ.copy() + if extra_env: + env.update(extra_env) return subprocess.run( ["bun", "run", "phase1"], cwd=repo_root, capture_output=True, text=True, + env=env, ) @@ -51,6 +59,418 @@ def dependency_snapshot_digest(repo_root: Path) -> str: return hashlib.sha256("\n".join(inputs).encode("utf-8")).hexdigest() +def init_git_repo(repo_root: Path) -> str: + subprocess.run(["git", "init", "-b", "main"], cwd=repo_root, capture_output=True, text=True, check=True) + subprocess.run(["git", "config", "user.email", "tests@example.com"], cwd=repo_root, capture_output=True, text=True, check=True) + subprocess.run(["git", "config", "user.name", "Tests"], cwd=repo_root, capture_output=True, text=True, check=True) + subprocess.run(["git", "add", "."], cwd=repo_root, capture_output=True, text=True, check=True) + subprocess.run(["git", "commit", "-m", "initial"], cwd=repo_root, capture_output=True, text=True, check=True) + return subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=repo_root, + capture_output=True, + text=True, + check=True, + ).stdout.strip() + + +def write_fake_docker(tmp_path: Path) -> Path: + script_path = tmp_path / "fake-docker.py" + script_path.write_text( + textwrap.dedent( + """\ + #!/usr/bin/env python3 + import json + import os + import subprocess + import sys + import time + from pathlib import Path + + + def parse_mount(raw: str) -> dict[str, str]: + entry: dict[str, str] = {} + for item in raw.split(","): + if "=" in item: + key, value = item.split("=", 1) + entry[key] = value + else: + entry[item] = "true" + return entry + + + def build_mounts(argv: list[str]) -> list[dict[str, str]]: + mounts: list[dict[str, str]] = [] + index = 0 + while index < len(argv): + if argv[index] == "--mount": + mounts.append(parse_mount(argv[index + 1])) + index += 2 + continue + index += 1 + return mounts + + + def map_path(container_path: str, mounts: list[dict[str, str]]) -> str: + for mount in sorted(mounts, key=lambda item: len(item["target"]), reverse=True): + target = mount["target"].rstrip("/") + if container_path == target: + return mount["source"] + if container_path.startswith(f"{target}/"): + return f"{mount['source']}{container_path[len(target):]}" + return container_path + + + def load_json(path: str) -> dict: + return json.loads(Path(path).read_text(encoding="utf-8")) + + + def write_text(path: Path, value: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(value, encoding="utf-8") + + + def write_json(path: Path, value: dict) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(value, indent=2) + "\\n", encoding="utf-8") + + + def maybe_capture(envelope: dict, task_packet: dict, mounts: list[dict[str, str]]) -> None: + capture_dir = os.environ.get("CODINGCLAW_FAKE_DOCKER_CAPTURE_DIR", "").strip() + if not capture_dir: + return + capture_path = Path(capture_dir) / f"{envelope['run_role']}-{envelope['run_id']}.json" + write_json( + capture_path, + { + "envelope": envelope, + "task_packet": task_packet, + "mounts": mounts, + }, + ) + + + def task_packet_for(envelope: dict, mounts: list[dict[str, str]]) -> dict: + task_packet_path = map_path(envelope["task_packet_path"], mounts) + return load_json(task_packet_path) + + + def mutate_head(repo_path: str) -> None: + subprocess.run(["git", "config", "user.email", "tests@example.com"], cwd=repo_path, capture_output=True, text=True, check=True) + subprocess.run(["git", "config", "user.name", "Tests"], cwd=repo_path, capture_output=True, text=True, check=True) + marker = Path(repo_path) / ".phase1-head-shift" + marker.write_text("shift\\n", encoding="utf-8") + subprocess.run(["git", "add", ".phase1-head-shift"], cwd=repo_path, capture_output=True, text=True, check=True) + subprocess.run(["git", "commit", "-m", "builder-shift"], cwd=repo_path, capture_output=True, text=True, check=True) + + + def create_builder_outputs(envelope: dict, task_packet: dict, artifact_root: Path) -> dict: + write_text( + artifact_root / "reports" / "implementation-summary.en.md", + "\\n".join( + [ + "# Implementation Summary", + "", + f"- job_id: {envelope['job_id']}", + f"- run_id: {envelope['run_id']}", + f"- story_id: {task_packet['story']['story_id']}", + "", + ] + ) + + "\\n", + ) + write_text( + artifact_root / "reports" / "self-check.en.md", + "\\n".join( + [ + "# Self Check", + "", + "- required checks executed:", + "- scope-compliance", + "- artifact-presence", + "", + ] + ) + + "\\n", + ) + write_json( + artifact_root / "evidence" / "test-results" / "builder-check.json", + { + "run_id": envelope["run_id"], + "run_role": envelope["run_role"], + "story_id": task_packet["story"]["story_id"], + "status": "prepared-for-qa", + "checked_items": task_packet["story"]["mandatory_checks"], + }, + ) + return { + "status": "SUCCESS", + "completed": ["builder completed"], + "open": ["run QA"], + "blockers": [], + "next_action": "run QA", + "acceptance_status": "blocked", + "mandatory_check_status": "blocked", + "evidence_paths": [ + "reports/implementation-summary.en.md", + "reports/self-check.en.md", + "evidence/test-results/builder-check.json", + ], + "report_paths": [ + "reports/implementation-summary.en.md", + "reports/self-check.en.md", + ], + "test_result_paths": ["evidence/test-results/builder-check.json"], + "fixback_items": [], + } + + + def create_qa_outputs(envelope: dict, task_packet: dict, artifact_root: Path, status: str) -> dict: + write_text( + artifact_root / "reports" / "qa-report.en.md", + "\\n".join( + [ + "# QA Report", + "", + f"- job_id: {envelope['job_id']}", + f"- run_id: {envelope['run_id']}", + f"- story_id: {task_packet['story']['story_id']}", + f"- QA verdict: {status}", + "", + ] + ) + + "\\n", + ) + write_json( + artifact_root / "evidence" / "test-results" / "qa-check.json", + { + "run_id": envelope["run_id"], + "run_role": envelope["run_role"], + "story_id": task_packet["story"]["story_id"], + "status": status, + }, + ) + write_json( + artifact_root / "metadata" / "qa-verdict.json", + { + "story_id": task_packet["story"]["story_id"], + "status_family": "run_exit", + "status": status, + }, + ) + if status == "FIXBACK_REQUIRED": + write_text( + artifact_root / "reports" / "fixback-items.en.md", + "# Fixback Items\\n\\n- Restore builder artifact: reports/self-check.en.md\\n", + ) + return { + "status": status, + "completed": ["qa completed"], + "open": ["archive" if status == "SUCCESS" else "enter fixback"], + "blockers": [] if status == "SUCCESS" else ["Restore builder artifact: reports/self-check.en.md"], + "next_action": "archive" if status == "SUCCESS" else "enter fixback", + "acceptance_status": "pass" if status == "SUCCESS" else "fail", + "mandatory_check_status": "pass" if status == "SUCCESS" else "fail", + "evidence_paths": [ + "reports/qa-report.en.md", + "metadata/qa-verdict.json", + "evidence/test-results/qa-check.json", + *([] if status == "SUCCESS" else ["reports/fixback-items.en.md"]), + ], + "report_paths": [ + "reports/qa-report.en.md", + *([] if status == "SUCCESS" else ["reports/fixback-items.en.md"]), + ], + "test_result_paths": ["evidence/test-results/qa-check.json"], + "fixback_items": [] if status == "SUCCESS" else ["Restore builder artifact: reports/self-check.en.md"], + } + + + def main() -> int: + argv = sys.argv[1:] + if not argv: + return 1 + if argv[0] == "image" and len(argv) > 1 and argv[1] == "inspect": + return 1 + if argv[0] == "build": + return 0 + if argv[0] != "run": + return 1 + + mounts = build_mounts(argv) + envelope = load_json(map_path(argv[-1], mounts)) + task_packet = task_packet_for(envelope, mounts) + artifact_root = Path(map_path(envelope["artifact_path"], mounts)) + mode = os.environ.get("CODINGCLAW_FAKE_DOCKER_MODE", "success") + if mode == "slow_success": + time.sleep(float(os.environ.get("CODINGCLAW_FAKE_DOCKER_SLEEP", "1"))) + if mode == "qa_timeout" and envelope["run_role"] == "qa": + time.sleep(float(os.environ.get("CODINGCLAW_FAKE_DOCKER_SLEEP", "1"))) + if mode == "real_worker": + worker_script = map_path(argv[-2], mounts) + container_envelope_path = map_path(argv[-1], mounts) + host_envelope_path = envelope.get("container_runtime", {}).get( + "envelope_host_path", + container_envelope_path, + ) + result = subprocess.run(["bun", worker_script, host_envelope_path], capture_output=True, text=True) + sys.stdout.write(result.stdout) + sys.stderr.write(result.stderr) + return result.returncode + + if envelope["run_role"] == "builder": + output = create_builder_outputs(envelope, task_packet, artifact_root) + if mode == "builder_awaiting_approval": + output["status"] = "AWAITING_APPROVAL" + output["open"] = ["owner approval required"] + output["blockers"] = ["Privileged action requires owner approval"] + output["next_action"] = "approve requested action or choose an alternative" + if mode == "builder_awaiting_credentials": + output["status"] = "AWAITING_CREDENTIALS" + output["open"] = ["credentials required"] + output["blockers"] = ["A credential is required before continuing"] + output["next_action"] = "provide the requested credential or choose an alternative" + if mode == "builder_awaiting_takeover": + output["status"] = "AWAITING_TAKEOVER" + output["open"] = ["manual takeover required"] + output["blockers"] = ["A local GUI step requires governed takeover"] + output["next_action"] = "open the takeover session and archive the result" + if mode == "builder_head_shift": + mutate_head(map_path(envelope["repo_path"], mounts)) + else: + if mode == "qa_failed_infra": + sys.stderr.write("docker: qa container failed before worker start\\n") + return 125 + qa_status = "FIXBACK_REQUIRED" if mode == "qa_fixback" else "SUCCESS" + output = create_qa_outputs(envelope, task_packet, artifact_root, qa_status) + + maybe_capture(envelope, task_packet, mounts) + sys.stdout.write(json.dumps(output)) + return 0 + + + raise SystemExit(main()) + """ + ), + encoding="utf-8", + ) + script_path.chmod(0o755) + return script_path + + +def write_time_limit_minutes(repo_root: Path, minutes: float) -> None: + for relative_path in [ + "control/fixtures/phase1-local-run-envelope.json", + "control/fixtures/phase1-local-task-packet.en.json", + ]: + fixture_path = repo_root / relative_path + fixture = json.loads(fixture_path.read_text(encoding="utf-8")) + fixture["time_limits"]["minutes"] = minutes + fixture_path.write_text(json.dumps(fixture, indent=2) + "\n", encoding="utf-8") + + +def write_requested_capabilities(repo_root: Path, requested_capabilities: list[str]) -> None: + for relative_path in [ + "control/fixtures/phase1-local-run-envelope.json", + "control/fixtures/phase1-local-task-packet.en.json", + ]: + fixture_path = repo_root / relative_path + fixture = json.loads(fixture_path.read_text(encoding="utf-8")) + fixture["requested_capabilities"] = requested_capabilities + fixture_path.write_text(json.dumps(fixture, indent=2) + "\n", encoding="utf-8") + + +def remove_capability_manifest(repo_root: Path) -> None: + (repo_root / "adapters" / "generic-cli" / "adapter-capability.json").unlink() + + +def write_plan_approval_decision(repo_root: Path, decision: str) -> None: + decision_path = repo_root / "control" / "fixtures" / "phase1-local-approval-decision.json" + payload = json.loads(decision_path.read_text(encoding="utf-8")) + payload["decision"] = decision + decision_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") + + +def load_capture(capture_dir: Path, run_role: str) -> dict: + matches = sorted(capture_dir.glob(f"{run_role}-*.json")) + assert matches + return json.loads(matches[0].read_text(encoding="utf-8")) + + +def assert_builder_failure_bundle(job_root: Path) -> None: + run_roots = sorted(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) + builder_run_root = next(path for path in run_roots if path.name.startswith("run-builder-")) + artifact_index = json.loads((builder_run_root / "metadata" / "artifact-index.json").read_text(encoding="utf-8")) + indexed_paths = {entry["path"] for entry in artifact_index["artifacts"]} + + for relative_path in [ + "reports/implementation-summary.en.md", + "reports/self-check.en.md", + "evidence/test-results/builder-check.json", + ]: + assert (builder_run_root / relative_path).exists() + assert relative_path in indexed_paths + + +def assert_recovery_pause_context(manifest: dict, job_root: Path, expected_status: str) -> None: + pause_context = manifest["pause_context"] + assert pause_context["is_paused"] is True + assert pause_context["pause_reason"] == expected_status + assert pause_context["waiting_on"] == "owner" + assert pause_context["resume_action"] + assert pause_context["paused_at"] + assert pause_context["related_card_id"] + assert pause_context["expires_at"] + + recovery_record = manifest["approvals"][-1] + assert recovery_record["card_id"] == pause_context["related_card_id"] + assert recovery_record["card_type"] == "recovery" + assert recovery_record["card_state"] == "PENDING" + assert recovery_record["decision"] is None + assert recovery_record["decision_path"] is None + + recovery_root = job_root / "approvals" / recovery_record["card_id"] + recovery_card = json.loads((recovery_root / "approval-card.json").read_text(encoding="utf-8")) + + assert (recovery_root / "summary.zh.md").exists() + assert not (recovery_root / "decision.json").exists() + assert recovery_card["recovery_context"]["last_exit_reason"] == expected_status + assert recovery_card["recovery_context"]["latest_evidence_path"] + assert recovery_card["recovery_context"]["recommended_next_action"] + assert recovery_card["recovery_context"]["resume_gate"] == "owner" + assert pause_context["resume_action"] == recovery_card["recovery_context"]["recommended_next_action"] + + +def assert_recovery_state_mirror(job_root: Path, recovery_card_id: str) -> None: + decisions = (job_root / "state" / "decisions.en.md").read_text(encoding="utf-8") + progress = (job_root / "state" / "progress.en.md").read_text(encoding="utf-8") + risk_register = (job_root / "state" / "risk-register.en.md").read_text(encoding="utf-8") + + assert f"- card_id: {recovery_card_id}" in decisions + assert "Wait for owner input before continuing." in decisions + assert "Wait for owner input before continuing." in progress + assert "fixback" not in risk_register.lower() + assert "owner review is required" in risk_register.lower() + + +def assert_takeover_pause_context(manifest: dict, job_root: Path, expected_status: str) -> None: + pause_context = manifest["pause_context"] + assert pause_context["is_paused"] is True + assert pause_context["pause_reason"] == expected_status + assert pause_context["waiting_on"] == "takeover" + assert pause_context["resume_action"] + assert pause_context["paused_at"] + assert pause_context["related_card_id"] + assert pause_context["expires_at"] + + recovery_record = manifest["approvals"][-1] + assert recovery_record["card_id"] == pause_context["related_card_id"] + assert recovery_record["card_type"] == "recovery" + assert recovery_record["card_state"] == "PENDING" + recovery_card = json.loads((job_root / "approvals" / recovery_record["card_id"] / "approval-card.json").read_text(encoding="utf-8")) + assert recovery_card["recovery_context"]["resume_gate"] == "takeover" + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_rerun_rejects_mutating_existing_archive(tmp_path): @@ -76,6 +496,25 @@ def test_phase1_local_rerun_rejects_mutating_existing_archive(tmp_path): assert run_dirs_before == sorted(path.name for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_requires_approved_plan_decision_before_freeze_and_execution(tmp_path): + repo_root = export_repo(tmp_path) + write_plan_approval_decision(repo_root, "reject") + result = run_phase1(repo_root) + combined_output = "\n".join(part for part in [result.stdout, result.stderr] if part) + + assert result.returncode != 0 + assert "requires an approved Development Plan" in combined_output + + job_root = repo_root / "jobs" / "job-phase1-local" + assert (job_root / "DEVELOPMENT_PLAN.en.md").exists() + assert (job_root / "approvals" / "card-phase1-local-001" / "approval-card.json").exists() + assert (job_root / "approvals" / "card-phase1-local-001" / "decision.json").exists() + assert not (job_root / "contract-freeze.json").exists() + assert not (job_root / "job-manifest.json").exists() + + @pytest.mark.integration @pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") def test_phase1_local_freeze_digest_captures_repo_dependency_inputs(tmp_path): @@ -87,3 +526,462 @@ def test_phase1_local_freeze_digest_captures_repo_dependency_inputs(tmp_path): freeze = json.loads((repo_root / "jobs" / "job-phase1-local" / "contract-freeze.json").read_text(encoding="utf-8")) assert freeze["dependency_snapshot_digest"] == dependency_snapshot_digest(repo_root) + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_success_records_contract_checks_and_acceptance_mapping(tmp_path): + repo_root = export_repo(tmp_path) + fake_docker = write_fake_docker(tmp_path) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_MODE": "real_worker", + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + run_roots = sorted(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) + builder_run_root = next(path for path in run_roots if path.name.startswith("run-builder-")) + qa_run_root = next(path for path in run_roots if path.name.startswith("run-qa-")) + builder_check = json.loads((builder_run_root / "evidence" / "test-results" / "builder-check.json").read_text(encoding="utf-8")) + qa_verdict = json.loads((qa_run_root / "metadata" / "qa-verdict.json").read_text(encoding="utf-8")) + builder_state_snapshot = json.loads((builder_run_root / "metadata" / "state" / "trace-index.json").read_text(encoding="utf-8")) + qa_state_snapshot = json.loads((qa_run_root / "metadata" / "state" / "trace-index.json").read_text(encoding="utf-8")) + + assert builder_check["self_checks"]["approval_context"] == "pass" + assert builder_check["self_checks"]["artifact_presence"] == "pass" + assert builder_check["language_violations"] == [] + assert sorted(qa_verdict["acceptance_verdicts"]) == ["ACC-PHASE1-BUILDER", "ACC-PHASE1-QA"] + assert qa_verdict["reproducibility"]["status"] == "pass" + assert qa_verdict["language_validation"]["status"] == "pass" + assert qa_verdict["scope_validation"]["undeclared_builder_artifacts"] == [] + assert qa_verdict["mandatory_checks"]["acceptance-closure"]["status"] == "pass" + assert builder_state_snapshot["stories"]["STORY-PHASE1-LOCAL-001"]["latest_run_role"] == "builder" + assert qa_state_snapshot["stories"]["STORY-PHASE1-LOCAL-001"]["latest_run_role"] == "qa" + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_builder_container_materialization_uses_read_only_inputs_and_container_paths(tmp_path): + repo_root = export_repo(tmp_path) + fake_docker = write_fake_docker(tmp_path) + capture_dir = tmp_path / "captures" + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_CAPTURE_DIR": str(capture_dir), + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + builder_capture = load_capture(capture_dir, "builder") + builder_task_packet = builder_capture["task_packet"] + builder_envelope = builder_capture["envelope"] + container_paths = builder_envelope["container_runtime"]["container_paths"] + mounts = {mount["target"]: mount for mount in builder_capture["mounts"]} + job_root = repo_root / "jobs" / "job-phase1-local" + + assert builder_task_packet["repo_path"] == (job_root / "repo").as_posix() + assert builder_task_packet["state_path"] == (job_root / "state").as_posix() + assert builder_task_packet["runtime_home"] == (job_root / "runtime-home" / "phase1-local" / "builder").as_posix() + assert builder_task_packet["artifact_path"] == (job_root / "artifacts" / "runs" / builder_envelope["run_id"]).as_posix() + assert "container_control" in builder_task_packet["requested_capabilities"] + assert container_paths["repo_path"] == "/work/repo" + assert container_paths["state_path"] == "/work/state" + assert container_paths["runtime_home"] == "/work/runtime-home" + assert container_paths["artifact_path"] == builder_envelope["artifact_path"] + assert container_paths["task_packet_path"] == builder_envelope["task_packet_path"] + assert builder_envelope["task_packet_path"].endswith(f"/artifacts/runs/{builder_envelope['run_id']}/metadata/task-packet.en.json") + + capability_manifest = json.loads( + (repo_root / "adapters" / "generic-cli" / "adapter-capability.json").read_text(encoding="utf-8") + ) + filesystem_write_scope = set(capability_manifest["capabilities"]["filesystem_write"]["scope"]) + task_packet_target = f"{builder_envelope['artifact_path']}/metadata/task-packet.en.json" + mount_sources = {mount["target"]: mount["source"] for mount in builder_capture["mounts"]} + + assert mount_sources["/work/repo"] == (job_root / "repo").as_posix() + assert mounts["/work/repo"].get("readonly") != "true" + assert mounts["/work/state"]["readonly"] == "true" + assert mounts["/work/artifacts"]["readonly"] == "true" + assert mounts[builder_envelope["artifact_path"]].get("readonly") != "true" + assert mounts[task_packet_target]["readonly"] == "true" + assert not any(target.startswith("/work/repo/jobs/") for target in mounts) + assert mounts["/work/runtime-home"].get("readonly") != "true" + assert mount_sources["/work/runtime-home"] == (job_root / "runtime-home" / "phase1-local" / "builder").as_posix() + assert {"run-artifacts", "runtime-home"}.issubset(filesystem_write_scope) + assert all(source != repo_root.as_posix() for source in mount_sources.values()) + + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + freeze = json.loads((job_root / "contract-freeze.json").read_text(encoding="utf-8")) + builder_run = next(run for run in manifest["runs"] if run["run_role"] == "builder") + expected_task_packet_path = builder_run["task_packet_path"] + + assert expected_task_packet_path == f"artifacts/runs/{builder_envelope['run_id']}/metadata/task-packet.en.json" + assert json.loads((job_root / expected_task_packet_path).read_text(encoding="utf-8")) == builder_task_packet + assert not (job_root / "runtime-home" / "phase1-local" / "builder" / "envelopes" / "container" / "task-packets").exists() + assert expected_task_packet_path in (job_root / "checksums.txt").read_text(encoding="utf-8") + assert freeze["task_packet_digests"][builder_envelope["run_id"]] == builder_task_packet["task_packet_sha256"] + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_qa_packet_keeps_freeze_base_commit_when_builder_moves_head(tmp_path): + repo_root = export_repo(tmp_path) + initial_commit = init_git_repo(repo_root) + fake_docker = write_fake_docker(tmp_path) + capture_dir = tmp_path / "captures" + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_MODE": "builder_head_shift", + "CODINGCLAW_FAKE_DOCKER_CAPTURE_DIR": str(capture_dir), + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + current_head = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=repo_root, + capture_output=True, + text=True, + check=True, + ).stdout.strip() + job_root = repo_root / "jobs" / "job-phase1-local" + worker_repo_head = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=job_root / "repo", + capture_output=True, + text=True, + check=True, + ).stdout.strip() + freeze = json.loads((job_root / "contract-freeze.json").read_text(encoding="utf-8")) + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + qa_capture = load_capture(capture_dir, "qa") + + assert current_head == initial_commit + assert worker_repo_head != initial_commit + assert freeze["base_commit"] == initial_commit + assert manifest["base_commit"] == initial_commit + assert qa_capture["task_packet"]["base_commit"] == initial_commit + assert qa_capture["task_packet"]["base_commit"] != worker_repo_head + assert freeze["task_packet_digests"][qa_capture["task_packet"]["run_id"]] == qa_capture["task_packet"]["task_packet_sha256"] + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_builder_failed_infra_stops_before_qa(tmp_path): + repo_root = export_repo(tmp_path) + result = run_phase1(repo_root, {"CODINGCLAW_DOCKER_BIN": "does-not-exist"}) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + freeze = json.loads((job_root / "contract-freeze.json").read_text(encoding="utf-8")) + archived_trace = json.loads((job_root / "state" / "trace-index.json").read_text(encoding="utf-8")) + live_trace = json.loads((repo_root / "state" / "trace-index.json").read_text(encoding="utf-8")) + qa_run_id = next(run_id for run_id in freeze["task_packet_digests"] if run_id.startswith("run-qa-")) + qa_task_packet_path = job_root / "artifacts" / "runs" / qa_run_id / "metadata" / "task-packet.en.json" + qa_task_packet = json.loads(qa_task_packet_path.read_text(encoding="utf-8")) + + assert [run["run_role"] for run in manifest["runs"]] == ["builder"] + assert [run["run_exit_status"] for run in manifest["runs"]] == ["FAILED_INFRA"] + assert manifest["current_run_id"] == manifest["runs"][0]["run_id"] + assert manifest["stories"][0]["latest_run_role"] == "builder" + assert manifest["stories"][0]["latest_run_status"] == "FAILED_INFRA" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_run_role"] == "builder" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_qa_status"] == "PENDING" + assert live_trace == archived_trace + assert qa_task_packet["task_packet_sha256"] == freeze["task_packet_digests"][qa_run_id] + assert f"artifacts/runs/{qa_run_id}/metadata/task-packet.en.json" in (job_root / "checksums.txt").read_text(encoding="utf-8") + assert sorted(path.relative_to(job_root / "artifacts" / "runs" / qa_run_id).as_posix() for path in qa_task_packet_path.parent.rglob("*") if path.is_file()) == [ + "metadata/task-packet.en.json" + ] + assert not (job_root / "artifacts" / "final" / "final-summary.en.md").exists() + assert_builder_failure_bundle(job_root) + assert_recovery_pause_context(manifest, job_root, "FAILED_INFRA") + assert_recovery_state_mirror(job_root, manifest["pause_context"]["related_card_id"]) + + +@pytest.mark.integration +@pytest.mark.parametrize( + ("mode", "expected_status", "expected_capability"), + [ + ("builder_awaiting_approval", "AWAITING_APPROVAL", "interactive_approval"), + ("builder_awaiting_credentials", "AWAITING_CREDENTIALS", "secret_injection"), + ], +) +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_waiting_recovery_card_preserves_approval_request_details( + tmp_path, + mode, + expected_status, + expected_capability, +): + repo_root = export_repo(tmp_path) + fake_docker = write_fake_docker(tmp_path) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_MODE": mode, + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + recovery_record = manifest["approvals"][-1] + recovery_card = json.loads( + (job_root / "approvals" / recovery_record["card_id"] / "approval-card.json").read_text(encoding="utf-8") + ) + approval_request = recovery_card["approval_request"] + + assert [run["run_role"] for run in manifest["runs"]] == ["builder"] + assert [run["run_exit_status"] for run in manifest["runs"]] == [expected_status] + assert manifest["status"] == "AWAITING_OWNER" + assert manifest["pause_context"]["waiting_on"] == "owner" + assert approval_request["run_id"] == manifest["runs"][0]["run_id"] + assert approval_request["run_role"] == "builder" + assert approval_request["requested_capability"] == expected_capability + assert approval_request["reason"] + assert approval_request["suggested_alternatives"] + assert approval_request == recovery_record["approval_request"] + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_capability_gate_stops_undeclared_or_denied_requests_before_worker_launch(tmp_path): + repo_root = export_repo(tmp_path) + fake_docker = write_fake_docker(tmp_path) + capture_dir = tmp_path / "captures" + write_requested_capabilities( + repo_root, + ["filesystem_read", "filesystem_write", "shell_command", "container_control", "browser"], + ) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_CAPTURE_DIR": str(capture_dir), + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + builder_run_root = job_root / next(run["root"] for run in manifest["runs"] if run["run_role"] == "builder") + command_log = (builder_run_root / "logs" / "command-log.txt").read_text(encoding="utf-8") + + assert [run["run_role"] for run in manifest["runs"]] == ["builder"] + assert [run["run_exit_status"] for run in manifest["runs"]] == ["FAILED_POLICY"] + assert manifest["status"] == "AWAITING_OWNER" + assert " browser container_control filesystem_read filesystem_write shell_command" in command_log + assert not capture_dir.exists() or not list(capture_dir.iterdir()) + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_capability_gate_manifest_load_failure_returns_failed_policy(tmp_path): + repo_root = export_repo(tmp_path) + fake_docker = write_fake_docker(tmp_path) + capture_dir = tmp_path / "captures" + remove_capability_manifest(repo_root) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_CAPTURE_DIR": str(capture_dir), + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + builder_run_root = job_root / next(run["root"] for run in manifest["runs"] if run["run_role"] == "builder") + command_log = (builder_run_root / "logs" / "command-log.txt").read_text(encoding="utf-8") + + assert [run["run_exit_status"] for run in manifest["runs"]] == ["FAILED_POLICY"] + assert manifest["status"] == "AWAITING_OWNER" + assert "capability gate could not load adapter policy" in command_log + assert not capture_dir.exists() or not list(capture_dir.iterdir()) + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_takeover_run_writes_takeover_packet_and_manifest_reference(tmp_path): + repo_root = export_repo(tmp_path) + fake_docker = write_fake_docker(tmp_path) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_MODE": "builder_awaiting_takeover", + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + run_record = manifest["runs"][0] + takeover_packet_path = job_root / run_record["takeover_packet_path"] + takeover_packet = takeover_packet_path.read_text(encoding="utf-8") + recovery_card = json.loads((job_root / "approvals" / manifest["pause_context"]["related_card_id"] / "approval-card.json").read_text(encoding="utf-8")) + artifact_index = json.loads((job_root / run_record["artifact_index_path"]).read_text(encoding="utf-8")) + indexed_paths = {entry["path"] for entry in artifact_index["artifacts"]} + + assert [run["run_exit_status"] for run in manifest["runs"]] == ["AWAITING_TAKEOVER"] + assert manifest["status"] == "AWAITING_TAKEOVER" + assert takeover_packet_path.exists() + assert run_record["takeover_packet_path"].endswith("takeover/takeover-packet.en.md") + assert "takeover/takeover-packet.en.md" in indexed_paths + assert_takeover_pause_context(manifest, job_root, "AWAITING_TAKEOVER") + assert manifest["pause_context"]["related_card_id"] in takeover_packet + assert recovery_card["timeout_at"] == manifest["pause_context"]["expires_at"] + assert recovery_card["timeout_at"] in takeover_packet + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_builder_failed_infra_command_log_records_docker_run(tmp_path): + repo_root = export_repo(tmp_path) + result = run_phase1(repo_root, {"CODINGCLAW_DOCKER_BIN": "does-not-exist"}) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + run_roots = sorted(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) + run_root = next(path for path in run_roots if path.name.startswith("run-builder-")) + command_log = (run_root / "logs" / "command-log.txt").read_text(encoding="utf-8") + + assert "command: does-not-exist run --rm --network none" in command_log + assert " image inspect " not in command_log + assert " build --file " not in command_log + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_qa_fixback_closes_without_final_summary(tmp_path): + repo_root = export_repo(tmp_path) + fake_docker = write_fake_docker(tmp_path) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_MODE": "qa_fixback", + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + archived_trace = json.loads((job_root / "state" / "trace-index.json").read_text(encoding="utf-8")) + live_trace = json.loads((repo_root / "state" / "trace-index.json").read_text(encoding="utf-8")) + + assert [run["run_role"] for run in manifest["runs"]] == ["builder", "qa"] + assert [run["run_exit_status"] for run in manifest["runs"]] == ["SUCCESS", "FIXBACK_REQUIRED"] + assert manifest["status"] == "FIXBACK_PENDING" + assert manifest["stories"][0]["latest_run_role"] == "qa" + assert manifest["stories"][0]["latest_run_status"] == "FIXBACK_REQUIRED" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_run_role"] == "qa" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_qa_status"] == "FIXBACK_REQUIRED" + assert live_trace == archived_trace + assert not (job_root / "artifacts" / "final" / "final-summary.en.md").exists() + + +@pytest.mark.integration +@pytest.mark.parametrize( + ("mode", "expected_status"), + [("qa_failed_infra", "FAILED_INFRA"), ("qa_timeout", "TIMEOUT")], +) +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_qa_non_success_writes_required_qa_bundle(tmp_path, mode, expected_status): + repo_root = export_repo(tmp_path) + if mode == "qa_timeout": + write_time_limit_minutes(repo_root, 0.001) + fake_docker = write_fake_docker(tmp_path) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_MODE": mode, + "CODINGCLAW_FAKE_DOCKER_SLEEP": "1", + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + archived_trace = json.loads((job_root / "state" / "trace-index.json").read_text(encoding="utf-8")) + run_roots = sorted(path for path in (job_root / "artifacts" / "runs").iterdir() if path.is_dir()) + qa_run_root = run_roots[-1] + artifact_index = json.loads((qa_run_root / "metadata" / "artifact-index.json").read_text(encoding="utf-8")) + indexed_paths = {entry["path"] for entry in artifact_index["artifacts"]} + + assert [run["run_role"] for run in manifest["runs"]] == ["builder", "qa"] + assert [run["run_exit_status"] for run in manifest["runs"]] == ["SUCCESS", expected_status] + assert manifest["status"] == "AWAITING_OWNER" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_run_role"] == "qa" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_qa_status"] == expected_status + assert not (job_root / "artifacts" / "final" / "final-summary.en.md").exists() + + for relative_path in [ + "reports/qa-report.en.md", + "reports/fixback-items.en.md", + "metadata/qa-verdict.json", + "evidence/test-results/qa-check.json", + ]: + assert (qa_run_root / relative_path).exists() + assert relative_path in indexed_paths + + qa_verdict = json.loads((qa_run_root / "metadata" / "qa-verdict.json").read_text(encoding="utf-8")) + assert qa_verdict["status"] == expected_status + assert_recovery_pause_context(manifest, job_root, expected_status) + assert_recovery_state_mirror(job_root, manifest["pause_context"]["related_card_id"]) + + +@pytest.mark.integration +@pytest.mark.skipif(shutil.which("bun") is None, reason="bun is required") +def test_phase1_local_builder_timeout_stops_before_qa(tmp_path): + repo_root = export_repo(tmp_path) + write_time_limit_minutes(repo_root, 0) + fake_docker = write_fake_docker(tmp_path) + result = run_phase1( + repo_root, + { + "CODINGCLAW_DOCKER_BIN": str(fake_docker), + "CODINGCLAW_FAKE_DOCKER_MODE": "slow_success", + "CODINGCLAW_FAKE_DOCKER_SLEEP": "1", + }, + ) + + assert result.returncode == 0, result.stderr or result.stdout + + job_root = repo_root / "jobs" / "job-phase1-local" + manifest = json.loads((job_root / "job-manifest.json").read_text(encoding="utf-8")) + archived_trace = json.loads((job_root / "state" / "trace-index.json").read_text(encoding="utf-8")) + + assert [run["run_role"] for run in manifest["runs"]] == ["builder"] + assert [run["run_exit_status"] for run in manifest["runs"]] == ["TIMEOUT"] + assert manifest["status"] == "AWAITING_OWNER" + assert manifest["stories"][0]["latest_run_role"] == "builder" + assert manifest["stories"][0]["latest_run_status"] == "TIMEOUT" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_run_role"] == "builder" + assert archived_trace["stories"]["STORY-PHASE1-LOCAL-001"]["latest_qa_status"] == "PENDING" + assert not (job_root / "artifacts" / "final" / "final-summary.en.md").exists() + assert_builder_failure_bundle(job_root)