From 7b240af1021e2e79ce37ba1eb580d128da4e83d5 Mon Sep 17 00:00:00 2001 From: jasonli0226 Date: Tue, 2 Jun 2026 01:39:45 +0800 Subject: [PATCH] feat: add subagent run limits, history visibility, and WS origin checks - engine: enforce policy maxSubagentRunMs and reap stale runs; add history-visibility filtering so hidden session messages are excluded from model history - api: share CORS allowlist between HTTP and the WebSocket gateway via getAllowedOrigins(); validate WS Origin on connect - db: persist hiddenInHistory on session messages and add the policy/max-subagent-run-ms migration - web: add bound-agents view, chat input/thread improvements, and channels-tab updates - docs/infra: clarify Postgres env vars in .env.example, mount nest-cli.json in dev compose --- .env.example | 8 +- docker-compose.dev.yml | 1 + packages/api/nest-cli.json | 2 + .../migration.sql | 2 + .../migration.sql | 2 + packages/api/prisma/schema.prisma | 12 +- packages/api/prisma/seed.ts | 6 + packages/api/src/bootstrap.ts | 60 ++++++++ .../web/__tests__/web.gateway.test.ts | 20 ++- packages/api/src/channels/web/web.gateway.ts | 26 ++++ .../chat/__tests__/chat.controller.test.ts | 21 ++- packages/api/src/chat/chat.controller.ts | 8 +- packages/api/src/common/security.config.ts | 19 ++- .../db/__tests__/agent-run.repository.test.ts | 27 ++++ packages/api/src/db/agent-run.repository.ts | 9 ++ .../__tests__/agent-runner.service.test.ts | 2 + .../__tests__/history-visibility.test.ts | 55 ++++++++ .../session-message-store.test.ts | 24 +++- .../__tests__/session-manager.service.test.ts | 43 ++++++ .../src/engine/__tests__/spawn-tool.test.ts | 1 + .../stale-run-reaper.service.test.ts | 32 ++++- .../__tests__/task-executor.service.test.ts | 132 ++++++++++++++++++ .../api/src/engine/agent-runner.service.ts | 12 +- packages/api/src/engine/agent-runner.types.ts | 8 +- packages/api/src/engine/history-visibility.ts | 39 ++++++ .../src/engine/message-store/message-store.ts | 15 +- .../message-store/session-message-store.ts | 9 +- .../__tests__/anthropic-provider.test.ts | 83 ++++++++--- .../engine/providers/anthropic-provider.ts | 12 +- .../api/src/engine/session-manager.service.ts | 3 + .../src/engine/stale-run-reaper.service.ts | 28 +++- .../api/src/engine/task-executor.service.ts | 116 ++++++++++++++- packages/api/src/engine/tools/spawn.ts | 9 ++ packages/web/next.config.ts | 8 ++ .../__tests__/bound-agents.test.ts | 27 ++++ .../agents/user-agents/bound-agents.ts | 23 +++ .../(dashboard)/agents/user-agents/page.tsx | 8 +- .../__tests__/chat-input.test.tsx | 98 +++++++++++++ .../__tests__/chat-thread.test.ts | 59 ++++++++ .../(dashboard)/conversations/chat-input.tsx | 39 +++++- .../(dashboard)/conversations/chat-thread.tsx | 81 ++++++++++- .../app/(dashboard)/conversations/use-chat.ts | 7 +- .../settings/__tests__/channels-tab.test.ts | 39 ++++++ .../app/(dashboard)/settings/channels-tab.tsx | 8 +- .../src/components/ui/vanta-background.tsx | 22 ++- packages/web/src/lib/__tests__/api.test.ts | 44 ++++++ packages/web/src/lib/api.ts | 4 +- skills/builtin/projector-creator/SKILL.md | 12 +- 48 files changed, 1247 insertions(+), 78 deletions(-) create mode 100644 packages/api/prisma/migrations/20260529000000_add_policy_max_subagent_run_ms/migration.sql create mode 100644 packages/api/prisma/migrations/20260530054338_add_session_message_hidden_in_history/migration.sql create mode 100644 packages/api/src/engine/__tests__/history-visibility.test.ts create mode 100644 packages/api/src/engine/history-visibility.ts create mode 100644 packages/web/src/app/(dashboard)/agents/user-agents/__tests__/bound-agents.test.ts create mode 100644 packages/web/src/app/(dashboard)/agents/user-agents/bound-agents.ts create mode 100644 packages/web/src/app/(dashboard)/conversations/__tests__/chat-input.test.tsx create mode 100644 packages/web/src/app/(dashboard)/conversations/__tests__/chat-thread.test.ts create mode 100644 packages/web/src/app/(dashboard)/settings/__tests__/channels-tab.test.ts create mode 100644 packages/web/src/lib/__tests__/api.test.ts diff --git a/.env.example b/.env.example index 4d3c677..eac4c11 100644 --- a/.env.example +++ b/.env.example @@ -5,7 +5,13 @@ # Database DATABASE_URL="postgresql://clawix:clawix_dev@localhost:5433/clawix?schema=public" -# Production: set POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_DB env vars +# Production (docker-compose.prod.yml): these are interpolated at compose-parse +# time, so they must live in .env (not just env_file). POSTGRES_PASSWORD is +# required — `docker compose -f docker-compose.prod.yml build` fails without it. +# POSTGRES_USER and POSTGRES_DB default to "clawix" if unset. +# POSTGRES_USER=clawix +# POSTGRES_PASSWORD=change-me-strong-secret +# POSTGRES_DB=clawix # Redis REDIS_URL="redis://localhost:6379" diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 851b5c0..4bbadf2 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -51,6 +51,7 @@ services: - ./packages/api/prisma.config.ts:/app/packages/api/prisma.config.ts - ./packages/api/package.json:/app/packages/api/package.json - ./packages/api/tsconfig.json:/app/packages/api/tsconfig.json + - ./packages/api/nest-cli.json:/app/packages/api/nest-cli.json - ./packages/shared/src:/app/packages/shared/src - ./packages/shared/package.json:/app/packages/shared/package.json - ./packages/shared/tsconfig.json:/app/packages/shared/tsconfig.json diff --git a/packages/api/nest-cli.json b/packages/api/nest-cli.json index 97c557e..be97735 100644 --- a/packages/api/nest-cli.json +++ b/packages/api/nest-cli.json @@ -4,6 +4,8 @@ "sourceRoot": "src", "compilerOptions": { "deleteOutDir": true, + "assets": ["**/*.md"], + "watchAssets": true, "tsConfigPath": "tsconfig.json" } } diff --git a/packages/api/prisma/migrations/20260529000000_add_policy_max_subagent_run_ms/migration.sql b/packages/api/prisma/migrations/20260529000000_add_policy_max_subagent_run_ms/migration.sql new file mode 100644 index 0000000..d895273 --- /dev/null +++ b/packages/api/prisma/migrations/20260529000000_add_policy_max_subagent_run_ms/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "Policy" ADD COLUMN "maxSubAgentRunMs" INTEGER NOT NULL DEFAULT 300000; diff --git a/packages/api/prisma/migrations/20260530054338_add_session_message_hidden_in_history/migration.sql b/packages/api/prisma/migrations/20260530054338_add_session_message_hidden_in_history/migration.sql new file mode 100644 index 0000000..1100b69 --- /dev/null +++ b/packages/api/prisma/migrations/20260530054338_add_session_message_hidden_in_history/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "SessionMessage" ADD COLUMN "hiddenInHistory" BOOLEAN NOT NULL DEFAULT false; diff --git a/packages/api/prisma/schema.prisma b/packages/api/prisma/schema.prisma index d2e5947..3ba96cb 100644 --- a/packages/api/prisma/schema.prisma +++ b/packages/api/prisma/schema.prisma @@ -43,6 +43,7 @@ model Policy { maxPythonTimeoutSecs Int @default(60) maxPythonCpuCores Int @default(1) maxConcurrentPythonRuns Int @default(2) + maxSubAgentRunMs Int @default(300000) // wall-clock cap per spawned sub-agent run (ms) isActive Boolean @default(true) createdAt DateTime @default(now()) updatedAt DateTime @updatedAt @@ -347,6 +348,11 @@ model SessionMessage { ordering Int createdAt DateTime @default(now()) archivedAt DateTime? + // When true, this message is omitted from the chat-history display endpoint. + // Set on intermediate reasoning steps of a non-streamed run so reopened + // history mirrors the single combined reply the user saw live. The row is + // still persisted in full (needed to reconstruct conversation context). + hiddenInHistory Boolean @default(false) session Session @relation(fields: [sessionId], references: [id], onDelete: Cascade) @@ -517,9 +523,9 @@ model WikiShare { revokedAt DateTime? isRevoked Boolean @default(false) - page WikiPage @relation(fields: [pageId], references: [id], onDelete: Cascade) - group Group? @relation(fields: [groupId], references: [id], onDelete: Cascade) - sharedByUser User? @relation("WikiSharedBy", fields: [sharedBy], references: [id], onDelete: SetNull) + page WikiPage @relation(fields: [pageId], references: [id], onDelete: Cascade) + group Group? @relation(fields: [groupId], references: [id], onDelete: Cascade) + sharedByUser User? @relation("WikiSharedBy", fields: [sharedBy], references: [id], onDelete: SetNull) @@index([pageId, isRevoked]) @@index([groupId, isRevoked]) diff --git a/packages/api/prisma/seed.ts b/packages/api/prisma/seed.ts index 4b7e807..698631c 100644 --- a/packages/api/prisma/seed.ts +++ b/packages/api/prisma/seed.ts @@ -131,6 +131,7 @@ async function main(): Promise { maxPythonTimeoutSecs: 60, maxPythonCpuCores: 1, maxConcurrentPythonRuns: 2, + maxSubAgentRunMs: 300000, // 5 min }, create: { name: 'Standard', @@ -149,6 +150,7 @@ async function main(): Promise { maxPythonTimeoutSecs: 60, maxPythonCpuCores: 1, maxConcurrentPythonRuns: 2, + maxSubAgentRunMs: 300000, // 5 min }, }); console.log(` Policy: ${standardPolicy.name}`); @@ -163,6 +165,7 @@ async function main(): Promise { maxPythonTimeoutSecs: 300, maxPythonCpuCores: 2, maxConcurrentPythonRuns: 3, + maxSubAgentRunMs: 480000, // 8 min }, create: { name: 'Extended', @@ -181,6 +184,7 @@ async function main(): Promise { maxPythonTimeoutSecs: 300, maxPythonCpuCores: 2, maxConcurrentPythonRuns: 3, + maxSubAgentRunMs: 480000, // 8 min }, }); console.log(` Policy: ${extendedPolicy.name}`); @@ -195,6 +199,7 @@ async function main(): Promise { maxPythonTimeoutSecs: 600, maxPythonCpuCores: 4, maxConcurrentPythonRuns: 5, + maxSubAgentRunMs: 540000, // 9 min (kept under the 10-min stale-run reaper) }, create: { name: 'Unrestricted', @@ -213,6 +218,7 @@ async function main(): Promise { maxPythonTimeoutSecs: 600, maxPythonCpuCores: 4, maxConcurrentPythonRuns: 5, + maxSubAgentRunMs: 540000, // 9 min (kept under the 10-min stale-run reaper) }, }); console.log(` Policy: ${unrestrictedPolicy.name}`); diff --git a/packages/api/src/bootstrap.ts b/packages/api/src/bootstrap.ts index 7c00682..ec40ee1 100644 --- a/packages/api/src/bootstrap.ts +++ b/packages/api/src/bootstrap.ts @@ -242,6 +242,66 @@ async function main(): Promise { })); console.log(`[bootstrap] Primary agent: ${primaryAgent.name}`); + // --- Named workers (coder, researcher) — created only if missing --- + // These mirror the dev `seed.ts` workers so that production deployments + // (which run bootstrap, not the seed) can spawn named sub-agents. Skills + // such as projector-creator spawn `agent_name="coder"`; without this row + // the named spawn fails and silently falls back to the anonymous + // default-worker (losing the worker's specialized system prompt). + const namedWorkers = [ + { + name: 'coder', + description: 'Writes, reviews, and tests code — optimized for code generation', + systemPrompt: + 'You are a skilled software engineer. Write clean, complete, functional code. Never use placeholders or TODO comments. Always verify your output is complete. Use the tools available to read, write, and execute code in the workspace.', + maxTokensPerRun: 100000, + containerConfig: { + image: process.env['AGENT_CONTAINER_IMAGE'] ?? 'clawix-agent:latest', + cpuLimit: '1', + memoryLimit: '512m', + timeoutSeconds: 300, + readOnlyRootfs: false, + allowedMounts: [], + }, + }, + { + name: 'researcher', + description: 'Searches the web and summarizes findings', + systemPrompt: + 'You are a research specialist. Search the web for information, analyze sources, and provide clear, well-organized summaries with citations.', + maxTokensPerRun: 50000, + containerConfig: { + image: process.env['AGENT_CONTAINER_IMAGE'] ?? 'clawix-agent:latest', + cpuLimit: '0.5', + memoryLimit: '256m', + timeoutSeconds: 120, + readOnlyRootfs: true, + allowedMounts: [], + }, + }, + ]; + for (const worker of namedWorkers) { + const existingWorker = await prisma.agentDefinition.findFirst({ + where: { name: worker.name, role: 'worker' }, + }); + if (!existingWorker) { + await prisma.agentDefinition.create({ + data: { + name: worker.name, + description: worker.description, + systemPrompt: worker.systemPrompt, + role: 'worker', + provider: defaultProvider, + model: defaultModel, + maxTokensPerRun: worker.maxTokensPerRun, + containerConfig: worker.containerConfig, + isActive: true, + }, + }); + console.log(`[bootstrap] Worker seeded: ${worker.name}`); + } + } + // --- Default worker (only if none exists) --- const existingDefaultWorker = await prisma.agentDefinition.findFirst({ where: { name: 'default-worker', role: 'worker' }, diff --git a/packages/api/src/channels/web/__tests__/web.gateway.test.ts b/packages/api/src/channels/web/__tests__/web.gateway.test.ts index 74d13b7..4a72696 100644 --- a/packages/api/src/channels/web/__tests__/web.gateway.test.ts +++ b/packages/api/src/channels/web/__tests__/web.gateway.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; import type { IncomingMessage } from 'node:http'; -import { WebChatGateway } from '../web.gateway.js'; +import { WebChatGateway, isWsOriginAllowed } from '../web.gateway.js'; // Mock logger vi.mock('@clawix/shared', async (importOriginal) => { @@ -62,6 +62,24 @@ describe('WebChatGateway', () => { gateway.setAdapter(mockAdapter as never); }); + describe('isWsOriginAllowed', () => { + const allowed = ['http://localhost:3000', 'https://app.example.com']; + + it('allows an origin present in the allowlist', () => { + expect(isWsOriginAllowed('http://localhost:3000', allowed)).toBe(true); + expect(isWsOriginAllowed('https://app.example.com', allowed)).toBe(true); + }); + + it('rejects an origin not in the allowlist (cross-site)', () => { + expect(isWsOriginAllowed('https://evil.example.com', allowed)).toBe(false); + }); + + it('allows a missing or empty Origin header (non-browser clients)', () => { + expect(isWsOriginAllowed(undefined, allowed)).toBe(true); + expect(isWsOriginAllowed('', allowed)).toBe(true); + }); + }); + describe('handleConnection — valid JWT', () => { it('calls adapter.addConnection and sends connection.ack', async () => { const payload = { sub: 'user-1', email: 'test@example.com', role: 'developer' }; diff --git a/packages/api/src/channels/web/web.gateway.ts b/packages/api/src/channels/web/web.gateway.ts index 5f7fe06..31e47db 100644 --- a/packages/api/src/channels/web/web.gateway.ts +++ b/packages/api/src/channels/web/web.gateway.ts @@ -9,11 +9,22 @@ import { createLogger } from '@clawix/shared'; import type { WebAdapterExtended } from './web.adapter.js'; import { serializeServerMessage } from './web.protocol.js'; +import { getAllowedOrigins } from '../../common/security.config.js'; const logger = createLogger('channels:web:gateway'); const HEARTBEAT_INTERVAL = 30_000; +/** + * Validate a WebSocket upgrade's `Origin` header against the allowlist to block + * cross-site WebSocket hijacking. Non-browser clients omit `Origin`; allow + * those (browsers always send it, so a malicious page is still rejected). + */ +export function isWsOriginAllowed(origin: string | undefined, allowed: string[]): boolean { + if (origin === undefined || origin === '') return true; + return allowed.includes(origin); +} + interface SocketWithAlive extends WebSocket { isAlive?: boolean; heartbeatInterval?: ReturnType; @@ -34,6 +45,7 @@ interface JwtPayload { export class WebChatGateway implements OnModuleInit, OnModuleDestroy { private adapter: WebAdapterExtended | null = null; private wss: WebSocketServer | null = null; + private allowedOrigins: string[] = []; constructor( private readonly jwtService: JwtService, @@ -43,6 +55,9 @@ export class WebChatGateway implements OnModuleInit, OnModuleDestroy { onModuleInit(): void { const server = this.httpAdapterHost.httpAdapter.getHttpServer(); + // Resolve the Origin allowlist once at startup (throws on a misconfigured + // wildcard, surfacing the error early) — shared with the HTTP CORS layer. + this.allowedOrigins = getAllowedOrigins(); // noServer mode: we manually route only matching paths so that other // WebSocketServers (e.g. /ws/notifications) can coexist on the same // HTTP server without one tearing down the other's upgrade. @@ -55,6 +70,17 @@ export class WebChatGateway implements OnModuleInit, OnModuleDestroy { server.on('upgrade', (req: IncomingMessage, socket: Duplex, head: Buffer) => { const url = new URL(req.url ?? '/', 'http://localhost'); if (url.pathname !== '/ws/chat') return; + // Block cross-site WebSocket hijacking: reject upgrades whose Origin is + // not allowlisted before completing the handshake. + if (!isWsOriginAllowed(req.headers.origin, this.allowedOrigins)) { + logger.warn( + { origin: req.headers.origin }, + 'WebSocket upgrade rejected — origin not allowed', + ); + socket.write('HTTP/1.1 403 Forbidden\r\n\r\n'); + socket.destroy(); + return; + } this.wss?.handleUpgrade(req, socket, head, (ws) => { this.wss?.emit('connection', ws, req); }); diff --git a/packages/api/src/chat/__tests__/chat.controller.test.ts b/packages/api/src/chat/__tests__/chat.controller.test.ts index c73ec0f..61fd3e3 100644 --- a/packages/api/src/chat/__tests__/chat.controller.test.ts +++ b/packages/api/src/chat/__tests__/chat.controller.test.ts @@ -140,13 +140,30 @@ describe('ChatController', () => { meta: { total: 2, page: 1, limit: 50 }, }); expect(mockPrisma.sessionMessage.findMany).toHaveBeenCalledWith({ - where: { sessionId: 'sess-1', archivedAt: null }, + where: { sessionId: 'sess-1', archivedAt: null, hiddenInHistory: false }, orderBy: { ordering: 'desc' }, skip: 0, take: 50, }); }); + it('excludes hiddenInHistory rows from both the page query and the total count', async () => { + mockSessionRepo.findById.mockResolvedValue({ id: 'sess-1', userId: 'user-1' }); + mockPrisma.sessionMessage.findMany.mockResolvedValue([]); + mockPrisma.sessionMessage.count.mockResolvedValue(0); + + const controller = createController(); + const req = { user: { sub: 'user-1' } }; + await controller.listMessages(req as never, 'sess-1', { page: 1, limit: 50 }); + + const expectedWhere = { sessionId: 'sess-1', archivedAt: null, hiddenInHistory: false }; + expect(mockPrisma.sessionMessage.findMany).toHaveBeenCalledWith( + expect.objectContaining({ where: expectedWhere }), + ); + // Count must apply the same filter or pagination math drifts. + expect(mockPrisma.sessionMessage.count).toHaveBeenCalledWith({ where: expectedWhere }); + }); + it('throws NotFoundException when session belongs to another user', async () => { mockSessionRepo.findById.mockResolvedValue({ id: 'sess-1', userId: 'other-user' }); @@ -168,7 +185,7 @@ describe('ChatController', () => { await controller.listMessages(req as never, 'sess-1', {}); expect(mockPrisma.sessionMessage.findMany).toHaveBeenCalledWith({ - where: { sessionId: 'sess-1', archivedAt: null }, + where: { sessionId: 'sess-1', archivedAt: null, hiddenInHistory: false }, orderBy: { ordering: 'desc' }, skip: 0, take: 50, diff --git a/packages/api/src/chat/chat.controller.ts b/packages/api/src/chat/chat.controller.ts index b274c38..10463a6 100644 --- a/packages/api/src/chat/chat.controller.ts +++ b/packages/api/src/chat/chat.controller.ts @@ -205,14 +205,18 @@ export class ChatController { const limit = Math.min(Number(query.limit) || 50, 100); const skip = (page - 1) * limit; + // `hiddenInHistory` rows are intermediate reasoning steps of a non-streamed + // run — excluded so reopened history mirrors the single combined reply the + // user saw live. Both queries filter identically to keep pagination correct. + const where = { sessionId, archivedAt: null, hiddenInHistory: false }; const [data, total] = await Promise.all([ this.prisma.sessionMessage.findMany({ - where: { sessionId, archivedAt: null }, + where, orderBy: { ordering: 'desc' }, skip, take: limit, }), - this.prisma.sessionMessage.count({ where: { sessionId, archivedAt: null } }), + this.prisma.sessionMessage.count({ where }), ]); return { diff --git a/packages/api/src/common/security.config.ts b/packages/api/src/common/security.config.ts index 062f7b6..3a2c69e 100644 --- a/packages/api/src/common/security.config.ts +++ b/packages/api/src/common/security.config.ts @@ -46,11 +46,11 @@ export function buildHelmetOptions(): FastifyHelmetOptions { } /** - * Build CORS options from environment. - * Reads CORS_ALLOWED_ORIGINS (comma-separated). - * Rejects wildcard '*' when credentials are enabled. + * Parse CORS_ALLOWED_ORIGINS (comma-separated) into a trimmed, non-empty list. + * Rejects wildcard '*' since credentials are enabled. Shared by the HTTP CORS + * layer and the WebSocket gateway's Origin check so both use one allowlist. */ -export function buildCorsOptions() { +export function getAllowedOrigins(): string[] { const raw = process.env['CORS_ALLOWED_ORIGINS'] ?? 'http://localhost:3000'; const origins = raw .split(',') @@ -61,6 +61,17 @@ export function buildCorsOptions() { throw new Error("CORS_ALLOWED_ORIGINS must not contain '*' when credentials are enabled"); } + return origins; +} + +/** + * Build CORS options from environment. + * Reads CORS_ALLOWED_ORIGINS (comma-separated). + * Rejects wildcard '*' when credentials are enabled. + */ +export function buildCorsOptions() { + const origins = getAllowedOrigins(); + return { origin: origins, methods: ['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'OPTIONS'], diff --git a/packages/api/src/db/__tests__/agent-run.repository.test.ts b/packages/api/src/db/__tests__/agent-run.repository.test.ts index 576f32e..1cb66b6 100644 --- a/packages/api/src/db/__tests__/agent-run.repository.test.ts +++ b/packages/api/src/db/__tests__/agent-run.repository.test.ts @@ -244,6 +244,33 @@ describe('AgentRunRepository', () => { NotFoundError, ); }); + + it('should reset startedAt when transitioning to running so the execution clock matches the watchdog', async () => { + // The pending row is created with startedAt at creation time; a sub-agent + // may then wait in the executor queue. Anchoring startedAt to the + // running transition keeps the stale-run reaper's clock aligned with the + // executor watchdog and reasoning-loop timeout (all execution-anchored), + // so the watchdog fires before the reaper instead of the reverse. + mockPrisma.agentRun.update.mockResolvedValue({ ...mockAgentRun, status: 'running' }); + + await repository.update('run-1', { status: 'running' }); + + expect(mockPrisma.agentRun.update).toHaveBeenCalledWith({ + where: { id: 'run-1' }, + data: { status: 'running', startedAt: expect.any(Date) }, + }); + }); + + it('should NOT touch startedAt for non-running transitions', async () => { + mockPrisma.agentRun.update.mockResolvedValue({ ...mockAgentRun, status: 'completed' }); + + await repository.update('run-1', { status: 'completed', output: 'done' }); + + expect(mockPrisma.agentRun.update).toHaveBeenCalledWith({ + where: { id: 'run-1' }, + data: { status: 'completed', output: 'done' }, + }); + }); }); describe('delete', () => { diff --git a/packages/api/src/db/agent-run.repository.ts b/packages/api/src/db/agent-run.repository.ts index 8bbb410..3f8c3d4 100644 --- a/packages/api/src/db/agent-run.repository.ts +++ b/packages/api/src/db/agent-run.repository.ts @@ -161,6 +161,15 @@ export class AgentRunRepository { where: { id }, data: { ...(data.status ? { status: data.status } : {}), + // Anchor the execution clock to the pending→running transition. + // `startedAt` defaults to row-creation time, but a spawned sub-agent + // can sit in the executor queue before it runs. The stale-run reaper + // measures staleness from `startedAt`, while the executor watchdog + // and reasoning-loop timeout are anchored at execution start — so a + // queued run could be reaped (10 min from creation) before its own + // watchdog (timeout + 30s from execution) fired. Resetting here keeps + // all three layers on the same clock so the watchdog wins. + ...(data.status === 'running' ? { startedAt: new Date() } : {}), ...(data.sessionId !== undefined ? { sessionId: data.sessionId } : {}), ...(data.output !== undefined ? { output: data.output } : {}), ...(data.error !== undefined ? { error: data.error } : {}), diff --git a/packages/api/src/engine/__tests__/agent-runner.service.test.ts b/packages/api/src/engine/__tests__/agent-runner.service.test.ts index 68e827f..621d55c 100644 --- a/packages/api/src/engine/__tests__/agent-runner.service.test.ts +++ b/packages/api/src/engine/__tests__/agent-runner.service.test.ts @@ -198,6 +198,7 @@ const mockPolicy = { maxTokensPerCronRun: null, allowBrowserCdp: false, maxConcurrentBrowserSessions: 2, + maxSubAgentRunMs: 300000, isActive: true, createdAt: new Date(), updatedAt: new Date(), @@ -950,6 +951,7 @@ describe('AgentRunnerService', () => { 'run-1', // parentAgentRunId (the current run's ID) 'user-1', // userId undefined, // budgetTracker — none in this test (no tokenBudget passed) + 300000, // subAgentTimeoutMs from policy.maxSubAgentRunMs ); }); diff --git a/packages/api/src/engine/__tests__/history-visibility.test.ts b/packages/api/src/engine/__tests__/history-visibility.test.ts new file mode 100644 index 0000000..41fbbf3 --- /dev/null +++ b/packages/api/src/engine/__tests__/history-visibility.test.ts @@ -0,0 +1,55 @@ +import { describe, it, expect } from 'vitest'; +import type { ChatMessage } from '@clawix/shared'; +import { computeHiddenInHistory } from '../history-visibility.js'; + +const asst = (content: string, toolCalls?: ChatMessage['toolCalls']): ChatMessage => ({ + role: 'assistant', + content, + ...(toolCalls ? { toolCalls } : {}), +}); +const tool = (content: string): ChatMessage => ({ role: 'tool', content, toolCallId: 't1' }); + +describe('computeHiddenInHistory', () => { + it('hides nothing for a streamed run (every step was shown live)', () => { + const messages: ChatMessage[] = [ + asst('Let me check.', [{ id: 't1', name: 'search', arguments: {} }]), + tool('result'), + asst('Here is the answer.'), + ]; + expect(computeHiddenInHistory(messages, true)).toEqual([false, false, false]); + }); + + it('hides intermediate steps for a non-streamed run, keeping only the final assistant message', () => { + const messages: ChatMessage[] = [ + asst('Let me check.', [{ id: 't1', name: 'search', arguments: {} }]), + tool('result'), + asst('Here is the answer.'), + ]; + // Only the last assistant message (the final reply the user saw live) stays visible. + expect(computeHiddenInHistory(messages, false)).toEqual([true, true, false]); + }); + + it('hides nothing for a single-message non-streamed run', () => { + const messages: ChatMessage[] = [asst('Direct answer, no tools.')]; + expect(computeHiddenInHistory(messages, false)).toEqual([false]); + }); + + it('keeps the LAST assistant visible even if a trailing non-assistant message follows', () => { + const messages: ChatMessage[] = [ + asst('Working...', [{ id: 't1', name: 'search', arguments: {} }]), + asst('Final reply.'), + tool('late tool row'), + ]; + expect(computeHiddenInHistory(messages, false)).toEqual([true, false, true]); + }); + + it('defensively hides nothing when there is no assistant message', () => { + const messages: ChatMessage[] = [tool('orphan result')]; + expect(computeHiddenInHistory(messages, false)).toEqual([false]); + }); + + it('returns an empty array for empty input', () => { + expect(computeHiddenInHistory([], false)).toEqual([]); + expect(computeHiddenInHistory([], true)).toEqual([]); + }); +}); diff --git a/packages/api/src/engine/__tests__/message-store/session-message-store.test.ts b/packages/api/src/engine/__tests__/message-store/session-message-store.test.ts index 71207aa..c73b3e4 100644 --- a/packages/api/src/engine/__tests__/message-store/session-message-store.test.ts +++ b/packages/api/src/engine/__tests__/message-store/session-message-store.test.ts @@ -20,9 +20,27 @@ describe('SessionMessageStore', () => { }; const store = new SessionMessageStore(sessionManager as never, 'sess-1'); const ids = await store.saveMessages([{ role: 'assistant', content: 'ok' }]); - expect(sessionManager.saveMessages).toHaveBeenCalledWith('sess-1', [ - { role: 'assistant', content: 'ok' }, - ]); + expect(sessionManager.saveMessages).toHaveBeenCalledWith( + 'sess-1', + [{ role: 'assistant', content: 'ok' }], + undefined, + ); expect(ids).toEqual(['id-1']); }); + + it('saveMessages forwards the SaveMessagesOptions through to sessionManager', async () => { + const sessionManager = { + loadMessages: vi.fn(), + saveMessages: vi.fn().mockResolvedValue(['id-1']), + }; + const store = new SessionMessageStore(sessionManager as never, 'sess-1'); + await store.saveMessages([{ role: 'assistant', content: 'ok' }], { + hiddenInHistory: [true], + }); + expect(sessionManager.saveMessages).toHaveBeenCalledWith( + 'sess-1', + [{ role: 'assistant', content: 'ok' }], + { hiddenInHistory: [true] }, + ); + }); }); diff --git a/packages/api/src/engine/__tests__/session-manager.service.test.ts b/packages/api/src/engine/__tests__/session-manager.service.test.ts index 436c7f0..f11ce02 100644 --- a/packages/api/src/engine/__tests__/session-manager.service.test.ts +++ b/packages/api/src/engine/__tests__/session-manager.service.test.ts @@ -427,6 +427,49 @@ describe('SessionManagerService', () => { }); }); + it('persists per-message hiddenInHistory from opts (aligned by index)', async () => { + mockPrisma.sessionMessage.count.mockResolvedValue(0); + const mockCreate = vi + .fn() + .mockResolvedValueOnce({ id: 'id-1' }) + .mockResolvedValueOnce({ id: 'id-2' }); + mockPrisma.$transaction.mockImplementation(async (fn: (tx: unknown) => Promise) => { + const fakeTx = { sessionMessage: { create: mockCreate } }; + return fn(fakeTx); + }); + + await service.saveMessages( + 'session-1', + [ + { role: 'assistant', content: 'intermediate step' }, + { role: 'assistant', content: 'final reply' }, + ], + { hiddenInHistory: [true, false] }, + ); + + expect(mockCreate).toHaveBeenNthCalledWith(1, { + data: expect.objectContaining({ content: 'intermediate step', hiddenInHistory: true }), + }); + expect(mockCreate).toHaveBeenNthCalledWith(2, { + data: expect.objectContaining({ content: 'final reply', hiddenInHistory: false }), + }); + }); + + it('defaults hiddenInHistory to false when opts omitted', async () => { + mockPrisma.sessionMessage.count.mockResolvedValue(0); + const mockCreate = vi.fn().mockResolvedValueOnce({ id: 'id-1' }); + mockPrisma.$transaction.mockImplementation(async (fn: (tx: unknown) => Promise) => { + const fakeTx = { sessionMessage: { create: mockCreate } }; + return fn(fakeTx); + }); + + await service.saveMessages('session-1', [{ role: 'user', content: 'Hello' }]); + + expect(mockCreate).toHaveBeenCalledWith({ + data: expect.objectContaining({ hiddenInHistory: false }), + }); + }); + it('sets senderId to undefined when not provided', async () => { mockPrisma.sessionMessage.count.mockResolvedValue(0); const mockCreate = vi.fn().mockResolvedValueOnce({ id: 'id-1' }); diff --git a/packages/api/src/engine/__tests__/spawn-tool.test.ts b/packages/api/src/engine/__tests__/spawn-tool.test.ts index c8cc65c..0b44132 100644 --- a/packages/api/src/engine/__tests__/spawn-tool.test.ts +++ b/packages/api/src/engine/__tests__/spawn-tool.test.ts @@ -194,6 +194,7 @@ describe('spawn tool — task executor integration', () => { input: 'Summarize this', userId: 'user-1', sessionId: 'session-abc', + displayName: 'summarizer', }); }); diff --git a/packages/api/src/engine/__tests__/stale-run-reaper.service.test.ts b/packages/api/src/engine/__tests__/stale-run-reaper.service.test.ts index 9cf1f86..e88d298 100644 --- a/packages/api/src/engine/__tests__/stale-run-reaper.service.test.ts +++ b/packages/api/src/engine/__tests__/stale-run-reaper.service.test.ts @@ -1,22 +1,33 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; import { StaleRunReaperService } from '../stale-run-reaper.service.js'; +import type { AgentRunRegistry } from '../agent-run-registry.service.js'; const mockPrisma = { agentRun: { + findMany: vi.fn().mockResolvedValue([]), updateMany: vi.fn().mockResolvedValue({ count: 0 }), }, }; +const mockRegistry = { + abort: vi.fn().mockReturnValue(true), +}; + describe('StaleRunReaperService', () => { let reaper: StaleRunReaperService; beforeEach(() => { vi.clearAllMocks(); - reaper = new StaleRunReaperService(mockPrisma as any); + mockPrisma.agentRun.findMany.mockResolvedValue([]); + reaper = new StaleRunReaperService( + mockPrisma as any, + mockRegistry as unknown as AgentRunRegistry, + ); }); it('marks runs older than threshold as failed', async () => { + mockPrisma.agentRun.findMany.mockResolvedValue([{ id: 'run-1' }, { id: 'run-2' }]); mockPrisma.agentRun.updateMany.mockResolvedValue({ count: 2 }); const result = await reaper.reapStaleRuns(); @@ -35,9 +46,24 @@ describe('StaleRunReaperService', () => { }); }); - it('returns 0 when no stale runs exist', async () => { - mockPrisma.agentRun.updateMany.mockResolvedValue({ count: 0 }); + it('aborts the in-process controller for each stale run before flipping the DB', async () => { + mockPrisma.agentRun.findMany.mockResolvedValue([{ id: 'run-1' }, { id: 'run-2' }]); + mockPrisma.agentRun.updateMany.mockResolvedValue({ count: 2 }); + + await reaper.reapStaleRuns(); + + expect(mockRegistry.abort).toHaveBeenCalledTimes(2); + expect(mockRegistry.abort).toHaveBeenCalledWith('run-1', 'stale_timeout'); + expect(mockRegistry.abort).toHaveBeenCalledWith('run-2', 'stale_timeout'); + }); + + it('returns 0 and does not abort or update when no stale runs exist', async () => { + mockPrisma.agentRun.findMany.mockResolvedValue([]); + const result = await reaper.reapStaleRuns(); + expect(result).toBe(0); + expect(mockRegistry.abort).not.toHaveBeenCalled(); + expect(mockPrisma.agentRun.updateMany).not.toHaveBeenCalled(); }); }); diff --git a/packages/api/src/engine/__tests__/task-executor.service.test.ts b/packages/api/src/engine/__tests__/task-executor.service.test.ts index 3d97e37..8471cba 100644 --- a/packages/api/src/engine/__tests__/task-executor.service.test.ts +++ b/packages/api/src/engine/__tests__/task-executor.service.test.ts @@ -117,6 +117,7 @@ describe('TaskExecutorService', () => { subscribe: ReturnType; }; let mockAgentDefRepo: { findById: ReturnType }; + let mockAgentRunRegistry: { abort: ReturnType }; beforeEach(() => { vi.clearAllMocks(); @@ -158,6 +159,8 @@ describe('TaskExecutorService', () => { findById: vi.fn().mockResolvedValue({ id: 'agent-def-1', name: 'test-agent' }), }; + mockAgentRunRegistry = { abort: vi.fn().mockReturnValue(true) }; + service = new TaskExecutorService( mockAgentRunner as unknown as AgentRunnerService, mockAgentRunRepo as unknown as AgentRunRepository, @@ -165,6 +168,7 @@ describe('TaskExecutorService', () => { mockRedis as unknown as RedisService, mockPubsub as unknown as RedisPubSubService, mockAgentDefRepo as unknown as AgentDefinitionRepository, + mockAgentRunRegistry as unknown as import('../agent-run-registry.service.js').AgentRunRegistry, ); }); @@ -416,6 +420,133 @@ describe('TaskExecutorService', () => { expect(service.activeCount).toBe(0); }); + + it('passes the policy-resolved timeoutMs through to agentRunner.run', async () => { + service.submit('run-1', { + agentDefinitionId: 'agent-def-1', + input: 'Hello!', + userId: 'user-1', + sessionId: 'sess-1', + timeoutMs: 12345, + }); + + await Promise.resolve(); + + expect(mockAgentRunner.run).toHaveBeenCalledWith( + expect.objectContaining({ agentRunId: 'run-1', timeoutMs: 12345 }), + ); + }); + + it('falls back to the default sub-agent timeout when none is supplied', async () => { + service.submit('run-1', { + agentDefinitionId: 'agent-def-1', + input: 'Hello!', + userId: 'user-1', + sessionId: 'sess-1', + }); + + await Promise.resolve(); + + expect(mockAgentRunner.run).toHaveBeenCalledWith( + expect.objectContaining({ agentRunId: 'run-1', timeoutMs: 300000 }), + ); + }); + }); + + // ---------------------------------------------------------------- // + // describe('watchdog') // + // ---------------------------------------------------------------- // + + describe('watchdog', () => { + it('aborts a stuck sub-agent, reports failure to the parent, and frees the slot', async () => { + vi.useFakeTimers(); + try { + const childRun = { + ...mockAgentRun, + id: 'run-stuck', + parentAgentRunId: 'run-parent', + agentDefinitionId: 'agent-def-1', + }; + const parentRun = { + ...mockAgentRun, + id: 'run-parent', + sessionId: 'sess-parent', + parentAgentRunId: null, + }; + mockAgentRunRepo.findById.mockResolvedValueOnce(childRun).mockResolvedValueOnce(parentRun); + // Run never settles — simulates a tool that ignores the abort signal. + mockAgentRunner.run.mockReturnValueOnce(new Promise(() => {})); + + service.submit('run-stuck', { + agentDefinitionId: 'agent-def-1', + input: 'build an app', + userId: 'user-1', + sessionId: 'sess-parent', + timeoutMs: 1000, + }); + + expect(service.activeCount).toBe(1); + + // Advance past timeoutMs (1000) + watchdog grace (30000). + await vi.advanceTimersByTimeAsync(31_001); + + expect(mockAgentRunRegistry.abort).toHaveBeenCalledWith('run-stuck', 'watchdog_timeout'); + expect(mockAgentRunRepo.update).toHaveBeenCalledWith( + 'run-stuck', + expect.objectContaining({ status: 'failed' }), + ); + + const queueKey = `${KEY_PREFIXES.agentResults}sess-parent`; + expect(mockRedis.lpush).toHaveBeenCalledWith(queueKey, expect.any(String)); + const published = JSON.parse(mockRedis.lpush.mock.calls[0]![1] as string) as { + status: string; + }; + expect(published.status).toBe('failed'); + + expect(service.activeCount).toBe(0); + } finally { + vi.useRealTimers(); + } + }); + }); + + // ---------------------------------------------------------------- // + // describe('progress forwarding') // + // ---------------------------------------------------------------- // + + describe('progress forwarding', () => { + it('forwards throttled progress to the parent channel', async () => { + // Run never settles so the captured onProgress stays callable. + mockAgentRunner.run.mockReturnValueOnce(new Promise(() => {})); + + service.submit('run-1', { + agentDefinitionId: 'agent-def-1', + input: 'do work', + userId: 'user-1', + sessionId: 'sess-parent', + displayName: 'coder', + }); + + await Promise.resolve(); + + const call = mockAgentRunner.run.mock.calls[0]![0] as { + onProgress?: (hint: string) => void; + }; + expect(typeof call.onProgress).toBe('function'); + + call.onProgress!('shell(npm install)'); + call.onProgress!('shell(ls)'); // throttled — same tick + await Promise.resolve(); + + const progressPublishes = mockPubsub.publish.mock.calls.filter( + (c: unknown[]) => c[0] === PUBSUB_CHANNELS.channelResponseReady, + ); + expect(progressPublishes).toHaveLength(1); + expect(progressPublishes[0]![1]).toEqual({ + sessionId: 'sess-parent', + output: expect.stringContaining('coder is working'), + }); + }); }); // ---------------------------------------------------------------- // @@ -733,6 +864,7 @@ describe('TaskExecutorService', () => { mockRedis as unknown as RedisService, mockPubsub as unknown as RedisPubSubService, mockAgentDefRepo as unknown as AgentDefinitionRepository, + mockAgentRunRegistry as unknown as import('../agent-run-registry.service.js').AgentRunRegistry, ); mockAgentRunRepo.findById.mockResolvedValue({ diff --git a/packages/api/src/engine/agent-runner.service.ts b/packages/api/src/engine/agent-runner.service.ts index 2f23ab5..76d5f12 100644 --- a/packages/api/src/engine/agent-runner.service.ts +++ b/packages/api/src/engine/agent-runner.service.ts @@ -67,6 +67,7 @@ import { TaskRunMessageRepository } from '../db/task-run-message.repository.js'; import type { RunOptions, RunResult } from './agent-runner.types.js'; import { SessionMessageStore } from './message-store/session-message-store.js'; import type { MessageStore } from './message-store/message-store.js'; +import { computeHiddenInHistory } from './history-visibility.js'; import type { Session } from '../generated/prisma/client.js'; import { ProviderConfigService } from '../provider-config/provider-config.service.js'; import { createProvider } from './providers/provider-factory.js'; @@ -485,6 +486,7 @@ export class AgentRunnerService { agentRun.id, userId, budgetTracker, + policy.maxSubAgentRunMs, ), ); } @@ -591,7 +593,9 @@ export class AgentRunnerService { }); // Step 15: Run loop - // No default wall-clock timeout — let the model finish. The stale run reaper (10 min) is the safety net. + // Sub-agents always carry a policy-resolved timeout (supplied by the + // TaskExecutorService); primary runs have none by default and let the + // model finish, with the stale-run reaper (10 min) as their backstop. const timeoutMs = options.timeoutMs; // Wire the streaming event sink only for primary runs of agents that @@ -644,7 +648,11 @@ export class AgentRunnerService { if (!isSubAgent) { const loopMessages = loopResult.messages.slice(initialMessages.length); if (loopMessages.length > 0) { - const savedIds = await store.saveMessages(loopMessages); + // Non-streamed runs only showed the user one combined final reply, so + // hide the intermediate reasoning steps from history to match. Streamed + // runs surfaced every step live, so all stay visible. + const hiddenInHistory = computeHiddenInHistory(loopMessages, streamingUsed); + const savedIds = await store.saveMessages(loopMessages, { hiddenInHistory }); // Find the ID of the last assistant message for WebSocket delivery for (let i = loopMessages.length - 1; i >= 0; i--) { if (loopMessages[i]!.role === 'assistant') { diff --git a/packages/api/src/engine/agent-runner.types.ts b/packages/api/src/engine/agent-runner.types.ts index 74abb43..20f3e3b 100644 --- a/packages/api/src/engine/agent-runner.types.ts +++ b/packages/api/src/engine/agent-runner.types.ts @@ -47,7 +47,13 @@ export interface RunOptions { * caps total cost across primary + sub-agents. */ readonly budgetTracker?: BudgetTracker; - /** Wall-clock timeout for the entire agent run in milliseconds. Default: 300000 (5 min). */ + /** + * Wall-clock timeout for the entire agent run in milliseconds. When the + * reasoning loop exceeds this, it aborts in-flight work and returns with + * `hitTimeout`. Omitted for primary runs (no default — the stale-run reaper + * is their backstop); for sub-agents the TaskExecutorService always supplies + * a value resolved from the user's policy (`Policy.maxSubAgentRunMs`). + */ readonly timeoutMs?: number; /** Caller-supplied persistence backend. When provided, agent-runner does NOT * create or resume a Session — all transcript persistence flows through the store. */ diff --git a/packages/api/src/engine/history-visibility.ts b/packages/api/src/engine/history-visibility.ts new file mode 100644 index 0000000..b8ccd4d --- /dev/null +++ b/packages/api/src/engine/history-visibility.ts @@ -0,0 +1,39 @@ +import type { ChatMessage } from '@clawix/shared'; + +/** + * Decide which of a run's persisted loop messages should be HIDDEN from the + * chat-history display so that history mirrors what the user saw live. + * + * A streamed run surfaced every reasoning step to the user as its own chat + * bubble, so all steps stay visible in history. A non-streamed run only ever + * showed the user a single combined final reply — so the intermediate assistant + * prose and tool-call/result steps are hidden from history, leaving just the + * final assistant message (which equals the live `result.output`). + * + * The rows are still persisted in full (the reasoning loop's tool-call/result + * pairs are required to reconstruct conversation context on the next turn); only + * their visibility in the history display endpoint changes. + * + * @param messages - The loop-generated messages being persisted, in order. + * @param streamingUsed - Whether the run actually streamed steps to the user. + * @returns A boolean array aligned to `messages` (`true` = hide from history). + * All-false when streamed, or when no assistant message exists (defensive — + * never hide every message). + */ +export function computeHiddenInHistory( + messages: readonly ChatMessage[], + streamingUsed: boolean, +): boolean[] { + if (streamingUsed) return messages.map(() => false); + + let finalAssistantIdx = -1; + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i]!.role === 'assistant') { + finalAssistantIdx = i; + break; + } + } + if (finalAssistantIdx === -1) return messages.map(() => false); + + return messages.map((_, i) => i !== finalAssistantIdx); +} diff --git a/packages/api/src/engine/message-store/message-store.ts b/packages/api/src/engine/message-store/message-store.ts index 2e29344..6392c34 100644 --- a/packages/api/src/engine/message-store/message-store.ts +++ b/packages/api/src/engine/message-store/message-store.ts @@ -1,5 +1,15 @@ import type { ChatMessage } from '@clawix/shared'; +/** Options for persisting a batch of messages. */ +export interface SaveMessagesOptions { + /** + * Per-message flag, aligned to the `messages` array. `true` hides the row + * from the chat-history display endpoint (used for the intermediate steps of + * a non-streamed run). Stores that lack a history surface may ignore it. + */ + readonly hiddenInHistory?: readonly boolean[]; +} + /** * Persistence abstraction for agent run transcripts. * @@ -9,5 +19,8 @@ import type { ChatMessage } from '@clawix/shared'; */ export interface MessageStore { loadMessages(): Promise; - saveMessages(messages: readonly ChatMessage[]): Promise; + saveMessages( + messages: readonly ChatMessage[], + opts?: SaveMessagesOptions, + ): Promise; } diff --git a/packages/api/src/engine/message-store/session-message-store.ts b/packages/api/src/engine/message-store/session-message-store.ts index 2739c9e..646c2fa 100644 --- a/packages/api/src/engine/message-store/session-message-store.ts +++ b/packages/api/src/engine/message-store/session-message-store.ts @@ -1,6 +1,6 @@ import type { ChatMessage } from '@clawix/shared'; import type { SessionManagerService } from '../session-manager.service.js'; -import type { MessageStore } from './message-store.js'; +import type { MessageStore, SaveMessagesOptions } from './message-store.js'; export class SessionMessageStore implements MessageStore { constructor( @@ -12,7 +12,10 @@ export class SessionMessageStore implements MessageStore { return this.sessionManager.loadMessages(this.sessionId); } - saveMessages(messages: readonly ChatMessage[]): Promise { - return this.sessionManager.saveMessages(this.sessionId, messages); + saveMessages( + messages: readonly ChatMessage[], + opts?: SaveMessagesOptions, + ): Promise { + return this.sessionManager.saveMessages(this.sessionId, messages, opts); } } diff --git a/packages/api/src/engine/providers/__tests__/anthropic-provider.test.ts b/packages/api/src/engine/providers/__tests__/anthropic-provider.test.ts index 7a2e32e..d1eaf7a 100644 --- a/packages/api/src/engine/providers/__tests__/anthropic-provider.test.ts +++ b/packages/api/src/engine/providers/__tests__/anthropic-provider.test.ts @@ -1,12 +1,24 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; const mockCreate = vi.fn(); +const mockStream = vi.fn(); vi.mock('@anthropic-ai/sdk', () => ({ default: vi.fn().mockImplementation(() => ({ - messages: { create: mockCreate }, + messages: { create: mockCreate, stream: mockStream }, })), })); +/** + * Set up `messages.stream()` to return a MessageStream whose `finalMessage()` + * resolves to `message`. Mirrors the SDK's streaming helper: the request is + * sent as SSE and `finalMessage()` resolves once the stream completes. + */ +function streamResolving(message: unknown): void { + mockStream.mockReturnValueOnce({ + finalMessage: vi.fn().mockResolvedValue(message), + }); +} + vi.mock('@clawix/shared', async (importOriginal) => { const actual = await importOriginal(); return { @@ -25,6 +37,7 @@ import { AnthropicProvider } from '../anthropic-provider.js'; describe('AnthropicProvider', () => { beforeEach(() => { mockCreate.mockReset(); + mockStream.mockReset(); }); it('has name "anthropic"', () => { @@ -32,8 +45,41 @@ describe('AnthropicProvider', () => { expect(provider.name).toBe('anthropic'); }); + it('uses the streaming API so the request is incremental, not a blocking non-streaming call', async () => { + streamResolving({ + content: [{ type: 'text', text: 'streamed' }], + stop_reason: 'end_turn', + usage: { input_tokens: 3, output_tokens: 2 }, + }); + + const provider = new AnthropicProvider('test-key'); + const result = await provider.chat([{ role: 'user', content: 'Hi' }]); + + // The blocking `messages.create` path must not be used — it holds the + // socket with zero bytes until the whole completion lands, which is what + // makes long turns look hung. + expect(mockStream).toHaveBeenCalledTimes(1); + expect(mockCreate).not.toHaveBeenCalled(); + expect(result.content).toBe('streamed'); + }); + + it('forwards the abort signal to the streaming request', async () => { + streamResolving({ + content: [{ type: 'text', text: 'ok' }], + stop_reason: 'end_turn', + usage: { input_tokens: 1, output_tokens: 1 }, + }); + const controller = new AbortController(); + + const provider = new AnthropicProvider('test-key'); + await provider.chat([{ role: 'user', content: 'Hi' }], { abortSignal: controller.signal }); + + const requestOptions = mockStream.mock.calls[0]![1]; + expect(requestOptions?.signal).toBe(controller.signal); + }); + it('sends a basic chat and returns normalized LLMResponse', async () => { - mockCreate.mockResolvedValueOnce({ + streamResolving({ content: [{ type: 'text', text: 'Hello!' }], stop_reason: 'end_turn', usage: { input_tokens: 10, output_tokens: 5 }, @@ -50,7 +96,7 @@ describe('AnthropicProvider', () => { }); it('maps tool_use stop reason and extracts tool calls', async () => { - mockCreate.mockResolvedValueOnce({ + streamResolving({ content: [ { type: 'tool_use', @@ -83,7 +129,7 @@ describe('AnthropicProvider', () => { }); it('extracts system message and passes as top-level param', async () => { - mockCreate.mockResolvedValueOnce({ + streamResolving({ content: [{ type: 'text', text: 'Response' }], stop_reason: 'end_turn', usage: { input_tokens: 30, output_tokens: 10 }, @@ -95,7 +141,7 @@ describe('AnthropicProvider', () => { { role: 'user', content: 'Hello' }, ]); - const callArgs = mockCreate.mock.calls[0]![0]; + const callArgs = mockStream.mock.calls[0]![0]; // With caching enabled (default), system is a content-block array expect(callArgs.system).toEqual([ { type: 'text', text: 'You are helpful.', cache_control: { type: 'ephemeral' } }, @@ -104,7 +150,7 @@ describe('AnthropicProvider', () => { }); it('maps max_tokens stop reason to max_tokens finish reason', async () => { - mockCreate.mockResolvedValueOnce({ + streamResolving({ content: [{ type: 'text', text: 'Truncated...' }], stop_reason: 'max_tokens', usage: { input_tokens: 10, output_tokens: 4096 }, @@ -116,7 +162,7 @@ describe('AnthropicProvider', () => { }); it('surfaces cache token fields from the response', async () => { - mockCreate.mockResolvedValueOnce({ + streamResolving({ content: [{ type: 'text', text: 'cached response' }], stop_reason: 'end_turn', usage: { @@ -136,7 +182,7 @@ describe('AnthropicProvider', () => { }); it('omits cache token fields when the SDK does not return them', async () => { - mockCreate.mockResolvedValueOnce({ + streamResolving({ content: [{ type: 'text', text: 'no cache response' }], stop_reason: 'end_turn', usage: { input_tokens: 10, output_tokens: 5 }, @@ -154,10 +200,11 @@ describe('AnthropicProvider', () => { describe('AnthropicProvider — prompt caching', () => { beforeEach(() => { mockCreate.mockReset(); + mockStream.mockReset(); }); it('marks the system block with cache_control by default', async () => { - mockCreate.mockResolvedValueOnce({ + streamResolving({ content: [{ type: 'text', text: 'ok' }], stop_reason: 'end_turn', usage: { input_tokens: 5, output_tokens: 5 }, @@ -169,7 +216,7 @@ describe('AnthropicProvider — prompt caching', () => { { role: 'user', content: 'Hi' }, ]); - const args = mockCreate.mock.calls[0]![0]; + const args = mockStream.mock.calls[0]![0]; expect(args.system).toEqual([ { type: 'text', @@ -180,7 +227,7 @@ describe('AnthropicProvider — prompt caching', () => { }); it('marks the last tool with cache_control by default', async () => { - mockCreate.mockResolvedValueOnce({ + streamResolving({ content: [{ type: 'text', text: 'ok' }], stop_reason: 'end_turn', usage: { input_tokens: 5, output_tokens: 5 }, @@ -194,7 +241,7 @@ describe('AnthropicProvider — prompt caching', () => { ], }); - const args = mockCreate.mock.calls[0]![0]; + const args = mockStream.mock.calls[0]![0]; expect(args.tools).toHaveLength(2); expect(args.tools[0]).not.toHaveProperty('cache_control'); expect(args.tools[1]).toMatchObject({ @@ -204,7 +251,7 @@ describe('AnthropicProvider — prompt caching', () => { }); it('does not mark system or tools when enableCaching=false', async () => { - mockCreate.mockResolvedValueOnce({ + streamResolving({ content: [{ type: 'text', text: 'ok' }], stop_reason: 'end_turn', usage: { input_tokens: 5, output_tokens: 5 }, @@ -221,14 +268,14 @@ describe('AnthropicProvider — prompt caching', () => { }, ); - const args = mockCreate.mock.calls[0]![0]; + const args = mockStream.mock.calls[0]![0]; // System is sent as a plain string (no content blocks) when caching is off expect(args.system).toBe('You are helpful.'); expect(args.tools[0]).not.toHaveProperty('cache_control'); }); it('does not send cache_control on the user message', async () => { - mockCreate.mockResolvedValueOnce({ + streamResolving({ content: [{ type: 'text', text: 'ok' }], stop_reason: 'end_turn', usage: { input_tokens: 5, output_tokens: 5 }, @@ -240,13 +287,13 @@ describe('AnthropicProvider — prompt caching', () => { { role: 'user', content: 'Timestamp 123: please respond' }, ]); - const args = mockCreate.mock.calls[0]![0]; + const args = mockStream.mock.calls[0]![0]; expect(args.messages[0]).toEqual({ role: 'user', content: 'Timestamp 123: please respond' }); expect(JSON.stringify(args.messages)).not.toContain('cache_control'); }); it('omits system content blocks entirely when there is no system message', async () => { - mockCreate.mockResolvedValueOnce({ + streamResolving({ content: [{ type: 'text', text: 'ok' }], stop_reason: 'end_turn', usage: { input_tokens: 5, output_tokens: 5 }, @@ -255,7 +302,7 @@ describe('AnthropicProvider — prompt caching', () => { const provider = new AnthropicProvider('test-key'); await provider.chat([{ role: 'user', content: 'Hi' }]); - const args = mockCreate.mock.calls[0]![0]; + const args = mockStream.mock.calls[0]![0]; expect(args.system).toBeUndefined(); }); }); diff --git a/packages/api/src/engine/providers/anthropic-provider.ts b/packages/api/src/engine/providers/anthropic-provider.ts index 964b948..864d8a2 100644 --- a/packages/api/src/engine/providers/anthropic-provider.ts +++ b/packages/api/src/engine/providers/anthropic-provider.ts @@ -140,7 +140,7 @@ export class AnthropicProvider implements LLMProvider { ) : baseTools; - const requestParams: Anthropic.MessageCreateParamsNonStreaming = { + const requestParams: Anthropic.MessageStreamParams = { model, max_tokens: maxTokens, messages: nonSystemMessages.map(toAnthropicMessage), @@ -157,10 +157,18 @@ export class AnthropicProvider implements LLMProvider { ...(toolsForRequest ? { tools: toolsForRequest } : {}), }; - const response = await this.client.messages.create( + // Use the streaming API even though we return a single assembled response. + // A non-streaming `messages.create` holds the HTTP connection with zero + // bytes until the entire completion is generated — for slow models or + // large outputs the turn can take minutes, during which the run looks + // hung (no tokens, no progress) and may be killed by the stale-run reaper. + // Streaming keeps the SSE socket flowing, so the request stays responsive + // and aborts promptly when the signal fires. + const stream = this.client.messages.stream( requestParams, options?.abortSignal ? { signal: options.abortSignal } : undefined, ); + const response = await stream.finalMessage(); let textContent = ''; const toolCalls: ToolCallRequest[] = []; diff --git a/packages/api/src/engine/session-manager.service.ts b/packages/api/src/engine/session-manager.service.ts index fcb9efc..8e7d36f 100644 --- a/packages/api/src/engine/session-manager.service.ts +++ b/packages/api/src/engine/session-manager.service.ts @@ -5,6 +5,7 @@ import type { ChatMessage } from '@clawix/shared'; import type { Session } from '../generated/prisma/client.js'; import { SessionRepository } from '../db/session.repository.js'; import { PrismaService } from '../prisma/prisma.service.js'; +import type { SaveMessagesOptions } from './message-store/message-store.js'; const logger = createLogger('engine:session-manager'); @@ -120,6 +121,7 @@ export class SessionManagerService { async saveMessages( sessionId: string, messages: readonly ChatMessage[], + opts?: SaveMessagesOptions, ): Promise { const currentCount = await this.prisma.sessionMessage.count({ where: { sessionId, archivedAt: null }, @@ -140,6 +142,7 @@ export class SessionManagerService { toolCallId: msg.toolCallId, toolCalls: msg.toolCalls ? JSON.parse(JSON.stringify(msg.toolCalls)) : undefined, ordering: currentCount + i, + hiddenInHistory: opts?.hiddenInHistory?.[i] ?? false, }, }); createdIds.push(created.id); diff --git a/packages/api/src/engine/stale-run-reaper.service.ts b/packages/api/src/engine/stale-run-reaper.service.ts index 0e6107f..7605a55 100644 --- a/packages/api/src/engine/stale-run-reaper.service.ts +++ b/packages/api/src/engine/stale-run-reaper.service.ts @@ -3,6 +3,7 @@ import { Injectable, type OnModuleInit, type OnModuleDestroy } from '@nestjs/com import { createLogger } from '@clawix/shared'; import { PrismaService } from '../prisma/prisma.service.js'; +import { AgentRunRegistry } from './agent-run-registry.service.js'; const logger = createLogger('engine:stale-run-reaper'); @@ -16,7 +17,10 @@ const SWEEP_INTERVAL_MS = 60 * 1000; // every 60 seconds export class StaleRunReaperService implements OnModuleInit, OnModuleDestroy { private intervalHandle: ReturnType | null = null; - constructor(private readonly prisma: PrismaService) {} + constructor( + private readonly prisma: PrismaService, + private readonly agentRunRegistry: AgentRunRegistry, + ) {} onModuleInit(): void { this.intervalHandle = setInterval(() => { @@ -38,11 +42,25 @@ export class StaleRunReaperService implements OnModuleInit, OnModuleDestroy { async reapStaleRuns(): Promise { const cutoff = new Date(Date.now() - STALE_THRESHOLD_MS); + // Collect the ids first so we can abort their in-process controllers. A + // bare updateMany only flips the DB row: the awaited run keeps executing, + // its slot stays occupied, and the parent is never re-invoked. Aborting + // the controller actually stops the run and lets it report back. + const stale = await this.prisma.agentRun.findMany({ + where: { status: 'running', startedAt: { lt: cutoff } }, + select: { id: true }, + }); + + if (stale.length === 0) return 0; + + for (const { id } of stale) { + // Best-effort: a no-op if the controller is gone (e.g. another process, + // or lost on restart) — the DB update below still records the timeout. + this.agentRunRegistry.abort(id, 'stale_timeout'); + } + const result = await this.prisma.agentRun.updateMany({ - where: { - status: 'running', - startedAt: { lt: cutoff }, - }, + where: { status: 'running', startedAt: { lt: cutoff } }, data: { status: 'failed', error: 'Agent run timed out (stale run reaper)', diff --git a/packages/api/src/engine/task-executor.service.ts b/packages/api/src/engine/task-executor.service.ts index 0b006c0..61c5e21 100644 --- a/packages/api/src/engine/task-executor.service.ts +++ b/packages/api/src/engine/task-executor.service.ts @@ -27,9 +27,31 @@ import { SCAN_BATCH_SIZE, } from '../cache/cache.constants.js'; import { AgentDefinitionRepository } from '../db/agent-definition.repository.js'; +import { AgentRunRegistry } from './agent-run-registry.service.js'; const logger = createLogger('engine:task-executor'); +/** + * Fallback wall-clock cap (ms) for a sub-agent run when the submit options + * carry no policy-resolved timeout (e.g. an orphan recovered after a crash, + * which has no parent/policy context). Overridable via SUBAGENT_TIMEOUT_MS. + */ +const DEFAULT_SUBAGENT_TIMEOUT_MS = (() => { + const raw = parseInt(process.env['SUBAGENT_TIMEOUT_MS'] ?? '300000', 10); + return Number.isFinite(raw) && raw > 0 ? raw : 300000; +})(); + +/** + * Grace added on top of the run's own timeout before the executor's hard + * watchdog fires. The reasoning loop should abort cleanly at `timeoutMs`; the + * watchdog only fires if it cannot (e.g. a tool that ignores the abort signal), + * forcibly freeing the slot and reporting failure to the parent. + */ +const SUBAGENT_WATCHDOG_GRACE_MS = 30_000; + +/** Minimum spacing between progress heartbeats forwarded to the parent channel. */ +const SUBAGENT_PROGRESS_THROTTLE_MS = 30_000; + // ------------------------------------------------------------------ // // Types // // ------------------------------------------------------------------ // @@ -51,6 +73,14 @@ interface SubmitOptions { * In-memory only — the recovery path (orphan runs) has no parent signal. */ readonly abortSignal?: AbortSignal; + /** + * Wall-clock cap (ms) for the run, resolved from the spawning user's policy + * (`Policy.maxSubAgentRunMs`). Drives both the reasoning loop's own timeout + * and the executor's hard watchdog. Absent on the orphan-recovery path. + */ + readonly timeoutMs?: number; + /** Human-readable agent label, used to tag progress heartbeats to the parent. */ + readonly displayName?: string; } interface QueueItem { @@ -96,6 +126,7 @@ export class TaskExecutorService implements OnModuleInit { private readonly redis: RedisService, private readonly pubsub: RedisPubSubService, private readonly agentDefRepo: AgentDefinitionRepository, + private readonly agentRunRegistry: AgentRunRegistry, ) { this.maxConcurrent = parseInt(process.env['MAX_CONCURRENT_AGENTS'] ?? '10', 10); this.maxPending = parseInt(process.env['MAX_PENDING_AGENTS'] ?? '100', 10); @@ -257,33 +288,106 @@ export class TaskExecutorService implements OnModuleInit { /** * Execute a single task via AgentRunnerService. * - * Decrements the active counter and drains the queue when done. - * On completion (success or failure), publishes result to Redis if this is a child agent. + * Guarantees the parent always hears back and the concurrency slot is always + * freed — exactly once — via a `settled` latch shared by three racing exits: + * 1. the run resolves → publish result, + * 2. the run rejects → publish failure, + * 3. the hard watchdog fires → abort the in-flight run, publish failure. + * + * The watchdog (3) is the backstop for a run that cannot honor its own + * timeout (e.g. a tool that ignores the abort signal): without it the awaited + * `agentRunner.run()` would never settle, leaking the slot forever and + * leaving the parent waiting indefinitely with no feedback. */ private async executeTask(item: QueueItem): Promise { const { agentRunId, options } = item; + const timeoutMs = options.timeoutMs ?? DEFAULT_SUBAGENT_TIMEOUT_MS; + + logger.debug({ agentRunId, timeoutMs }, 'Starting task execution'); - logger.debug({ agentRunId }, 'Starting task execution'); + let settled = false; + const finalize = (): void => { + this.activeCount_--; + this.drain(); + }; + + // Backstop: if the run hasn't settled within its timeout + grace, the loop + // failed to abort itself. Forcibly abort, report failure to the parent, and + // free the slot — the orphaned promise (if any) becomes a no-op on settle. + const watchdog = setTimeout(() => { + if (settled) return; + settled = true; + logger.error( + { agentRunId, timeoutMs }, + 'Sub-agent watchdog fired — run did not settle; aborting and failing', + ); + this.agentRunRegistry.abort(agentRunId, 'watchdog_timeout'); + this.agentRunRepo + .update(agentRunId, { + status: 'failed', + error: 'Sub-agent run timed out (watchdog)', + completedAt: new Date(), + }) + .catch((err: unknown) => { + logger.error({ agentRunId, err }, 'Failed to mark watchdog-timed-out run as failed'); + }); + void this.publishFailureIfChild(agentRunId, new Error('Sub-agent run timed out')); + finalize(); + }, timeoutMs + SUBAGENT_WATCHDOG_GRACE_MS); + if (typeof watchdog === 'object' && watchdog !== null && typeof watchdog.unref === 'function') { + watchdog.unref(); + } try { const result = await this.agentRunner.run({ ...options, isSubAgent: true, agentRunId, + timeoutMs, + onProgress: this.buildProgressForwarder(options), }); + if (settled) return; // watchdog already handled it + settled = true; + clearTimeout(watchdog); logger.info({ agentRunId }, 'Task completed successfully'); await this.publishResultIfChild(agentRunId, result); + finalize(); } catch (err: unknown) { + if (settled) return; // watchdog already handled it + settled = true; + clearTimeout(watchdog); const error = err instanceof Error ? err : new Error(String(err)); logger.error({ agentRunId, error: error.message }, 'Task execution failed'); await this.publishFailureIfChild(agentRunId, error); - } finally { - this.activeCount_--; - this.drain(); + finalize(); } } + /** + * Build a throttled progress callback that forwards a sub-agent's tool + * activity to the parent's channel, so long-running sub-agents are visibly + * alive instead of looking hung. Transient: delivered as a channel message, + * not persisted to session history. + */ + private buildProgressForwarder(options: SubmitOptions): (hint: string) => void { + const label = options.displayName ?? 'sub-agent'; + let lastEmit = 0; + return (hint: string): void => { + const now = Date.now(); + if (now - lastEmit < SUBAGENT_PROGRESS_THROTTLE_MS) return; + lastEmit = now; + void this.pubsub + .publish(PUBSUB_CHANNELS.channelResponseReady, { + sessionId: options.sessionId, + output: `_${label} is working: ${hint}_`, + }) + .catch((err: unknown) => { + logger.debug({ err }, 'Failed to forward sub-agent progress'); + }); + }; + } + // ---------------------------------------------------------------- // // Result publishing // // ---------------------------------------------------------------- // diff --git a/packages/api/src/engine/tools/spawn.ts b/packages/api/src/engine/tools/spawn.ts index 760a574..20c3010 100644 --- a/packages/api/src/engine/tools/spawn.ts +++ b/packages/api/src/engine/tools/spawn.ts @@ -27,6 +27,10 @@ interface TaskSubmitter { readonly budgetTracker?: BudgetTracker; /** Parent abort signal forwarded for cancellation cascade. */ readonly abortSignal?: AbortSignal; + /** Wall-clock cap for the sub-agent run (ms), resolved from the user's policy. */ + readonly timeoutMs?: number; + /** Human-readable agent label, used for progress messages to the parent. */ + readonly displayName?: string; }, ): void; } @@ -40,6 +44,8 @@ interface TaskSubmitter { * @param parentSessionId - The session ID of the calling agent. * @param parentAgentRunId - The AgentRun ID of the parent agent (used to deliver results back). * @param userId - The ID of the user initiating the spawn. + * @param budgetTracker - Optional shared budget tracker inherited by the sub-agent. + * @param subAgentTimeoutMs - Wall-clock cap (ms) for the spawned run, from the user's policy. */ export function createSpawnTool( agentDefRepo: AgentDefinitionRepository, @@ -49,6 +55,7 @@ export function createSpawnTool( parentAgentRunId: string, userId: string, budgetTracker?: BudgetTracker, + subAgentTimeoutMs?: number, ): Tool { return { name: 'spawn', @@ -136,8 +143,10 @@ export function createSpawnTool( input: prompt, userId, sessionId: parentSessionId, + displayName, ...(budgetTracker ? { budgetTracker } : {}), ...(ctx?.abortSignal ? { abortSignal: ctx.abortSignal } : {}), + ...(subAgentTimeoutMs != null ? { timeoutMs: subAgentTimeoutMs } : {}), }); } diff --git a/packages/web/next.config.ts b/packages/web/next.config.ts index f445a7e..a23fe9d 100644 --- a/packages/web/next.config.ts +++ b/packages/web/next.config.ts @@ -2,6 +2,14 @@ import type { NextConfig } from 'next'; const nextConfig: NextConfig = { transpilePackages: ['@clawix/shared'], + experimental: { + // Rewrite the unified `radix-ui` barrel into direct deep imports so only + // the primitives a page uses get compiled. The barrel is imported by ~19 + // ui/ components, so without this every route's first dev compile drags in + // all ~30 Radix primitives. (lucide-react and recharts are already in + // Next's default optimizePackageImports list; `radix-ui` is not.) + optimizePackageImports: ['radix-ui'], + }, // Enable standalone output for Docker production builds output: process.env.NODE_ENV === 'production' ? 'standalone' : undefined, }; diff --git a/packages/web/src/app/(dashboard)/agents/user-agents/__tests__/bound-agents.test.ts b/packages/web/src/app/(dashboard)/agents/user-agents/__tests__/bound-agents.test.ts new file mode 100644 index 0000000..9188ea5 --- /dev/null +++ b/packages/web/src/app/(dashboard)/agents/user-agents/__tests__/bound-agents.test.ts @@ -0,0 +1,27 @@ +import { describe, expect, it } from 'vitest'; +import { selectBoundAgentIds, type UserAgentBinding } from '../bound-agents'; + +describe('selectBoundAgentIds', () => { + const bindings: UserAgentBinding[] = [ + { agentDefinitionId: 'primary-admin', userId: 'admin' }, + { agentDefinitionId: 'primary-dev', userId: 'dev' }, + { agentDefinitionId: 'sub-admin', userId: 'admin' }, + ]; + + it("excludes other users' bindings (admin gets ALL bindings from the API)", () => { + // Regression: the /agents/user-agents endpoint returns every user's + // bindings to admins. Without filtering, an admin would mark dev's primary + // as Active too. + const result = selectBoundAgentIds(bindings, 'admin'); + expect(result).toEqual(new Set(['primary-admin', 'sub-admin'])); + expect(result.has('primary-dev')).toBe(false); + }); + + it('keeps a non-admin user to their own bindings', () => { + expect(selectBoundAgentIds(bindings, 'dev')).toEqual(new Set(['primary-dev'])); + }); + + it('returns an empty set when the current user is unknown', () => { + expect(selectBoundAgentIds(bindings, undefined)).toEqual(new Set()); + }); +}); diff --git a/packages/web/src/app/(dashboard)/agents/user-agents/bound-agents.ts b/packages/web/src/app/(dashboard)/agents/user-agents/bound-agents.ts new file mode 100644 index 0000000..155e035 --- /dev/null +++ b/packages/web/src/app/(dashboard)/agents/user-agents/bound-agents.ts @@ -0,0 +1,23 @@ +/** A UserAgent binding as returned by `GET /api/v1/agents/user-agents`. */ +export interface UserAgentBinding { + agentDefinitionId: string; + userId: string; +} + +/** + * Build the set of `AgentDefinition` ids assigned to a single user. + * + * The `/agents/user-agents` endpoint returns ALL users' bindings when the + * caller is an admin, so callers MUST filter to the current user — otherwise an + * admin would see every user's primary agent marked "Active" on the agents + * page instead of only their own assigned primary. + */ +export function selectBoundAgentIds( + bindings: readonly UserAgentBinding[], + currentUserId: string | undefined, +): Set { + if (!currentUserId) return new Set(); + return new Set( + bindings.filter((b) => b.userId === currentUserId).map((b) => b.agentDefinitionId), + ); +} diff --git a/packages/web/src/app/(dashboard)/agents/user-agents/page.tsx b/packages/web/src/app/(dashboard)/agents/user-agents/page.tsx index d9501f9..c56a8ee 100644 --- a/packages/web/src/app/(dashboard)/agents/user-agents/page.tsx +++ b/packages/web/src/app/(dashboard)/agents/user-agents/page.tsx @@ -27,6 +27,7 @@ import { Input } from '@/components/ui/input'; import { Label } from '@/components/ui/label'; import { Switch } from '@/components/ui/switch'; import { ProviderModelFields, agentFormInput, useProviders } from '../agent-form-fields'; +import { selectBoundAgentIds, type UserAgentBinding } from './bound-agents'; import { Table, TableBody, @@ -1209,11 +1210,14 @@ export default function UserAgentsPage() { const [agentsRes, userAgentsRes] = await Promise.all([ authFetch('/api/v1/agents?limit=100&includeCreatedBy=true'), // Endpoint returns the array directly, not wrapped in { data }. - authFetch<{ agentDefinitionId: string }[]>('/api/v1/agents/user-agents').catch(() => []), + // For admins it returns ALL users' bindings, so selectBoundAgentIds + // filters to the current user — boundAgentIds must only reflect *my* + // assigned primary. + authFetch('/api/v1/agents/user-agents').catch(() => []), ]); const all = Array.isArray(agentsRes.data) ? agentsRes.data : []; const bindings = Array.isArray(userAgentsRes) ? userAgentsRes : []; - setBoundAgentIds(new Set(bindings.map((b) => b.agentDefinitionId))); + setBoundAgentIds(selectBoundAgentIds(bindings, currentUserId)); // Official agents (primary first, then workers) setOfficialAgents( diff --git a/packages/web/src/app/(dashboard)/conversations/__tests__/chat-input.test.tsx b/packages/web/src/app/(dashboard)/conversations/__tests__/chat-input.test.tsx new file mode 100644 index 0000000..3e2fc89 --- /dev/null +++ b/packages/web/src/app/(dashboard)/conversations/__tests__/chat-input.test.tsx @@ -0,0 +1,98 @@ +import { createElement } from 'react'; +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { cleanup, fireEvent, render, screen } from '@testing-library/react'; +import { ChatInput } from '../chat-input'; + +// ChatInput fetches /api/v1/skills on mount; stub it so no real network runs. +vi.mock('@/lib/auth', () => ({ + authFetch: vi.fn().mockResolvedValue({ data: [] }), +})); + +afterEach(() => { + cleanup(); + vi.clearAllMocks(); +}); + +/** + * History nav (#152) must gate on the absolute caret edge, not logical-line + * detection. A long sentence that soft-wraps via CSS contains no '\n', so the + * old `!value.slice(...).includes('\n')` check treated every wrapped row as the + * first/last line and hijacked ArrowUp/Down for history (#157). These tests + * exercise the gate purely through caret offset, which jsdom supports. + */ +describe('ChatInput history navigation — caret-edge gating (#157)', () => { + const history = ['most recent message', 'older message']; + + function setup() { + render( + createElement(ChatInput, { + onSend: () => true, + disabled: false, + isConnected: true, + userMessages: history, + }), + ); + return screen.getByLabelText('Chat message') as HTMLTextAreaElement; + } + + // A long single-line draft with no newline — exactly what CSS soft-wrap produces. + const wrapped = 'a'.repeat(200); + + it('does NOT recall history on ArrowUp when caret is mid-draft (wrapped line, no newline)', () => { + const ta = setup(); + fireEvent.change(ta, { target: { value: wrapped } }); + ta.selectionStart = ta.selectionEnd = 100; // caret in the middle of the wrapped line + fireEvent.keyDown(ta, { key: 'ArrowUp' }); + expect(ta.value).toBe(wrapped); // caret moves natively; history untouched + }); + + it('recalls the most recent message on ArrowUp only when caret is at position 0', () => { + const ta = setup(); + fireEvent.change(ta, { target: { value: wrapped } }); + ta.selectionStart = ta.selectionEnd = 0; + fireEvent.keyDown(ta, { key: 'ArrowUp' }); + expect(ta.value).toBe(history[0]); + }); + + it('does NOT recall history on ArrowUp with a non-collapsed selection at the start', () => { + const ta = setup(); + fireEvent.change(ta, { target: { value: wrapped } }); + ta.selectionStart = 0; + ta.selectionEnd = 10; // selection, not a collapsed caret + fireEvent.keyDown(ta, { key: 'ArrowUp' }); + expect(ta.value).toBe(wrapped); + }); + + // Helper: press ArrowUp with the caret pinned at the start (the setTimeout + // caret-reset in the component is async and won't fire under fireEvent). + function arrowUpFromStart(ta: HTMLTextAreaElement) { + ta.selectionStart = ta.selectionEnd = 0; + fireEvent.keyDown(ta, { key: 'ArrowUp' }); + } + + it('does NOT recall newer history on ArrowDown when caret is mid-draft', () => { + const ta = setup(); + // First climb into history so historyIndexRef >= 0. + fireEvent.change(ta, { target: { value: '' } }); + arrowUpFromStart(ta); + arrowUpFromStart(ta); + expect(ta.value).toBe(history[1]); // now on the older entry + + // Caret mid-text → ArrowDown must move the caret, not walk history. + ta.selectionStart = ta.selectionEnd = 2; + fireEvent.keyDown(ta, { key: 'ArrowDown' }); + expect(ta.value).toBe(history[1]); + }); + + it('walks back down history on ArrowDown only when caret is at the end of the text', () => { + const ta = setup(); + fireEvent.change(ta, { target: { value: '' } }); + arrowUpFromStart(ta); + arrowUpFromStart(ta); + expect(ta.value).toBe(history[1]); + + ta.selectionStart = ta.selectionEnd = ta.value.length; + fireEvent.keyDown(ta, { key: 'ArrowDown' }); + expect(ta.value).toBe(history[0]); + }); +}); diff --git a/packages/web/src/app/(dashboard)/conversations/__tests__/chat-thread.test.ts b/packages/web/src/app/(dashboard)/conversations/__tests__/chat-thread.test.ts new file mode 100644 index 0000000..5840f1d --- /dev/null +++ b/packages/web/src/app/(dashboard)/conversations/__tests__/chat-thread.test.ts @@ -0,0 +1,59 @@ +import { createElement } from 'react'; +import { describe, expect, it } from 'vitest'; +import { dedentCodeBlocks, reactNodeToText } from '../chat-thread'; + +describe('dedentCodeBlocks', () => { + it('leaves a code fence nested in a numbered list untouched', () => { + const md = [ + '2. Or call the login API:', + ' ```bash', + ' curl -X POST https://x/api/auth/login \\', + ' -H "Content-Type: application/json"', + ' ```', + '', + '3. Copy the `accessToken` from the response', + ].join('\n'); + + // Nested fence markers carry list-indent meaning — dedenting would desync + // the fence from its body and break the list parse. Must be a no-op. + expect(dedentCodeBlocks(md)).toBe(md); + }); + + it('dedents a uniformly over-indented top-level fence', () => { + const md = ['```bash', ' curl -X POST https://x', ' echo done', '```'].join('\n'); + const expected = ['```bash', 'curl -X POST https://x', 'echo done', '```'].join('\n'); + expect(dedentCodeBlocks(md)).toBe(expected); + }); + + it('preserves relative indentation within a top-level fence', () => { + const md = ['```python', ' def f():', ' return 1', '```'].join('\n'); + const expected = ['```python', 'def f():', ' return 1', '```'].join('\n'); + expect(dedentCodeBlocks(md)).toBe(expected); + }); + + it('leaves an already-flush top-level fence unchanged', () => { + const md = ['```ts', 'const x = 1;', '```'].join('\n'); + expect(dedentCodeBlocks(md)).toBe(md); + }); +}); + +describe('reactNodeToText', () => { + it('extracts text from a rendered
 tree (ReactMarkdown shape)', () => {
+    const tree = createElement(
+      'code',
+      { className: 'language-bash' },
+      'curl -X POST https://x\necho done\n',
+    );
+    expect(reactNodeToText(tree)).toBe('curl -X POST https://x\necho done\n');
+  });
+
+  it('joins mixed string and element children', () => {
+    const tree = ['const x = ', createElement('span', null, '1'), ';'];
+    expect(reactNodeToText(tree)).toBe('const x = 1;');
+  });
+
+  it('returns empty string for null/undefined', () => {
+    expect(reactNodeToText(null)).toBe('');
+    expect(reactNodeToText(undefined)).toBe('');
+  });
+});
diff --git a/packages/web/src/app/(dashboard)/conversations/chat-input.tsx b/packages/web/src/app/(dashboard)/conversations/chat-input.tsx
index 96a3f74..63eb351 100644
--- a/packages/web/src/app/(dashboard)/conversations/chat-input.tsx
+++ b/packages/web/src/app/(dashboard)/conversations/chat-input.tsx
@@ -299,12 +299,25 @@ export function ChatInput({
                   return;
                 }
               }
-              // Input history: ArrowUp/Down when not in slash menu.
-              // After programmatically restoring a history entry, schedule an
-              // autoResize on the next tick so the textarea grows/shrinks to
-              // match — onChange does not fire for setValue() and stale heights
-              // truncate long entries.
+              // Input history: ArrowUp/Down when not in slash menu. Only
+              // hijack the arrow when the caret sits at the absolute editable
+              // edge — start of the text for ArrowUp, end for ArrowDown — with a
+              // collapsed selection, so any in-draft edit (including soft-wrapped
+              // long lines that contain no '\n') moves the caret between rows as
+              // normal. We gate on caret *offset*, not logical-line detection:
+              // a CSS-wrapped line has no newline, so a '\n' scan would treat
+              // every wrapped row as the first/last line and wrongly recall
+              // history (#157). After restoring an entry we move the caret to
+              // that same edge (start for ArrowUp, end for ArrowDown) so repeated
+              // presses keep chaining through history, and schedule an autoResize
+              // on the next tick so the textarea grows/shrinks to match — onChange
+              // does not fire for setValue() and stale heights truncate long
+              // entries.
               if (e.key === 'ArrowUp' && !showCommands && inputHistory.length > 0) {
+                const el = e.currentTarget;
+                const caretAtStart =
+                  el.selectionStart === el.selectionEnd && el.selectionStart === 0;
+                if (!caretAtStart) return;
                 if (historyIndexRef.current === -1) {
                   savedInputRef.current = value;
                 }
@@ -312,12 +325,20 @@ export function ChatInput({
                 if (nextIndex !== historyIndexRef.current || historyIndexRef.current === -1) {
                   historyIndexRef.current = nextIndex;
                   setValue(inputHistory[nextIndex]!);
-                  setTimeout(autoResize, 0);
+                  setTimeout(() => {
+                    autoResize();
+                    const ta = textareaRef.current;
+                    if (ta) ta.selectionStart = ta.selectionEnd = 0;
+                  }, 0);
                   e.preventDefault();
                 }
                 return;
               }
               if (e.key === 'ArrowDown' && !showCommands && historyIndexRef.current >= 0) {
+                const el = e.currentTarget;
+                const caretAtEnd =
+                  el.selectionStart === el.selectionEnd && el.selectionStart === el.value.length;
+                if (!caretAtEnd) return;
                 e.preventDefault();
                 const nextIndex = historyIndexRef.current - 1;
                 historyIndexRef.current = nextIndex;
@@ -326,7 +347,11 @@ export function ChatInput({
                 } else {
                   setValue(inputHistory[nextIndex]!);
                 }
-                setTimeout(autoResize, 0);
+                setTimeout(() => {
+                  autoResize();
+                  const ta = textareaRef.current;
+                  if (ta) ta.selectionStart = ta.selectionEnd = ta.value.length;
+                }, 0);
                 return;
               }
               if (e.key === 'Enter' && !e.shiftKey) {
diff --git a/packages/web/src/app/(dashboard)/conversations/chat-thread.tsx b/packages/web/src/app/(dashboard)/conversations/chat-thread.tsx
index 693b51e..ae16b58 100644
--- a/packages/web/src/app/(dashboard)/conversations/chat-thread.tsx
+++ b/packages/web/src/app/(dashboard)/conversations/chat-thread.tsx
@@ -1,6 +1,15 @@
 'use client';
 
-import { useCallback, useEffect, useRef, useState } from 'react';
+import {
+  Children,
+  isValidElement,
+  useCallback,
+  useEffect,
+  useRef,
+  useState,
+  type ComponentPropsWithoutRef,
+  type ReactNode,
+} from 'react';
 import { ArrowDown, Bot, Check, Copy, Loader2, RotateCcw } from 'lucide-react';
 import ReactMarkdown from 'react-markdown';
 import remarkGfm from 'remark-gfm';
@@ -89,10 +98,14 @@ function UserMessage({
 }
 
 /** Dedent code blocks in raw markdown — strips common leading whitespace
- *  from the content inside fenced code blocks (``` ... ```). */
-function dedentCodeBlocks(md: string): string {
+ *  from the content inside top-level fenced code blocks (``` ... ```).
+ *  Only fences whose opening and closing markers sit at column 0 are touched:
+ *  fences nested in a list item or blockquote carry meaningful leading
+ *  indentation that the markdown parser uses for nesting, so dedenting their
+ *  bodies (without the markers) would desync the fence and corrupt the parse. */
+export function dedentCodeBlocks(md: string): string {
   return md.replace(
-    /(```\w*\n)([\s\S]*?)(```)/g,
+    /^(```\w*\n)([\s\S]*?)^(```)$/gm,
     (_match, open: string, body: string, close: string) => {
       const lines = body.split('\n');
       const nonEmpty = lines.filter((l) => l.trim().length > 0);
@@ -105,6 +118,64 @@ function dedentCodeBlocks(md: string): string {
   );
 }
 
+/** Recursively collect the plain-text content of a React node tree — used to
+ *  recover a fenced code block's raw text from ReactMarkdown's rendered
+ *  `
` so it can be copied to the clipboard. */ +export function reactNodeToText(node: ReactNode): string { + return Children.toArray(node) + .map((child) => { + if (typeof child === 'string') return child; + if (typeof child === 'number') return String(child); + if (isValidElement(child)) { + return reactNodeToText((child.props as { children?: ReactNode }).children); + } + return ''; + }) + .join(''); +} + +/** Copy button overlaid on a fenced code block; reveals on hover/focus. */ +function CodeCopyButton({ content }: { content: string }) { + const [copied, setCopied] = useState(false); + return ( + + ); +} + +/** ReactMarkdown `pre` override: wraps the code block so a copy button can be + * overlaid outside the `
`'s horizontal scroll area. */
+function CodeBlock({ children, ...props }: ComponentPropsWithoutRef<'pre'>) {
+  const code = reactNodeToText(children).replace(/\n$/, '');
+  return (
+    
+
{children}
+ {code.length > 0 && } +
+ ); +} + function AgentMessage({ content, createdAt }: { content: string; createdAt: string }) { return (
@@ -113,7 +184,7 @@ function AgentMessage({ content, createdAt }: { content: string; createdAt: stri
- + {dedentCodeBlocks(content)}
diff --git a/packages/web/src/app/(dashboard)/conversations/use-chat.ts b/packages/web/src/app/(dashboard)/conversations/use-chat.ts index faf81f7..d2bf0c5 100644 --- a/packages/web/src/app/(dashboard)/conversations/use-chat.ts +++ b/packages/web/src/app/(dashboard)/conversations/use-chat.ts @@ -6,7 +6,7 @@ import { toast } from 'sonner'; import type { ToolCallRequest, ToolProgressMode } from '@clawix/shared'; import { resolveToolProgressMode } from '@clawix/shared'; -import { authFetch, getAccessToken } from '@/lib/auth'; +import { authFetch, clearTokens, getAccessToken } from '@/lib/auth'; import { uuidv4 } from '@/lib/utils'; /** @@ -441,9 +441,12 @@ export function useChat() { setIsConnected(false); if (pingIntervalRef.current) clearInterval(pingIntervalRef.current); - // Auth failure — don't reconnect, redirect to login + // Auth failure — clear the stale tokens and redirect to login instead of + // leaving the user stranded on a half-broken dashboard. if (event.code === 4001) { setError('Session expired. Please log in again.'); + clearTokens(); + window.location.href = '/login'; return; } diff --git a/packages/web/src/app/(dashboard)/settings/__tests__/channels-tab.test.ts b/packages/web/src/app/(dashboard)/settings/__tests__/channels-tab.test.ts new file mode 100644 index 0000000..5cdb164 --- /dev/null +++ b/packages/web/src/app/(dashboard)/settings/__tests__/channels-tab.test.ts @@ -0,0 +1,39 @@ +import { describe, it, expect } from 'vitest'; +import { buildConfig } from '../channels-tab'; + +function fd(entries: Record): FormData { + const form = new FormData(); + for (const [key, value] of Object.entries(entries)) form.append(key, value); + return form; +} + +describe('buildConfig — telegram', () => { + it('includes webhook_url and webhook_secret in webhook mode (#109)', () => { + const config = buildConfig( + 'telegram', + fd({ + bot_token: 'tok', + mode: 'webhook', + webhook_url: 'https://example.com/hook', + webhook_secret: 'sekret', + }), + ); + + expect(config).toMatchObject({ + bot_token: 'tok', + mode: 'webhook', + webhook_url: 'https://example.com/hook', + webhook_secret: 'sekret', + }); + }); + + it('omits blank webhook fields and preserves an existing secret', () => { + const config = buildConfig('telegram', fd({ bot_token: 'tok', mode: 'polling' }), { + webhook_secret: 'existing-secret', + }); + + expect(config['webhook_url']).toBeUndefined(); + expect(config['webhook_secret']).toBe('existing-secret'); + expect(config['mode']).toBe('polling'); + }); +}); diff --git a/packages/web/src/app/(dashboard)/settings/channels-tab.tsx b/packages/web/src/app/(dashboard)/settings/channels-tab.tsx index dc1d13e..99c4ea1 100644 --- a/packages/web/src/app/(dashboard)/settings/channels-tab.tsx +++ b/packages/web/src/app/(dashboard)/settings/channels-tab.tsx @@ -82,7 +82,7 @@ function ChannelIcon({ type }: { type: string }) { * Build a config object from form data, merging with existing config. * Blank sensitive fields (e.g. bot token) are omitted to preserve existing values. */ -function buildConfig( +export function buildConfig( type: string, form: FormData, existing: Record = {}, @@ -92,8 +92,14 @@ function buildConfig( if (type === 'telegram') { const botToken = formString(form, 'bot_token'); const mode = formString(form, 'mode'); + const webhookUrl = formString(form, 'webhook_url'); + const webhookSecret = formString(form, 'webhook_secret'); if (botToken) config['bot_token'] = botToken; if (mode) config['mode'] = mode; + // Webhook fields render only in webhook mode; omit when blank so a blank + // secret preserves the existing one (same convention as bot_token). + if (webhookUrl) config['webhook_url'] = webhookUrl; + if (webhookSecret) config['webhook_secret'] = webhookSecret; } if (type === 'web') { diff --git a/packages/web/src/components/ui/vanta-background.tsx b/packages/web/src/components/ui/vanta-background.tsx index 73d9b3f..fce5782 100644 --- a/packages/web/src/components/ui/vanta-background.tsx +++ b/packages/web/src/components/ui/vanta-background.tsx @@ -130,11 +130,31 @@ export function VantaBackground({ effect, children, className }: VantaBackground }, 250); } - void createEffect(); + // Respect reduced-motion: skip the WebGL/p5 background entirely. + if (window.matchMedia('(prefers-reduced-motion: reduce)').matches) return; + + // Defer the heavy three.js / p5 dynamic import + WebGL init until the + // browser is idle so this decorative work never competes with first paint, + // hydration, or the route entrance animation. In dev this also pushes + // Next's on-demand compile of three/p5 off the critical first-load path. + let idleHandle: number | undefined; + let usedIdleCallback = false; + const scheduleInit = () => void createEffect(); + if (typeof window.requestIdleCallback === 'function') { + usedIdleCallback = true; + idleHandle = window.requestIdleCallback(scheduleInit, { timeout: 2000 }); + } else { + idleHandle = window.setTimeout(scheduleInit, 200); + } + window.addEventListener('resize', handleResize); return () => { cancelled = true; + if (idleHandle !== undefined) { + if (usedIdleCallback) window.cancelIdleCallback(idleHandle); + else clearTimeout(idleHandle); + } if (resizeTimer) clearTimeout(resizeTimer); window.removeEventListener('resize', handleResize); destroyEffect(); diff --git a/packages/web/src/lib/__tests__/api.test.ts b/packages/web/src/lib/__tests__/api.test.ts new file mode 100644 index 0000000..2a66d01 --- /dev/null +++ b/packages/web/src/lib/__tests__/api.test.ts @@ -0,0 +1,44 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { apiFetch } from '../api'; + +function okResponse(): Response { + return { + ok: true, + status: 200, + headers: new Headers({ 'content-type': 'application/json', 'content-length': '2' }), + json: async () => ({}), + } as unknown as Response; +} + +function headersOf(call: unknown): Record { + const init = (call as [string, RequestInit])[1]; + return init.headers as Record; +} + +describe('apiFetch — Content-Type handling', () => { + const fetchMock = vi.fn(); + + beforeEach(() => { + fetchMock.mockReset().mockResolvedValue(okResponse()); + vi.stubGlobal('fetch', fetchMock); + }); + + afterEach(() => { + vi.unstubAllGlobals(); + }); + + it('does NOT set application/json for a FormData body (browser adds the boundary)', async () => { + const body = new FormData(); + body.append('file', new Blob(['x']), 'f.txt'); + + await apiFetch('/upload', { method: 'POST', body }); + + expect(headersOf(fetchMock.mock.calls[0])['Content-Type']).toBeUndefined(); + }); + + it('sets application/json for a non-FormData body', async () => { + await apiFetch('/items', { method: 'POST', body: JSON.stringify({ a: 1 }) }); + + expect(headersOf(fetchMock.mock.calls[0])['Content-Type']).toBe('application/json'); + }); +}); diff --git a/packages/web/src/lib/api.ts b/packages/web/src/lib/api.ts index 230bbd5..cf3b319 100644 --- a/packages/web/src/lib/api.ts +++ b/packages/web/src/lib/api.ts @@ -50,7 +50,9 @@ export async function apiFetch( credentials: 'include', signal: controller.signal, headers: { - ...(body ? { 'Content-Type': 'application/json' } : {}), + // Let the browser set multipart Content-Type (with boundary) for + // FormData bodies; only force JSON for other non-empty bodies. + ...(body && !(body instanceof FormData) ? { 'Content-Type': 'application/json' } : {}), ...(accessToken ? { Authorization: `Bearer ${accessToken}` } : {}), ...(headers as Record), }, diff --git a/skills/builtin/projector-creator/SKILL.md b/skills/builtin/projector-creator/SKILL.md index d225bb2..ff626dc 100644 --- a/skills/builtin/projector-creator/SKILL.md +++ b/skills/builtin/projector-creator/SKILL.md @@ -23,7 +23,15 @@ If the tool needs live data (exchange rates, etc.), YOU must fetch it first with You are a COORDINATOR for projector tasks. **NEVER create or modify projector items yourself.** When the user asks to create, modify, or fix a projector item: -Split into exactly 3 sequential spawns. Run ALL automatically — NEVER ask the user for permission between steps. +Split into exactly 3 spawns that run **STRICTLY ONE AT A TIME — they are dependent, NOT parallel.** + +> **Emit only ONE `spawn` call per turn, then WAIT for it to return successfully before emitting the next.** +> Step #2 edits the file Step #1 writes, and Step #3 verifies the file Step #2 edits. If you batch +> two or three `spawn` calls into a single turn, Step #2 starts before the HTML file exists, finds no +> `// JAVASCRIPT GOES HERE` placeholder to edit, and spins until it hits the sub-agent wall-clock timeout. +> NEVER put more than one `spawn` in the same turn. + +Run all three automatically, in order — NEVER ask the user for permission between steps. **Spawn #1 (HTML + CSS):** @@ -34,7 +42,7 @@ spawn(agent_name="coder", prompt="read_file(\"/skills/builtin/projector-creator/ **Spawn #2 (JavaScript):** ``` -spawn(agent_name="coder", prompt="FIRST: read_file(\"/skills/builtin/projector-creator/references/js-patterns.md\") — this contains ready-to-use JS code blocks for all common features. THEN: read_file /workspace/projector//index.html to see the HTML element IDs. THEN: use edit_file to replace '// JAVASCRIPT GOES HERE' with COMPLETE JavaScript. Copy the patterns from the skill and adapt element IDs to match the HTML. Every function must be real working code — no stubs, no TODOs. Verify after.") +spawn(agent_name="coder", prompt="PRECONDITION: read_file /workspace/projector//index.html. If the file does not exist or does not contain '// JAVASCRIPT GOES HERE', STOP immediately and report 'Step 1 (HTML) is not complete' — do NOT wait or retry. Otherwise: FIRST read_file(\"/skills/builtin/projector-creator/references/js-patterns.md\") — this contains ready-to-use JS code blocks for all common features. THEN use edit_file to replace '// JAVASCRIPT GOES HERE' with COMPLETE JavaScript. Copy the patterns from the skill and adapt element IDs to match the HTML. Every function must be real working code — no stubs, no TODOs. Verify after.") ``` **Spawn #3 (Review):**