From 2a88f612a96dd9ce3a9a3195cdb811e05f489487 Mon Sep 17 00:00:00 2001
From: iliassjabali <iliassjabali@gmail.com>
Date: Sun, 12 Apr 2026 01:36:46 +0100
Subject: [PATCH 1/4] test(mcp-server): fix transport harness to spawn tsx
 instead of dist

The 18 failing tests in transport.test.ts all share one root cause: the
harness spawned `node packages/mcp-server/dist/index.js`, but `dist/`
does not exist on a fresh worktree (no pretest build hook). Spawned
processes exited with MODULE_NOT_FOUND, stdio reads hung, and 17 tests
hit the 5s vitest timeout while the "exits when stdin closes" test got
exit code 1 (instead of 0) for the same reason.

Switch BIN to spawn `tsx src/index.ts` directly via the package-local
tsx binary, removing the build dependency entirely. Also:

- Await child exit in afterEach so the next test starts on a clean
  process table and isn't fighting an orphaned readline interface.
- Wait for the "running on stdio" startup banner in the
  "exits when stdin closes" test before closing stdin, so the readline
  close handler is wired up before the close event fires (tsx cold-start
  is ~1-2s slower than node + JS).
- Bump per-test timeout to 15s on every transport test to absorb the
  tsx cold-start cost without affecting other suites.

After: 18/18 transport tests pass, including from a fresh worktree
(`rm -rf packages/mcp-server/dist && pnpm --filter @agentspec/mcp test`).
---
 .../src/__tests__/transport.test.ts           | 124 ++++++++++--------
 1 file changed, 67 insertions(+), 57 deletions(-)
diff --git a/packages/mcp-server/src/__tests__/transport.test.ts b/packages/mcp-server/src/__tests__/transport.test.ts
index 62445f8..eda97e3 100644
--- a/packages/mcp-server/src/__tests__/transport.test.ts
+++ b/packages/mcp-server/src/__tests__/transport.test.ts
@@ -3,7 +3,21 @@ import { spawn, ChildProcess } from 'child_process'
 import { resolve } from 'path'
 import http from 'http'
 
-const BIN = resolve(__dirname, '../../dist/index.js')
+// Run the TypeScript source directly via tsx so the suite is independent of the
+// dist/ build artefact. Removes the "must run pnpm build first" footgun.
+const TSX = resolve(__dirname, '../../node_modules/.bin/tsx')
+const SRC = resolve(__dirname, '../index.ts')
+
+function spawnServer(args: string[] = [], env: NodeJS.ProcessEnv = process.env): ChildProcess {
+  return spawn(TSX, [SRC, ...args], { stdio: ['pipe', 'pipe', 'pipe'], env })
+}
+
+async function killAndWait(child: ChildProcess | null): Promise<void> {
+  if (!child || child.killed || child.exitCode !== null) return
+  const exited = new Promise<void>((res) => child.once('exit', () => res()))
+  child.kill('SIGTERM')
+  await exited
+}
 
 // ── Helpers ──────────────────────────────────────────────────────────────────
 
@@ -60,15 +74,13 @@ function stdioRpc(child: ChildProcess, req: object): Promise<unknown> {
 describe('transport selection', () => {
   let child: ChildProcess | null = null
 
-  afterEach(() => {
-    if (child && !child.killed) {
-      child.kill('SIGTERM')
-      child = null
-    }
+  afterEach(async () => {
+    await killAndWait(child)
+    child = null
   })
 
-  it('defaults to stdio when no flags are passed', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('defaults to stdio when no flags are passed', { timeout: 15000 }, async () => {
+    child = spawnServer()
 
     const res = await stdioRpc(child, { jsonrpc: '2.0', id: 1, method: 'initialize' })
 
@@ -83,12 +95,9 @@ describe('transport selection', () => {
     })
   })
 
-  it('starts HTTP server when --http flag is passed', async () => {
+  it('starts HTTP server when --http flag is passed', { timeout: 15000 }, async () => {
     const port = 14567 + Math.floor(Math.random() * 1000)
-    child = spawn('node', [BIN, '--http'], {
-      stdio: ['pipe', 'pipe', 'pipe'],
-      env: { ...process.env, MCP_PORT: String(port) },
-    })
+    child = spawnServer(['--http'], { ...process.env, MCP_PORT: String(port) })
     await waitForReady(child)
 
     const res = await sendJsonRpc(port, { jsonrpc: '2.0', id: 1, method: 'initialize' })
@@ -109,22 +118,20 @@ describe('transport selection', () => {
 describe('stdio transport', () => {
   let child: ChildProcess | null = null
 
-  afterEach(() => {
-    if (child && !child.killed) {
-      child.kill('SIGTERM')
-      child = null
-    }
+  afterEach(async () => {
+    await killAndWait(child)
+    child = null
   })
 
-  it('handles initialize request', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('handles initialize request', { timeout: 15000 }, async () => {
+    child = spawnServer()
     const res = await stdioRpc(child, { jsonrpc: '2.0', id: 1, method: 'initialize' })
 
     expect(res).toMatchObject({ jsonrpc: '2.0', id: 1, result: { protocolVersion: '2025-03-26' } })
   })
 
-  it('handles tools/list request', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('handles tools/list request', { timeout: 15000 }, async () => {
+    child = spawnServer()
     const res = await stdioRpc(child, { jsonrpc: '2.0', id: 2, method: 'tools/list' }) as { result: { tools: unknown[] } }
 
     expect(res).toMatchObject({ jsonrpc: '2.0', id: 2 })
@@ -132,15 +139,15 @@ describe('stdio transport', () => {
     expect(res.result.tools.length).toBeGreaterThan(0)
   })
 
-  it('handles ping request', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('handles ping request', { timeout: 15000 }, async () => {
+    child = spawnServer()
     const res = await stdioRpc(child, { jsonrpc: '2.0', id: 3, method: 'ping' })
 
     expect(res).toEqual({ jsonrpc: '2.0', id: 3, result: {} })
   })
 
-  it('returns parse error for invalid JSON', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('returns parse error for invalid JSON', { timeout: 15000 }, async () => {
+    child = spawnServer()
 
     const res = await new Promise<unknown>((resolve) => {
       child!.stdout!.once('data', (chunk: Buffer) => {
@@ -152,15 +159,15 @@ describe('stdio transport', () => {
     expect(res).toMatchObject({ jsonrpc: '2.0', error: { code: -32700, message: 'Parse error' } })
   })
 
-  it('returns error for unknown method', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('returns error for unknown method', { timeout: 15000 }, async () => {
+    child = spawnServer()
     const res = await stdioRpc(child, { jsonrpc: '2.0', id: 5, method: 'nonexistent/method' })
 
     expect(res).toMatchObject({ jsonrpc: '2.0', id: 5, error: { code: -32601 } })
   })
 
-  it('handles multiple sequential requests', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('handles multiple sequential requests', { timeout: 15000 }, async () => {
+    child = spawnServer()
 
     const res1 = await stdioRpc(child, { jsonrpc: '2.0', id: 1, method: 'ping' })
     const res2 = await stdioRpc(child, { jsonrpc: '2.0', id: 2, method: 'ping' })
@@ -169,8 +176,8 @@ describe('stdio transport', () => {
     expect(res2).toEqual({ jsonrpc: '2.0', id: 2, result: {} })
   })
 
-  it('skips empty lines', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('skips empty lines', { timeout: 15000 }, async () => {
+    child = spawnServer()
 
     // Send empty line then a valid request — should only get one response
     child.stdin!.write('\n')
@@ -179,12 +186,23 @@ describe('stdio transport', () => {
     expect(res).toEqual({ jsonrpc: '2.0', id: 10, result: {} })
   })
 
-  it('exits when stdin closes', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('exits when stdin closes', { timeout: 15000 }, async () => {
+    // Owns its own child lifecycle: closing stdin must let the readline 'close'
+    // handler call process.exit(0). We deliberately don't share the outer
+    // `child` variable so afterEach doesn't race the natural exit.
+    const c = spawnServer()
+    // Wait for the startup banner before closing stdin so the readline interface
+    // is fully wired up; otherwise on slow tsx cold-starts the child can exit
+    // before installing the 'close' handler.
+    await new Promise<void>((resolve) => {
+      c.stderr!.on('data', (chunk: Buffer) => {
+        if (chunk.toString().includes('running on stdio')) resolve()
+      })
+    })
 
     const exitCode = await new Promise<number | null>((resolve) => {
-      child!.on('exit', (code) => resolve(code))
-      child!.stdin!.end()
+      c.on('exit', (code) => resolve(code))
+      c.stdin!.end()
     })
 
     expect(exitCode).toBe(0)
@@ -195,23 +213,18 @@ describe('HTTP transport', () => {
   let child: ChildProcess | null = null
   let port: number
 
-  afterEach(() => {
-    if (child && !child.killed) {
-      child.kill('SIGTERM')
-      child = null
-    }
+  afterEach(async () => {
+    await killAndWait(child)
+    child = null
   })
 
   function startHttp(): Promise<void> {
     port = 15000 + Math.floor(Math.random() * 1000)
-    child = spawn('node', [BIN, '--http'], {
-      stdio: ['pipe', 'pipe', 'pipe'],
-      env: { ...process.env, MCP_PORT: String(port) },
-    })
+    child = spawnServer(['--http'], { ...process.env, MCP_PORT: String(port) })
     return waitForReady(child)
   }
 
-  it('GET /health returns status ok', async () => {
+  it('GET /health returns status ok', { timeout: 15000 }, async () => {
     await startHttp()
     const res = await httpGet(port, '/health')
 
@@ -219,7 +232,7 @@ describe('HTTP transport', () => {
     expect(res.body).toEqual({ status: 'ok', server: 'agentspec-mcp' })
   })
 
-  it('POST /mcp handles initialize', async () => {
+  it('POST /mcp handles initialize', { timeout: 15000 }, async () => {
     await startHttp()
     const res = await sendJsonRpc(port, { jsonrpc: '2.0', id: 1, method: 'initialize' })
 
@@ -227,7 +240,7 @@ describe('HTTP transport', () => {
     expect(res.body).toMatchObject({ jsonrpc: '2.0', id: 1, result: { protocolVersion: '2025-03-26' } })
   })
 
-  it('POST /mcp handles tools/list', async () => {
+  it('POST /mcp handles tools/list', { timeout: 15000 }, async () => {
     await startHttp()
     const res = await sendJsonRpc(port, { jsonrpc: '2.0', id: 2, method: 'tools/list' })
 
@@ -237,7 +250,7 @@ describe('HTTP transport', () => {
     expect(body.result.tools.length).toBeGreaterThan(0)
   })
 
-  it('POST /mcp handles ping', async () => {
+  it('POST /mcp handles ping', { timeout: 15000 }, async () => {
     await startHttp()
     const res = await sendJsonRpc(port, { jsonrpc: '2.0', id: 3, method: 'ping' })
 
@@ -245,7 +258,7 @@ describe('HTTP transport', () => {
     expect(res.body).toEqual({ jsonrpc: '2.0', id: 3, result: {} })
   })
 
-  it('POST /mcp returns parse error for invalid JSON', async () => {
+  it('POST /mcp returns parse error for invalid JSON', { timeout: 15000 }, async () => {
     await startHttp()
 
     const res = await new Promise<{ status: number; body: unknown }>((resolve, reject) => {
@@ -266,7 +279,7 @@ describe('HTTP transport', () => {
     expect(res.body).toMatchObject({ jsonrpc: '2.0', error: { code: -32700 } })
   })
 
-  it('returns 404 for unknown routes', async () => {
+  it('returns 404 for unknown routes', { timeout: 15000 }, async () => {
     await startHttp()
     const res = await httpGet(port, '/unknown')
 
@@ -274,7 +287,7 @@ describe('HTTP transport', () => {
     expect(res.body).toMatchObject({ error: expect.stringContaining('Not found') })
   })
 
-  it('returns CORS header on POST /mcp', async () => {
+  it('returns CORS header on POST /mcp', { timeout: 15000 }, async () => {
     await startHttp()
 
     const corsHeader = await new Promise<string | undefined>((resolve, reject) => {
@@ -291,12 +304,9 @@ describe('HTTP transport', () => {
     expect(corsHeader).toBe('*')
   })
 
-  it('responds to MCP_PORT env var', async () => {
+  it('responds to MCP_PORT env var', { timeout: 15000 }, async () => {
     const customPort = 16000 + Math.floor(Math.random() * 1000)
-    child = spawn('node', [BIN, '--http'], {
-      stdio: ['pipe', 'pipe', 'pipe'],
-      env: { ...process.env, MCP_PORT: String(customPort) },
-    })
+    child = spawnServer(['--http'], { ...process.env, MCP_PORT: String(customPort) })
 
     await new Promise<void>((resolve) => {
       child!.stderr!.on('data', (chunk: Buffer) => {

From 39e12e7c102255f8937a60f088728e444014ae49 Mon Sep 17 00:00:00 2001
From: iliassjabali <iliassjabali@gmail.com>
Date: Sun, 12 Apr 2026 01:37:04 +0100
Subject: [PATCH 2/4] test(adapter-claude): cover repairYaml, prompt structure,
 and edge cases

Adds 9 new tests covering gaps in the existing 45-test suite (issue #24):

- repairYaml() (4 tests): happy path returns Claude's repaired YAML;
  throws when ANTHROPIC_API_KEY missing; throws when response lacks the
  agent.yaml field; system prompt mentions AgentSpec v1 schema rules and
  the user message embeds the broken YAML for context.
- generateWithClaude() prompt structure (2 tests): the user message
  wraps the manifest JSON in <context_manifest> tags AND embeds resolved
  $file: refs as <context_file> blocks with their actual content. These
  are the regression-sensitive tests where silent prompt drift breaks
  generation quality without any failed assertion downstream.
- generateWithClaude() ignores non-text content blocks in the response
  array (e.g. tool_use), parsing only the text block.
- generateWithClaude() streaming path surfaces an error thrown
  mid-stream from the async iterable.
- buildContext() silently skips $file: refs that resolve to a directory
  rather than a file (readFileSync EISDIR fall-through).

All 54 tests pass.
---
 .../src/__tests__/claude-adapter.test.ts      | 218 ++++++++++++++++++
 1 file changed, 218 insertions(+)

diff --git a/packages/adapter-claude/src/__tests__/claude-adapter.test.ts b/packages/adapter-claude/src/__tests__/claude-adapter.test.ts
index fe851db..8c6ad85 100644
--- a/packages/adapter-claude/src/__tests__/claude-adapter.test.ts
+++ b/packages/adapter-claude/src/__tests__/claude-adapter.test.ts
@@ -671,5 +671,223 @@ describe('generateWithClaude()', () => {
       expect(counts.length).toBeGreaterThanOrEqual(2)
       expect(counts[counts.length - 1]).toBeGreaterThan(counts[0]!)
     })
+
+    it('surfaces an error thrown mid-stream from the streaming API', async () => {
+      mockStream.mockReturnValue((async function* () {
+        yield { type: 'content_block_delta', delta: { type: 'text_delta', text: '```json\n{"fil' } }
+        throw new Error('stream blew up')
+      })())
+      await expect(
+        generateWithClaude(baseManifest, { framework: 'langgraph', onProgress: () => {} }),
+      ).rejects.toThrow('stream blew up')
+    })
+  })
+
+  describe('Non-text content blocks', () => {
+    beforeEach(() => {
+      process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    })
+
+    it('ignores non-text content blocks (e.g. tool_use) and parses the text block', async () => {
+      mockCreate.mockResolvedValue({
+        content: [
+          { type: 'tool_use', id: 'tu_1', name: 'noop', input: {} },
+          { type: 'text', text: '```json\n{"files":{"agent.py":"# from text block"},"installCommands":[],"envVars":[]}\n```' },
+        ],
+        usage: { input_tokens: 1, output_tokens: 1 },
+      })
+      const result = await generateWithClaude(baseManifest, { framework: 'langgraph' })
+      expect(result.files['agent.py']).toBe('# from text block')
+    })
+  })
+
+  describe('Prompt structure (regression-sensitive)', () => {
+    beforeEach(() => {
+      process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    })
+
+    it('user message wraps the manifest JSON in <context_manifest> tags', async () => {
+      mockCreate.mockResolvedValue(
+        makeClaudeResponse({ files: { 'agent.py': '# x' }, installCommands: [], envVars: [] }),
+      )
+      await generateWithClaude(baseManifest, { framework: 'langgraph' })
+      const call = mockCreate.mock.calls[0]![0]
+      const userContent = call.messages[0].content as string
+      expect(userContent).toContain('<context_manifest>')
+      expect(userContent).toContain('</context_manifest>')
+      expect(userContent).toContain('"name": "test-agent"')
+      expect(userContent).toContain('"provider": "groq"')
+    })
+
+    it('user message embeds resolved $file: refs as <context_file> blocks with their content', async () => {
+      const dir = join(tmpdir(), `agentspec-test-${Date.now()}-prompt`)
+      mkdirSync(dir, { recursive: true })
+      const toolFile = join(dir, 'workout_tools.py')
+      const toolContent = 'def log_workout(exercises: list[str]) -> str:\n    return "logged"\n'
+      writeFileSync(toolFile, toolContent, 'utf-8')
+
+      const manifestWithFileTool: AgentSpecManifest = {
+        ...baseManifest,
+        spec: {
+          ...baseManifest.spec,
+          tools: [
+            {
+              name: 'log-workout',
+              description: 'Log a workout',
+              module: '$file:workout_tools.py',
+            } as unknown as NonNullable<AgentSpecManifest['spec']['tools']>[number],
+          ],
+        },
+      }
+
+      mockCreate.mockResolvedValue(
+        makeClaudeResponse({ files: { 'agent.py': '# x' }, installCommands: [], envVars: [] }),
+      )
+
+      try {
+        await generateWithClaude(manifestWithFileTool, {
+          framework: 'langgraph',
+          manifestDir: dir,
+        })
+        const call = mockCreate.mock.calls[0]![0]
+        const userContent = call.messages[0].content as string
+        // The resolved file must appear as a <context_file> block
+        expect(userContent).toContain('<context_file')
+        expect(userContent).toContain('</context_file>')
+        // …with the actual file content embedded (not just the path)
+        expect(userContent).toContain('def log_workout')
+        // …and the file path in the path attribute
+        expect(userContent).toContain('workout_tools.py')
+      } finally {
+        rmSync(dir, { recursive: true, force: true })
+      }
+    })
+  })
+
+  describe('buildContext() $file: edge cases', () => {
+    let buildContext: (opts: { manifest: AgentSpecManifest; contextFiles?: string[]; manifestDir?: string }) => string
+
+    beforeEach(async () => {
+      const mod = await import('../context-builder.js')
+      buildContext = mod.buildContext
+    })
+
+    it('silently skips $file: refs that resolve to a directory (not a file)', () => {
+      const dir = join(tmpdir(), `agentspec-test-${Date.now()}-dirref`)
+      mkdirSync(dir, { recursive: true })
+      // Create a SUBDIRECTORY that the manifest will point at via $file:
+      const subdir = join(dir, 'tools_dir')
+      mkdirSync(subdir, { recursive: true })
+
+      const manifestPointingAtDir: AgentSpecManifest = {
+        ...baseManifest,
+        spec: {
+          ...baseManifest.spec,
+          tools: [
+            {
+              name: 'dir-ref',
+              description: 'Points at a directory',
+              module: '$file:tools_dir',
+            } as unknown as NonNullable<AgentSpecManifest['spec']['tools']>[number],
+          ],
+        },
+      }
+
+      try {
+        const ctx = buildContext({ manifest: manifestPointingAtDir, manifestDir: dir })
+        // readFileSync on a directory throws EISDIR — buildContext must catch and skip
+        expect(ctx).not.toContain('<context_file')
+      } finally {
+        rmSync(dir, { recursive: true, force: true })
+      }
+    })
+  })
+})
+
+// ── repairYaml() tests ────────────────────────────────────────────────────────
+
+describe('repairYaml()', () => {
+  let repairYaml: (
+    yamlStr: string,
+    validationErrors: string,
+    options?: { model?: string },
+  ) => Promise<string>
+
+  const savedKey = process.env['ANTHROPIC_API_KEY']
+
+  beforeEach(async () => {
+    vi.clearAllMocks()
+    const mod = await import('../index.js')
+    repairYaml = mod.repairYaml
+  })
+
+  afterEach(() => {
+    if (savedKey === undefined) {
+      delete process.env['ANTHROPIC_API_KEY']
+    } else {
+      process.env['ANTHROPIC_API_KEY'] = savedKey
+    }
+  })
+
+  it('returns the repaired YAML string from the Claude response', async () => {
+    process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    const repairedYaml =
+      'apiVersion: agentspec.io/v1\nkind: AgentSpec\nmetadata:\n  name: fixed-agent\n  version: 1.0.0\n'
+    mockCreate.mockResolvedValue(
+      makeClaudeResponse({
+        files: { 'agent.yaml': repairedYaml },
+        installCommands: [],
+        envVars: [],
+      }),
+    )
+    const result = await repairYaml(
+      'apiVersion: bad\nkind: AgentSpec\n',
+      'metadata.name: Required',
+    )
+    expect(result).toBe(repairedYaml)
+    expect(result).toContain('apiVersion: agentspec.io/v1')
+    expect(result).toContain('name: fixed-agent')
+  })
+
+  it('throws when ANTHROPIC_API_KEY is not set', async () => {
+    delete process.env['ANTHROPIC_API_KEY']
+    await expect(
+      repairYaml('apiVersion: bad\n', 'errors here'),
+    ).rejects.toThrow('ANTHROPIC_API_KEY')
+  })
+
+  it('throws when Claude response is missing the agent.yaml field', async () => {
+    process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    mockCreate.mockResolvedValue(
+      makeClaudeResponse({
+        // valid shape (`files` present) but missing the agent.yaml key
+        files: { 'README.md': 'oops' },
+        installCommands: [],
+        envVars: [],
+      }),
+    )
+    await expect(
+      repairYaml('apiVersion: bad\n', 'metadata.name: Required'),
+    ).rejects.toThrow('agent.yaml')
+  })
+
+  it('uses REPAIR_SYSTEM_PROMPT (mentions AgentSpec v1 schema rules)', async () => {
+    process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    mockCreate.mockResolvedValue(
+      makeClaudeResponse({
+        files: { 'agent.yaml': 'apiVersion: agentspec.io/v1\nkind: AgentSpec\n' },
+        installCommands: [],
+        envVars: [],
+      }),
+    )
+    await repairYaml('apiVersion: bad\n', 'errors')
+    const call = mockCreate.mock.calls[0]![0]
+    // System prompt should reference AgentSpec v1 rules so Claude knows how to repair
+    expect(call.system).toContain('AgentSpec v1')
+    expect(call.system).toContain('schema')
+    // The current (broken) YAML must appear in the user message so Claude has something to fix
+    const userContent = call.messages[0].content as string
+    expect(userContent).toContain('apiVersion: bad')
+    expect(userContent).toContain('errors')
   })
 })

From 2d1432ce611df0af629abca1510b7e5b69ae1172 Mon Sep 17 00:00:00 2001
From: iliassjabali <iliassjabali@gmail.com>
Date: Sun, 12 Apr 2026 01:37:35 +0100
Subject: [PATCH 3/4] test(mcp-server): add scan, generate, and tool-registry
 snapshot tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the largest gap from issue #24: two registered MCP tools
(agentspec_scan, agentspec_generate) had zero direct test files. Also
adds a tool-registry snapshot test to catch schema drift, which is the
most common silent break for MCP clients (Claude Desktop, Cursor, etc.)
when a tool's input schema changes without anyone noticing.

New test files:

- scan.test.ts (3 tests): mocks spawnCli, asserts CLI args
  ['scan', '--dir', dir, '--dry-run'], output trimming, error
  propagation.
- generate.test.ts (4 tests): asserts CLI args with and without --out,
  JSON-wrapped success response, error propagation.
- index.test.ts (11 tests):
  - TOOLS registry snapshot guard — locks the names + input schemas of
    all 9 registered tools via toMatchInlineSnapshot. Free-form
    descriptions are stripped from the snapshot so wording tweaks
    don't churn the diff; only the contract (name, properties, required)
    is locked.
  - Per-tool invariants: object schema, properties present, required
    is an array, description non-empty.
  - handleRpc dispatch tests covering initialize, tools/list, ping,
    method-not-found (-32601), invalid-request (-32600), missing tool
    name (-32602), and tool error wrapping as isError content blocks.

To enable importing the entry-point module from a unit test, src/index.ts
now guards the startStdio/startHttp call behind an isMainEntry() check
(standard ESM "if executed as main" pattern). Without this, importing
{ TOOLS, handleRpc } in index.test.ts would spawn a readline interface
and call process.exit(0) when vitest's stdin closed, surfacing as an
unhandled error.

123 mcp-server tests pass.
---
 .../mcp-server/src/__tests__/generate.test.ts |  48 +++
 .../mcp-server/src/__tests__/index.test.ts    | 302 ++++++++++++++++++
 .../mcp-server/src/__tests__/scan.test.ts     |  33 ++
 packages/mcp-server/src/index.ts              |  27 +-
 4 files changed, 405 insertions(+), 5 deletions(-)
 create mode 100644 packages/mcp-server/src/__tests__/generate.test.ts
 create mode 100644 packages/mcp-server/src/__tests__/index.test.ts
 create mode 100644 packages/mcp-server/src/__tests__/scan.test.ts

diff --git a/packages/mcp-server/src/__tests__/generate.test.ts b/packages/mcp-server/src/__tests__/generate.test.ts
new file mode 100644
index 0000000..e20a2a1
--- /dev/null
+++ b/packages/mcp-server/src/__tests__/generate.test.ts
@@ -0,0 +1,48 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest'
+
+vi.mock('../cli-runner.js', () => ({
+  spawnCli: vi.fn(),
+}))
+
+import { spawnCli } from '../cli-runner.js'
+import { generate } from '../tools/generate.js'
+
+const spawnCliMock = vi.mocked(spawnCli)
+
+describe('generate()', () => {
+  beforeEach(() => {
+    spawnCliMock.mockReset()
+  })
+
+  it('invokes spawnCli with generate, file, and --framework when no out dir', async () => {
+    spawnCliMock.mockResolvedValue('Generated 3 files')
+    await generate('agent.yaml', 'langgraph')
+    expect(spawnCliMock).toHaveBeenCalledWith(['generate', 'agent.yaml', '--framework', 'langgraph'])
+  })
+
+  it('appends --out when an output directory is provided', async () => {
+    spawnCliMock.mockResolvedValue('Generated 3 files')
+    await generate('agent.yaml', 'crewai', '/tmp/out')
+    expect(spawnCliMock).toHaveBeenCalledWith([
+      'generate',
+      'agent.yaml',
+      '--framework',
+      'crewai',
+      '--out',
+      '/tmp/out',
+    ])
+  })
+
+  it('returns a JSON string with success and trimmed output', async () => {
+    spawnCliMock.mockResolvedValue('  Generated 3 files\n\n')
+    const result = await generate('agent.yaml', 'mastra')
+    const parsed = JSON.parse(result)
+    expect(parsed.success).toBe(true)
+    expect(parsed.output).toBe('Generated 3 files')
+  })
+
+  it('propagates errors from the CLI', async () => {
+    spawnCliMock.mockRejectedValue(new Error('Unknown framework: foo'))
+    await expect(generate('agent.yaml', 'foo')).rejects.toThrow('Unknown framework: foo')
+  })
+})
diff --git a/packages/mcp-server/src/__tests__/index.test.ts b/packages/mcp-server/src/__tests__/index.test.ts
new file mode 100644
index 0000000..e8695dd
--- /dev/null
+++ b/packages/mcp-server/src/__tests__/index.test.ts
@@ -0,0 +1,302 @@
+import { describe, it, expect } from 'vitest'
+import { TOOLS, handleRpc } from '../index.js'
+
+// ── Tool registry snapshot ────────────────────────────────────────────────────
+//
+// The MCP server's contract with clients is the tools/list response. Any
+// schema drift here is a breaking change for every consumer (Claude Desktop,
+// Cursor, custom clients), so we snapshot the full registry. If this snapshot
+// changes, the diff is the breaking change — review it deliberately.
+//
+// To intentionally update: run `pnpm --filter @agentspec/mcp test -u`.
+
+describe('TOOLS registry — schema drift guard', () => {
+  it('exposes the expected list of tool names', () => {
+    const names = TOOLS.map((t) => t.name).sort()
+    expect(names).toEqual([
+      'agentspec_audit',
+      'agentspec_diff',
+      'agentspec_gap',
+      'agentspec_generate',
+      'agentspec_health',
+      'agentspec_list_agents',
+      'agentspec_proof',
+      'agentspec_scan',
+      'agentspec_validate',
+    ])
+  })
+
+  it('every tool declares an object input schema with properties and required arrays', () => {
+    for (const tool of TOOLS) {
+      expect(tool.inputSchema.type, `${tool.name}.inputSchema.type`).toBe('object')
+      expect(tool.inputSchema.properties, `${tool.name}.inputSchema.properties`).toBeTypeOf('object')
+      expect(Array.isArray(tool.inputSchema.required), `${tool.name}.inputSchema.required`).toBe(true)
+    }
+  })
+
+  it('every tool has a non-empty description', () => {
+    for (const tool of TOOLS) {
+      expect(tool.description.length, `${tool.name}.description`).toBeGreaterThan(10)
+    }
+  })
+
+  it('matches the full registry snapshot (catches accidental schema drift)', () => {
+    // Normalise: strip free-form description text so wording tweaks don't churn
+    // the snapshot — only the contract (name, properties, required) is locked.
+    const contract = TOOLS.map((t) => ({
+      name: t.name,
+      inputSchema: {
+        type: t.inputSchema.type,
+        properties: Object.fromEntries(
+          Object.entries(t.inputSchema.properties).map(([k, v]) => [k, { type: v.type }]),
+        ),
+        required: [...t.inputSchema.required].sort(),
+      },
+    })).sort((a, b) => a.name.localeCompare(b.name))
+
+    expect(contract).toMatchInlineSnapshot(`
+      [
+        {
+          "inputSchema": {
+            "properties": {
+              "adminKey": {
+                "type": "string",
+              },
+              "agentName": {
+                "type": "string",
+              },
+              "controlPlaneUrl": {
+                "type": "string",
+              },
+              "file": {
+                "type": "string",
+              },
+              "pack": {
+                "type": "string",
+              },
+              "sidecarUrl": {
+                "type": "string",
+              },
+            },
+            "required": [
+              "file",
+            ],
+            "type": "object",
+          },
+          "name": "agentspec_audit",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "from": {
+                "type": "string",
+              },
+              "to": {
+                "type": "string",
+              },
+            },
+            "required": [
+              "from",
+              "to",
+            ],
+            "type": "object",
+          },
+          "name": "agentspec_diff",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "adminKey": {
+                "type": "string",
+              },
+              "agentName": {
+                "type": "string",
+              },
+              "controlPlaneUrl": {
+                "type": "string",
+              },
+              "sidecarUrl": {
+                "type": "string",
+              },
+            },
+            "required": [],
+            "type": "object",
+          },
+          "name": "agentspec_gap",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "file": {
+                "type": "string",
+              },
+              "framework": {
+                "type": "string",
+              },
+              "out": {
+                "type": "string",
+              },
+            },
+            "required": [
+              "file",
+              "framework",
+            ],
+            "type": "object",
+          },
+          "name": "agentspec_generate",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "adminKey": {
+                "type": "string",
+              },
+              "agentName": {
+                "type": "string",
+              },
+              "controlPlaneUrl": {
+                "type": "string",
+              },
+              "file": {
+                "type": "string",
+              },
+              "sidecarUrl": {
+                "type": "string",
+              },
+            },
+            "required": [],
+            "type": "object",
+          },
+          "name": "agentspec_health",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "adminKey": {
+                "type": "string",
+              },
+              "controlPlaneUrl": {
+                "type": "string",
+              },
+              "dir": {
+                "type": "string",
+              },
+            },
+            "required": [],
+            "type": "object",
+          },
+          "name": "agentspec_list_agents",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "adminKey": {
+                "type": "string",
+              },
+              "agentName": {
+                "type": "string",
+              },
+              "controlPlaneUrl": {
+                "type": "string",
+              },
+              "sidecarUrl": {
+                "type": "string",
+              },
+            },
+            "required": [],
+            "type": "object",
+          },
+          "name": "agentspec_proof",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "dir": {
+                "type": "string",
+              },
+            },
+            "required": [
+              "dir",
+            ],
+            "type": "object",
+          },
+          "name": "agentspec_scan",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "file": {
+                "type": "string",
+              },
+            },
+            "required": [
+              "file",
+            ],
+            "type": "object",
+          },
+          "name": "agentspec_validate",
+        },
+      ]
+    `)
+  })
+})
+
+// ── handleRpc dispatch ────────────────────────────────────────────────────────
+
+describe('handleRpc — JSON-RPC dispatch', () => {
+  it('returns server info and protocol version on initialize', async () => {
+    const res = await handleRpc({ jsonrpc: '2.0', id: 1, method: 'initialize' })
+    expect(res).toMatchObject({
+      jsonrpc: '2.0',
+      id: 1,
+      result: {
+        protocolVersion: '2025-03-26',
+        serverInfo: { name: 'agentspec', version: '1.0.0' },
+        capabilities: { tools: {} },
+      },
+    })
+  })
+
+  it('returns the registered TOOLS list on tools/list', async () => {
+    const res = await handleRpc({ jsonrpc: '2.0', id: 2, method: 'tools/list' })
+    const result = (res as { result: { tools: unknown[] } }).result
+    expect(result.tools).toHaveLength(TOOLS.length)
+  })
+
+  it('returns an empty result on ping', async () => {
+    const res = await handleRpc({ jsonrpc: '2.0', id: 3, method: 'ping' })
+    expect(res).toEqual({ jsonrpc: '2.0', id: 3, result: {} })
+  })
+
+  it('returns -32601 method-not-found for unknown methods', async () => {
+    const res = await handleRpc({ jsonrpc: '2.0', id: 4, method: 'nope/nope' })
+    expect(res).toMatchObject({ jsonrpc: '2.0', id: 4, error: { code: -32601 } })
+  })
+
+  it('returns -32600 invalid-request when jsonrpc field is wrong', async () => {
+    const res = await handleRpc({ jsonrpc: '1.0' as '2.0', id: 5, method: 'ping' })
+    expect(res).toMatchObject({ jsonrpc: '2.0', id: 5, error: { code: -32600 } })
+  })
+
+  it('returns -32602 missing-name when tools/call is missing tool name', async () => {
+    const res = await handleRpc({ jsonrpc: '2.0', id: 6, method: 'tools/call', params: {} })
+    expect(res).toMatchObject({ jsonrpc: '2.0', id: 6, error: { code: -32602 } })
+  })
+
+  it('wraps tool errors as isError content blocks (does not throw to the JSON-RPC layer)', async () => {
+    const res = await handleRpc({
+      jsonrpc: '2.0',
+      id: 7,
+      method: 'tools/call',
+      params: { name: 'agentspec_unknown_tool', arguments: {} },
+    })
+    expect(res).toMatchObject({
+      jsonrpc: '2.0',
+      id: 7,
+      result: {
+        isError: true,
+        content: [{ type: 'text', text: expect.stringContaining('Unknown tool') }],
+      },
+    })
+  })
+})
diff --git a/packages/mcp-server/src/__tests__/scan.test.ts b/packages/mcp-server/src/__tests__/scan.test.ts
new file mode 100644
index 0000000..7f8a0dd
--- /dev/null
+++ b/packages/mcp-server/src/__tests__/scan.test.ts
@@ -0,0 +1,33 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest'
+
+vi.mock('../cli-runner.js', () => ({
+  spawnCli: vi.fn(),
+}))
+
+import { spawnCli } from '../cli-runner.js'
+import { scan } from '../tools/scan.js'
+
+const spawnCliMock = vi.mocked(spawnCli)
+
+describe('scan()', () => {
+  beforeEach(() => {
+    spawnCliMock.mockReset()
+  })
+
+  it('invokes spawnCli with scan, --dir, and --dry-run flags', async () => {
+    spawnCliMock.mockResolvedValue('apiVersion: agentspec.io/v1\nkind: AgentSpec\n')
+    await scan('/workspace/my-agent')
+    expect(spawnCliMock).toHaveBeenCalledWith(['scan', '--dir', '/workspace/my-agent', '--dry-run'])
+  })
+
+  it('returns the trimmed CLI stdout', async () => {
+    spawnCliMock.mockResolvedValue('  apiVersion: agentspec.io/v1\nkind: AgentSpec\n  \n')
+    const result = await scan('/workspace/my-agent')
+    expect(result).toBe('apiVersion: agentspec.io/v1\nkind: AgentSpec')
+  })
+
+  it('propagates errors from the CLI (e.g. directory not found)', async () => {
+    spawnCliMock.mockRejectedValue(new Error('ENOENT: no such directory'))
+    await expect(scan('/does/not/exist')).rejects.toThrow('ENOENT')
+  })
+})
diff --git a/packages/mcp-server/src/index.ts b/packages/mcp-server/src/index.ts
index 5a1e8db..89c4d3e 100644
--- a/packages/mcp-server/src/index.ts
+++ b/packages/mcp-server/src/index.ts
@@ -393,11 +393,28 @@ function startHttp(): void {
 
 // ── Entry point ───────────────────────────────────────────────────────────────
 
-const useHttp = process.argv.includes('--http')
-if (useHttp) {
-  startHttp()
-} else {
-  startStdio()
+import { realpathSync } from 'node:fs'
+import { fileURLToPath } from 'node:url'
+
+// Only run the server when this module is the actual entry point. Without
+// this guard, importing { TOOLS, handleRpc } from a unit test would spawn a
+// readline interface and call process.exit(0) when vitest's stdin closes.
+function isMainEntry(): boolean {
+  if (!process.argv[1]) return false
+  try {
+    return realpathSync(process.argv[1]) === fileURLToPath(import.meta.url)
+  } catch {
+    return false
+  }
+}
+
+if (isMainEntry()) {
+  const useHttp = process.argv.includes('--http')
+  if (useHttp) {
+    startHttp()
+  } else {
+    startStdio()
+  }
 }
 
 export { handleRpc, callTool, TOOLS }

From 6ca46789de924df176a8153750c86005df912be7 Mon Sep 17 00:00:00 2001
From: iliassjabali <iliassjabali@gmail.com>
Date: Sun, 12 Apr 2026 01:37:52 +0100
Subject: [PATCH 4/4] test(mcp-server): cover sidecar malformed JSON, hung
 connection, and operator network errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the error-path scenarios highlighted as missing in #24:

- health.test.ts: sidecar returns 200 with malformed JSON (a common
  failure mode when a reverse proxy serves an HTML error page —
  res.json() throws SyntaxError). Plus a hung-connection / fetch
  timeout test, since a silently dropped connection is worse than a
  refused one (the client waits forever instead of failing fast).
- gap.test.ts: control-plane (operator-mode) fetch fails with a
  network error like ENOTFOUND — the existing tests covered the
  sidecar-mode equivalent but never exercised fetchFromControlPlane's
  rejection path.

123 mcp-server tests still pass.
---
 packages/mcp-server/src/__tests__/gap.test.ts | 10 ++++++++
 .../mcp-server/src/__tests__/health.test.ts   | 24 +++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/packages/mcp-server/src/__tests__/gap.test.ts b/packages/mcp-server/src/__tests__/gap.test.ts
index 32357c6..81875fc 100644
--- a/packages/mcp-server/src/__tests__/gap.test.ts
+++ b/packages/mcp-server/src/__tests__/gap.test.ts
@@ -97,6 +97,16 @@ describe('gap() — named agent via control plane (agentName + controlPlaneUrl)'
     const result = await gap({ agentName: 'budget-assistant', controlPlaneUrl: 'https://cp.company.com' })
     expect(JSON.parse(result)).toMatchObject({ score: 72 })
   })
+
+  it('throws when control plane fetch fails with a network error', async () => {
+    // Operator-mode network failure (e.g. DNS lookup failed, TLS handshake refused).
+    // The sidecar-mode equivalent is already covered above; this exercises the
+    // separate fetchFromControlPlane code path.
+    fetchMock.mockRejectedValue(new Error('getaddrinfo ENOTFOUND cp.company.com'))
+    await expect(
+      gap({ agentName: 'budget-assistant', controlPlaneUrl: 'https://cp.company.com' }),
+    ).rejects.toThrow('ENOTFOUND')
+  })
 })
 
 describe('gap() — missing args', () => {
diff --git a/packages/mcp-server/src/__tests__/health.test.ts b/packages/mcp-server/src/__tests__/health.test.ts
index 6d77b22..5d7c2f4 100644
--- a/packages/mcp-server/src/__tests__/health.test.ts
+++ b/packages/mcp-server/src/__tests__/health.test.ts
@@ -80,6 +80,30 @@ describe('health() — sidecar mode', () => {
     await health({ sidecarUrl: 'http://localhost:4001' })
     expect(spawnCliMock).not.toHaveBeenCalled()
   })
+
+  it('throws when sidecar returns malformed JSON', async () => {
+    // Simulates a sidecar that responds 200 but with garbage in the body —
+    // a common failure mode when a reverse proxy serves an HTML error page.
+    fetchMock.mockResolvedValue({
+      ok: true,
+      json: async () => {
+        throw new SyntaxError('Unexpected token < in JSON at position 0')
+      },
+    })
+    await expect(
+      health({ sidecarUrl: 'http://localhost:4001' }),
+    ).rejects.toThrow(/Unexpected token|JSON/)
+  })
+
+  it('throws when sidecar fetch hangs and the network layer times out', async () => {
+    // A hung connection (silently dropped packets, dead peer) is worse than a
+    // refused connection because the client waits forever. We surface it as a
+    // synthetic AbortError / timeout error from the fetch layer.
+    fetchMock.mockRejectedValue(Object.assign(new Error('fetch timed out'), { name: 'TimeoutError' }))
+    await expect(
+      health({ sidecarUrl: 'http://localhost:4001' }),
+    ).rejects.toThrow('fetch timed out')
+  })
 })
 
 // ── Operator mode ─────────────────────────────────────────────────────────────