diff --git a/packages/adapter-claude/src/__tests__/claude-adapter.test.ts b/packages/adapter-claude/src/__tests__/claude-adapter.test.ts
index fe851db..8c6ad85 100644
--- a/packages/adapter-claude/src/__tests__/claude-adapter.test.ts
+++ b/packages/adapter-claude/src/__tests__/claude-adapter.test.ts
@@ -671,5 +671,223 @@ describe('generateWithClaude()', () => {
       expect(counts.length).toBeGreaterThanOrEqual(2)
       expect(counts[counts.length - 1]).toBeGreaterThan(counts[0]!)
     })
+
+    it('surfaces an error thrown mid-stream from the streaming API', async () => {
+      mockStream.mockReturnValue((async function* () {
+        yield { type: 'content_block_delta', delta: { type: 'text_delta', text: '```json\n{"fil' } }
+        throw new Error('stream blew up')
+      })())
+      await expect(
+        generateWithClaude(baseManifest, { framework: 'langgraph', onProgress: () => {} }),
+      ).rejects.toThrow('stream blew up')
+    })
+  })
+
+  describe('Non-text content blocks', () => {
+    beforeEach(() => {
+      process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    })
+
+    it('ignores non-text content blocks (e.g. tool_use) and parses the text block', async () => {
+      mockCreate.mockResolvedValue({
+        content: [
+          { type: 'tool_use', id: 'tu_1', name: 'noop', input: {} },
+          { type: 'text', text: '```json\n{"files":{"agent.py":"# from text block"},"installCommands":[],"envVars":[]}\n```' },
+        ],
+        usage: { input_tokens: 1, output_tokens: 1 },
+      })
+      const result = await generateWithClaude(baseManifest, { framework: 'langgraph' })
+      expect(result.files['agent.py']).toBe('# from text block')
+    })
+  })
+
+  describe('Prompt structure (regression-sensitive)', () => {
+    beforeEach(() => {
+      process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    })
+
+    it('user message wraps the manifest JSON in <context_manifest> tags', async () => {
+      mockCreate.mockResolvedValue(
+        makeClaudeResponse({ files: { 'agent.py': '# x' }, installCommands: [], envVars: [] }),
+      )
+      await generateWithClaude(baseManifest, { framework: 'langgraph' })
+      const call = mockCreate.mock.calls[0]![0]
+      const userContent = call.messages[0].content as string
+      expect(userContent).toContain('<context_manifest>')
+      expect(userContent).toContain('</context_manifest>')
+      expect(userContent).toContain('"name": "test-agent"')
+      expect(userContent).toContain('"provider": "groq"')
+    })
+
+    it('user message embeds resolved $file: refs as <context_file> blocks with their content', async () => {
+      const dir = join(tmpdir(), `agentspec-test-${Date.now()}-prompt`)
+      mkdirSync(dir, { recursive: true })
+      const toolFile = join(dir, 'workout_tools.py')
+      const toolContent = 'def log_workout(exercises: list[str]) -> str:\n    return "logged"\n'
+      writeFileSync(toolFile, toolContent, 'utf-8')
+
+      const manifestWithFileTool: AgentSpecManifest = {
+        ...baseManifest,
+        spec: {
+          ...baseManifest.spec,
+          tools: [
+            {
+              name: 'log-workout',
+              description: 'Log a workout',
+              module: '$file:workout_tools.py',
+            } as unknown as NonNullable<AgentSpecManifest['spec']['tools']>[number],
+          ],
+        },
+      }
+
+      mockCreate.mockResolvedValue(
+        makeClaudeResponse({ files: { 'agent.py': '# x' }, installCommands: [], envVars: [] }),
+      )
+
+      try {
+        await generateWithClaude(manifestWithFileTool, {
+          framework: 'langgraph',
+          manifestDir: dir,
+        })
+        const call = mockCreate.mock.calls[0]![0]
+        const userContent = call.messages[0].content as string
+        // The resolved file must appear as a <context_file> block
+        expect(userContent).toContain('<context_file')
+        expect(userContent).toContain('</context_file>')
+        // …with the actual file content embedded (not just the path)
+        expect(userContent).toContain('def log_workout')
+        // …and the file path in the path attribute
+        expect(userContent).toContain('workout_tools.py')
+      } finally {
+        rmSync(dir, { recursive: true, force: true })
+      }
+    })
+  })
+
+  describe('buildContext() $file: edge cases', () => {
+    let buildContext: (opts: { manifest: AgentSpecManifest; contextFiles?: string[]; manifestDir?: string }) => string
+
+    beforeEach(async () => {
+      const mod = await import('../context-builder.js')
+      buildContext = mod.buildContext
+    })
+
+    it('silently skips $file: refs that resolve to a directory (not a file)', () => {
+      const dir = join(tmpdir(), `agentspec-test-${Date.now()}-dirref`)
+      mkdirSync(dir, { recursive: true })
+      // Create a SUBDIRECTORY that the manifest will point at via $file:
+      const subdir = join(dir, 'tools_dir')
+      mkdirSync(subdir, { recursive: true })
+
+      const manifestPointingAtDir: AgentSpecManifest = {
+        ...baseManifest,
+        spec: {
+          ...baseManifest.spec,
+          tools: [
+            {
+              name: 'dir-ref',
+              description: 'Points at a directory',
+              module: '$file:tools_dir',
+            } as unknown as NonNullable<AgentSpecManifest['spec']['tools']>[number],
+          ],
+        },
+      }
+
+      try {
+        const ctx = buildContext({ manifest: manifestPointingAtDir, manifestDir: dir })
+        // readFileSync on a directory throws EISDIR — buildContext must catch and skip
+        expect(ctx).not.toContain('<context_file')
+      } finally {
+        rmSync(dir, { recursive: true, force: true })
+      }
+    })
+  })
+})
+
+// ── repairYaml() tests ────────────────────────────────────────────────────────
+
+describe('repairYaml()', () => {
+  let repairYaml: (
+    yamlStr: string,
+    validationErrors: string,
+    options?: { model?: string },
+  ) => Promise<string>
+
+  const savedKey = process.env['ANTHROPIC_API_KEY']
+
+  beforeEach(async () => {
+    vi.clearAllMocks()
+    const mod = await import('../index.js')
+    repairYaml = mod.repairYaml
+  })
+
+  afterEach(() => {
+    if (savedKey === undefined) {
+      delete process.env['ANTHROPIC_API_KEY']
+    } else {
+      process.env['ANTHROPIC_API_KEY'] = savedKey
+    }
+  })
+
+  it('returns the repaired YAML string from the Claude response', async () => {
+    process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    const repairedYaml =
+      'apiVersion: agentspec.io/v1\nkind: AgentSpec\nmetadata:\n  name: fixed-agent\n  version: 1.0.0\n'
+    mockCreate.mockResolvedValue(
+      makeClaudeResponse({
+        files: { 'agent.yaml': repairedYaml },
+        installCommands: [],
+        envVars: [],
+      }),
+    )
+    const result = await repairYaml(
+      'apiVersion: bad\nkind: AgentSpec\n',
+      'metadata.name: Required',
+    )
+    expect(result).toBe(repairedYaml)
+    expect(result).toContain('apiVersion: agentspec.io/v1')
+    expect(result).toContain('name: fixed-agent')
+  })
+
+  it('throws when ANTHROPIC_API_KEY is not set', async () => {
+    delete process.env['ANTHROPIC_API_KEY']
+    await expect(
+      repairYaml('apiVersion: bad\n', 'errors here'),
+    ).rejects.toThrow('ANTHROPIC_API_KEY')
+  })
+
+  it('throws when Claude response is missing the agent.yaml field', async () => {
+    process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    mockCreate.mockResolvedValue(
+      makeClaudeResponse({
+        // valid shape (`files` present) but missing the agent.yaml key
+        files: { 'README.md': 'oops' },
+        installCommands: [],
+        envVars: [],
+      }),
+    )
+    await expect(
+      repairYaml('apiVersion: bad\n', 'metadata.name: Required'),
+    ).rejects.toThrow('agent.yaml')
+  })
+
+  it('uses REPAIR_SYSTEM_PROMPT (mentions AgentSpec v1 schema rules)', async () => {
+    process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    mockCreate.mockResolvedValue(
+      makeClaudeResponse({
+        files: { 'agent.yaml': 'apiVersion: agentspec.io/v1\nkind: AgentSpec\n' },
+        installCommands: [],
+        envVars: [],
+      }),
+    )
+    await repairYaml('apiVersion: bad\n', 'errors')
+    const call = mockCreate.mock.calls[0]![0]
+    // System prompt should reference AgentSpec v1 rules so Claude knows how to repair
+    expect(call.system).toContain('AgentSpec v1')
+    expect(call.system).toContain('schema')
+    // The current (broken) YAML must appear in the user message so Claude has something to fix
+    const userContent = call.messages[0].content as string
+    expect(userContent).toContain('apiVersion: bad')
+    expect(userContent).toContain('errors')
   })
 })
diff --git a/packages/mcp-server/src/__tests__/gap.test.ts b/packages/mcp-server/src/__tests__/gap.test.ts
index 32357c6..81875fc 100644
--- a/packages/mcp-server/src/__tests__/gap.test.ts
+++ b/packages/mcp-server/src/__tests__/gap.test.ts
@@ -97,6 +97,16 @@ describe('gap() — named agent via control plane (agentName + controlPlaneUrl)'
     const result = await gap({ agentName: 'budget-assistant', controlPlaneUrl: 'https://cp.company.com' })
     expect(JSON.parse(result)).toMatchObject({ score: 72 })
   })
+
+  it('throws when control plane fetch fails with a network error', async () => {
+    // Operator-mode network failure (e.g. DNS lookup failed, TLS handshake refused).
+    // The sidecar-mode equivalent is already covered above; this exercises the
+    // separate fetchFromControlPlane code path.
+    fetchMock.mockRejectedValue(new Error('getaddrinfo ENOTFOUND cp.company.com'))
+    await expect(
+      gap({ agentName: 'budget-assistant', controlPlaneUrl: 'https://cp.company.com' }),
+    ).rejects.toThrow('ENOTFOUND')
+  })
 })
 
 describe('gap() — missing args', () => {
diff --git a/packages/mcp-server/src/__tests__/generate.test.ts b/packages/mcp-server/src/__tests__/generate.test.ts
new file mode 100644
index 0000000..e20a2a1
--- /dev/null
+++ b/packages/mcp-server/src/__tests__/generate.test.ts
@@ -0,0 +1,48 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest'
+
+vi.mock('../cli-runner.js', () => ({
+  spawnCli: vi.fn(),
+}))
+
+import { spawnCli } from '../cli-runner.js'
+import { generate } from '../tools/generate.js'
+
+const spawnCliMock = vi.mocked(spawnCli)
+
+describe('generate()', () => {
+  beforeEach(() => {
+    spawnCliMock.mockReset()
+  })
+
+  it('invokes spawnCli with generate, file, and --framework when no out dir', async () => {
+    spawnCliMock.mockResolvedValue('Generated 3 files')
+    await generate('agent.yaml', 'langgraph')
+    expect(spawnCliMock).toHaveBeenCalledWith(['generate', 'agent.yaml', '--framework', 'langgraph'])
+  })
+
+  it('appends --out when an output directory is provided', async () => {
+    spawnCliMock.mockResolvedValue('Generated 3 files')
+    await generate('agent.yaml', 'crewai', '/tmp/out')
+    expect(spawnCliMock).toHaveBeenCalledWith([
+      'generate',
+      'agent.yaml',
+      '--framework',
+      'crewai',
+      '--out',
+      '/tmp/out',
+    ])
+  })
+
+  it('returns a JSON string with success and trimmed output', async () => {
+    spawnCliMock.mockResolvedValue('  Generated 3 files\n\n')
+    const result = await generate('agent.yaml', 'mastra')
+    const parsed = JSON.parse(result)
+    expect(parsed.success).toBe(true)
+    expect(parsed.output).toBe('Generated 3 files')
+  })
+
+  it('propagates errors from the CLI', async () => {
+    spawnCliMock.mockRejectedValue(new Error('Unknown framework: foo'))
+    await expect(generate('agent.yaml', 'foo')).rejects.toThrow('Unknown framework: foo')
+  })
+})
diff --git a/packages/mcp-server/src/__tests__/health.test.ts b/packages/mcp-server/src/__tests__/health.test.ts
index 6d77b22..5d7c2f4 100644
--- a/packages/mcp-server/src/__tests__/health.test.ts
+++ b/packages/mcp-server/src/__tests__/health.test.ts
@@ -80,6 +80,30 @@ describe('health() — sidecar mode', () => {
     await health({ sidecarUrl: 'http://localhost:4001' })
     expect(spawnCliMock).not.toHaveBeenCalled()
   })
+
+  it('throws when sidecar returns malformed JSON', async () => {
+    // Simulates a sidecar that responds 200 but with garbage in the body —
+    // a common failure mode when a reverse proxy serves an HTML error page.
+    fetchMock.mockResolvedValue({
+      ok: true,
+      json: async () => {
+        throw new SyntaxError('Unexpected token < in JSON at position 0')
+      },
+    })
+    await expect(
+      health({ sidecarUrl: 'http://localhost:4001' }),
+    ).rejects.toThrow(/Unexpected token|JSON/)
+  })
+
+  it('throws when sidecar fetch hangs and the network layer times out', async () => {
+    // A hung connection (silently dropped packets, dead peer) is worse than a
+    // refused connection because the client waits forever. We surface it as a
+    // synthetic AbortError / timeout error from the fetch layer.
+    fetchMock.mockRejectedValue(Object.assign(new Error('fetch timed out'), { name: 'TimeoutError' }))
+    await expect(
+      health({ sidecarUrl: 'http://localhost:4001' }),
+    ).rejects.toThrow('fetch timed out')
+  })
 })
 
 // ── Operator mode ─────────────────────────────────────────────────────────────
diff --git a/packages/mcp-server/src/__tests__/index.test.ts b/packages/mcp-server/src/__tests__/index.test.ts
new file mode 100644
index 0000000..e8695dd
--- /dev/null
+++ b/packages/mcp-server/src/__tests__/index.test.ts
@@ -0,0 +1,302 @@
+import { describe, it, expect } from 'vitest'
+import { TOOLS, handleRpc } from '../index.js'
+
+// ── Tool registry snapshot ────────────────────────────────────────────────────
+//
+// The MCP server's contract with clients is the tools/list response. Any
+// schema drift here is a breaking change for every consumer (Claude Desktop,
+// Cursor, custom clients), so we snapshot the full registry. If this snapshot
+// changes, the diff is the breaking change — review it deliberately.
+//
+// To intentionally update: run `pnpm --filter @agentspec/mcp test -u`.
+
+describe('TOOLS registry — schema drift guard', () => {
+  it('exposes the expected list of tool names', () => {
+    const names = TOOLS.map((t) => t.name).sort()
+    expect(names).toEqual([
+      'agentspec_audit',
+      'agentspec_diff',
+      'agentspec_gap',
+      'agentspec_generate',
+      'agentspec_health',
+      'agentspec_list_agents',
+      'agentspec_proof',
+      'agentspec_scan',
+      'agentspec_validate',
+    ])
+  })
+
+  it('every tool declares an object input schema with properties and required arrays', () => {
+    for (const tool of TOOLS) {
+      expect(tool.inputSchema.type, `${tool.name}.inputSchema.type`).toBe('object')
+      expect(tool.inputSchema.properties, `${tool.name}.inputSchema.properties`).toBeTypeOf('object')
+      expect(Array.isArray(tool.inputSchema.required), `${tool.name}.inputSchema.required`).toBe(true)
+    }
+  })
+
+  it('every tool has a non-empty description', () => {
+    for (const tool of TOOLS) {
+      expect(tool.description.length, `${tool.name}.description`).toBeGreaterThan(10)
+    }
+  })
+
+  it('matches the full registry snapshot (catches accidental schema drift)', () => {
+    // Normalise: strip free-form description text so wording tweaks don't churn
+    // the snapshot — only the contract (name, properties, required) is locked.
+    const contract = TOOLS.map((t) => ({
+      name: t.name,
+      inputSchema: {
+        type: t.inputSchema.type,
+        properties: Object.fromEntries(
+          Object.entries(t.inputSchema.properties).map(([k, v]) => [k, { type: v.type }]),
+        ),
+        required: [...t.inputSchema.required].sort(),
+      },
+    })).sort((a, b) => a.name.localeCompare(b.name))
+
+    expect(contract).toMatchInlineSnapshot(`
+      [
+        {
+          "inputSchema": {
+            "properties": {
+              "adminKey": {
+                "type": "string",
+              },
+              "agentName": {
+                "type": "string",
+              },
+              "controlPlaneUrl": {
+                "type": "string",
+              },
+              "file": {
+                "type": "string",
+              },
+              "pack": {
+                "type": "string",
+              },
+              "sidecarUrl": {
+                "type": "string",
+              },
+            },
+            "required": [
+              "file",
+            ],
+            "type": "object",
+          },
+          "name": "agentspec_audit",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "from": {
+                "type": "string",
+              },
+              "to": {
+                "type": "string",
+              },
+            },
+            "required": [
+              "from",
+              "to",
+            ],
+            "type": "object",
+          },
+          "name": "agentspec_diff",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "adminKey": {
+                "type": "string",
+              },
+              "agentName": {
+                "type": "string",
+              },
+              "controlPlaneUrl": {
+                "type": "string",
+              },
+              "sidecarUrl": {
+                "type": "string",
+              },
+            },
+            "required": [],
+            "type": "object",
+          },
+          "name": "agentspec_gap",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "file": {
+                "type": "string",
+              },
+              "framework": {
+                "type": "string",
+              },
+              "out": {
+                "type": "string",
+              },
+            },
+            "required": [
+              "file",
+              "framework",
+            ],
+            "type": "object",
+          },
+          "name": "agentspec_generate",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "adminKey": {
+                "type": "string",
+              },
+              "agentName": {
+                "type": "string",
+              },
+              "controlPlaneUrl": {
+                "type": "string",
+              },
+              "file": {
+                "type": "string",
+              },
+              "sidecarUrl": {
+                "type": "string",
+              },
+            },
+            "required": [],
+            "type": "object",
+          },
+          "name": "agentspec_health",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "adminKey": {
+                "type": "string",
+              },
+              "controlPlaneUrl": {
+                "type": "string",
+              },
+              "dir": {
+                "type": "string",
+              },
+            },
+            "required": [],
+            "type": "object",
+          },
+          "name": "agentspec_list_agents",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "adminKey": {
+                "type": "string",
+              },
+              "agentName": {
+                "type": "string",
+              },
+              "controlPlaneUrl": {
+                "type": "string",
+              },
+              "sidecarUrl": {
+                "type": "string",
+              },
+            },
+            "required": [],
+            "type": "object",
+          },
+          "name": "agentspec_proof",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "dir": {
+                "type": "string",
+              },
+            },
+            "required": [
+              "dir",
+            ],
+            "type": "object",
+          },
+          "name": "agentspec_scan",
+        },
+        {
+          "inputSchema": {
+            "properties": {
+              "file": {
+                "type": "string",
+              },
+            },
+            "required": [
+              "file",
+            ],
+            "type": "object",
+          },
+          "name": "agentspec_validate",
+        },
+      ]
+    `)
+  })
+})
+
+// ── handleRpc dispatch ────────────────────────────────────────────────────────
+
+describe('handleRpc — JSON-RPC dispatch', () => {
+  it('returns server info and protocol version on initialize', async () => {
+    const res = await handleRpc({ jsonrpc: '2.0', id: 1, method: 'initialize' })
+    expect(res).toMatchObject({
+      jsonrpc: '2.0',
+      id: 1,
+      result: {
+        protocolVersion: '2025-03-26',
+        serverInfo: { name: 'agentspec', version: '1.0.0' },
+        capabilities: { tools: {} },
+      },
+    })
+  })
+
+  it('returns the registered TOOLS list on tools/list', async () => {
+    const res = await handleRpc({ jsonrpc: '2.0', id: 2, method: 'tools/list' })
+    const result = (res as { result: { tools: unknown[] } }).result
+    expect(result.tools).toHaveLength(TOOLS.length)
+  })
+
+  it('returns an empty result on ping', async () => {
+    const res = await handleRpc({ jsonrpc: '2.0', id: 3, method: 'ping' })
+    expect(res).toEqual({ jsonrpc: '2.0', id: 3, result: {} })
+  })
+
+  it('returns -32601 method-not-found for unknown methods', async () => {
+    const res = await handleRpc({ jsonrpc: '2.0', id: 4, method: 'nope/nope' })
+    expect(res).toMatchObject({ jsonrpc: '2.0', id: 4, error: { code: -32601 } })
+  })
+
+  it('returns -32600 invalid-request when jsonrpc field is wrong', async () => {
+    const res = await handleRpc({ jsonrpc: '1.0' as '2.0', id: 5, method: 'ping' })
+    expect(res).toMatchObject({ jsonrpc: '2.0', id: 5, error: { code: -32600 } })
+  })
+
+  it('returns -32602 missing-name when tools/call is missing tool name', async () => {
+    const res = await handleRpc({ jsonrpc: '2.0', id: 6, method: 'tools/call', params: {} })
+    expect(res).toMatchObject({ jsonrpc: '2.0', id: 6, error: { code: -32602 } })
+  })
+
+  it('wraps tool errors as isError content blocks (does not throw to the JSON-RPC layer)', async () => {
+    const res = await handleRpc({
+      jsonrpc: '2.0',
+      id: 7,
+      method: 'tools/call',
+      params: { name: 'agentspec_unknown_tool', arguments: {} },
+    })
+    expect(res).toMatchObject({
+      jsonrpc: '2.0',
+      id: 7,
+      result: {
+        isError: true,
+        content: [{ type: 'text', text: expect.stringContaining('Unknown tool') }],
+      },
+    })
+  })
+})
diff --git a/packages/mcp-server/src/__tests__/scan.test.ts b/packages/mcp-server/src/__tests__/scan.test.ts
new file mode 100644
index 0000000..7f8a0dd
--- /dev/null
+++ b/packages/mcp-server/src/__tests__/scan.test.ts
@@ -0,0 +1,33 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest'
+
+vi.mock('../cli-runner.js', () => ({
+  spawnCli: vi.fn(),
+}))
+
+import { spawnCli } from '../cli-runner.js'
+import { scan } from '../tools/scan.js'
+
+const spawnCliMock = vi.mocked(spawnCli)
+
+describe('scan()', () => {
+  beforeEach(() => {
+    spawnCliMock.mockReset()
+  })
+
+  it('invokes spawnCli with scan, --dir, and --dry-run flags', async () => {
+    spawnCliMock.mockResolvedValue('apiVersion: agentspec.io/v1\nkind: AgentSpec\n')
+    await scan('/workspace/my-agent')
+    expect(spawnCliMock).toHaveBeenCalledWith(['scan', '--dir', '/workspace/my-agent', '--dry-run'])
+  })
+
+  it('returns the trimmed CLI stdout', async () => {
+    spawnCliMock.mockResolvedValue('  apiVersion: agentspec.io/v1\nkind: AgentSpec\n  \n')
+    const result = await scan('/workspace/my-agent')
+    expect(result).toBe('apiVersion: agentspec.io/v1\nkind: AgentSpec')
+  })
+
+  it('propagates errors from the CLI (e.g. directory not found)', async () => {
+    spawnCliMock.mockRejectedValue(new Error('ENOENT: no such directory'))
+    await expect(scan('/does/not/exist')).rejects.toThrow('ENOENT')
+  })
+})
diff --git a/packages/mcp-server/src/__tests__/transport.test.ts b/packages/mcp-server/src/__tests__/transport.test.ts
index 62445f8..eda97e3 100644
--- a/packages/mcp-server/src/__tests__/transport.test.ts
+++ b/packages/mcp-server/src/__tests__/transport.test.ts
@@ -3,7 +3,21 @@ import { spawn, ChildProcess } from 'child_process'
 import { resolve } from 'path'
 import http from 'http'
 
-const BIN = resolve(__dirname, '../../dist/index.js')
+// Run the TypeScript source directly via tsx so the suite is independent of the
+// dist/ build artefact. Removes the "must run pnpm build first" footgun.
+const TSX = resolve(__dirname, '../../node_modules/.bin/tsx')
+const SRC = resolve(__dirname, '../index.ts')
+
+function spawnServer(args: string[] = [], env: NodeJS.ProcessEnv = process.env): ChildProcess {
+  return spawn(TSX, [SRC, ...args], { stdio: ['pipe', 'pipe', 'pipe'], env })
+}
+
+async function killAndWait(child: ChildProcess | null): Promise<void> {
+  if (!child || child.killed || child.exitCode !== null) return
+  const exited = new Promise<void>((res) => child.once('exit', () => res()))
+  child.kill('SIGTERM')
+  await exited
+}
 
 // ── Helpers ──────────────────────────────────────────────────────────────────
 
@@ -60,15 +74,13 @@ function stdioRpc(child: ChildProcess, req: object): Promise<unknown> {
 describe('transport selection', () => {
   let child: ChildProcess | null = null
 
-  afterEach(() => {
-    if (child && !child.killed) {
-      child.kill('SIGTERM')
-      child = null
-    }
+  afterEach(async () => {
+    await killAndWait(child)
+    child = null
   })
 
-  it('defaults to stdio when no flags are passed', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('defaults to stdio when no flags are passed', { timeout: 15000 }, async () => {
+    child = spawnServer()
 
     const res = await stdioRpc(child, { jsonrpc: '2.0', id: 1, method: 'initialize' })
 
@@ -83,12 +95,9 @@ describe('transport selection', () => {
     })
   })
 
-  it('starts HTTP server when --http flag is passed', async () => {
+  it('starts HTTP server when --http flag is passed', { timeout: 15000 }, async () => {
     const port = 14567 + Math.floor(Math.random() * 1000)
-    child = spawn('node', [BIN, '--http'], {
-      stdio: ['pipe', 'pipe', 'pipe'],
-      env: { ...process.env, MCP_PORT: String(port) },
-    })
+    child = spawnServer(['--http'], { ...process.env, MCP_PORT: String(port) })
     await waitForReady(child)
 
     const res = await sendJsonRpc(port, { jsonrpc: '2.0', id: 1, method: 'initialize' })
@@ -109,22 +118,20 @@ describe('transport selection', () => {
 describe('stdio transport', () => {
   let child: ChildProcess | null = null
 
-  afterEach(() => {
-    if (child && !child.killed) {
-      child.kill('SIGTERM')
-      child = null
-    }
+  afterEach(async () => {
+    await killAndWait(child)
+    child = null
   })
 
-  it('handles initialize request', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('handles initialize request', { timeout: 15000 }, async () => {
+    child = spawnServer()
     const res = await stdioRpc(child, { jsonrpc: '2.0', id: 1, method: 'initialize' })
 
     expect(res).toMatchObject({ jsonrpc: '2.0', id: 1, result: { protocolVersion: '2025-03-26' } })
   })
 
-  it('handles tools/list request', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('handles tools/list request', { timeout: 15000 }, async () => {
+    child = spawnServer()
     const res = await stdioRpc(child, { jsonrpc: '2.0', id: 2, method: 'tools/list' }) as { result: { tools: unknown[] } }
 
     expect(res).toMatchObject({ jsonrpc: '2.0', id: 2 })
@@ -132,15 +139,15 @@ describe('stdio transport', () => {
     expect(res.result.tools.length).toBeGreaterThan(0)
   })
 
-  it('handles ping request', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('handles ping request', { timeout: 15000 }, async () => {
+    child = spawnServer()
     const res = await stdioRpc(child, { jsonrpc: '2.0', id: 3, method: 'ping' })
 
     expect(res).toEqual({ jsonrpc: '2.0', id: 3, result: {} })
   })
 
-  it('returns parse error for invalid JSON', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('returns parse error for invalid JSON', { timeout: 15000 }, async () => {
+    child = spawnServer()
 
     const res = await new Promise<unknown>((resolve) => {
       child!.stdout!.once('data', (chunk: Buffer) => {
@@ -152,15 +159,15 @@ describe('stdio transport', () => {
     expect(res).toMatchObject({ jsonrpc: '2.0', error: { code: -32700, message: 'Parse error' } })
   })
 
-  it('returns error for unknown method', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('returns error for unknown method', { timeout: 15000 }, async () => {
+    child = spawnServer()
     const res = await stdioRpc(child, { jsonrpc: '2.0', id: 5, method: 'nonexistent/method' })
 
     expect(res).toMatchObject({ jsonrpc: '2.0', id: 5, error: { code: -32601 } })
   })
 
-  it('handles multiple sequential requests', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('handles multiple sequential requests', { timeout: 15000 }, async () => {
+    child = spawnServer()
 
     const res1 = await stdioRpc(child, { jsonrpc: '2.0', id: 1, method: 'ping' })
     const res2 = await stdioRpc(child, { jsonrpc: '2.0', id: 2, method: 'ping' })
@@ -169,8 +176,8 @@ describe('stdio transport', () => {
     expect(res2).toEqual({ jsonrpc: '2.0', id: 2, result: {} })
   })
 
-  it('skips empty lines', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('skips empty lines', { timeout: 15000 }, async () => {
+    child = spawnServer()
 
     // Send empty line then a valid request — should only get one response
     child.stdin!.write('\n')
@@ -179,12 +186,23 @@ describe('stdio transport', () => {
     expect(res).toEqual({ jsonrpc: '2.0', id: 10, result: {} })
   })
 
-  it('exits when stdin closes', async () => {
-    child = spawn('node', [BIN], { stdio: ['pipe', 'pipe', 'pipe'] })
+  it('exits when stdin closes', { timeout: 15000 }, async () => {
+    // Owns its own child lifecycle: closing stdin must let the readline 'close'
+    // handler call process.exit(0). We deliberately don't share the outer
+    // `child` variable so afterEach doesn't race the natural exit.
+    const c = spawnServer()
+    // Wait for the startup banner before closing stdin so the readline interface
+    // is fully wired up; otherwise on slow tsx cold-starts the child can exit
+    // before installing the 'close' handler.
+    await new Promise<void>((resolve) => {
+      c.stderr!.on('data', (chunk: Buffer) => {
+        if (chunk.toString().includes('running on stdio')) resolve()
+      })
+    })
 
     const exitCode = await new Promise<number | null>((resolve) => {
-      child!.on('exit', (code) => resolve(code))
-      child!.stdin!.end()
+      c.on('exit', (code) => resolve(code))
+      c.stdin!.end()
     })
 
     expect(exitCode).toBe(0)
@@ -195,23 +213,18 @@ describe('HTTP transport', () => {
   let child: ChildProcess | null = null
   let port: number
 
-  afterEach(() => {
-    if (child && !child.killed) {
-      child.kill('SIGTERM')
-      child = null
-    }
+  afterEach(async () => {
+    await killAndWait(child)
+    child = null
   })
 
   function startHttp(): Promise<void> {
     port = 15000 + Math.floor(Math.random() * 1000)
-    child = spawn('node', [BIN, '--http'], {
-      stdio: ['pipe', 'pipe', 'pipe'],
-      env: { ...process.env, MCP_PORT: String(port) },
-    })
+    child = spawnServer(['--http'], { ...process.env, MCP_PORT: String(port) })
     return waitForReady(child)
   }
 
-  it('GET /health returns status ok', async () => {
+  it('GET /health returns status ok', { timeout: 15000 }, async () => {
     await startHttp()
     const res = await httpGet(port, '/health')
 
@@ -219,7 +232,7 @@ describe('HTTP transport', () => {
     expect(res.body).toEqual({ status: 'ok', server: 'agentspec-mcp' })
   })
 
-  it('POST /mcp handles initialize', async () => {
+  it('POST /mcp handles initialize', { timeout: 15000 }, async () => {
     await startHttp()
     const res = await sendJsonRpc(port, { jsonrpc: '2.0', id: 1, method: 'initialize' })
 
@@ -227,7 +240,7 @@ describe('HTTP transport', () => {
     expect(res.body).toMatchObject({ jsonrpc: '2.0', id: 1, result: { protocolVersion: '2025-03-26' } })
   })
 
-  it('POST /mcp handles tools/list', async () => {
+  it('POST /mcp handles tools/list', { timeout: 15000 }, async () => {
     await startHttp()
     const res = await sendJsonRpc(port, { jsonrpc: '2.0', id: 2, method: 'tools/list' })
 
@@ -237,7 +250,7 @@ describe('HTTP transport', () => {
     expect(body.result.tools.length).toBeGreaterThan(0)
   })
 
-  it('POST /mcp handles ping', async () => {
+  it('POST /mcp handles ping', { timeout: 15000 }, async () => {
     await startHttp()
     const res = await sendJsonRpc(port, { jsonrpc: '2.0', id: 3, method: 'ping' })
 
@@ -245,7 +258,7 @@ describe('HTTP transport', () => {
     expect(res.body).toEqual({ jsonrpc: '2.0', id: 3, result: {} })
   })
 
-  it('POST /mcp returns parse error for invalid JSON', async () => {
+  it('POST /mcp returns parse error for invalid JSON', { timeout: 15000 }, async () => {
     await startHttp()
 
     const res = await new Promise<{ status: number; body: unknown }>((resolve, reject) => {
@@ -266,7 +279,7 @@ describe('HTTP transport', () => {
     expect(res.body).toMatchObject({ jsonrpc: '2.0', error: { code: -32700 } })
   })
 
-  it('returns 404 for unknown routes', async () => {
+  it('returns 404 for unknown routes', { timeout: 15000 }, async () => {
     await startHttp()
     const res = await httpGet(port, '/unknown')
 
@@ -274,7 +287,7 @@ describe('HTTP transport', () => {
     expect(res.body).toMatchObject({ error: expect.stringContaining('Not found') })
   })
 
-  it('returns CORS header on POST /mcp', async () => {
+  it('returns CORS header on POST /mcp', { timeout: 15000 }, async () => {
     await startHttp()
 
     const corsHeader = await new Promise<string | undefined>((resolve, reject) => {
@@ -291,12 +304,9 @@ describe('HTTP transport', () => {
     expect(corsHeader).toBe('*')
   })
 
-  it('responds to MCP_PORT env var', async () => {
+  it('responds to MCP_PORT env var', { timeout: 15000 }, async () => {
     const customPort = 16000 + Math.floor(Math.random() * 1000)
-    child = spawn('node', [BIN, '--http'], {
-      stdio: ['pipe', 'pipe', 'pipe'],
-      env: { ...process.env, MCP_PORT: String(customPort) },
-    })
+    child = spawnServer(['--http'], { ...process.env, MCP_PORT: String(customPort) })
 
     await new Promise<void>((resolve) => {
       child!.stderr!.on('data', (chunk: Buffer) => {
diff --git a/packages/mcp-server/src/index.ts b/packages/mcp-server/src/index.ts
index 5a1e8db..89c4d3e 100644
--- a/packages/mcp-server/src/index.ts
+++ b/packages/mcp-server/src/index.ts
@@ -393,11 +393,28 @@ function startHttp(): void {
 
 // ── Entry point ───────────────────────────────────────────────────────────────
 
-const useHttp = process.argv.includes('--http')
-if (useHttp) {
-  startHttp()
-} else {
-  startStdio()
+import { realpathSync } from 'node:fs'
+import { fileURLToPath } from 'node:url'
+
+// Only run the server when this module is the actual entry point. Without
+// this guard, importing { TOOLS, handleRpc } from a unit test would spawn a
+// readline interface and call process.exit(0) when vitest's stdin closes.
+function isMainEntry(): boolean {
+  if (!process.argv[1]) return false
+  try {
+    return realpathSync(process.argv[1]) === fileURLToPath(import.meta.url)
+  } catch {
+    return false
+  }
+}
+
+if (isMainEntry()) {
+  const useHttp = process.argv.includes('--http')
+  if (useHttp) {
+    startHttp()
+  } else {
+    startStdio()
+  }
 }
 
 export { handleRpc, callTool, TOOLS }