agents-oss · iliassjabali · Apr 12, 2026 · Apr 12, 2026 · Apr 12, 2026 · Apr 12, 2026
diff --git a/packages/adapter-claude/src/__tests__/claude-adapter.test.ts b/packages/adapter-claude/src/__tests__/claude-adapter.test.ts
@@ -671,5 +671,223 @@ describe('generateWithClaude()', () => {
       expect(counts.length).toBeGreaterThanOrEqual(2)
       expect(counts[counts.length - 1]).toBeGreaterThan(counts[0]!)
     })
+
+    it('surfaces an error thrown mid-stream from the streaming API', async () => {
+      mockStream.mockReturnValue((async function* () {
+        yield { type: 'content_block_delta', delta: { type: 'text_delta', text: '```json\n{"fil' } }
+        throw new Error('stream blew up')
+      })())
+      await expect(
+        generateWithClaude(baseManifest, { framework: 'langgraph', onProgress: () => {} }),
+      ).rejects.toThrow('stream blew up')
+    })
+  })
+
+  describe('Non-text content blocks', () => {
+    beforeEach(() => {
+      process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    })
+
+    it('ignores non-text content blocks (e.g. tool_use) and parses the text block', async () => {
+      mockCreate.mockResolvedValue({
+        content: [
+          { type: 'tool_use', id: 'tu_1', name: 'noop', input: {} },
+          { type: 'text', text: '```json\n{"files":{"agent.py":"# from text block"},"installCommands":[],"envVars":[]}\n```' },
+        ],
+        usage: { input_tokens: 1, output_tokens: 1 },
+      })
+      const result = await generateWithClaude(baseManifest, { framework: 'langgraph' })
+      expect(result.files['agent.py']).toBe('# from text block')
+    })
+  })
+
+  describe('Prompt structure (regression-sensitive)', () => {
+    beforeEach(() => {
+      process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    })
+
+    it('user message wraps the manifest JSON in <context_manifest> tags', async () => {
+      mockCreate.mockResolvedValue(
+        makeClaudeResponse({ files: { 'agent.py': '# x' }, installCommands: [], envVars: [] }),
+      )
+      await generateWithClaude(baseManifest, { framework: 'langgraph' })
+      const call = mockCreate.mock.calls[0]![0]
+      const userContent = call.messages[0].content as string
+      expect(userContent).toContain('<context_manifest>')
+      expect(userContent).toContain('</context_manifest>')
+      expect(userContent).toContain('"name": "test-agent"')
+      expect(userContent).toContain('"provider": "groq"')
+    })
+
+    it('user message embeds resolved $file: refs as <context_file> blocks with their content', async () => {
+      const dir = join(tmpdir(), `agentspec-test-${Date.now()}-prompt`)
+      mkdirSync(dir, { recursive: true })
+      const toolFile = join(dir, 'workout_tools.py')
+      const toolContent = 'def log_workout(exercises: list[str]) -> str:\n    return "logged"\n'
+      writeFileSync(toolFile, toolContent, 'utf-8')
+
+      const manifestWithFileTool: AgentSpecManifest = {
+        ...baseManifest,
+        spec: {
+          ...baseManifest.spec,
+          tools: [
+            {
+              name: 'log-workout',
+              description: 'Log a workout',
+              module: '$file:workout_tools.py',
+            } as unknown as NonNullable<AgentSpecManifest['spec']['tools']>[number],
+          ],
+        },
+      }
+
+      mockCreate.mockResolvedValue(
+        makeClaudeResponse({ files: { 'agent.py': '# x' }, installCommands: [], envVars: [] }),
+      )
+
+      try {
+        await generateWithClaude(manifestWithFileTool, {
+          framework: 'langgraph',
+          manifestDir: dir,
+        })
+        const call = mockCreate.mock.calls[0]![0]
+        const userContent = call.messages[0].content as string
+        // The resolved file must appear as a <context_file> block
+        expect(userContent).toContain('<context_file')
+        expect(userContent).toContain('</context_file>')
+        // …with the actual file content embedded (not just the path)
+        expect(userContent).toContain('def log_workout')
+        // …and the file path in the path attribute
+        expect(userContent).toContain('workout_tools.py')
+      } finally {
+        rmSync(dir, { recursive: true, force: true })
+      }
+    })
+  })
+
+  describe('buildContext() $file: edge cases', () => {
+    let buildContext: (opts: { manifest: AgentSpecManifest; contextFiles?: string[]; manifestDir?: string }) => string
+
+    beforeEach(async () => {
+      const mod = await import('../context-builder.js')
+      buildContext = mod.buildContext
+    })
+
+    it('silently skips $file: refs that resolve to a directory (not a file)', () => {
+      const dir = join(tmpdir(), `agentspec-test-${Date.now()}-dirref`)
+      mkdirSync(dir, { recursive: true })
+      // Create a SUBDIRECTORY that the manifest will point at via $file:
+      const subdir = join(dir, 'tools_dir')
+      mkdirSync(subdir, { recursive: true })
+
+      const manifestPointingAtDir: AgentSpecManifest = {
+        ...baseManifest,
+        spec: {
+          ...baseManifest.spec,
+          tools: [
+            {
+              name: 'dir-ref',
+              description: 'Points at a directory',
+              module: '$file:tools_dir',
+            } as unknown as NonNullable<AgentSpecManifest['spec']['tools']>[number],
+          ],
+        },
+      }
+
+      try {
+        const ctx = buildContext({ manifest: manifestPointingAtDir, manifestDir: dir })
+        // readFileSync on a directory throws EISDIR — buildContext must catch and skip
+        expect(ctx).not.toContain('<context_file')
+      } finally {
+        rmSync(dir, { recursive: true, force: true })
+      }
+    })
+  })
+})
+
+// ── repairYaml() tests ────────────────────────────────────────────────────────
+
+describe('repairYaml()', () => {
+  let repairYaml: (
+    yamlStr: string,
+    validationErrors: string,
+    options?: { model?: string },
+  ) => Promise<string>
+
+  const savedKey = process.env['ANTHROPIC_API_KEY']
+
+  beforeEach(async () => {
+    vi.clearAllMocks()
+    const mod = await import('../index.js')
+    repairYaml = mod.repairYaml
+  })
+
+  afterEach(() => {
+    if (savedKey === undefined) {
+      delete process.env['ANTHROPIC_API_KEY']
+    } else {
+      process.env['ANTHROPIC_API_KEY'] = savedKey
+    }
+  })
+
+  it('returns the repaired YAML string from the Claude response', async () => {
+    process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    const repairedYaml =
+      'apiVersion: agentspec.io/v1\nkind: AgentSpec\nmetadata:\n  name: fixed-agent\n  version: 1.0.0\n'
+    mockCreate.mockResolvedValue(
+      makeClaudeResponse({
+        files: { 'agent.yaml': repairedYaml },
+        installCommands: [],
+        envVars: [],
+      }),
+    )
+    const result = await repairYaml(
+      'apiVersion: bad\nkind: AgentSpec\n',
+      'metadata.name: Required',
+    )
+    expect(result).toBe(repairedYaml)
+    expect(result).toContain('apiVersion: agentspec.io/v1')
+    expect(result).toContain('name: fixed-agent')
+  })
+
+  it('throws when ANTHROPIC_API_KEY is not set', async () => {
+    delete process.env['ANTHROPIC_API_KEY']
+    await expect(
+      repairYaml('apiVersion: bad\n', 'errors here'),
+    ).rejects.toThrow('ANTHROPIC_API_KEY')
+  })
+
+  it('throws when Claude response is missing the agent.yaml field', async () => {
+    process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    mockCreate.mockResolvedValue(
+      makeClaudeResponse({
+        // valid shape (`files` present) but missing the agent.yaml key
+        files: { 'README.md': 'oops' },
+        installCommands: [],
+        envVars: [],
+      }),
+    )
+    await expect(
+      repairYaml('apiVersion: bad\n', 'metadata.name: Required'),
+    ).rejects.toThrow('agent.yaml')
+  })
+
+  it('uses REPAIR_SYSTEM_PROMPT (mentions AgentSpec v1 schema rules)', async () => {
+    process.env['ANTHROPIC_API_KEY'] = 'sk-ant-test-key'
+    mockCreate.mockResolvedValue(
+      makeClaudeResponse({
+        files: { 'agent.yaml': 'apiVersion: agentspec.io/v1\nkind: AgentSpec\n' },
+        installCommands: [],
+        envVars: [],
+      }),
+    )
+    await repairYaml('apiVersion: bad\n', 'errors')
+    const call = mockCreate.mock.calls[0]![0]
+    // System prompt should reference AgentSpec v1 rules so Claude knows how to repair
+    expect(call.system).toContain('AgentSpec v1')
+    expect(call.system).toContain('schema')
+    // The current (broken) YAML must appear in the user message so Claude has something to fix
+    const userContent = call.messages[0].content as string
+    expect(userContent).toContain('apiVersion: bad')
+    expect(userContent).toContain('errors')
   })
 })
diff --git a/packages/mcp-server/src/__tests__/gap.test.ts b/packages/mcp-server/src/__tests__/gap.test.ts
@@ -97,6 +97,16 @@ describe('gap() — named agent via control plane (agentName + controlPlaneUrl)'
     const result = await gap({ agentName: 'budget-assistant', controlPlaneUrl: 'https://cp.company.com' })
     expect(JSON.parse(result)).toMatchObject({ score: 72 })
   })
+
+  it('throws when control plane fetch fails with a network error', async () => {
+    // Operator-mode network failure (e.g. DNS lookup failed, TLS handshake refused).
+    // The sidecar-mode equivalent is already covered above; this exercises the
+    // separate fetchFromControlPlane code path.
+    fetchMock.mockRejectedValue(new Error('getaddrinfo ENOTFOUND cp.company.com'))
+    await expect(
+      gap({ agentName: 'budget-assistant', controlPlaneUrl: 'https://cp.company.com' }),
+    ).rejects.toThrow('ENOTFOUND')
+  })
 })
 
 describe('gap() — missing args', () => {

diff --git a/packages/mcp-server/src/__tests__/generate.test.ts b/packages/mcp-server/src/__tests__/generate.test.ts
@@ -0,0 +1,48 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest'
+
+vi.mock('../cli-runner.js', () => ({
+  spawnCli: vi.fn(),
+}))
+
+import { spawnCli } from '../cli-runner.js'
+import { generate } from '../tools/generate.js'
+
+const spawnCliMock = vi.mocked(spawnCli)
+
+describe('generate()', () => {
+  beforeEach(() => {
+    spawnCliMock.mockReset()
+  })
+
+  it('invokes spawnCli with generate, file, and --framework when no out dir', async () => {
+    spawnCliMock.mockResolvedValue('Generated 3 files')
+    await generate('agent.yaml', 'langgraph')
+    expect(spawnCliMock).toHaveBeenCalledWith(['generate', 'agent.yaml', '--framework', 'langgraph'])
+  })
+
+  it('appends --out when an output directory is provided', async () => {
+    spawnCliMock.mockResolvedValue('Generated 3 files')
+    await generate('agent.yaml', 'crewai', '/tmp/out')
+    expect(spawnCliMock).toHaveBeenCalledWith([
+      'generate',
+      'agent.yaml',
+      '--framework',
+      'crewai',
+      '--out',
+      '/tmp/out',
+    ])
+  })
+
+  it('returns a JSON string with success and trimmed output', async () => {
+    spawnCliMock.mockResolvedValue('  Generated 3 files\n\n')
+    const result = await generate('agent.yaml', 'mastra')
+    const parsed = JSON.parse(result)
+    expect(parsed.success).toBe(true)
+    expect(parsed.output).toBe('Generated 3 files')
+  })
+
+  it('propagates errors from the CLI', async () => {
+    spawnCliMock.mockRejectedValue(new Error('Unknown framework: foo'))
+    await expect(generate('agent.yaml', 'foo')).rejects.toThrow('Unknown framework: foo')
+  })
+})
diff --git a/packages/mcp-server/src/__tests__/health.test.ts b/packages/mcp-server/src/__tests__/health.test.ts
@@ -80,6 +80,30 @@ describe('health() — sidecar mode', () => {
     await health({ sidecarUrl: 'http://localhost:4001' })
     expect(spawnCliMock).not.toHaveBeenCalled()
   })
+
+  it('throws when sidecar returns malformed JSON', async () => {
+    // Simulates a sidecar that responds 200 but with garbage in the body —
+    // a common failure mode when a reverse proxy serves an HTML error page.
+    fetchMock.mockResolvedValue({
+      ok: true,
+      json: async () => {
+        throw new SyntaxError('Unexpected token < in JSON at position 0')
+      },
+    })
+    await expect(
+      health({ sidecarUrl: 'http://localhost:4001' }),
+    ).rejects.toThrow(/Unexpected token|JSON/)
+  })
+
+  it('throws when sidecar fetch hangs and the network layer times out', async () => {
+    // A hung connection (silently dropped packets, dead peer) is worse than a
+    // refused connection because the client waits forever. We surface it as a
+    // synthetic AbortError / timeout error from the fetch layer.
+    fetchMock.mockRejectedValue(Object.assign(new Error('fetch timed out'), { name: 'TimeoutError' }))
+    await expect(
+      health({ sidecarUrl: 'http://localhost:4001' }),
+    ).rejects.toThrow('fetch timed out')
+  })
 })
 
 // ── Operator mode ─────────────────────────────────────────────────────────────