From 35692687c66d7c8ac10d96cee9febdbd10a2c640 Mon Sep 17 00:00:00 2001
From: Gustavo Cayres <gustavo.rodrigues.cayres@mindsight.com.br>
Date: Fri, 15 May 2026 16:49:25 -0300
Subject: [PATCH 1/2] fix: tool-retrieval MCP server crashes on startup due to
 plain-object schema

setRequestHandler() in @modelcontextprotocol/sdk requires a Zod schema as
its first argument (ListToolsRequestSchema / CallToolRequestSchema), not a
plain object like { method: 'tools/list' }.  Passing a plain object caused
the SDK to throw 'Schema is missing a method literal' synchronously during
server construction, so the process exited immediately on every mcp-serve
invocation.

Fixes:
- Use ListToolsRequestSchema and CallToolRequestSchema from the SDK's
  types module in both setRequestHandler calls.
- Extract server setup into createMcpServer({ Server, ListToolsRequestSchema,
  CallToolRequestSchema }) so it can be unit-tested without a real stdio
  transport.
- Remove the module-level startServer() auto-call; export startServer()
  explicitly and call it from bin/cli.js so requiring the module in tests
  is safe.

Tests added (docs/tool-retrieval-mcp.feature + step-definitions):
- Zod schema validation: asserts setRequestHandler is called with proper
  Zod schemas (not plain objects) using a mock Server + safeParse checks.
- list-tools wire protocol: full Client<->Server round-trip via
  InMemoryTransport; asserts search_tools appears in the manifest.
- call-tool wire protocol: same in-process transport; asserts a valid
  CallToolResult envelope is returned (empty corpus path, no embedder needed).

All 53 scenarios pass (234 steps).
---
 bin/cli.js                                    |  6 +-
 docs/tool-retrieval-mcp.feature               | 19 ++++
 src/mcp/tool-retrieval-server.js              | 55 +++++++++--
 .../tool-retrieval-mcp.steps.js               | 95 +++++++++++++++++++
 unit-tests/support/world.js                   | 94 +++++++++++++++++-
 5 files changed, 257 insertions(+), 12 deletions(-)

diff --git a/bin/cli.js b/bin/cli.js
index 06a5cd1..5f9e8c8 100755
--- a/bin/cli.js
+++ b/bin/cli.js
@@ -544,7 +544,11 @@ switch (command) {
   case 'mcp-serve': {
     // Start the tool-retrieval MCP stdio server.
     // Launched by opencode via: "command": ["opencode-workspace", "mcp-serve"]
-    require('../src/mcp/tool-retrieval-server.js');
+    const { startServer } = require('../src/mcp/tool-retrieval-server.js');
+    startServer().catch(err => {
+      process.stderr.write(`tool-retrieval-server: fatal error: ${err.message}\n`);
+      process.exit(1);
+    });
     break;
   }
 
diff --git a/docs/tool-retrieval-mcp.feature b/docs/tool-retrieval-mcp.feature
index b775bda..6d05399 100644
--- a/docs/tool-retrieval-mcp.feature
+++ b/docs/tool-retrieval-mcp.feature
@@ -57,3 +57,22 @@ Feature: On-Demand Tool Retrieval MCP Tool
     And an opencode session is active with no specific browser tools in context
     When the agent calls search_tools with query "click a button in a web page"
     Then at least one tool from the "playwright" server appears in the results
+
+  # ── Server wiring tests (the gap that let the startup crash go undetected) ──
+
+  Scenario: The MCP server wires request handlers with Zod schemas not plain objects
+    When the MCP server is configured with a mock SDK
+    Then setRequestHandler was called twice
+    And the list-tools handler schema is a valid Zod schema
+    And the call-tool handler schema is a valid Zod schema
+
+  Scenario: list-tools returns the search_tools manifest over the wire protocol
+    When the MCP server handles a list-tools request via in-memory transport
+    Then the response contains a tool named "search_tools"
+    And the search_tools tool declares a required "query" input parameter
+
+  Scenario: call-tool returns a valid CallToolResult envelope over the wire protocol
+    Given the tool corpus has not been built
+    When the MCP server handles a call-tool request for "search_tools" via in-memory transport
+    Then the response is a valid CallToolResult with a content array
+    And the content text is a non-empty string
diff --git a/src/mcp/tool-retrieval-server.js b/src/mcp/tool-retrieval-server.js
index 66edd08..5d64df9 100644
--- a/src/mcp/tool-retrieval-server.js
+++ b/src/mcp/tool-retrieval-server.js
@@ -22,14 +22,33 @@
  *   2. Complements the TUI first-message hook (lib/tool-retrieval.plugin.js)
  *      by giving the agent on-demand access to the retrieval pipeline at any
  *      point in the conversation.
+ *
+ * Exports:
+ *   createMcpServer(sdk) — builds and returns a configured Server instance.
+ *                          Accepts SDK dependencies explicitly so it can be
+ *                          unit-tested with a mock Server (no stdio transport).
+ *   startServer()        — loads the real SDK, calls createMcpServer, attaches
+ *                          a StdioServerTransport, and connects.  Called by
+ *                          bin/cli.js for the mcp-serve command.
  */
 
 const { handleSearchTools } = require('./search-tools-handler');
 
-async function startServer() {
-  const { Server }               = await import('@modelcontextprotocol/sdk/server/index.js');
-  const { StdioServerTransport } = await import('@modelcontextprotocol/sdk/server/stdio.js');
+// ── server factory ────────────────────────────────────────────────────────────
 
+/**
+ * Create and configure the MCP Server instance.
+ *
+ * Accepts the SDK constructors/schemas as explicit parameters so this function
+ * can be unit-tested without spawning a real stdio transport.
+ *
+ * @param {object} sdk
+ * @param {Function} sdk.Server                   - Server constructor
+ * @param {object}  sdk.ListToolsRequestSchema     - Zod schema for tools/list
+ * @param {object}  sdk.CallToolRequestSchema      - Zod schema for tools/call
+ * @returns {object} configured MCP Server instance
+ */
+function createMcpServer({ Server, ListToolsRequestSchema, CallToolRequestSchema }) {
   const server = new Server(
     { name: 'tool-retrieval', version: '1.0.0' },
     { capabilities: { tools: {} } },
@@ -37,7 +56,7 @@ async function startServer() {
 
   // ── tool list ─────────────────────────────────────────────────────────────
   server.setRequestHandler(
-    { method: 'tools/list' },
+    ListToolsRequestSchema,
     async () => ({
       tools: [
         {
@@ -71,7 +90,7 @@ async function startServer() {
 
   // ── tool call ─────────────────────────────────────────────────────────────
   server.setRequestHandler(
-    { method: 'tools/call' },
+    CallToolRequestSchema,
     async (request) => {
       const { name, arguments: args } = request.params;
 
@@ -86,7 +105,26 @@ async function startServer() {
     },
   );
 
-  // ── transport ─────────────────────────────────────────────────────────────
+  return server;
+}
+
+// ── transport entry point ─────────────────────────────────────────────────────
+
+/**
+ * Load the real MCP SDK, build the server via createMcpServer(), attach a
+ * StdioServerTransport, and connect.
+ *
+ * Called explicitly by bin/cli.js for the `mcp-serve` command — NOT invoked
+ * automatically at module load time so that unit tests can safely require()
+ * this module and call createMcpServer() without triggering stdio binding.
+ */
+async function startServer() {
+  const { Server }               = await import('@modelcontextprotocol/sdk/server/index.js');
+  const { StdioServerTransport } = await import('@modelcontextprotocol/sdk/server/stdio.js');
+  const { ListToolsRequestSchema, CallToolRequestSchema } =
+    await import('@modelcontextprotocol/sdk/types.js');
+
+  const server    = createMcpServer({ Server, ListToolsRequestSchema, CallToolRequestSchema });
   const transport = new StdioServerTransport();
   await server.connect(transport);
 
@@ -94,7 +132,4 @@ async function startServer() {
   process.on('SIGINT',  () => server.close());
 }
 
-startServer().catch(err => {
-  process.stderr.write(`tool-retrieval-server: fatal error: ${err.message}\n`);
-  process.exit(1);
-});
+module.exports = { createMcpServer, startServer };
diff --git a/unit-tests/step-definitions/tool-retrieval-mcp.steps.js b/unit-tests/step-definitions/tool-retrieval-mcp.steps.js
index dbfa4f7..cd42a1d 100644
--- a/unit-tests/step-definitions/tool-retrieval-mcp.steps.js
+++ b/unit-tests/step-definitions/tool-retrieval-mcp.steps.js
@@ -172,3 +172,98 @@ Then('at least one tool from the {string} server appears in the results', functi
     `Expected a tool from "${serverName}" in results.\nActual:\n${text}`,
   );
 });
+
+// ─── Server-wiring When ────────────────────────────────────────────────────────
+
+When('the MCP server is configured with a mock SDK', async function () {
+  await this.runStartServer();
+});
+
+When('the MCP server handles a list-tools request via in-memory transport', async function () {
+  await this.runWireListTools();
+});
+
+When('the MCP server handles a call-tool request for {string} via in-memory transport',
+  async function (toolName) {
+    await this.runWireCallTool(toolName, { query: 'test query' });
+  },
+);
+
+// ─── Server-wiring Then ────────────────────────────────────────────────────────
+
+Then('setRequestHandler was called twice', function () {
+  assert.equal(
+    this._serverCalls?.length,
+    2,
+    `Expected setRequestHandler to be called exactly 2 times, got: ${this._serverCalls?.length}`,
+  );
+});
+
+Then('the list-tools handler schema is a valid Zod schema', function () {
+  const schema = this._serverCalls?.[0]?.schema;
+  assert.ok(
+    typeof schema?.parse === 'function',
+    'Expected list-tools schema to be a Zod schema with a .parse() method; ' +
+    'got a plain object — this is the bug: setRequestHandler needs a Zod schema, not { method: "..." }',
+  );
+  const result = schema.safeParse({ method: 'tools/list', params: {} });
+  assert.ok(
+    result.success,
+    `Expected list-tools schema to accept a { method: "tools/list" } request.\n` +
+    `safeParse error: ${JSON.stringify(result.error)}`,
+  );
+});
+
+Then('the call-tool handler schema is a valid Zod schema', function () {
+  const schema = this._serverCalls?.[1]?.schema;
+  assert.ok(
+    typeof schema?.parse === 'function',
+    'Expected call-tool schema to be a Zod schema with a .parse() method; ' +
+    'got a plain object — this is the bug: setRequestHandler needs a Zod schema, not { method: "..." }',
+  );
+  const result = schema.safeParse({ method: 'tools/call', params: { name: 'search_tools', arguments: {} } });
+  assert.ok(
+    result.success,
+    `Expected call-tool schema to accept a tools/call request.\n` +
+    `safeParse error: ${JSON.stringify(result.error)}`,
+  );
+});
+
+Then('the response contains a tool named {string}', function (toolName) {
+  const tools = this._wireToolsList?.tools ?? [];
+  const found = tools.find(t => t.name === toolName);
+  assert.ok(
+    found,
+    `Expected a tool named "${toolName}" in the tools/list response.\n` +
+    `Got: ${tools.map(t => t.name).join(', ') || '(empty)'}`,
+  );
+});
+
+Then('the search_tools tool declares a required {string} input parameter', function (paramName) {
+  const tools = this._wireToolsList?.tools ?? [];
+  const tool  = tools.find(t => t.name === 'search_tools');
+  assert.ok(tool, 'Expected search_tools to be present in tools/list response');
+  const required = tool.inputSchema?.required ?? [];
+  assert.ok(
+    required.includes(paramName),
+    `Expected "${paramName}" to be in inputSchema.required.\nGot: ${JSON.stringify(required)}`,
+  );
+});
+
+Then('the response is a valid CallToolResult with a content array', function () {
+  const result = this._wireCallResult;
+  assert.ok(result, 'Expected _wireCallResult to be set (tools/call produced no response)');
+  assert.ok(
+    Array.isArray(result.content),
+    `Expected result.content to be an array.\nGot: ${JSON.stringify(result)}`,
+  );
+  assert.ok(result.content.length > 0, 'Expected result.content to be non-empty');
+});
+
+Then('the content text is a non-empty string', function () {
+  const text = this._wireCallResult?.content?.[0]?.text;
+  assert.ok(
+    typeof text === 'string' && text.trim().length > 0,
+    `Expected content[0].text to be a non-empty string.\nGot: ${JSON.stringify(text)}`,
+  );
+});
diff --git a/unit-tests/support/world.js b/unit-tests/support/world.js
index a5d6c0e..f05e775 100644
--- a/unit-tests/support/world.js
+++ b/unit-tests/support/world.js
@@ -38,6 +38,9 @@ class OWWorld extends World {
     this._hookHandler        = null;   // reusable handler from createFirstMessageHandler
     this._hookLastQuery      = null;   // last text passed to _searchFn in runHook
     this._searchToolsResult  = null;   // result from runSearchTools
+    this._serverCalls        = null;   // setRequestHandler calls captured by runStartServer
+    this._wireToolsList      = null;   // listTools() result from runWireListTools
+    this._wireCallResult     = null;   // callTool() result from runWireCallTool
   }
 
   // ── helpers ─────────────────────────────────────────────────────────────────
@@ -298,7 +301,96 @@ class OWWorld extends World {
     const args = { query, ...(opts.k !== undefined ? { k: opts.k } : {}) };
     this._searchToolsResult = await handleSearchTools(args, { _searchFn: realSearch });
   }
-}
+
+  /**
+   * Invoke createMcpServer() with a mock SDK to validate request-handler
+   * registration without connecting a real stdio transport.
+   *
+   * The mock Server records every setRequestHandler() call so that Then steps
+   * can assert that proper Zod schemas — not plain objects — were passed.
+   * Captures results in this._serverCalls = [{ schema, handler }, ...].
+   */
+  async runStartServer() {
+    const { ListToolsRequestSchema, CallToolRequestSchema } =
+      await import('@modelcontextprotocol/sdk/types.js');
+
+    const calls = [];
+
+    class MockServer {
+      constructor() {}
+      setRequestHandler(schema, handler) {
+        calls.push({ schema, handler });
+      }
+    }
+
+    const { createMcpServer } = require('../../src/mcp/tool-retrieval-server');
+    createMcpServer({ Server: MockServer, ListToolsRequestSchema, CallToolRequestSchema });
+
+    this._serverCalls = calls;
+  }
+
+  /**
+   * Run a full wire-protocol tools/list round-trip using InMemoryTransport.
+   *
+   * Creates a real MCP Server (via createMcpServer) and a real Client, links
+   * them in-process, then calls client.listTools().  No stdio, no subprocess.
+   * Result stored in this._wireToolsList.
+   */
+  async runWireListTools() {
+    const { Server }  = await import('@modelcontextprotocol/sdk/server/index.js');
+    const { Client }  = await import('@modelcontextprotocol/sdk/client/index.js');
+    const { InMemoryTransport } = await import('@modelcontextprotocol/sdk/inMemory.js');
+    const { ListToolsRequestSchema, CallToolRequestSchema } =
+      await import('@modelcontextprotocol/sdk/types.js');
+
+    const { createMcpServer } = require('../../src/mcp/tool-retrieval-server');
+    const server = createMcpServer({ Server, ListToolsRequestSchema, CallToolRequestSchema });
+
+    const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair();
+    await server.connect(serverTransport);
+
+    const client = new Client({ name: 'test-client', version: '1.0.0' });
+    await client.connect(clientTransport);
+    try {
+      this._wireToolsList = await client.listTools();
+    } finally {
+      await client.close();
+    }
+  }
+
+  /**
+   * Run a full wire-protocol tools/call round-trip using InMemoryTransport.
+   *
+   * Intentionally uses an empty corpus so handleSearchTools() returns its
+   * graceful "corpus is empty" response without invoking the embedder.
+   * This makes the test self-contained and fast.
+   * Result stored in this._wireCallResult.
+   *
+   * @param {string} toolName  - tool to call (e.g. 'search_tools')
+   * @param {object} args      - arguments to pass to the tool
+   */
+  async runWireCallTool(toolName, args) {
+    const { Server }  = await import('@modelcontextprotocol/sdk/server/index.js');
+    const { Client }  = await import('@modelcontextprotocol/sdk/client/index.js');
+    const { InMemoryTransport } = await import('@modelcontextprotocol/sdk/inMemory.js');
+    const { ListToolsRequestSchema, CallToolRequestSchema } =
+      await import('@modelcontextprotocol/sdk/types.js');
+
+    const { createMcpServer } = require('../../src/mcp/tool-retrieval-server');
+    const server = createMcpServer({ Server, ListToolsRequestSchema, CallToolRequestSchema });
+
+    const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair();
+    await server.connect(serverTransport);
+
+    const client = new Client({ name: 'test-client', version: '1.0.0' });
+    await client.connect(clientTransport);
+    try {
+      this._wireCallResult = await client.callTool({ name: toolName, arguments: args });
+    } finally {
+      await client.close();
+    }
+  }
+}  // end OWWorld
 
 // ── Expose ExitError as a global so step files can catch it ──────────────────
 

From a1dfc66b68e57a86636164a2881221f2e2b19820 Mon Sep 17 00:00:00 2001
From: Gustavo Cayres <gustavo.rodrigues.cayres@mindsight.com.br>
Date: Fri, 15 May 2026 17:21:17 -0300
Subject: [PATCH 2/2] docs: expand feature coverage from README; slim README to
 pointer

- Add 6 new docs/*.feature files covering every section of the old README
  that had no Gherkin spec: installation, mcp-env, tui-commands,
  mcp-servers, smoke-test, prerequisites
- Scenarios that require live systems (tmux, real binary installs,
  network) are tagged @wip; cucumber.js now sets tags:'not @wip' so
  they are skipped by default in npm test
- Add step definitions for the three unit-testable feature files:
  mcp-env.steps.js (file format, merge, update, dir creation),
  mcp-servers.steps.js (validates lib/opencode.json.template directly),
  prerequisites.steps.js (Node version + engines field check)
- All 69 scenarios / 293 steps pass (16 new unit-testable scenarios added)
- Rewrite README.md as a thin pointer: tagline, quick-start, table of
  links to every docs/*.feature file, and a one-liner on @wip semantics
---
 README.md                                     | 223 +++---------------
 cucumber.js                                   |   1 +
 docs/installation.feature                     |  62 +++++
 docs/mcp-env.feature                          |  47 ++++
 docs/mcp-servers.feature                      |  70 ++++++
 docs/prerequisites.feature                    |  44 ++++
 docs/smoke-test.feature                       |  40 ++++
 docs/tui-commands.feature                     |  58 +++++
 unit-tests/step-definitions/mcp-env.steps.js  | 140 +++++++++++
 .../step-definitions/mcp-servers.steps.js     |  75 ++++++
 .../step-definitions/prerequisites.steps.js   |  60 +++++
 11 files changed, 628 insertions(+), 192 deletions(-)
 create mode 100644 docs/installation.feature
 create mode 100644 docs/mcp-env.feature
 create mode 100644 docs/mcp-servers.feature
 create mode 100644 docs/prerequisites.feature
 create mode 100644 docs/smoke-test.feature
 create mode 100644 docs/tui-commands.feature
 create mode 100644 unit-tests/step-definitions/mcp-env.steps.js
 create mode 100644 unit-tests/step-definitions/mcp-servers.steps.js
 create mode 100644 unit-tests/step-definitions/prerequisites.steps.js

diff --git a/README.md b/README.md
index 8dd2d08..ef2aa4e 100644
--- a/README.md
+++ b/README.md
@@ -1,209 +1,48 @@
 # opencode-workspace
 
 Launches [OpenCode](https://opencode.ai) AI agents in a tmux split-pane layout, from any directory.
-Auto-creates a tmux session if you're not already in one.
-
-Includes a **tool-retrieval layer**: before each one-shot session the user's
-prompt is embedded and used to cosine-search the full MCP tool corpus.
-Only the most relevant servers are exposed to the LLM, cutting context overhead
-from 10+ servers down to the top-K matches.
-
-## Install
+Includes a **tool-retrieval layer**: before each one-shot session the prompt is embedded and
+cosine-searched against the MCP tool corpus, cutting context from 10+ servers down to the top-K matches.
 
 ```bash
 npm install -g @gus/opencode-workspace
-# postinstall automatically sets up: uv, glab, opencode, semgrep
-```
-
-## Setup (first time)
-
-```bash
-# 1. Store API keys
-opencode-workspace mcp env NOTION_TOKEN
-opencode-workspace mcp env GITHUB_TOKEN
-opencode-workspace mcp env BRAVE_API_KEY   # optional
-
-# 2. Build the tool corpus (connect to every MCP server and embed their tools)
-opencode-workspace index
-```
-
-`index` is incremental — re-run it whenever you add or update an MCP server.
-Each tool is only re-embedded when its description or input schema changes.
-
-## Usage
-
-```bash
-# TUI mode (no retrieval — opens interactive agent in a tmux split)
-opencode-workspace
-opencode-workspace agent
-
-# One-shot mode (retrieves tools, then runs opencode non-interactively)
-opencode-workspace "find open PRs assigned to me and draft a summary"
-opencode-workspace "run the test suite and report any failures"
-
-# Disable retrieval entirely for a single session (A/B baseline)
-OPENCODE_WORKSPACE_RETRIEVAL=off opencode-workspace "your prompt"
-
-# Inspect what tools were retrieved in past sessions
-opencode-workspace stats
-opencode-workspace stats --last 10
+opencode-workspace index          # build tool corpus (first time)
+opencode-workspace "find open PRs"    # one-shot: retrieve tools + run opencode
+opencode-workspace                # TUI mode: interactive agent in tmux split
 ```
 
-## Commands
-
-| Command | Description |
-|---|---|
-| `opencode-workspace` | Launch TUI agent. Auto-creates tmux session if needed. |
-| `opencode-workspace "<prompt>"` | One-shot: embed prompt → retrieve top-K tools → run `opencode run`. |
-| `opencode-workspace index` | Index all MCP servers. Incremental; only re-embeds changed tools. |
-| `opencode-workspace index --force` | Force re-embed of all tools regardless of schema cache. |
-| `opencode-workspace stats` | Summarise retrieval history from `~/.config/opencode-workspace/sessions.jsonl`. |
-| `opencode-workspace stats --last N` | Limit to last N sessions. |
-| `opencode-workspace install` | Install dependencies: uv, glab, opencode, semgrep. |
-| `opencode-workspace agent` | TUI alias (same as bare invocation, no retrieval). |
-| `opencode-workspace term` | Split a plain terminal pane. |
-| `opencode-workspace mcp env VAR` | Store a secret in `~/.local/share/opencode/mcp.env`. |
+## Documentation
 
-## Configuration
-
-`~/.config/opencode-workspace/config.json` (created automatically with defaults):
-
-```json
-{
-  "embedding": {
-    "provider": "local",
-    "model": "Xenova/all-MiniLM-L6-v2"
-  },
-  "retrieval": {
-    "k": 10,
-    "strategy": "topk"
-  }
-}
-```
+All behaviour is specified as Gherkin feature files in [`docs/`](docs/):
 
-### Embedding providers
-
-| Provider | `"provider"` value | Notes |
-|---|---|---|
-| Local ONNX (default) | `"local"` | `Xenova/all-MiniLM-L6-v2`, ~23 MB downloaded on first use to `~/.cache/huggingface`. No API key needed. |
-| OpenAI | `"openai"` | Set `OPENAI_API_KEY` or add `"apiKey"` to the config. Default model: `text-embedding-3-small`. |
-| Voyage | `"voyage"` | Not yet implemented. |
-| Cohere | `"cohere"` | Not yet implemented. |
-
-### Retrieval strategies
-
-| `"strategy"` | Status |
+| Feature file | What it covers |
 |---|---|
-| `"topk"` | Implemented — cosine top-K over the full corpus. |
-| `"agent_first"` | Placeholder (not implemented). |
-| `"graph"` | Placeholder (not implemented). |
-| `"active"` | Placeholder (not implemented). |
-
-### Kill switch
-
-```bash
-OPENCODE_WORKSPACE_RETRIEVAL=off opencode-workspace "prompt"
-```
-
-Bypasses all retrieval and permission filtering. Behaviour is identical to
-running `opencode run "prompt"` directly. Use this as the A/B baseline.
-
-## Inspecting what was retrieved
+| [`docs/prerequisites.feature`](docs/prerequisites.feature) | Node ≥ 18, tmux, git, curl |
+| [`docs/installation.feature`](docs/installation.feature) | `npm install`, postinstall, `opencode-workspace install` |
+| [`docs/mcp-env.feature`](docs/mcp-env.feature) | `mcp env VAR` — storing secrets in `mcp.env` |
+| [`docs/mcp-servers.feature`](docs/mcp-servers.feature) | The 10 bundled MCP servers and their configuration |
+| [`docs/indexing.feature`](docs/indexing.feature) | `index` — crawling MCP servers and building the corpus |
+| [`docs/configuration.feature`](docs/configuration.feature) | `config.json` — embedding providers and retrieval strategy |
+| [`docs/retrieval.feature`](docs/retrieval.feature) | One-shot retrieval, kill switch, fallthrough behaviour |
+| [`docs/permissions.feature`](docs/permissions.feature) | Deny-rule generation and composition with user config |
+| [`docs/telemetry.feature`](docs/telemetry.feature) | Session records, `stats` command |
+| [`docs/tui-commands.feature`](docs/tui-commands.feature) | TUI mode: `agent`, `term`, tmux layout |
+| [`docs/tool-retrieval-mcp.feature`](docs/tool-retrieval-mcp.feature) | On-demand `search_tools` MCP tool |
+| [`docs/tui-retrieval.feature`](docs/tui-retrieval.feature) | TUI first-message hook plugin |
+| [`docs/smoke-test.feature`](docs/smoke-test.feature) | `make smoke` — end-to-end validation |
+
+Scenarios tagged `@wip` require a live environment (real binaries, tmux, network) and are skipped
+by `npm test`. Run `make smoke` for end-to-end validation.
+
+## Running the tests
 
 ```bash
-# Plain text summary
-opencode-workspace stats
-
-# Raw JSONL (one record per session)
-cat ~/.config/opencode-workspace/sessions.jsonl | jq .
-```
-
-Each record:
-
-```json
-{
-  "ts": "2026-05-15T12:00:00.000Z",
-  "session_id": "uuid",
-  "prompt": "find open PRs...",
-  "retrieved_tools": [
-    { "server": "github", "tool": "list_pull_requests", "score": 0.923 }
-  ],
-  "corpus_size": 84,
-  "embedding_model": "Xenova/all-MiniLM-L6-v2",
-  "k": 10
-}
+npm test      # unit tests — skips @wip scenarios
+make smoke    # end-to-end: real MCP servers, real index, real retrieval
 ```
 
-## Smoke test
-
-Verifies that `index` + retrieval are working end-to-end:
-
-```bash
-make smoke
-```
-
-This runs `opencode-workspace index`, then asserts that querying
-`"list open pull requests on GitHub"` returns a GitHub tool as the top result.
-
-## How it works
-
-1. **`index`** — connects to every MCP server in `lib/opencode.json.template`
-   (using `@modelcontextprotocol/sdk`), calls `listTools()`, and stores
-   `{server, name, description, inputSchema}` plus a 384-dim embedding of
-   `"{server} / {tool}: {description}"` in a SQLite DB at
-   `~/.config/opencode-workspace/tools.db`.
-   Embeddings are skipped when `sha256(description + JSON.stringify(schema))`
-   is unchanged — making re-runs fast.
-
-2. **One-shot** — the prompt is embedded with the same model, cosine-searched
-   against the corpus (via `sqlite-vec` if installed, otherwise in-process
-   brute-force), and the top-K tools are identified.
-   A temporary config is written to `/tmp/ow-<uuid>.json` that extends the
-   workspace template with `"permission": { "mcp_<server>_*": "deny" }` for
-   every server absent from the top-K results.
-   `opencode run "<prompt>"` is then spawned with `OPENCODE_CONFIG` pointing
-   at that temp file. The file is deleted when opencode exits.
-
-3. **Compose, never overwrite** — only deny rules are generated; user-defined
-   permission entries in `~/.config/opencode/opencode.json` are preserved and
-   merged. A server the user has already denied cannot be re-enabled.
-
-## MCP servers included
-
-| Server | Description |
-|---|---|
-| `notion` | Notion API via `@notionhq/notion-mcp-server` |
-| `gitlab` | GitLab CLI via `glab mcp serve` |
-| `playwright` | Browser automation via `@playwright/mcp` |
-| `fetch` | HTTP fetch via `mcp-server-fetch` (uvx) |
-| `semgrep` | Code scanning via `semgrep mcp` |
-| `aws-knowledge` | AWS docs & regional availability (remote) |
-| `sequential-thinking` | Structured reasoning via `@modelcontextprotocol/server-sequential-thinking` |
-| `github` | GitHub API via `@modelcontextprotocol/server-github` (requires `GITHUB_TOKEN`) |
-| `brave-search-mcp-server` | Web search via Brave (requires `BRAVE_API_KEY`) |
-
-## Prerequisites
-
-- `tmux`
-- `git`
-- `curl`
-- Node.js >= 18
-
 ## References
 
-This implementation is based on the following work:
-
-> Lumer, E., Nizar, F., Gulati, A., Honaganahalli Basavaraju, P., & Subbiah, V. K. (2025). *Tool-to-Agent Retrieval: Bridging Tools and Agents for Scalable LLM Multi-Agent Systems.* arXiv:2511.01854. https://arxiv.org/abs/2511.01854
-
-```bibtex
-@misc{lumer2025tooltoagent,
-  title         = {Tool-to-Agent Retrieval: Bridging Tools and Agents for Scalable LLM Multi-Agent Systems},
-  author        = {Lumer, Elias and Nizar, Faheem and Gulati, Anmol and Honaganahalli Basavaraju, Pradeep and Subbiah, Vamse Kumar},
-  year          = {2025},
-  eprint        = {2511.01854},
-  archivePrefix = {arXiv},
-  primaryClass  = {cs.CL},
-  url           = {https://arxiv.org/abs/2511.01854}
-}
-```
-
+> Lumer, E., Nizar, F., Gulati, A., Honaganahalli Basavaraju, P., & Subbiah, V. K. (2025).
+> *Tool-to-Agent Retrieval: Bridging Tools and Agents for Scalable LLM Multi-Agent Systems.*
+> arXiv:2511.01854. <https://arxiv.org/abs/2511.01854>
diff --git a/cucumber.js b/cucumber.js
index 00a3087..2a384e0 100644
--- a/cucumber.js
+++ b/cucumber.js
@@ -6,6 +6,7 @@ module.exports = {
       'unit-tests/support/hooks.js',
       'unit-tests/step-definitions/**/*.steps.js',
     ],
+    tags:    'not @wip',
     format:  ['progress-bar', 'summary'],
     timeout: 30000,
   },
diff --git a/docs/installation.feature b/docs/installation.feature
new file mode 100644
index 0000000..372d154
--- /dev/null
+++ b/docs/installation.feature
@@ -0,0 +1,62 @@
+Feature: Installation
+  Running "npm install -g @gus/opencode-workspace" installs the package and
+  automatically triggers a postinstall hook that sets up all required system
+  dependencies.  The explicit "opencode-workspace install" command can be
+  re-run at any time to repair or update individual dependencies.
+
+  Each install step is wrapped in a try/catch so a single failure (for
+  example, a network error when downloading glab) warns and continues rather
+  than aborting the entire setup.
+
+  @wip
+  Scenario: Postinstall runs automatically after npm install
+    When "npm install -g @gus/opencode-workspace" is run
+    Then the postinstall hook calls "opencode-workspace install" automatically
+
+  @wip
+  Scenario: Install sets up uv if not already present
+    Given "uv" is not installed on the system
+    When the user runs "opencode-workspace install"
+    Then uv is downloaded and installed via the Astral installer script
+    And uv is available on PATH under ~/.local/bin
+
+  @wip
+  Scenario: Install sets up glab if not already present
+    Given "glab" is not installed on the system
+    When the user runs "opencode-workspace install"
+    Then the latest glab release is fetched from the GitLab API
+    And the glab binary is installed to ~/.local/bin/glab
+
+  @wip
+  Scenario: Install sets up opencode if not already present
+    Given "opencode" is not installed on the system
+    When the user runs "opencode-workspace install"
+    Then opencode is installed at the version pinned in package.json["opencode"]["version"]
+    And the installer script is fetched from https://opencode.ai/install
+
+  @wip
+  Scenario: Install sets up semgrep if not already present
+    Given "semgrep" is not installed on the system
+    When the user runs "opencode-workspace install"
+    Then semgrep is installed via "uv tool install semgrep"
+
+  @wip
+  Scenario: Install copies the TUI retrieval plugin
+    When the user runs "opencode-workspace install"
+    Then the file ~/.config/opencode/plugins/ow-tool-retrieval.js is created
+    And its contents match lib/tool-retrieval.plugin.js
+
+  @wip
+  Scenario: Already-installed dependencies are skipped without error
+    Given all dependencies (uv, glab, opencode, semgrep) are already installed
+    When the user runs "opencode-workspace install"
+    Then each dependency's existing version is logged to stdout
+    And no download or install step is retried
+
+  @wip
+  Scenario: A failing install step warns and continues
+    Given the glab download fails with a network error
+    When the user runs "opencode-workspace install"
+    Then a warning is printed containing "glab failed"
+    And a hint "Re-run: opencode-workspace install" is printed
+    And the remaining steps (opencode, semgrep, plugin) still run
diff --git a/docs/mcp-env.feature b/docs/mcp-env.feature
new file mode 100644
index 0000000..263e5ea
--- /dev/null
+++ b/docs/mcp-env.feature
@@ -0,0 +1,47 @@
+Feature: MCP Environment Secrets (mcp env)
+  "opencode-workspace mcp env VAR_NAME" prompts for a secret value and stores
+  it in ~/.local/share/opencode/mcp.env in KEY=value format, one entry per line.
+
+  MCP servers that reference {env:VAR_NAME} in lib/opencode.json.template
+  automatically receive the stored value at startup via environment injection.
+  The directory is created if it does not exist.  Re-running the command with
+  the same key updates the value in-place without duplicating the entry.
+
+  @wip
+  Scenario: Secret is stored after interactive prompt
+    Given the user runs "opencode-workspace mcp env GITHUB_TOKEN"
+    When the user types a secret value and presses Enter
+    Then the value is stored in ~/.local/share/opencode/mcp.env as "GITHUB_TOKEN=<value>"
+    And "Saved GITHUB_TOKEN to <path>" is printed to stdout
+
+  Scenario: mcp.env uses KEY=value format with one entry per line
+    Given ~/.local/share/opencode/mcp.env contains:
+      """
+      GITHUB_TOKEN=ghp_abc123
+      NOTION_TOKEN=secret_xyz
+      """
+    When the mcp.env file is parsed
+    Then GITHUB_TOKEN resolves to "ghp_abc123"
+    And NOTION_TOKEN resolves to "secret_xyz"
+
+  Scenario: Storing a second key does not overwrite the first
+    Given ~/.local/share/opencode/mcp.env already contains "GITHUB_TOKEN=ghp_abc123"
+    When "NOTION_TOKEN=secret_xyz" is added to mcp.env
+    Then both GITHUB_TOKEN and NOTION_TOKEN are present in mcp.env
+
+  Scenario: Storing an existing key updates its value in-place
+    Given ~/.local/share/opencode/mcp.env already contains "GITHUB_TOKEN=old_token"
+    When "GITHUB_TOKEN=new_token" is written to mcp.env
+    Then GITHUB_TOKEN resolves to "new_token"
+    And there is only one GITHUB_TOKEN entry in mcp.env
+
+  Scenario: The mcp.env directory is created automatically if absent
+    Given ~/.local/share/opencode/ does not exist
+    When the mcp.env file is written
+    Then the directory ~/.local/share/opencode/ is created automatically
+
+  @wip
+  Scenario: Missing VAR_NAME argument prints usage and exits with code 1
+    When the user runs "opencode-workspace mcp env" without a variable name
+    Then "Usage: opencode-workspace mcp env VAR_NAME" is printed to stderr
+    And the process exits with code 1
diff --git a/docs/mcp-servers.feature b/docs/mcp-servers.feature
new file mode 100644
index 0000000..3df6791
--- /dev/null
+++ b/docs/mcp-servers.feature
@@ -0,0 +1,70 @@
+Feature: Bundled MCP Servers
+  lib/opencode.json.template is the single source of truth for which MCP
+  servers ship with opencode-workspace.  Changes to the template affect both
+  indexing (which servers are crawled for tools) and one-shot retrieval
+  (which servers can be filtered by deny rules).
+
+  Every server must declare a "type" of either "local" (spawned as a child
+  process via "command") or "remote" (reached via a "url").  Servers that
+  require secrets reference them as {env:VAR_NAME}; the CLI resolves these
+  from ~/.local/share/opencode/mcp.env at startup.
+
+  Scenario: The template includes the notion server
+    When lib/opencode.json.template is read
+    Then a server named "notion" is defined
+    And its type is "local"
+    And its command starts with "npx"
+
+  Scenario: The template includes the gitlab server
+    When lib/opencode.json.template is read
+    Then a server named "gitlab" is defined
+    And its type is "local"
+    And its command sequence is "glab,mcp,serve"
+
+  Scenario: The template includes the playwright server
+    When lib/opencode.json.template is read
+    Then a server named "playwright" is defined
+    And its type is "local"
+    And its command starts with "npx"
+
+  Scenario: The template includes the fetch server
+    When lib/opencode.json.template is read
+    Then a server named "fetch" is defined
+    And its type is "local"
+    And its command starts with "uvx"
+
+  Scenario: The template includes the semgrep server
+    When lib/opencode.json.template is read
+    Then a server named "semgrep" is defined
+    And its type is "local"
+    And its command sequence is "semgrep,mcp"
+
+  Scenario: The template includes the aws-knowledge server as a remote server
+    When lib/opencode.json.template is read
+    Then a server named "aws-knowledge" is defined
+    And its type is "remote"
+    And its url is "https://knowledge-mcp.global.api.aws"
+
+  Scenario: The template includes the sequential-thinking server
+    When lib/opencode.json.template is read
+    Then a server named "sequential-thinking" is defined
+    And its type is "local"
+    And its command starts with "npx"
+
+  Scenario: The github server requires a GITHUB_TOKEN from mcp.env
+    When lib/opencode.json.template is read
+    Then a server named "github" is defined
+    And its type is "local"
+    And its environment references "{env:GITHUB_TOKEN}"
+
+  Scenario: The brave-search-mcp-server requires a BRAVE_API_KEY from mcp.env
+    When lib/opencode.json.template is read
+    Then a server named "brave-search-mcp-server" is defined
+    And its type is "local"
+    And its environment references "{env:BRAVE_API_KEY}"
+
+  Scenario: The tool-retrieval server is always included and self-hosted
+    When lib/opencode.json.template is read
+    Then a server named "tool-retrieval" is defined
+    And its type is "local"
+    And its command sequence is "opencode-workspace,mcp-serve"
diff --git a/docs/prerequisites.feature b/docs/prerequisites.feature
new file mode 100644
index 0000000..94d8d8f
--- /dev/null
+++ b/docs/prerequisites.feature
@@ -0,0 +1,44 @@
+Feature: Prerequisites
+  opencode-workspace requires Node.js >= 18 for all commands.
+  The TUI commands (agent, term) additionally require tmux.
+  The install command requires curl and, for semgrep, uv.
+  Standard development workflows assume git is available.
+
+  The Node.js version requirement is declared in package.json["engines"]["node"]
+  and is enforceable at install time by npm/pnpm/yarn.  The system tool
+  requirements (tmux, curl, git) are discovered at runtime: the command that
+  needs them will fail with a clear error message if they are absent.
+
+  Scenario: The package.json engines field requires Node.js 18 or higher
+    When package.json is read
+    Then the "engines.node" field is ">=18"
+
+  Scenario: The running Node.js version satisfies the declared engine requirement
+    Given the current Node.js version is 18 or higher
+    When any opencode-workspace command is run
+    Then the command does not exit with a "Node version" error
+
+  @wip
+  Scenario: tmux is required for the agent command
+    Given "tmux" is not installed on the system
+    When the user runs "opencode-workspace agent"
+    Then an error is printed and the process exits with a non-zero code
+
+  @wip
+  Scenario: tmux is required for the term command
+    Given "tmux" is not installed on the system
+    When the user runs "opencode-workspace term"
+    Then an error is printed and the process exits with a non-zero code
+
+  @wip
+  Scenario: curl is required by the install command for downloading uv and opencode
+    Given "curl" is not installed on the system
+    When the user runs "opencode-workspace install"
+    Then the steps that invoke curl fail with a warning
+    And the remaining install steps that do not require curl still run
+
+  @wip
+  Scenario: git is available as a standard development tool
+    Given "git" is installed on the system
+    When git-dependent workflows are run inside the tmux workspace
+    Then git commands execute without PATH or permission errors
diff --git a/docs/smoke-test.feature b/docs/smoke-test.feature
new file mode 100644
index 0000000..acbf881
--- /dev/null
+++ b/docs/smoke-test.feature
@@ -0,0 +1,40 @@
+Feature: Smoke Test
+  "make smoke" is the end-to-end validation that the full
+  index → embed → retrieve pipeline is working correctly against real MCP
+  servers.  It is not a unit test: it requires a live network connection,
+  all bundled MCP servers to be reachable, and a fully installed
+  opencode-workspace environment.
+
+  The canonical passing criterion is: after running "opencode-workspace index",
+  querying "list open pull requests on GitHub" must return a tool from the
+  "github" server as the top result.
+
+  All scenarios in this feature are end-to-end integration tests and cannot
+  be exercised in a unit-test environment.
+
+  @wip
+  Scenario: make smoke exits with code 0 when everything works
+    Given opencode-workspace is installed and all MCP servers are reachable
+    When "make smoke" is run
+    Then the exit code is 0
+
+  @wip
+  Scenario: The github server's tools are the top result for a GitHub query
+    Given the corpus has been freshly built by "opencode-workspace index"
+    When the query "list open pull requests on GitHub" is submitted via "opencode-workspace retrieve"
+    Then the top result belongs to the "github" server
+
+  @wip
+  Scenario: make smoke fails when the corpus has not been indexed
+    Given the corpus has not been built
+    When "make smoke" is run
+    Then the exit code is non-zero
+    And an error or warning message advises running "opencode-workspace index"
+
+  @wip
+  Scenario: Incremental index does not break retrieval accuracy
+    Given the corpus was previously indexed
+    And one MCP server's schema has changed
+    When "opencode-workspace index" is run again
+    Then only the changed server's tools are re-embedded
+    And retrieval accuracy for other servers is unchanged
diff --git a/docs/tui-commands.feature b/docs/tui-commands.feature
new file mode 100644
index 0000000..a8dded3
--- /dev/null
+++ b/docs/tui-commands.feature
@@ -0,0 +1,58 @@
+Feature: TUI Commands
+  Running opencode-workspace without a prompt — or with the "agent" subcommand —
+  opens an interactive OpenCode session in a tmux split-pane layout.
+  "opencode-workspace term" opens a plain shell pane in the same layout.
+
+  A tmux session named "opencode-workspace" is created automatically when the
+  user is not already inside one.  Subsequent invocations from within an
+  ow-session stack new panes vertically in the right column rather than
+  creating a new session.
+
+  All scenarios in this feature require a live tmux installation and cannot
+  be exercised in a unit-test environment.
+
+  @wip
+  Scenario: Bare invocation creates a tmux session and opens a two-pane layout
+    Given the user is not inside a tmux session
+    When the user runs "opencode-workspace"
+    Then a tmux session named "opencode-workspace" is created
+    And the left pane shows a welcome message with available commands
+    And the right pane starts opencode with OPENCODE_CONFIG=lib/opencode.json.template
+
+  @wip
+  Scenario: "agent" subcommand is equivalent to bare invocation
+    Given the user is not inside a tmux session
+    When the user runs "opencode-workspace agent"
+    Then the result is identical to running "opencode-workspace" with no arguments
+
+  @wip
+  Scenario: "agent" auto-installs opencode if the binary is missing
+    Given "opencode" is not installed on the system
+    When the user runs "opencode-workspace agent"
+    Then opencode is installed before the tmux layout is created
+
+  @wip
+  Scenario: "term" splits a plain terminal pane into the current session
+    Given the user is inside a tmux session
+    When the user runs "opencode-workspace term"
+    Then a new pane is added to the session running an interactive shell
+    And no opencode process is started in that pane
+
+  @wip
+  Scenario: Stacking a second agent inside an ow-session splits vertically
+    Given the user is inside a tmux window named "ow-session"
+    When the user runs "opencode-workspace agent" a second time
+    Then a new opencode pane is split vertically below the existing right-column pane
+
+  @wip
+  Scenario: Subsequent windows outside ow-session are named ow-session-2, ow-session-3, …
+    Given the user is inside tmux but not in an ow-session window
+    And an "ow-session" window already exists
+    When the user runs "opencode-workspace agent"
+    Then a new window named "ow-session-2" is created
+
+  @wip
+  Scenario: MCP environment secrets are injected when launching opencode
+    Given ~/.local/share/opencode/mcp.env contains "GITHUB_TOKEN=ghp_test"
+    When the user runs "opencode-workspace agent"
+    Then opencode is started with GITHUB_TOKEN exported in its environment
diff --git a/unit-tests/step-definitions/mcp-env.steps.js b/unit-tests/step-definitions/mcp-env.steps.js
new file mode 100644
index 0000000..e06239e
--- /dev/null
+++ b/unit-tests/step-definitions/mcp-env.steps.js
@@ -0,0 +1,140 @@
+'use strict';
+
+const { Given, When, Then } = require('@cucumber/cucumber');
+const assert = require('assert/strict');
+const fs     = require('fs');
+const path   = require('path');
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+/** Absolute path to mcp.env inside the scenario's isolated HOME. */
+function mcpEnvPath(home) {
+  return path.join(home, '.local', 'share', 'opencode', 'mcp.env');
+}
+
+/** Parse a mcp.env file's content into a plain object. */
+function parseEnvFile(content) {
+  const entries = {};
+  for (const line of content.split('\n')) {
+    const eqIdx = line.indexOf('=');
+    if (eqIdx > 0) {
+      entries[line.slice(0, eqIdx)] = line.slice(eqIdx + 1);
+    }
+  }
+  return entries;
+}
+
+/** Write an entries object to mcp.env, creating the directory if needed. */
+function writeEnvEntries(home, entries) {
+  const filePath = mcpEnvPath(home);
+  fs.mkdirSync(path.dirname(filePath), { recursive: true });
+  const output = Object.entries(entries).map(([k, v]) => `${k}=${v}`).join('\n') + '\n';
+  fs.writeFileSync(filePath, output, 'utf8');
+}
+
+// ─── Given ────────────────────────────────────────────────────────────────────
+
+Given('~\\/.local\\/share\\/opencode\\/ does not exist', function () {
+  // The isolated temp HOME created by the Before hook has no .local/share/opencode.
+  // Nothing to do — the directory is absent by default.
+});
+
+Given('~\\/.local\\/share\\/opencode\\/mcp.env contains:', function (docString) {
+  const filePath = mcpEnvPath(this.tmpHome);
+  fs.mkdirSync(path.dirname(filePath), { recursive: true });
+  fs.writeFileSync(filePath, docString.trim() + '\n', 'utf8');
+});
+
+Given('~\\/.local\\/share\\/opencode\\/mcp.env already contains {string}', function (entry) {
+  const eqIdx = entry.indexOf('=');
+  const key   = entry.slice(0, eqIdx);
+  const value = entry.slice(eqIdx + 1);
+  writeEnvEntries(this.tmpHome, { [key]: value });
+});
+
+// ─── When ─────────────────────────────────────────────────────────────────────
+
+When('the mcp.env file is parsed', function () {
+  const filePath = mcpEnvPath(this.tmpHome);
+  const content  = fs.readFileSync(filePath, 'utf8');
+  this._parsedEnv = parseEnvFile(content);
+});
+
+When('{string} is added to mcp.env', function (entry) {
+  const eqIdx = entry.indexOf('=');
+  const key   = entry.slice(0, eqIdx);
+  const value = entry.slice(eqIdx + 1);
+  const filePath = mcpEnvPath(this.tmpHome);
+
+  let existing = {};
+  if (fs.existsSync(filePath)) {
+    existing = parseEnvFile(fs.readFileSync(filePath, 'utf8'));
+  }
+  existing[key] = value;
+  writeEnvEntries(this.tmpHome, existing);
+  this._parsedEnv = parseEnvFile(fs.readFileSync(filePath, 'utf8'));
+});
+
+When('{string} is written to mcp.env', function (entry) {
+  const eqIdx = entry.indexOf('=');
+  const key   = entry.slice(0, eqIdx);
+  const value = entry.slice(eqIdx + 1);
+  const filePath = mcpEnvPath(this.tmpHome);
+
+  let existing = {};
+  if (fs.existsSync(filePath)) {
+    existing = parseEnvFile(fs.readFileSync(filePath, 'utf8'));
+  }
+  existing[key] = value;
+  writeEnvEntries(this.tmpHome, existing);
+  this._parsedEnv = parseEnvFile(fs.readFileSync(filePath, 'utf8'));
+});
+
+When('the mcp.env file is written', function () {
+  // Write any single entry to exercise the directory-creation path.
+  writeEnvEntries(this.tmpHome, { TEST_KEY: 'test_value' });
+});
+
+// ─── Then ─────────────────────────────────────────────────────────────────────
+
+Then('{word} resolves to {string}', function (key, expectedValue) {
+  assert.ok(
+    this._parsedEnv,
+    'No parsed env available — did you call "When the mcp.env file is parsed"?',
+  );
+  assert.equal(
+    this._parsedEnv[key],
+    expectedValue,
+    `Expected ${key}="${expectedValue}", got ${key}="${this._parsedEnv[key]}"`,
+  );
+});
+
+Then('both {word} and {word} are present in mcp.env', function (key1, key2) {
+  assert.ok(
+    Object.prototype.hasOwnProperty.call(this._parsedEnv, key1),
+    `Expected "${key1}" to be present in mcp.env`,
+  );
+  assert.ok(
+    Object.prototype.hasOwnProperty.call(this._parsedEnv, key2),
+    `Expected "${key2}" to be present in mcp.env`,
+  );
+});
+
+Then('there is only one {word} entry in mcp.env', function (key) {
+  const filePath = mcpEnvPath(this.tmpHome);
+  const content  = fs.readFileSync(filePath, 'utf8');
+  const matches  = content.split('\n').filter(l => l.startsWith(key + '='));
+  assert.equal(
+    matches.length,
+    1,
+    `Expected exactly 1 line starting with "${key}=", found ${matches.length}: ${JSON.stringify(matches)}`,
+  );
+});
+
+Then('the directory ~\\/.local\\/share\\/opencode\\/ is created automatically', function () {
+  const dir = path.join(this.tmpHome, '.local', 'share', 'opencode');
+  assert.ok(
+    fs.existsSync(dir),
+    `Expected directory ${dir} to exist after writing mcp.env`,
+  );
+});
diff --git a/unit-tests/step-definitions/mcp-servers.steps.js b/unit-tests/step-definitions/mcp-servers.steps.js
new file mode 100644
index 0000000..eef0cb8
--- /dev/null
+++ b/unit-tests/step-definitions/mcp-servers.steps.js
@@ -0,0 +1,75 @@
+'use strict';
+
+const { When, Then } = require('@cucumber/cucumber');
+const assert = require('assert/strict');
+const fs     = require('fs');
+const path   = require('path');
+
+const TEMPLATE_PATH = path.resolve(__dirname, '../../lib/opencode.json.template');
+
+// ─── When ─────────────────────────────────────────────────────────────────────
+
+When('lib\\/opencode.json.template is read', function () {
+  const raw         = fs.readFileSync(TEMPLATE_PATH, 'utf8');
+  this._template    = JSON.parse(raw);
+  this._servers     = this._template.mcp || {};
+  this._currentServer = null;
+});
+
+// ─── Then ─────────────────────────────────────────────────────────────────────
+
+Then('a server named {string} is defined', function (name) {
+  assert.ok(
+    Object.prototype.hasOwnProperty.call(this._servers, name),
+    `Expected server "${name}" in template. Found: [${Object.keys(this._servers).join(', ')}]`,
+  );
+  // Store for chained assertions in the same scenario.
+  this._currentServer = this._servers[name];
+});
+
+Then('its type is {string}', function (expectedType) {
+  assert.equal(
+    this._currentServer.type,
+    expectedType,
+    `Expected type="${expectedType}", got "${this._currentServer.type}"`,
+  );
+});
+
+Then('its command starts with {string}', function (prefix) {
+  const cmd = this._currentServer.command;
+  assert.ok(Array.isArray(cmd), `Expected "command" to be an array, got ${typeof cmd}`);
+  assert.equal(
+    cmd[0],
+    prefix,
+    `Expected command[0]="${prefix}", got "${cmd[0]}"`,
+  );
+});
+
+Then('its command sequence is {string}', function (commaSeparated) {
+  const expected = commaSeparated.split(',');
+  const cmd      = this._currentServer.command;
+  assert.ok(Array.isArray(cmd), `Expected "command" to be an array, got ${typeof cmd}`);
+  assert.deepEqual(
+    cmd,
+    expected,
+    `Expected command ${JSON.stringify(expected)}, got ${JSON.stringify(cmd)}`,
+  );
+});
+
+Then('its url is {string}', function (expectedUrl) {
+  assert.equal(
+    this._currentServer.url,
+    expectedUrl,
+    `Expected url="${expectedUrl}", got "${this._currentServer.url}"`,
+  );
+});
+
+Then('its environment references {string}', function (envRef) {
+  const env = this._currentServer.environment;
+  assert.ok(env, `Expected server to have an "environment" field`);
+  const envStr = JSON.stringify(env);
+  assert.ok(
+    envStr.includes(envRef),
+    `Expected environment to contain "${envRef}". Got: ${envStr}`,
+  );
+});
diff --git a/unit-tests/step-definitions/prerequisites.steps.js b/unit-tests/step-definitions/prerequisites.steps.js
new file mode 100644
index 0000000..c7cc078
--- /dev/null
+++ b/unit-tests/step-definitions/prerequisites.steps.js
@@ -0,0 +1,60 @@
+'use strict';
+
+const { Given, When, Then } = require('@cucumber/cucumber');
+const assert = require('assert/strict');
+const fs     = require('fs');
+const path   = require('path');
+
+const PKG_PATH = path.resolve(__dirname, '../../package.json');
+
+// ─── Given ────────────────────────────────────────────────────────────────────
+
+Given('the current Node.js version is {int} or higher', function (minVersion) {
+  const major = parseInt(process.version.slice(1), 10);
+  assert.ok(
+    major >= minVersion,
+    `Test environment requires Node.js >= ${minVersion}, but found ${process.version}. ` +
+    'Please upgrade Node.js before running these tests.',
+  );
+});
+
+// ─── When ─────────────────────────────────────────────────────────────────────
+
+When('any opencode-workspace command is run', function () {
+  // Node version is already verified in the Given step.
+  // This step is intentionally a no-op: reaching it means no pre-check failed.
+});
+
+When('package.json is read', function () {
+  this._pkg = JSON.parse(fs.readFileSync(PKG_PATH, 'utf8'));
+});
+
+// ─── Then ─────────────────────────────────────────────────────────────────────
+
+Then('the command does not exit with a {string} error', function (fragment) {
+  // If we reach this step, no ExitError was thrown by the When step.
+  // thrownError is null by default (world constructor).
+  assert.equal(
+    this.thrownError,
+    null,
+    `Expected no error containing "${fragment}", but got: ${this.thrownError?.message}`,
+  );
+});
+
+Then('the {string} field is {string}', function (keyPath, expectedValue) {
+  assert.ok(this._pkg, 'package.json was not read — call "When package.json is read" first');
+  const keys = keyPath.split('.');
+  let value  = this._pkg;
+  for (const key of keys) {
+    assert.ok(
+      value != null && Object.prototype.hasOwnProperty.call(value, key),
+      `Key "${key}" not found in ${JSON.stringify(value)}`,
+    );
+    value = value[key];
+  }
+  assert.equal(
+    String(value),
+    expectedValue,
+    `Expected "${keyPath}" to be "${expectedValue}", got "${value}"`,
+  );
+});