diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ad72611..3775dc2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,37 +1,149 @@ name: CI on: + workflow_dispatch: push: - branches: [ master ] + branches: [ master, main ] pull_request: +permissions: + contents: read + jobs: build-and-test: runs-on: ubuntu-latest + timeout-minutes: 15 defaults: run: working-directory: universal-refiner steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Setup Node 22 - uses: actions/setup-node@v4 + uses: actions/setup-node@v5 with: node-version: '22' - name: Install dependencies - run: npm install --no-fund + run: npm ci --no-fund - name: Rebuild native modules run: npm rebuild better-sqlite3 - - name: Generate version file - run: node scripts/sync-version.mjs - - - name: Type check - run: npx tsc --noEmit + - name: Build + run: npm run build - name: Run tests + run: npm run test:coverage + + acceptance: + runs-on: ubuntu-latest + timeout-minutes: 15 + defaults: + run: + working-directory: universal-refiner + strategy: + fail-fast: false + matrix: + model-order: + - primary + - reversed + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-node@v5 + with: + node-version: '22' + cache: npm + cache-dependency-path: universal-refiner/package-lock.json + - run: npm ci --no-fund + - run: npm rebuild better-sqlite3 + - run: npm run build + - name: Run all-tool and provider acceptance + run: npm run test:acceptance + - name: Run fake-model semantic acceptance + env: + PROMPT_REFINER_PRIMARY_MODEL: ${{ matrix.model-order == 'primary' && 'gemma3:12b' || 'gemma3:1b' }} + PROMPT_REFINER_FALLBACK_MODEL: ${{ matrix.model-order == 'primary' && 'gemma3:1b' || 'gemma3:12b' }} + run: npm run acceptance:semantic + + stress: + runs-on: ubuntu-latest + timeout-minutes: 15 + defaults: + run: + working-directory: universal-refiner + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-node@v5 + with: + node-version: '22' + cache: npm + cache-dependency-path: universal-refiner/package-lock.json + - run: npm ci --no-fund + - run: npm rebuild better-sqlite3 + - run: npm run build + - name: Run restart and in-process concurrency tests + run: npm run test:stress + - name: Run multi-process EventStore stress + env: + PROMPT_REFINER_STRESS_WORKERS: '4' + PROMPT_REFINER_STRESS_WRITES: '100' + run: npm run stress:event-store + + windows: + runs-on: windows-latest + timeout-minutes: 20 + defaults: + run: + working-directory: universal-refiner + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-node@v5 + with: + node-version: '22' + cache: npm + cache-dependency-path: universal-refiner/package-lock.json + - run: npm ci --no-fund + - run: npm rebuild better-sqlite3 + - run: npm run build + - run: npm run test:coverage + - run: npm run test:acceptance + - run: npm run test:stress + + supply-chain: + runs-on: ubuntu-latest + timeout-minutes: 15 + defaults: + run: + working-directory: universal-refiner + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-node@v5 + with: + node-version: '22' + cache: npm + cache-dependency-path: universal-refiner/package-lock.json + - run: npm ci --no-fund + - run: npm run security:audit + - run: npm run security:secrets + - run: npm run build + - run: npm run package:check + + release-gate: + if: always() + needs: [build-and-test, acceptance, stress, windows, supply-chain] + runs-on: ubuntu-latest + steps: + - name: Require every enterprise gate + env: + BUILD: ${{ needs['build-and-test'].result }} + ACCEPTANCE: ${{ needs.acceptance.result }} + STRESS: ${{ needs.stress.result }} + WINDOWS: ${{ needs.windows.result }} + SUPPLY_CHAIN: ${{ needs['supply-chain'].result }} run: | - chmod +x node_modules/.bin/vitest 2>/dev/null || true - node_modules/.bin/vitest run --exclude '**/correlation.test.ts' + test "$BUILD" = "success" + test "$ACCEPTANCE" = "success" + test "$STRESS" = "success" + test "$WINDOWS" = "success" + test "$SUPPLY_CHAIN" = "success" diff --git a/.gitignore b/.gitignore index 415fc2e..8cafa1a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Dependencies and generated outputs **/node_modules/ **/dist/ +**/coverage/ *.tgz # Runtime state and local databases @@ -23,4 +24,4 @@ .vscode/ .idea/ .DS_Store -Thumbs.db \ No newline at end of file +Thumbs.db diff --git a/.planning/phases/01-fs-watcher/01-01-PLAN.md b/.planning/phases/01-fs-watcher/01-01-PLAN.md new file mode 100644 index 0000000..9a4c996 --- /dev/null +++ b/.planning/phases/01-fs-watcher/01-01-PLAN.md @@ -0,0 +1,159 @@ +--- +phase: 01-fs-watcher +plan: 01 +type: execute +wave: 1 +depends_on: [] +files_modified: + - universal-refiner/src/watcher/file-watcher.ts + - universal-refiner/src/watcher/index.ts + - universal-refiner/tests/file-watcher.test.ts + - universal-refiner/src/index.ts +autonomous: true +requirements: + - AUTO-01 + - AUTO-02 +must_haves: + truths: + - "FileWatcher emits a 'change' event when a watched .ts file is written" + - "FileWatcher emits an 'add' event when a new .ts file appears" + - "FileWatcher does not emit for files inside node_modules" + - "FileWatcher does not emit for *.log or *.tmp files" + - "FileWatcher.stop() prevents any further events" + - "Detected changes are logged via RuntimeLogger on server startup" + artifacts: + - path: "universal-refiner/src/watcher/file-watcher.ts" + provides: "FileWatcher class with start/stop/on('change') interface" + exports: ["FileWatcher", "FileChangeEvent", "FileEventKind"] + - path: "universal-refiner/src/watcher/index.ts" + provides: "Re-exports for the watcher module" + - path: "universal-refiner/tests/file-watcher.test.ts" + provides: "5 Vitest tests covering AUTO-01 and AUTO-02" + key_links: + - from: "universal-refiner/src/index.ts" + to: "universal-refiner/src/watcher/index.ts" + via: "import FileWatcher, call start() at server init" +--- + + +Implement a real-time file system watcher (Phase 1) for the universal-refiner MCP server. + +Purpose: Satisfy AUTO-01 (detect meaningful file save events) and AUTO-02 (filter noise paths) as the foundation for the Background Autonomy milestone. + +Output: +- src/watcher/file-watcher.ts — FileWatcher class wrapping chokidar v5 +- src/watcher/index.ts — re-exports +- tests/file-watcher.test.ts — 5 passing Vitest tests +- src/index.ts updated to start watcher on server init + + + +chokidar v5.0.0 is already in dependencies. No new packages needed. +Uses RuntimeLogger (stderr, JSON-RPC safe) for all output. + + + +@.planning/ROADMAP.md +@.planning/REQUIREMENTS.md +@universal-refiner/src/core/logger.ts +@universal-refiner/src/index.ts + + + + + + Task 1: Create FileWatcher module (AUTO-01, AUTO-02) + + universal-refiner/src/watcher/file-watcher.ts + universal-refiner/src/watcher/index.ts + + + FileWatcher extends EventEmitter. Constructor takes rootPath: string. + start(): watches rootPath via chokidar.watch() with ignored: CHOKIDAR_IGNORE patterns. + stop(): closes the chokidar watcher, nulls inner reference. + emitChange() applies two-layer filter before emitting: + 1. Path segment check: reject paths containing /node_modules/, /dist/, /.git/, /coverage/ + 2. Extension check: only emit for .ts, .js, .md, .txt, .prompt + 3. Suffix noise check: reject .log, .tmp + Emits: { path: string, event: 'add'|'change'|'unlink', timestamp: Date } + Logs start/stop and per-event debug via RuntimeLogger. + index.ts re-exports FileWatcher, FileChangeEvent, FileEventKind. + + npm run build -- succeeds with zero type errors + Both files exist, TypeScript compiles clean, exports are correct. + + + + Task 2: Write Vitest tests (AUTO-01, AUTO-02) + universal-refiner/tests/file-watcher.test.ts + + - Test: write to existing .ts file in tmp dir -> 'change' event emitted with correct path and Date timestamp + - Test: write new .ts file -> 'add' event emitted + - Test: write .ts file inside node_modules subdirectory -> no event emitted (AUTO-02) + - Test: write .log file -> no event emitted (AUTO-02) + - Test: stop() called -> subsequent file write produces no events + + + Use vitest describe/it/expect. beforeEach creates a unique tmp dir via fs.mkdtempSync. + afterEach calls watcher.stop() and fs.rmSync. + Use a polling waitFor() helper with 6000ms timeout for positive assertions. + Allow 1500ms settle time after watcher.start() before writing files (Windows FS listener warm-up). + Set per-test timeout to 15_000. + + npm test -- shows 5/5 file-watcher tests passing + All 48 total tests pass including the 5 new watcher tests. + + + + Task 3: Wire FileWatcher into server entry point + universal-refiner/src/index.ts + + Import FileWatcher from "./watcher/index.js". + After CommandCenterDashboard.start() and before runBackgroundTasks(): + const fileWatcher = new FileWatcher(rootPath); + fileWatcher.on('change', (evt) => { + RuntimeLogger.info(`[FS] ${evt.event}: ${evt.path}`); + CommandCenterDashboard.log(`[FS] ${evt.event}: ${path.relative(rootPath, evt.path)}`); + }); + fileWatcher.start(); + BackgroundAutonomyService is left intact — FileWatcher is additive. + + npm run build succeeds; server starts without error + File watcher starts automatically when the MCP server initialises. + + + + + +## Trust Boundaries + +| Boundary | Description | +|----------|-------------| +| FS path → emitChange | File paths from chokidar are OS-provided and not sanitised before logging | + +## STRIDE Threat Register + +| Threat ID | Category | Component | Disposition | Mitigation Plan | +|-----------|----------|-----------|-------------|-----------------| +| T-01-01 | Information Disclosure | RuntimeLogger path output | accept | Logs go to stderr/runtime.log, not stdout (JSON-RPC channel). No PII in paths. | +| T-01-02 | Denial of Service | Rapid file changes flood emitChange | mitigate | awaitWriteFinish debounce (100ms stability) prevents event storms. | +| T-01-03 | Tampering | Malicious path with path-traversal characters | accept | FileWatcher is read-only (no FS writes). Path is logged, not executed. | + + + +- npm run build -- zero TypeScript errors +- npm test -- 48/48 tests pass (5 new FileWatcher tests) +- Server starts and logs "[FileWatcher] Starting file system watcher" on init + + + +1. Writing a .ts file in the watched directory emits a change event within 3 seconds. +2. Files under node_modules, dist, .git, coverage are never emitted. +3. .log and .tmp files are never emitted. +4. stop() terminates all event delivery immediately. +5. Build and full test suite remain green. + + + +Create .planning/phases/01-fs-watcher/01-01-SUMMARY.md after execution. + diff --git a/README.md b/README.md index daacfee..989de9f 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,30 @@ cd Promptimprover Both installers perform a deterministic dependency install, run the full test suite, build the package, install it globally, and verify the `gemini-prompt-refiner` command. Add that command to your MCP client configuration. See the [Setup Guide](https://github.com/Coding-Autopilot-System/Promptimprover/wiki/Setup-Guide) for full configuration instructions. +For optional automatic pre-prompt linting and post-execution recording, see the [cross-CLI automation guide](./docs/cross-cli-automation.md). Claude Code and Gemini CLI expose the required lifecycle hooks. Codex currently requires MCP-first instructions or explicit helper invocation because its hook lifecycle does not transparently intercept each prompt. + +## Local Semantic Model + +PromptImprover uses a local OpenAI-compatible endpoint before optional MCP sampling. The safe defaults target `http://localhost:9000/v1`, use `gemma3:12b` first, and fall back to `gemma3:1b`. If neither local model nor MCP sampling is available, rule-based refinement continues without semantic output. + +Override the defaults per repository with `.gemini-refiner.json`: + +```json +{ + "semantic": { + "localEnabled": true, + "mcpSamplingEnabled": true, + "baseUrl": "http://localhost:9000/v1", + "models": ["gemma3:12b", "gemma3:1b"], + "timeoutMs": 120000, + "temperature": 0.2, + "allowNonLoopback": false + } +} +``` + +Non-loopback model endpoints are rejected unless `allowNonLoopback` is explicitly enabled. Generated lessons and templates remain pending until reviewed through the MCP learning-review tools. + ## License MIT - see [LICENSE](LICENSE) diff --git a/docs/cross-cli-automation.md b/docs/cross-cli-automation.md new file mode 100644 index 0000000..29ed0d5 --- /dev/null +++ b/docs/cross-cli-automation.md @@ -0,0 +1,32 @@ +# Cross-CLI Automation + +PromptImprover ships fail-open pre-prompt and post-execution helpers: + +- `promptimprover-hook-pre` makes one latency-safe rule-based `lint_prompt` call, creates a trackable prompt ID, and injects advisory context. Interactive MCP linting continues to use semantic providers by default. +- `promptimprover-hook-post` records privacy-safe completion metadata with `record_agent_output`. + +Both commands read hook JSON from stdin, write JSON only to stdout, report failures to stderr, and always allow the client to continue. They start the same built MCP server used by `gemini-prompt-refiner`. Set `PROMPTIMPROVER_SERVER_PATH` only when testing a nonstandard build. + +The helpers store only prompt ID, client name, and creation time in the OS temporary directory. They do not persist prompt or response bodies. Completion records contain output length rather than response text. + +## Claude Code + +Claude Code supports `UserPromptSubmit` and `Stop`, so both phases can run transparently. Merge [`claude.settings.fragment.json`](../universal-refiner/hooks/config/claude.settings.fragment.json) into the desired user or project settings file after installing the package globally. + +## Gemini CLI + +Gemini CLI supports `BeforeAgent` and `AfterAgent`, so both phases can run transparently. Merge [`gemini.settings.fragment.json`](../universal-refiner/hooks/config/gemini.settings.fragment.json) into the desired user or project settings file after installing the package globally. + +## Codex CLI + +Codex CLI `0.138.0` has a stable hook system, but its exposed lifecycle currently does not provide transparent per-prompt pre/post hooks. Do not claim that `SessionStart` performs prompt interception. + +Keep PromptImprover registered as an MCP server and use repo instructions that require `lint_prompt` and `record_agent_output`. External automation can pipe normalized JSON into the same helpers; see [`codex.config.fragment.toml`](../universal-refiner/hooks/config/codex.config.fragment.toml). + +## Failure And Privacy Behavior + +- MCP startup, timeout, parsing, and tool errors fail open. +- Default timeout is 15 seconds; set `PROMPTIMPROVER_HOOK_TIMEOUT_MS` to a positive millisecond value to change it. +- Hook stdout remains strict JSON. +- No credentials or environment values are read or logged. +- Prompt and response text are not written to hook state or completion summaries. diff --git a/docs/enterprise-release-gates.md b/docs/enterprise-release-gates.md new file mode 100644 index 0000000..30c67b6 --- /dev/null +++ b/docs/enterprise-release-gates.md @@ -0,0 +1,55 @@ +# Enterprise Release Gates + +Passing unit tests alone does not prove that PromptImprover is operationally ready. A release is eligible only when every required gate below passes. + +## Quality Target + +- All owned deterministic production logic reaches 100% statements, branches, functions, and lines. +- Coverage exclusions are limited to generated artifacts. Bootstrap and integration behavior must be validated through acceptance or end-to-end tests. +- The coverage threshold is ratcheted upward as gaps close. It must never decrease without an approved, documented exception. +- Every reproduced defect receives a regression test at the owning boundary. + +## Required Gates + +1. `npm ci` succeeds from a clean workspace. +2. TypeScript build succeeds with strict type checking. +3. Unit and integration tests pass with the enforced coverage threshold. +4. Every advertised MCP tool schema and dispatcher path passes acceptance tests. +5. Local semantic-provider primary, fallback, malformed-response, timeout, and outage paths pass. +6. SQLite restart, migration, backup/restore, contention, and multi-process tests pass. +7. Claude and Gemini hook pre/post flows pass; Codex MCP-first flow passes. +8. Dashboard API security, review mutation, health telemetry, and browser smoke flows pass. +9. Dependency audit reports no known production vulnerabilities. +10. Package dry-run, global installation, runtime startup, and post-restart smoke tests pass. +11. Secret scanning finds no committed credentials. +12. Linux and Windows CI jobs pass before merge. + +## Current Coverage Baseline + +The first measured baseline on June 14, 2026 exposed substantial untested production behavior: + +| Metric | Baseline | Target | +|---|---:|---:| +| Statements | 66.33% | 100% | +| Branches | 61.95% | 100% | +| Functions | 79.92% | 100% | +| Lines | 68.09% | 100% | + +Initial high-risk gaps include MCP dispatcher behavior, background autonomy, template generation, prompt optimization, configuration failure paths, and operational dashboard branches. + +The enforced ratchet is 100% statements, branches, functions, and lines. It cannot be lowered without an approved exception. + +## Operator Recovery + +Build before invoking the recovery commands. Both operations run SQLite integrity checks and fail closed: + +```powershell +npm.cmd run db:backup -- C:\backups\promptimprover-events.db +npm.cmd run db:restore -- C:\backups\promptimprover-events.db +``` + +Stop the PromptImprover runtime before restoring a backup. + +## Meaning Of Green + +A green release pipeline means all declared gates passed in the tested environments. It substantially increases confidence but does not claim that unknown failures are impossible. Production incidents must become new automated regression gates. diff --git a/universal-refiner/hooks/config/claude.settings.fragment.json b/universal-refiner/hooks/config/claude.settings.fragment.json new file mode 100644 index 0000000..90028cf --- /dev/null +++ b/universal-refiner/hooks/config/claude.settings.fragment.json @@ -0,0 +1,26 @@ +{ + "hooks": { + "UserPromptSubmit": [ + { + "hooks": [ + { + "type": "command", + "command": "promptimprover-hook-pre", + "timeout": 20 + } + ] + } + ], + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "promptimprover-hook-post", + "timeout": 20 + } + ] + } + ] + } +} diff --git a/universal-refiner/hooks/config/codex.config.fragment.toml b/universal-refiner/hooks/config/codex.config.fragment.toml new file mode 100644 index 0000000..7d9c936 --- /dev/null +++ b/universal-refiner/hooks/config/codex.config.fragment.toml @@ -0,0 +1,7 @@ +# Codex CLI 0.138.0 does not expose per-prompt pre/post lifecycle hooks. +# Keep PromptImprover registered as an MCP server and invoke lint_prompt and +# record_agent_output through repo instructions or explicit tool calls. +# +# The generic helpers can be used by external Codex automation: +# '{"client":"codex","session_id":"...","cwd":"...","prompt":"..."}' | promptimprover-hook-pre +# '{"client":"codex","session_id":"...","cwd":"...","output":"..."}' | promptimprover-hook-post diff --git a/universal-refiner/hooks/config/gemini.settings.fragment.json b/universal-refiner/hooks/config/gemini.settings.fragment.json new file mode 100644 index 0000000..ecc25fb --- /dev/null +++ b/universal-refiner/hooks/config/gemini.settings.fragment.json @@ -0,0 +1,28 @@ +{ + "hooks": { + "BeforeAgent": [ + { + "hooks": [ + { + "name": "promptimprover-pre-prompt", + "type": "command", + "command": "promptimprover-hook-pre", + "timeout": 20 + } + ] + } + ], + "AfterAgent": [ + { + "hooks": [ + { + "name": "promptimprover-post-execution", + "type": "command", + "command": "promptimprover-hook-post", + "timeout": 20 + } + ] + } + ] + } +} diff --git a/universal-refiner/hooks/hooks.json b/universal-refiner/hooks/hooks.json index 6650244..ecc25fb 100644 --- a/universal-refiner/hooks/hooks.json +++ b/universal-refiner/hooks/hooks.json @@ -4,9 +4,22 @@ { "hooks": [ { - "name": "refine-prompt", + "name": "promptimprover-pre-prompt", "type": "command", - "command": "node C:/repo/Promptimprover/gemini-extension/dist/hooks/refine_hook.js" + "command": "promptimprover-hook-pre", + "timeout": 20 + } + ] + } + ], + "AfterAgent": [ + { + "hooks": [ + { + "name": "promptimprover-post-execution", + "type": "command", + "command": "promptimprover-hook-post", + "timeout": 20 } ] } diff --git a/universal-refiner/hooks/lib/hook-runtime.ts b/universal-refiner/hooks/lib/hook-runtime.ts new file mode 100644 index 0000000..43c9b30 --- /dev/null +++ b/universal-refiner/hooks/lib/hook-runtime.ts @@ -0,0 +1,180 @@ +import { createHash } from "crypto"; +import * as fs from "fs"; +import * as os from "os"; +import * as path from "path"; + +export type HookInput = Record; + +export interface HookState { + promptId: string; + client: string; + createdAt: string; +} + +export interface McpToolCaller { + (name: string, args: Record): Promise; +} + +const STATE_MAX_AGE_MS = 24 * 60 * 60 * 1000; + +export function parseHookInput(raw: string): HookInput { + const normalized = raw.replace(/^\uFEFF/, "").trim(); + return normalized ? JSON.parse(normalized) as HookInput : {}; +} + +export function detectClient(input: HookInput): string { + const explicit = stringField(input, "client"); + if (explicit) return explicit.toLowerCase(); + + const event = stringField(input, "hook_event_name"); + if (event === "UserPromptSubmit" || event === "Stop") return "claude"; + if (event === "BeforeAgent" || event === "AfterAgent") return "gemini"; + return "generic"; +} + +export function extractPrompt(input: HookInput): string | undefined { + return firstString(input, ["prompt", "user_prompt", "input"]); +} + +export function extractPromptId(text: string): string | undefined { + const tagged = text.match(/\[PROMPT_ID:\s*([^\]\s]+)\]/)?.[1]; + if (tagged) return tagged; + try { + const parsed = JSON.parse(text) as { promptId?: unknown }; + return typeof parsed.promptId === "string" ? parsed.promptId : undefined; + } catch { + return undefined; + } +} + +export function extractOutputLength(input: HookInput): number { + return firstString(input, [ + "prompt_response", + "last_assistant_message", + "response", + "output", + ])?.length ?? 0; +} + +export function buildLintContext(lintText: string, promptId?: string): string { + let gaps: Array<{ message?: unknown; suggestedAction?: unknown }> = []; + try { + const parsed = JSON.parse(lintText) as { gaps?: Array<{ message?: unknown; suggestedAction?: unknown }> }; + gaps = Array.isArray(parsed.gaps) ? parsed.gaps : []; + } catch { + return promptId + ? `PromptImprover tracked this turn as ${promptId}. Continue normally.` + : "PromptImprover linting completed. Continue normally."; + } + + const lines = gaps.slice(0, 5).map((gap) => { + const message = typeof gap.message === "string" ? gap.message : "Prompt quality gap detected."; + const action = typeof gap.suggestedAction === "string" ? ` ${gap.suggestedAction}` : ""; + return `- ${message}${action}`; + }); + + const tracking = promptId ? ` Tracking ID: ${promptId}.` : ""; + if (lines.length === 0) return `PromptImprover found no actionable prompt gaps.${tracking}`; + return `PromptImprover found advisory gaps.${tracking}\n${lines.join("\n")}`; +} + +export function allowOutput(input: HookInput, additionalContext?: string): Record { + const event = stringField(input, "hook_event_name"); + if (!additionalContext) return { decision: "allow" }; + + return { + decision: "allow", + hookSpecificOutput: { + ...(event ? { hookEventName: event } : {}), + additionalContext, + }, + }; +} + +export function statePath(input: HookInput): string { + const key = [ + detectClient(input), + stringField(input, "session_id") ?? stringField(input, "sessionId") ?? "no-session", + stringField(input, "cwd") ?? process.cwd(), + ].join("|"); + const hash = createHash("sha256").update(key).digest("hex"); + return path.join(os.tmpdir(), "promptimprover-hooks", `${hash}.json`); +} + +export function saveState(input: HookInput, state: HookState): void { + const file = statePath(input); + fs.mkdirSync(path.dirname(file), { recursive: true }); + fs.writeFileSync(file, JSON.stringify(state), { encoding: "utf8", mode: 0o600 }); +} + +export function loadState(input: HookInput): HookState | undefined { + const file = statePath(input); + try { + const state = JSON.parse(fs.readFileSync(file, "utf8")) as HookState; + if (!state.promptId || Date.now() - Date.parse(state.createdAt) > STATE_MAX_AGE_MS) { + fs.rmSync(file, { force: true }); + return undefined; + } + return state; + } catch { + return undefined; + } +} + +export function clearState(input: HookInput): void { + fs.rmSync(statePath(input), { force: true }); +} + +export async function runPrePrompt(input: HookInput, callTool: McpToolCaller): Promise> { + const prompt = extractPrompt(input); + if (!prompt?.trim()) return allowOutput(input); + + try { + const lintText = await callTool("lint_prompt", { prompt, semantic: false }); + const promptId = extractPromptId(lintText); + if (promptId) { + saveState(input, { + promptId, + client: detectClient(input), + createdAt: new Date().toISOString(), + }); + } + return allowOutput(input, buildLintContext(lintText, promptId)); + } catch { + return allowOutput(input); + } +} + +export async function runPostExecution(input: HookInput, callTool: McpToolCaller): Promise> { + const state = loadState(input); + const promptId = stringField(input, "prompt_id") ?? state?.promptId; + if (!promptId) return allowOutput(input); + + const client = state?.client ?? detectClient(input); + const outputLength = extractOutputLength(input); + await callTool("record_agent_output", { + prompt_id: promptId, + output_summary: `${client} completed the tracked turn; output_length=${outputLength}.`, + artifacts_json: JSON.stringify({ + client, + hook_event: stringField(input, "hook_event_name") ?? "manual", + output_length: outputLength, + }), + status: stringField(input, "status") === "failed" ? "failed" : "completed", + }); + clearState(input); + return allowOutput(input); +} + +function firstString(input: HookInput, fields: string[]): string | undefined { + for (const field of fields) { + const value = stringField(input, field); + if (value) return value; + } + return undefined; +} + +function stringField(input: HookInput, field: string): string | undefined { + const value = input[field]; + return typeof value === "string" ? value : undefined; +} diff --git a/universal-refiner/hooks/lib/mcp-client.ts b/universal-refiner/hooks/lib/mcp-client.ts new file mode 100644 index 0000000..6558ee1 --- /dev/null +++ b/universal-refiner/hooks/lib/mcp-client.ts @@ -0,0 +1,48 @@ +import { Client } from "@modelcontextprotocol/sdk/client/index.js"; +import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; +import { CallToolResultSchema } from "@modelcontextprotocol/sdk/types.js"; +import * as fs from "fs"; +import * as path from "path"; +import { fileURLToPath } from "url"; + +const DEFAULT_TIMEOUT_MS = 15_000; + +export async function callMcpTool(name: string, args: Record): Promise { + const transport = new StdioClientTransport({ + command: process.execPath, + args: [resolveServerPath()], + stderr: "pipe", + }); + const client = new Client({ name: "promptimprover-cross-cli-hook", version: "1.0.0" }, { capabilities: {} }); + + try { + await client.connect(transport); + const result = await client.request( + { method: "tools/call", params: { name, arguments: args } }, + CallToolResultSchema, + { timeout: timeoutMs() }, + ); + const text = result.content.find((item) => item.type === "text"); + if (!text || text.type !== "text") throw new Error(`MCP tool ${name} returned no text content.`); + return text.text; + } finally { + await client.close().catch(() => undefined); + } +} + +export function resolveServerPath(): string { + const configured = process.env.PROMPTIMPROVER_SERVER_PATH; + if (configured) return path.resolve(configured); + + const here = path.dirname(fileURLToPath(import.meta.url)); + const candidates = [ + path.resolve(here, "../../src/index.js"), + path.resolve(here, "../../dist/src/index.js"), + ]; + return candidates.find((candidate) => fs.existsSync(candidate)) ?? candidates[0]; +} + +function timeoutMs(): number { + const configured = Number(process.env.PROMPTIMPROVER_HOOK_TIMEOUT_MS); + return Number.isFinite(configured) && configured > 0 ? configured : DEFAULT_TIMEOUT_MS; +} diff --git a/universal-refiner/hooks/post-execution.ts b/universal-refiner/hooks/post-execution.ts new file mode 100644 index 0000000..cc8a04e --- /dev/null +++ b/universal-refiner/hooks/post-execution.ts @@ -0,0 +1,17 @@ +#!/usr/bin/env node +import * as fs from "fs"; +import { callMcpTool } from "./lib/mcp-client.js"; +import { allowOutput, HookInput, parseHookInput, runPostExecution } from "./lib/hook-runtime.js"; + +async function main(): Promise { + let input: HookInput = {}; + try { + input = parseHookInput(fs.readFileSync(0, "utf8")); + console.log(JSON.stringify(await runPostExecution(input, callMcpTool))); + } catch (error) { + console.error(`[PromptImprover] Post-execution hook failed open: ${error instanceof Error ? error.message : "unknown error"}`); + console.log(JSON.stringify(allowOutput(input))); + } +} + +void main(); diff --git a/universal-refiner/hooks/pre-prompt.ts b/universal-refiner/hooks/pre-prompt.ts new file mode 100644 index 0000000..3520309 --- /dev/null +++ b/universal-refiner/hooks/pre-prompt.ts @@ -0,0 +1,17 @@ +#!/usr/bin/env node +import * as fs from "fs"; +import { callMcpTool } from "./lib/mcp-client.js"; +import { allowOutput, HookInput, parseHookInput, runPrePrompt } from "./lib/hook-runtime.js"; + +async function main(): Promise { + let input: HookInput = {}; + try { + input = parseHookInput(fs.readFileSync(0, "utf8")); + console.log(JSON.stringify(await runPrePrompt(input, callMcpTool))); + } catch (error) { + console.error(`[PromptImprover] Pre-prompt hook failed open: ${error instanceof Error ? error.message : "unknown error"}`); + console.log(JSON.stringify(allowOutput(input))); + } +} + +void main(); diff --git a/universal-refiner/hooks/refine_hook.ts b/universal-refiner/hooks/refine_hook.ts deleted file mode 100644 index 420faf7..0000000 --- a/universal-refiner/hooks/refine_hook.ts +++ /dev/null @@ -1,85 +0,0 @@ -import { Client } from "@modelcontextprotocol/sdk/client/index.js"; -import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; -import { CallToolResultSchema } from "@modelcontextprotocol/sdk/types.js"; -import * as fs from "fs"; -import * as path from "path"; -import { fileURLToPath } from "url"; - -const __filename = fileURLToPath(import.meta.url); -const __dirname = path.dirname(__filename); - -async function run() { - const input = JSON.parse(fs.readFileSync(0, "utf-8")); - const prompt = input.prompt; - const answers = input.answers; - - // Connect to the local MCP Server via STDIO using absolute path - const serverPath = "C:/repo/Promptimprover/universal-refiner/dist/src/index.js"; - const transport = new StdioClientTransport({ - command: "node", - args: [serverPath], - }); - - const client = new Client({ name: "refine-hook", version: "1.0.0" }, { capabilities: {} }); - await client.connect(transport); - - if (!answers) { - // PASS 1: Lint the prompt - const lintResult = await client.request( - { method: "tools/call", params: { name: "lint_prompt", arguments: { prompt } } }, - CallToolResultSchema - ); - - const firstContent = lintResult.content[0]; - if (firstContent.type !== "text") throw new Error("Expected text content"); - const { gaps } = JSON.parse(firstContent.text); - - if (gaps && gaps.length > 0) { - // Create questions - const questionResult = await client.request( - { method: "tools/call", params: { name: "create_questions", arguments: { gaps } } }, - CallToolResultSchema - ); - const firstQContent = questionResult.content[0]; - if (firstQContent.type !== "text") throw new Error("Expected text content"); - const questions = JSON.parse(firstQContent.text); - - console.log(JSON.stringify({ - decision: "ask_user", - questions: questions - })); - process.exit(0); - } - } else { - // PASS 2: We have answers, finalize the prompt - const finalResult = await client.request( - { - method: "tools/call", - params: { - name: "finalize_prompt", - arguments: { original_prompt: prompt, answers } - } - }, - CallToolResultSchema - ); - - const firstFinalContent = finalResult.content[0]; - if (firstFinalContent.type !== "text") throw new Error("Expected text content"); - const refinedPrompt = firstFinalContent.text; - - console.log(JSON.stringify({ - prompt: refinedPrompt - })); - process.exit(0); - } - - // No refinement needed or error - console.log(JSON.stringify({ decision: "allow" })); - process.exit(0); -} - -run().catch((err) => { - console.error(err); - console.log(JSON.stringify({ decision: "allow" })); // Fail-open - process.exit(0); -}); diff --git a/universal-refiner/package-lock.json b/universal-refiner/package-lock.json index b0c45cb..94ab4c6 100644 --- a/universal-refiner/package-lock.json +++ b/universal-refiner/package-lock.json @@ -7,7 +7,7 @@ "": { "name": "gemini-prompt-refiner", "version": "8.0.0", - "license": "ISC", + "license": "MIT", "dependencies": { "@modelcontextprotocol/sdk": "^1.29.0", "better-sqlite3": "^12.8.0", @@ -16,14 +16,80 @@ "zod": "^4.3.6" }, "bin": { - "gemini-prompt-refiner": "dist/src/index.js" + "gemini-prompt-refiner": "dist/src/index.js", + "promptimprover-hook-post": "dist/hooks/post-execution.js", + "promptimprover-hook-pre": "dist/hooks/pre-prompt.js" }, "devDependencies": { "@types/better-sqlite3": "^7.6.13", "@types/node": "^22.19.17", + "@vitest/coverage-v8": "4.1.4", "ts-node": "^10.9.2", "typescript": "^5.9.3", "vitest": "^4.1.4" + }, + "engines": { + "node": ">=22" + } + }, + "node_modules/@babel/helper-string-parser": { + "version": "7.29.7", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.29.7.tgz", + "integrity": "sha512-Pb5ijPrZ89GDH8223L4UP8i6QApWxs04RbPQJTeWDV0/keR2E36MeKnyr6LYmUUvqRRI+Iv87SuF1W6ErINzYw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.29.7", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.29.7.tgz", + "integrity": "sha512-qehxGkRj55h/ff8EMaJ+cYhyaKlHIxqYDn682wQD7RNp9UujOQsHog2uS0r2vzr4pW+sXf90NeeayjcNaX3fFg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/parser": { + "version": "7.29.7", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.29.7.tgz", + "integrity": "sha512-hnORnjP/1P/zFEndoeX+n+t1RwWRJiJpM/jO7FW32Kn9r5+sJB2JWOdYo4L6k78j15eCwY3Gm/7364B1EMwtNg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/types": "^7.29.7" + }, + "bin": { + "parser": "bin/babel-parser.js" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/types": { + "version": "7.29.7", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.29.7.tgz", + "integrity": "sha512-4zBIxpPzowiZpusoFkyGVwakdRJUyuH5PxQ/PrqghfdFWWasvnCdPfQXHrenDai+gyLARulZjZowCOj6fjT4pA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-string-parser": "^7.29.7", + "@babel/helper-validator-identifier": "^7.29.7" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@bcoe/v8-coverage": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@bcoe/v8-coverage/-/v8-coverage-1.0.2.tgz", + "integrity": "sha512-6zABk/ECA/QYSCQ1NGiVwwbQerUCZ+TQbp64Q3AgmfNvurHH0j8TtXa1qbShXA6qqkpAj4V5W8pP6mLe1mcMqA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" } }, "node_modules/@cspotcode/source-map-support": { @@ -46,6 +112,7 @@ "dev": true, "license": "MIT", "optional": true, + "peer": true, "dependencies": { "@emnapi/wasi-threads": "1.2.2", "tslib": "^2.4.0" @@ -58,6 +125,19 @@ "dev": true, "license": "MIT", "optional": true, + "peer": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@emnapi/runtime": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.11.1.tgz", + "integrity": "sha512-vgj7R3y3Wgx24IQaGPA/R6YFXLHVMOZ0uVEyIQPaWs+rd1AzfEMXlAC22FYwO1XkKR6NPsq7mUandH8oIRdZFw==", + "dev": true, + "license": "MIT", + "optional": true, + "peer": true, "dependencies": { "tslib": "^2.4.0" } @@ -556,11 +636,41 @@ "integrity": "sha512-wGdMcf+vPYM6jikpS/qhg6WiqSV/OhG+jeeHT/KlVqxYfD40iYJf9/AE1uQxVWFvU7MipKRkRv8NSHiCGgPr8Q==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "undici-types": "~6.21.0" } }, + "node_modules/@vitest/coverage-v8": { + "version": "4.1.4", + "resolved": "https://registry.npmjs.org/@vitest/coverage-v8/-/coverage-v8-4.1.4.tgz", + "integrity": "sha512-x7FptB5oDruxNPDNY2+S8tCh0pcq7ymCe1gTHcsp733jYjrJl8V1gMUlVysuCD9Kz46Xz9t1akkv08dPcYDs1w==", + "dev": true, + "license": "MIT", + "dependencies": { + "@bcoe/v8-coverage": "^1.0.2", + "@vitest/utils": "4.1.4", + "ast-v8-to-istanbul": "^1.0.0", + "istanbul-lib-coverage": "^3.2.2", + "istanbul-lib-report": "^3.0.1", + "istanbul-reports": "^3.2.0", + "magicast": "^0.5.2", + "obug": "^2.1.1", + "std-env": "^4.0.0-rc.1", + "tinyrainbow": "^3.1.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + }, + "peerDependencies": { + "@vitest/browser": "4.1.4", + "vitest": "4.1.4" + }, + "peerDependenciesMeta": { + "@vitest/browser": { + "optional": true + } + } + }, "node_modules/@vitest/expect": { "version": "4.1.4", "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-4.1.4.tgz", @@ -763,6 +873,29 @@ "node": ">=12" } }, + "node_modules/ast-v8-to-istanbul": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/ast-v8-to-istanbul/-/ast-v8-to-istanbul-1.0.4.tgz", + "integrity": "sha512-0bC0/4bTSrnwdhU3IsZDwEdojvuPrSg59OYZfKsLRtJZ0u8VBx9DebfqqG8bRdCC0I7vjgxmPi41P0lpkhJHtA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/trace-mapping": "^0.3.31", + "estree-walker": "^3.0.3", + "js-tokens": "^10.0.0" + } + }, + "node_modules/ast-v8-to-istanbul/node_modules/@jridgewell/trace-mapping": { + "version": "0.3.31", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz", + "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, "node_modules/base64-js": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", @@ -1233,7 +1366,6 @@ "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", "license": "MIT", - "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.1", @@ -1466,6 +1598,16 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, "node_modules/has-symbols": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", @@ -1495,11 +1637,17 @@ "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.25.tgz", "integrity": "sha512-2NFaIyNVgJmBs/ecmtGzlmluTFs5cHEWGTdu0t1HBwYzoGXOL5nUQBRMXsXWla5i4KkG//QMzVP88m1+I3fdAQ==", "license": "MIT", - "peer": true, "engines": { "node": ">=16.9.0" } }, + "node_modules/html-escaper": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz", + "integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==", + "dev": true, + "license": "MIT" + }, "node_modules/http-errors": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", @@ -1598,6 +1746,45 @@ "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", "license": "ISC" }, + "node_modules/istanbul-lib-coverage": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz", + "integrity": "sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=8" + } + }, + "node_modules/istanbul-lib-report": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/istanbul-lib-report/-/istanbul-lib-report-3.0.1.tgz", + "integrity": "sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==", + "dev": true, + "license": "BSD-3-Clause", + "dependencies": { + "istanbul-lib-coverage": "^3.0.0", + "make-dir": "^4.0.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/istanbul-reports": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/istanbul-reports/-/istanbul-reports-3.2.0.tgz", + "integrity": "sha512-HGYWWS/ehqTV3xN10i23tkPkpH46MLCIMFNCaaKNavAXTF1RkqxawEPtnjnGZ6XKSInBKkiOA5BKS+aZiY3AvA==", + "dev": true, + "license": "BSD-3-Clause", + "dependencies": { + "html-escaper": "^2.0.0", + "istanbul-lib-report": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/jose": { "version": "6.2.2", "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.2.tgz", @@ -1607,6 +1794,13 @@ "url": "https://github.com/sponsors/panva" } }, + "node_modules/js-tokens": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-10.0.0.tgz", + "integrity": "sha512-lM/UBzQmfJRo9ABXbPWemivdCW8V2G8FHaHdypQaIy523snUjog0W71ayWXTjiR+ixeMyVHN2XcpnTd/liPg/Q==", + "dev": true, + "license": "MIT" + }, "node_modules/json-schema-traverse": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", @@ -1890,6 +2084,34 @@ "@jridgewell/sourcemap-codec": "^1.5.5" } }, + "node_modules/magicast": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/magicast/-/magicast-0.5.3.tgz", + "integrity": "sha512-pVKE4UdSQ7DvHzivsCIFx2BJn1mHG6KsyrFcaxFx6tONdneEuThrDx0Cj3AMg58KyN4pzYT+LHOotxDQDjNvkw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.29.3", + "@babel/types": "^7.29.0", + "source-map-js": "^1.2.1" + } + }, + "node_modules/make-dir": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-4.0.0.tgz", + "integrity": "sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==", + "dev": true, + "license": "MIT", + "dependencies": { + "semver": "^7.5.3" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/make-error": { "version": "1.3.6", "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", @@ -2132,7 +2354,6 @@ "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -2653,6 +2874,19 @@ "node": ">=0.10.0" } }, + "node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/tar-fs": { "version": "2.1.4", "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz", @@ -2818,7 +3052,6 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -2871,7 +3104,6 @@ "integrity": "sha512-dbU7/iLVa8KZALJyLOBOQ88nOXtNG8vxKuOT4I2mD+Ya70KPceF4IAmDsmU0h1Qsn5bPrvsY9HJstCRh3hG6Uw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "lightningcss": "^1.32.0", "picomatch": "^4.0.4", @@ -3087,7 +3319,6 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", "license": "MIT", - "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/universal-refiner/package.json b/universal-refiner/package.json index ad821c1..023f91e 100644 --- a/universal-refiner/package.json +++ b/universal-refiner/package.json @@ -1,55 +1,71 @@ { - "name": "gemini-prompt-refiner", - "version": "8.0.0", - "description": "Cross-CLI prompt refinement using an MCP server.", - "main": "dist/src/index.js", - "bin": { - "gemini-prompt-refiner": "./dist/src/index.js" - }, - "scripts": { - "prebuild": "node scripts/sync-version.mjs", - "build": "tsc \u0026\u0026 node scripts/copy-dashboard.mjs", - "start": "node dist/src/index.js", - "dev": "ts-node src/index.ts", - "pretest": "node scripts/sync-version.mjs", - "test": "vitest run" - }, - "keywords": [ - "mcp", - "mcp-server", - "gemini", - "prompt-refinement" - ], - "author": "Kim Harjamaki", - "license": "MIT", - "type": "module", - "dependencies": { - "@modelcontextprotocol/sdk": "^1.29.0", - "better-sqlite3": "^12.8.0", - "chokidar": "^5.0.0", - "flexsearch": "^0.7.43", - "zod": "^4.3.6" - }, - "devDependencies": { - "@types/better-sqlite3": "^7.6.13", - "@types/node": "^22.19.17", - "ts-node": "^10.9.2", - "typescript": "^5.9.3", - "vitest": "^4.1.4" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/Coding-Autopilot-System/Promptimprover.git", - "directory": "universal-refiner" - }, - "homepage": "https://github.com/Coding-Autopilot-System/Promptimprover#readme", - "bugs": { - "url": "https://github.com/Coding-Autopilot-System/Promptimprover/issues" - }, - "engines": { - "node": "\u003e=22" - }, - "files": [ - "dist/" - ] + "name": "gemini-prompt-refiner", + "version": "8.0.0", + "description": "Cross-CLI prompt refinement using an MCP server.", + "main": "dist/src/index.js", + "bin": { + "gemini-prompt-refiner": "./dist/src/index.js", + "promptimprover-hook-pre": "./dist/hooks/pre-prompt.js", + "promptimprover-hook-post": "./dist/hooks/post-execution.js" + }, + "scripts": { + "prebuild": "node scripts/sync-version.mjs && node scripts/clean-dist.mjs", + "build": "tsc && node scripts/copy-dashboard.mjs", + "start": "node dist/src/index.js", + "dev": "ts-node src/index.ts", + "pretest": "node scripts/sync-version.mjs", + "test": "vitest run", + "test:coverage": "vitest run --coverage", + "test:acceptance": "vitest run tests/acceptance", + "test:stress": "vitest run tests/stress", + "security:secrets": "node scripts/security/scan-secrets.mjs", + "security:audit": "npm audit --omit=dev --audit-level=high", + "package:check": "npm pack --dry-run", + "db:backup": "node scripts/operations/event-store-recovery.mjs backup", + "db:restore": "node scripts/operations/event-store-recovery.mjs restore", + "acceptance:semantic": "node scripts/acceptance/semantic-provider-acceptance.mjs", + "stress:event-store": "node scripts/stress/event-store-stress.mjs", + "release:verify": "npm run build && npm run test:coverage && npm run test:acceptance && npm run acceptance:semantic && npm run test:stress && npm run stress:event-store && npm run security:audit && npm run security:secrets && npm run package:check" + }, + "keywords": [ + "mcp", + "mcp-server", + "gemini", + "prompt-refinement" + ], + "author": "Kim Harjamaki", + "license": "MIT", + "type": "module", + "dependencies": { + "@modelcontextprotocol/sdk": "^1.29.0", + "better-sqlite3": "^12.8.0", + "chokidar": "^5.0.0", + "flexsearch": "^0.7.43", + "zod": "^4.3.6" + }, + "devDependencies": { + "@types/better-sqlite3": "^7.6.13", + "@types/node": "^22.19.17", + "@vitest/coverage-v8": "4.1.4", + "ts-node": "^10.9.2", + "typescript": "^5.9.3", + "vitest": "^4.1.4" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/Coding-Autopilot-System/Promptimprover.git", + "directory": "universal-refiner" + }, + "homepage": "https://github.com/Coding-Autopilot-System/Promptimprover#readme", + "bugs": { + "url": "https://github.com/Coding-Autopilot-System/Promptimprover/issues" + }, + "engines": { + "node": ">=22" + }, + "files": [ + "dist/", + "hooks/config/", + "scripts/operations/" + ] } diff --git a/universal-refiner/scripts/acceptance/semantic-provider-acceptance.mjs b/universal-refiner/scripts/acceptance/semantic-provider-acceptance.mjs new file mode 100644 index 0000000..5a72e58 --- /dev/null +++ b/universal-refiner/scripts/acceptance/semantic-provider-acceptance.mjs @@ -0,0 +1,67 @@ +import assert from "node:assert/strict"; +import { startFakeOpenAiServer } from "../support/fake-openai-server.mjs"; +import { + LocalOpenAiProvider, + SemanticProviderChain, +} from "../../dist/src/core/semantic-provider.js"; + +const primary = process.env.PROMPT_REFINER_PRIMARY_MODEL || "gemma3:12b"; +const fallback = process.env.PROMPT_REFINER_FALLBACK_MODEL || "gemma3:1b"; +const liveBaseUrl = process.env.PROMPT_REFINER_ACCEPTANCE_BASE_URL; +const fake = await startFakeOpenAiServer({ + unavailableModels: [primary], + responses: { [fallback]: "fallback accepted" }, +}); + +try { + if (liveBaseUrl) { + for (const model of [primary, fallback]) { + const liveProvider = new LocalOpenAiProvider({ + baseUrl: liveBaseUrl, + models: [model], + timeoutMs: Number.parseInt(process.env.PROMPT_REFINER_ACCEPTANCE_TIMEOUT_MS || "120000", 10), + temperature: 0, + allowNonLoopback: process.env.PROMPT_REFINER_ACCEPTANCE_ALLOW_NON_LOOPBACK === "true", + }); + const liveResult = await liveProvider.requestText({ taskName: "live acceptance", prompt: "Reply with accepted.", maxTokens: 16 }); + assert.equal(liveResult?.model, model, `Live endpoint did not return a response from ${model}.`); + } + console.log(`Live semantic acceptance passed for ${primary} and ${fallback} at ${liveBaseUrl}.`); + } + + const local = new LocalOpenAiProvider({ + baseUrl: fake.baseUrl, + models: [primary, fallback], + timeoutMs: 2000, + temperature: 0, + allowNonLoopback: false, + }); + const lastResort = { + name: "acceptance-fallback", + requestText: async () => ({ + text: "provider fallback accepted", + provider: "acceptance-fallback", + model: "deterministic", + latencyMs: 0, + }), + }; + + const localResult = await local.requestText({ taskName: "acceptance", prompt: "hello", maxTokens: 16 }); + assert.equal(localResult?.model, fallback); + assert.deepEqual(localResult?.fallbackFrom, [primary]); + + const outageProvider = new LocalOpenAiProvider({ + baseUrl: "http://127.0.0.1:1/v1", + models: [primary, fallback], + timeoutMs: 100, + temperature: 0, + allowNonLoopback: false, + }); + const chainResult = await new SemanticProviderChain([outageProvider, lastResort]) + .requestText({ taskName: "outage", prompt: "hello", maxTokens: 16 }); + assert.equal(chainResult, "provider fallback accepted"); + + console.log(`Semantic acceptance passed: ${primary} -> ${fallback} and outage provider fallback.`); +} finally { + await fake.close(); +} diff --git a/universal-refiner/scripts/clean-dist.mjs b/universal-refiner/scripts/clean-dist.mjs new file mode 100644 index 0000000..19a17e4 --- /dev/null +++ b/universal-refiner/scripts/clean-dist.mjs @@ -0,0 +1,4 @@ +import { rmSync } from "fs"; +import { resolve } from "path"; + +rmSync(resolve("dist"), { recursive: true, force: true }); diff --git a/universal-refiner/scripts/operations/event-store-recovery.mjs b/universal-refiner/scripts/operations/event-store-recovery.mjs new file mode 100644 index 0000000..33621c0 --- /dev/null +++ b/universal-refiner/scripts/operations/event-store-recovery.mjs @@ -0,0 +1,28 @@ +#!/usr/bin/env node + +import path from "node:path"; +import { EventStore } from "../../dist/src/history/event-store.js"; + +const [operation, destination] = process.argv.slice(2); + +if (!["backup", "restore"].includes(operation) || !destination) { + console.error("Usage: event-store-recovery.mjs "); + process.exitCode = 2; +} else { + const resolvedPath = path.resolve(destination); + try { + if (operation === "backup") { + const store = EventStore.getInstance(); + await store.backup(resolvedPath); + store.close(); + console.log(`EventStore backup created: ${resolvedPath}`); + } else { + const store = EventStore.restore(resolvedPath); + store.close(); + console.log(`EventStore restored from: ${resolvedPath}`); + } + } catch (error) { + console.error(`EventStore ${operation} failed:`, error); + process.exitCode = 1; + } +} diff --git a/universal-refiner/scripts/security/scan-secrets.mjs b/universal-refiner/scripts/security/scan-secrets.mjs new file mode 100644 index 0000000..6ba8338 --- /dev/null +++ b/universal-refiner/scripts/security/scan-secrets.mjs @@ -0,0 +1,37 @@ +import { execFileSync } from "node:child_process"; +import { readFileSync } from "node:fs"; + +const PATTERNS = [ + { name: "GitHub personal access token", expression: /\b(?:ghp|github_pat)_[A-Za-z0-9_]{20,}\b/g }, + { name: "OpenAI API key", expression: /\bsk-[A-Za-z0-9_-]{20,}\b/g }, + { name: "Azure storage connection string", expression: /\bDefaultEndpointsProtocol=https?;AccountName=[^;\s]+;AccountKey=[^;\s]+/gi }, + { name: "Private key", expression: /-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----/g }, +]; + +const ALLOWED_FIXTURE_MARKER = "secret-scan: allow-fixture"; +const files = execFileSync("git", ["ls-files", "-z"], { encoding: "utf8" }) + .split("\0") + .filter(Boolean); +const findings = []; + +for (const file of files) { + let content; + try { + content = readFileSync(file, "utf8"); + } catch { + continue; + } + if (content.includes(ALLOWED_FIXTURE_MARKER)) continue; + + for (const pattern of PATTERNS) { + pattern.expression.lastIndex = 0; + if (pattern.expression.test(content)) findings.push(`${pattern.name}: ${file}`); + } +} + +if (findings.length > 0) { + console.error(`Secret scan failed:\n${findings.join("\n")}`); + process.exit(1); +} + +console.log(`Secret scan passed for ${files.length} tracked files.`); diff --git a/universal-refiner/scripts/stress/event-store-stress.mjs b/universal-refiner/scripts/stress/event-store-stress.mjs new file mode 100644 index 0000000..d9706a7 --- /dev/null +++ b/universal-refiner/scripts/stress/event-store-stress.mjs @@ -0,0 +1,36 @@ +import assert from "node:assert/strict"; +import { mkdtemp, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { spawn } from "node:child_process"; +import { fileURLToPath } from "node:url"; +import Database from "better-sqlite3"; + +const workers = Number.parseInt(process.env.PROMPT_REFINER_STRESS_WORKERS || "4", 10); +const writes = Number.parseInt(process.env.PROMPT_REFINER_STRESS_WRITES || "100", 10); +const directory = await mkdtemp(join(tmpdir(), "prompt-refiner-stress-")); +const workerScript = fileURLToPath(new URL("./event-store-worker.mjs", import.meta.url)); + +function runWorker(workerId) { + return new Promise((resolve, reject) => { + const child = spawn(process.execPath, [workerScript, String(workerId), String(writes)], { + env: { ...process.env, PROMPT_REFINER_GLOBAL_DIR: directory }, + stdio: ["ignore", "pipe", "pipe"], + }); + let stderr = ""; + child.stderr.on("data", chunk => stderr += chunk); + child.on("error", reject); + child.on("exit", code => code === 0 ? resolve() : reject(new Error(`worker ${workerId} exited ${code}: ${stderr}`))); + }); +} + +try { + await Promise.all(Array.from({ length: workers }, (_, index) => runWorker(index))); + const database = new Database(join(directory, "events.db"), { readonly: true }); + const row = database.prepare("SELECT COUNT(*) AS count FROM events WHERE event_type = 'stress'").get(); + database.close(); + assert.equal(row.count, workers * writes); + console.log(`EventStore stress passed: ${workers} workers wrote ${row.count} events.`); +} finally { + await rm(directory, { recursive: true, force: true }); +} diff --git a/universal-refiner/scripts/stress/event-store-worker.mjs b/universal-refiner/scripts/stress/event-store-worker.mjs new file mode 100644 index 0000000..2a72bdf --- /dev/null +++ b/universal-refiner/scripts/stress/event-store-worker.mjs @@ -0,0 +1,15 @@ +import { EventStore } from "../../dist/src/history/event-store.js"; + +const workerId = process.argv[2]; +const writes = Number.parseInt(process.argv[3] || "100", 10); +const store = EventStore.getInstance(); + +for (let index = 0; index < writes; index += 1) { + store.recordEvent({ + id: `worker-${workerId}-${index}`, + event_type: "stress", + summary: `worker ${workerId} event ${index}`, + }); +} + +store.close(); diff --git a/universal-refiner/scripts/support/fake-openai-server.mjs b/universal-refiner/scripts/support/fake-openai-server.mjs new file mode 100644 index 0000000..b1bc219 --- /dev/null +++ b/universal-refiner/scripts/support/fake-openai-server.mjs @@ -0,0 +1,56 @@ +import { createServer } from "node:http"; + +export async function startFakeOpenAiServer(options = {}) { + const { + host = "127.0.0.1", + port = 0, + responses = {}, + unavailableModels = [], + } = options; + const requests = []; + const unavailable = new Set(unavailableModels); + + const server = createServer((request, response) => { + if (request.method !== "POST" || request.url !== "/v1/chat/completions") { + response.writeHead(404).end(); + return; + } + + let body = ""; + request.setEncoding("utf8"); + request.on("data", chunk => body += chunk); + request.on("end", () => { + const payload = JSON.parse(body); + requests.push(payload); + response.setHeader("content-type", "application/json"); + + if (unavailable.has(payload.model)) { + response.writeHead(503).end(JSON.stringify({ error: "model unavailable" })); + return; + } + + response.end(JSON.stringify({ + choices: [{ message: { content: responses[payload.model] ?? `response from ${payload.model}` } }], + usage: { prompt_tokens: 4, completion_tokens: 3 }, + })); + }); + }); + + await new Promise((resolve, reject) => { + server.once("error", reject); + server.listen(port, host, resolve); + }); + + const address = server.address(); + if (!address || typeof address === "string") { + throw new Error("Fake OpenAI server did not expose a TCP address."); + } + + return { + baseUrl: `http://${host}:${address.port}/v1`, + requests, + close: () => new Promise((resolve, reject) => { + server.close(error => error ? reject(error) : resolve()); + }), + }; +} diff --git a/universal-refiner/src/core/background-service.ts b/universal-refiner/src/core/background-service.ts index 040bd4b..0f5895c 100644 --- a/universal-refiner/src/core/background-service.ts +++ b/universal-refiner/src/core/background-service.ts @@ -2,21 +2,35 @@ import * as chokidar from 'chokidar'; import { CommitIngester } from "../history/commit-ingest.js"; import { LessonExtractor } from "../history/lesson-extractor.js"; import { CorrelationEngine } from "../history/correlation-engine.js"; +import { GitPoller } from "../history/git-poller.js"; import { RuntimeLogger } from "./logger.js"; import { CommandCenterDashboard } from "./dashboard.js"; +import { SerializedJobQueue } from "./job-queue.js"; export class BackgroundAutonomyService { private watcher: chokidar.FSWatcher | null = null; + private gitPoller: GitPoller | null = null; private debounceTimer: NodeJS.Timeout | null = null; private rootPath: string; private requestModelText: (taskName: string, userPrompt: string, maxTokens: number) => Promise; + private queue = new SerializedJobQueue(); + /** + * @param gitPollIntervalMs Pass a number in milliseconds to enable git polling. + * Defaults to null (disabled) so tests that use vi.runAllTimersAsync() are not + * affected by an infinite setInterval. The production server passes 30_000. + */ constructor( rootPath: string, - requestModelText: (taskName: string, userPrompt: string, maxTokens: number) => Promise + requestModelText: (taskName: string, userPrompt: string, maxTokens: number) => Promise, + gitPollIntervalMs: number | null = null, ) { this.rootPath = rootPath; this.requestModelText = requestModelText; + if (gitPollIntervalMs !== null) { + this.gitPoller = new GitPoller(rootPath, gitPollIntervalMs); + this.gitPoller.on("commits", () => this.triggerAutonomy()); + } } public start() { @@ -39,10 +53,12 @@ export class BackgroundAutonomyService { }); this.watcher.on('all', (event, filePath) => { - // Don't log every single file change to dashboard to avoid noise, but log to RuntimeLogger RuntimeLogger.debug(`File change detected: ${event} ${filePath}`); this.triggerAutonomy(); }); + + this.gitPoller?.start(); + this.queue.enqueue(`autonomy:${this.rootPath}`, () => this.runCycles()); } private triggerAutonomy() { @@ -50,33 +66,30 @@ export class BackgroundAutonomyService { clearTimeout(this.debounceTimer); } - this.debounceTimer = setTimeout(async () => { - await this.runCycles(); + this.debounceTimer = setTimeout(() => { + const accepted = this.queue.enqueue(`autonomy:${this.rootPath}`, () => this.runCycles()); + if (!accepted) { + RuntimeLogger.debug("Background autonomy cycle coalesced", { rootPath: this.rootPath }); + } }, 3000); } private async runCycles() { try { CommandCenterDashboard.log("Background Autonomy: Change detected. Triggering intelligence cycles..."); - - // a) CommitIngester.ingestLatest() - // We increase the limit significantly because the ingester now fetches only since last SHA const ingestedCount = await CommitIngester.ingestLatest(this.rootPath, 100); CommandCenterDashboard.log(`Background Autonomy: Ingested ${ingestedCount} commits.`); - // b) CorrelationEngine.correlateAll() const engine = new CorrelationEngine(); - await engine.correlateAll(); - CommandCenterDashboard.log("Background Autonomy: Correlation complete."); - - // c) LessonExtractor.extractNewLessons() const extractor = new LessonExtractor(this.requestModelText); + await engine.correlateAll(); await extractor.extractNewLessons(); - CommandCenterDashboard.log("Background Autonomy: Lesson extraction complete."); + CommandCenterDashboard.log("Background Autonomy: Correlation and lesson extraction complete."); } catch (error) { RuntimeLogger.error("Background Autonomy cycle failed", error); CommandCenterDashboard.log("Background Autonomy: Cycle failed. See logs."); + throw error; } } @@ -89,5 +102,10 @@ export class BackgroundAutonomyService { clearTimeout(this.debounceTimer); this.debounceTimer = null; } + this.gitPoller?.stop(); + } + + public async idle() { + await this.queue.idle(); } } diff --git a/universal-refiner/src/core/config.ts b/universal-refiner/src/core/config.ts index a1e4d20..68d6af3 100644 --- a/universal-refiner/src/core/config.ts +++ b/universal-refiner/src/core/config.ts @@ -5,10 +5,30 @@ import { AgenticBlackboard } from "./blackboard.js"; export interface RefinerConfig { mandates?: string[]; ignoredPaths?: string[]; + semantic?: Partial; +} + +export interface SemanticConfig { + localEnabled: boolean; + mcpSamplingEnabled: boolean; + baseUrl: string; + models: string[]; + timeoutMs: number; + temperature: number; + allowNonLoopback: boolean; } export class ConfigManager { private static CONFIG_FILE = ".gemini-refiner.json"; + private static DEFAULT_SEMANTIC_CONFIG: SemanticConfig = { + localEnabled: true, + mcpSamplingEnabled: true, + baseUrl: "http://localhost:9000/v1", + models: ["gemma3:12b", "gemma3:1b"], + timeoutMs: 120000, + temperature: 0.2, + allowNonLoopback: false, + }; static loadConfig(rootPath: string = "."): RefinerConfig { const configPath = path.join(rootPath, this.CONFIG_FILE); @@ -25,6 +45,26 @@ export class ConfigManager { } } + static getSemanticConfig(rootPath: string = "."): SemanticConfig { + const semantic = this.loadConfig(rootPath).semantic || {}; + const defaults = this.DEFAULT_SEMANTIC_CONFIG; + return { + localEnabled: typeof semantic.localEnabled === "boolean" ? semantic.localEnabled : defaults.localEnabled, + mcpSamplingEnabled: typeof semantic.mcpSamplingEnabled === "boolean" ? semantic.mcpSamplingEnabled : defaults.mcpSamplingEnabled, + baseUrl: typeof semantic.baseUrl === "string" && semantic.baseUrl.trim() ? semantic.baseUrl.trim() : defaults.baseUrl, + models: Array.isArray(semantic.models) && semantic.models.length > 0 && semantic.models.every(model => typeof model === "string" && model.trim()) + ? semantic.models.map(model => model.trim()) + : defaults.models, + timeoutMs: typeof semantic.timeoutMs === "number" && Number.isFinite(semantic.timeoutMs) && semantic.timeoutMs > 0 + ? semantic.timeoutMs + : defaults.timeoutMs, + temperature: typeof semantic.temperature === "number" && Number.isFinite(semantic.temperature) && semantic.temperature >= 0 && semantic.temperature <= 2 + ? semantic.temperature + : defaults.temperature, + allowNonLoopback: typeof semantic.allowNonLoopback === "boolean" ? semantic.allowNonLoopback : defaults.allowNonLoopback, + }; + } + static getPredictiveMandates(): string[] { const logs = AgenticBlackboard.getLogs(); const recent = logs.slice(0, 10).map(l => l.message.toLowerCase()); diff --git a/universal-refiner/src/core/dashboard.html b/universal-refiner/src/core/dashboard.html index 97b79c4..726ae8b 100644 --- a/universal-refiner/src/core/dashboard.html +++ b/universal-refiner/src/core/dashboard.html @@ -62,6 +62,14 @@ .lesson-card { border-left: 4px solid var(--accent); background: rgba(56, 189, 248, 0.03); padding: 1rem; border-radius: 0 0.5rem 0.5rem 0; margin-bottom: 1rem; } .confidence-high { color: #22c55e; } .confidence-medium { color: #eab308; } + .actions { display: flex; gap: 0.5rem; margin-top: 1rem; } + button { border: 1px solid var(--border); border-radius: 0.4rem; padding: 0.45rem 0.8rem; color: var(--text); background: #1e293b; cursor: pointer; } + button:hover { border-color: var(--accent); } + button.approve { background: rgba(34, 197, 94, 0.15); color: #86efac; } + button.reject { background: rgba(239, 68, 68, 0.15); color: #fca5a5; } + .health-good { color: #22c55e; } + .health-warn { color: #eab308; } + .notice { min-height: 1.25rem; color: var(--dim); font-size: 0.75rem; margin-bottom: 1rem; } @@ -75,6 +83,7 @@

PROMPT🧠 COMMIT INTELLIGENCE +
Selected Project
@@ -125,6 +134,7 @@

Commit History & Linkage

Predictive Engineering Lessons

+
@@ -135,10 +145,29 @@

Predictive Engineering Lessons

Autonomous Prompt Templates

+
+ + + @@ -148,6 +177,7 @@

Autonomous Prompt Templates

let currentView = 'stream'; const escapeHtml = (v) => String(v ?? '').replace(/&/g, '&').replace(//g, '>'); + const encodeCandidateId = (v) => encodeURIComponent(String(v)).replace(/'/g, '%27'); const projectName = (p) => p.split(/[\\\/]/).filter(Boolean).pop() || 'ROOT'; function switchView(viewId) { @@ -156,7 +186,7 @@

Autonomous Prompt Templates

document.querySelectorAll('.nav-item').forEach(n => n.classList.remove('active')); event.target.classList.add('active'); - const titles = { 'stream': 'Global Intelligence Stream', 'intelligence': 'Commit Intelligence', 'learning': 'Learning Layer', 'library': 'Prompt Library' }; + const titles = { 'stream': 'Global Intelligence Stream', 'intelligence': 'Commit Intelligence', 'learning': 'Learning Layer', 'library': 'Prompt Library', 'health': 'Provider Health' }; document.getElementById('view-title').textContent = titles[viewId]; currentView = viewId; refreshData(); @@ -224,10 +254,11 @@

Autonomous Prompt Templates

${escapeHtml(l.title)} - ${l.confidence.toUpperCase()} CONFIDENCE + ${escapeHtml(l.confidence).toUpperCase()} CONFIDENCE

${escapeHtml(l.summary)}

-
TYPE: ${l.lesson_type.toUpperCase()} • SOURCE: ${l.source}
+
TYPE: ${escapeHtml(l.lesson_type).toUpperCase()} • SOURCE: ${escapeHtml(l.source)} • REVIEW: ${l.approved === 1 ? 'APPROVED' : l.approved === -1 ? 'REJECTED' : 'PENDING'}
+ ${l.approved === 0 ? `
` : ''}
`).join('') || '

No lessons extracted yet. Run derive_lessons!

'; } @@ -237,13 +268,64 @@

Autonomous Prompt Templates

const data = await res.json(); document.getElementById('template-list').innerHTML = data.map(t => `
- ${escapeHtml(t.name)} -

${escapeHtml(t.description)}

+ ${escapeHtml(t.title)} +

${escapeHtml(t.usage_notes)}

${escapeHtml(t.template_text)}
-
SUCCESS SCORE: ${t.success_score}%
+
SUCCESS SCORE: ${t.success_score}% • REVIEW: ${t.approved === 1 ? 'APPROVED' : t.deprecated === 1 ? 'REJECTED' : 'PENDING'}
+ ${t.approved === 0 && t.deprecated === 0 ? `
` : ''}
`).join('') || '

Prompt library is currently empty.

'; } + + if (currentView === 'health') { + const res = await fetch(`/api/health?project=${encodeURIComponent(currentProject)}`); + const health = await res.json(); + if (!res.ok) throw new Error(health.error || 'Provider health unavailable'); + const semantic = health.semantic; + document.getElementById('runtime-health').innerHTML = ` +
STATUS${escapeHtml(health.runtime.status).toUpperCase()}
+
UPTIME${health.runtime.uptimeSeconds}s
+
NODE${escapeHtml(health.runtime.nodeVersion)}
+
CHECKED${new Date(health.runtime.checkedAt).toLocaleString()}
+ `; + document.getElementById('semantic-health').innerHTML = ` +
STATUS${escapeHtml(semantic.status).toUpperCase()}
+
LOCAL PROVIDER${semantic.local.enabled ? 'ENABLED' : 'DISABLED'}
+
ENDPOINT${escapeHtml(semantic.local.endpoint)}
+
MODELS${semantic.local.models.map(escapeHtml).join(', ')}
+
MCP SAMPLING${semantic.mcpSampling.enabled ? 'ENABLED' : 'DISABLED'}
+
COMPLETIONS${semantic.totals.completed}
+
AVG LATENCY${semantic.totals.averageLatencyMs === null ? 'N/A' : semantic.totals.averageLatencyMs + 'ms'}
+
FALLBACK COMPLETIONS${semantic.totals.fallbackCompletions}
+ `; + document.getElementById('provider-metrics').innerHTML = semantic.providers.map(p => ` +
+ ${escapeHtml(p.provider)} +
COMPLETIONS${p.completions}
+
AVG LATENCY${p.averageLatencyMs === null ? 'N/A' : p.averageLatencyMs + 'ms'}
+
MODELS${p.models.map(escapeHtml).join(', ')}
+
LAST SUCCESS${new Date(p.lastSuccessAt).toLocaleString()}
+
+ `).join('') || '

No semantic completion telemetry recorded for this project.

'; + } + } + + async function reviewCandidate(kind, encodedId, decision) { + const notice = document.getElementById(currentView === 'learning' ? 'review-notice-learning' : 'review-notice-library'); + notice.textContent = `${decision === 'approve' ? 'Approving' : 'Rejecting'} ${kind}...`; + try { + const response = await fetch(`/api/review/${kind}/${encodedId}?project=${encodeURIComponent(currentProject)}`, { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ decision }) + }); + const result = await response.json(); + if (!response.ok) throw new Error(result.error || 'Review failed'); + notice.textContent = `${kind === 'lesson' ? 'Lesson' : 'Template'} ${decision === 'approve' ? 'approved' : 'rejected'}.`; + await refreshData(); + } catch (error) { + notice.textContent = error instanceof Error ? error.message : String(error); + } } // Initial Load diff --git a/universal-refiner/src/core/dashboard.ts b/universal-refiner/src/core/dashboard.ts index 2f647f4..7a0b6a4 100644 --- a/universal-refiner/src/core/dashboard.ts +++ b/universal-refiner/src/core/dashboard.ts @@ -8,6 +8,7 @@ import { fileURLToPath } from "url"; import { streamSSE } from "hono/streaming"; import { getDisplayVersion } from "./version.js"; import { RuntimeLogger } from "./logger.js"; +import { ConfigManager } from "./config.js"; import { TimelineProvider } from "../history/timeline.js"; import { EventStore } from "../history/event-store.js"; @@ -30,6 +31,44 @@ interface DashboardState { pattern: string; } +interface SemanticEventDetails { + taskName?: unknown; + provider?: unknown; + model?: unknown; + latencyMs?: unknown; + fallbackFrom?: unknown; +} + +interface SemanticEventRow { + timestamp: string; + details_json: string; +} + +function safeString(value: unknown, maxLength = 120): string | null { + return typeof value === "string" && value.length > 0 ? value.slice(0, maxLength) : null; +} + +function sanitizeEndpoint(rawUrl: string): string { + try { + const url = new URL(rawUrl); + return `${url.protocol}//${url.hostname}${url.port ? `:${url.port}` : ""}`; + } catch { + return "invalid"; + } +} + +export function isSameOriginRequest(origin: string | undefined, requestUrl: string): boolean { + if (!origin) { + return true; + } + + try { + return new URL(origin).origin === new URL(requestUrl).origin; + } catch { + return false; + } +} + export class CommandCenterDashboard { private static rootPath: string = "."; @@ -88,7 +127,102 @@ export class CommandCenterDashboard { }; } - static start(port = 3000, defaultPath = ".") { + private static buildHealth(selectedPath: string) { + const repoId = EventStore.getInstance().ensureRepository(selectedPath).id; + const config = ConfigManager.getSemanticConfig(selectedPath); + const db = (EventStore.getInstance() as any).db; + const rows = db.prepare(` + SELECT timestamp, details_json + FROM events + WHERE repo_id = ? AND event_type = 'semantic_request_completed' + ORDER BY timestamp DESC + LIMIT 100 + `).all(repoId) as SemanticEventRow[]; + + const events = rows.map(row => { + let details: SemanticEventDetails = {}; + try { + details = JSON.parse(row.details_json) as SemanticEventDetails; + } catch { + // Malformed historical telemetry is ignored rather than exposed. + } + return { + timestamp: row.timestamp, + taskName: safeString(details.taskName), + provider: safeString(details.provider) || "unknown", + model: safeString(details.model) || "unknown", + latencyMs: typeof details.latencyMs === "number" && Number.isFinite(details.latencyMs) + ? Math.max(0, Math.round(details.latencyMs)) + : null, + fallbackFrom: Array.isArray(details.fallbackFrom) + ? details.fallbackFrom.map(item => safeString(item, 80)).filter((item): item is string => item !== null).slice(0, 10) + : [], + }; + }); + + const providerMetrics = new Map; + }>(); + for (const event of events) { + const metric = providerMetrics.get(event.provider) || { + completions: 0, + latencyTotal: 0, + latencyCount: 0, + lastSuccessAt: event.timestamp, + models: new Set(), + }; + metric.completions += 1; + if (event.latencyMs !== null) { + metric.latencyTotal += event.latencyMs; + metric.latencyCount += 1; + } + metric.models.add(event.model); + providerMetrics.set(event.provider, metric); + } + + const totalLatency = events.reduce((sum, event) => sum + (event.latencyMs || 0), 0); + const latencyCount = events.filter(event => event.latencyMs !== null).length; + const semanticEnabled = config.localEnabled || config.mcpSamplingEnabled; + + return { + runtime: { + status: "online", + uptimeSeconds: Math.floor(process.uptime()), + nodeVersion: process.version, + checkedAt: new Date().toISOString(), + }, + semantic: { + status: !semanticEnabled ? "disabled" : events.length > 0 ? "healthy" : "configured", + local: { + enabled: config.localEnabled, + endpoint: sanitizeEndpoint(config.baseUrl), + models: config.models, + timeoutMs: config.timeoutMs, + allowNonLoopback: config.allowNonLoopback, + }, + mcpSampling: { enabled: config.mcpSamplingEnabled }, + totals: { + completed: events.length, + averageLatencyMs: latencyCount > 0 ? Math.round(totalLatency / latencyCount) : null, + fallbackCompletions: events.filter(event => event.fallbackFrom.length > 0).length, + }, + lastSuccess: events[0] || null, + providers: [...providerMetrics.entries()].map(([provider, metric]) => ({ + provider, + completions: metric.completions, + averageLatencyMs: metric.latencyCount > 0 ? Math.round(metric.latencyTotal / metric.latencyCount) : null, + lastSuccessAt: metric.lastSuccessAt, + models: [...metric.models], + })), + }, + }; + } + + static createApp(defaultPath = ".") { this.rootPath = defaultPath; const app = new Hono(); @@ -116,8 +250,9 @@ export class CommandCenterDashboard { app.get("/api/commits", async (c) => { try { - const repoId = path.basename(this.resolveSelectedPath(c.req.query("project"))); - const db = (EventStore.getInstance() as any).db; + const store = EventStore.getInstance(); + const repoId = store.ensureRepository(this.resolveSelectedPath(c.req.query("project"))).id; + const db = (store as any).db; const commits = db.prepare(` SELECT c.*, e.prompt_id, e.id as execution_id FROM commits c @@ -135,8 +270,9 @@ export class CommandCenterDashboard { app.get("/api/lessons", async (c) => { try { - const repoId = path.basename(this.resolveSelectedPath(c.req.query("project"))); - const db = (EventStore.getInstance() as any).db; + const store = EventStore.getInstance(); + const repoId = store.ensureRepository(this.resolveSelectedPath(c.req.query("project"))).id; + const db = (store as any).db; const lessons = db.prepare("SELECT * FROM lessons WHERE repo_id = ? ORDER BY created_at DESC").all(repoId); return c.json(lessons); } catch (error) { @@ -147,8 +283,9 @@ export class CommandCenterDashboard { app.get("/api/templates", async (c) => { try { - const repoId = path.basename(this.resolveSelectedPath(c.req.query("project"))); - const db = (EventStore.getInstance() as any).db; + const store = EventStore.getInstance(); + const repoId = store.ensureRepository(this.resolveSelectedPath(c.req.query("project"))).id; + const db = (store as any).db; const templates = db.prepare("SELECT * FROM prompt_templates WHERE repo_id = ? ORDER BY success_score DESC").all(repoId); return c.json(templates); } catch (error) { @@ -157,6 +294,70 @@ export class CommandCenterDashboard { } }); + app.post("/api/review/:kind/:id", async (c) => { + const selectedPath = this.resolveSelectedPath(c.req.query("project")); + try { + if (!isSameOriginRequest(c.req.header("origin"), c.req.url)) { + return c.json({ error: "Cross-origin review requests are not allowed" }, 403); + } + if (!c.req.header("content-type")?.toLowerCase().startsWith("application/json")) { + return c.json({ error: "Review requests must use application/json" }, 415); + } + + const kind = c.req.param("kind"); + if (kind !== "lesson" && kind !== "template") { + return c.json({ error: "Unsupported review candidate type" }, 400); + } + + let body: { decision?: unknown }; + try { + body = await c.req.json() as { decision?: unknown }; + } catch { + return c.json({ error: "Review request body must be valid JSON" }, 400); + } + if (body.decision !== "approve" && body.decision !== "reject") { + return c.json({ error: "Decision must be approve or reject" }, 400); + } + + const id = c.req.param("id"); + if (!id || id.length > 200) { + return c.json({ error: "Review candidate ID is invalid" }, 400); + } + const store = EventStore.getInstance(); + const repoId = store.ensureRepository(selectedPath).id; + const approved = body.decision === "approve"; + const changed = kind === "lesson" + ? store.reviewLesson(repoId, id, approved) + : store.reviewTemplate(repoId, id, approved); + if (!changed) { + return c.json({ error: `Pending ${kind} not found for selected repository` }, 404); + } + + store.recordEvent({ + id: `evt_dashboard_review_${Date.now()}_${Math.floor(Math.random() * 100000)}`, + event_type: `${kind}_reviewed`, + repo_id: repoId, + summary: `${kind === "lesson" ? "Lesson" : "Template"} ${approved ? "approved" : "rejected"} from dashboard`, + details_json: JSON.stringify({ candidateId: id, decision: body.decision }), + }); + this.log(`${kind === "lesson" ? "Lesson" : "Template"} ${approved ? "approved" : "rejected"}: ${id}`, selectedPath); + return c.json({ id, kind, decision: body.decision, repoId }); + } catch (error) { + this.logRouteError("api/review", error, selectedPath); + return c.json({ error: "Review request failed" }, 500); + } + }); + + app.get("/api/health", async (c) => { + const selectedPath = this.resolveSelectedPath(c.req.query("project")); + try { + return c.json(this.buildHealth(selectedPath)); + } catch (error) { + this.logRouteError("api/health", error, selectedPath); + return c.json({ error: "Provider health unavailable" }, 500); + } + }); + app.get("/api/events", async (c) => { try { return streamSSE(c, async (stream) => { @@ -228,6 +429,11 @@ export class CommandCenterDashboard { } }); + return app; + } + + static start(port = 3000, defaultPath = ".") { + const app = this.createApp(defaultPath); try { const server = serve({ fetch: app.fetch, port, hostname: resolveDashboardHost() }); server.on("error", (e: any) => { diff --git a/universal-refiner/src/core/job-queue.ts b/universal-refiner/src/core/job-queue.ts new file mode 100644 index 0000000..7a15276 --- /dev/null +++ b/universal-refiner/src/core/job-queue.ts @@ -0,0 +1,49 @@ +import { RuntimeLogger } from "./logger.js"; + +export interface QueueJobOptions { + retries?: number; + retryDelayMs?: number; +} + +export class SerializedJobQueue { + private tail: Promise = Promise.resolve(); + private pendingKeys = new Set(); + + enqueue(key: string, job: () => Promise, options: QueueJobOptions = {}): boolean { + if (this.pendingKeys.has(key)) { + return false; + } + + this.pendingKeys.add(key); + this.tail = this.tail + .then(() => this.runWithRetry(key, job, options)) + .catch(error => RuntimeLogger.error(`Queued job failed permanently: ${key}`, error)) + .finally(() => this.pendingKeys.delete(key)); + return true; + } + + async idle(): Promise { + await this.tail; + } + + private async runWithRetry(key: string, job: () => Promise, options: QueueJobOptions): Promise { + const retries = options.retries ?? 2; + const retryDelayMs = options.retryDelayMs ?? 500; + + for (let attempt = 0; attempt <= retries; attempt++) { + try { + await job(); + return; + } catch (error) { + if (attempt === retries) { + throw error; + } + RuntimeLogger.warn(`Queued job retry: ${key}`, { + attempt: attempt + 1, + error: error instanceof Error ? error.message : String(error), + }); + await new Promise(resolve => setTimeout(resolve, retryDelayMs * (attempt + 1))); + } + } + } +} diff --git a/universal-refiner/src/core/logger.ts b/universal-refiner/src/core/logger.ts index ed20076..ff29554 100644 --- a/universal-refiner/src/core/logger.ts +++ b/universal-refiner/src/core/logger.ts @@ -87,8 +87,6 @@ export class RuntimeLogger { } static error(message: string, meta?: unknown) { - if (shouldLog("error")) { - write("error", message, meta); - } + write("error", message, meta); } } diff --git a/universal-refiner/src/core/semantic-provider.ts b/universal-refiner/src/core/semantic-provider.ts new file mode 100644 index 0000000..716c45c --- /dev/null +++ b/universal-refiner/src/core/semantic-provider.ts @@ -0,0 +1,165 @@ +import { RuntimeLogger } from "./logger.js"; + +export interface SemanticRequest { + taskName: string; + prompt: string; + maxTokens: number; +} + +export interface SemanticResponse { + text: string; + provider: string; + model: string; + latencyMs: number; + promptTokens?: number; + completionTokens?: number; + fallbackFrom?: string[]; +} + +export interface SemanticProvider { + readonly name: string; + requestText(request: SemanticRequest): Promise; +} + +export interface LocalOpenAiProviderOptions { + baseUrl: string; + models: string[]; + timeoutMs: number; + temperature: number; + allowNonLoopback: boolean; +} + +function isLoopbackUrl(rawUrl: string): boolean { + try { + const hostname = new URL(rawUrl).hostname.toLowerCase(); + return hostname === "localhost" || hostname === "127.0.0.1" || hostname === "::1" || hostname === "[::1]"; + } catch { + return false; + } +} + +function getErrorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +export class LocalOpenAiProvider implements SemanticProvider { + readonly name = "local-openai"; + private readonly options: LocalOpenAiProviderOptions; + + constructor(options: LocalOpenAiProviderOptions) { + if (!options.allowNonLoopback && !isLoopbackUrl(options.baseUrl)) { + throw new Error("Local semantic provider base URL must use a loopback host unless allowNonLoopback is enabled."); + } + this.options = options; + } + + async requestText(request: SemanticRequest): Promise { + const failedModels: string[] = []; + for (const model of this.options.models) { + const startedAt = Date.now(); + try { + const response = await fetch(`${this.options.baseUrl.replace(/\/$/, "")}/chat/completions`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + model, + messages: [{ role: "user", content: request.prompt }], + stream: false, + temperature: this.options.temperature, + max_tokens: request.maxTokens, + }), + signal: AbortSignal.timeout(this.options.timeoutMs), + }); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}`); + } + + const payload = await response.json() as { + choices?: Array<{ message?: { content?: unknown } }>; + usage?: { prompt_tokens?: number; completion_tokens?: number }; + }; + const text = payload.choices?.[0]?.message?.content; + if (typeof text !== "string" || text.trim().length === 0) { + throw new Error("Response did not contain assistant text."); + } + + return { + text, + provider: this.name, + model, + latencyMs: Date.now() - startedAt, + promptTokens: payload.usage?.prompt_tokens, + completionTokens: payload.usage?.completion_tokens, + fallbackFrom: failedModels, + }; + } catch (error) { + failedModels.push(model); + RuntimeLogger.warn(`${request.taskName} local model attempt failed`, { + provider: this.name, + model, + latencyMs: Date.now() - startedAt, + error: getErrorMessage(error), + }); + } + } + + return null; + } +} + +export class McpSamplingProvider implements SemanticProvider { + readonly name = "mcp-sampling"; + private readonly sample: (prompt: string, maxTokens: number) => Promise; + + constructor(sample: (prompt: string, maxTokens: number) => Promise) { + this.sample = sample; + } + + async requestText(request: SemanticRequest): Promise { + const startedAt = Date.now(); + const text = await this.sample(request.prompt, request.maxTokens); + if (!text) { + return null; + } + + return { + text, + provider: this.name, + model: "client-selected", + latencyMs: Date.now() - startedAt, + }; + } +} + +export class SemanticProviderChain { + private readonly providers: SemanticProvider[]; + private readonly onSuccess?: (response: SemanticResponse, request: SemanticRequest) => void; + + constructor( + providers: SemanticProvider[], + onSuccess?: (response: SemanticResponse, request: SemanticRequest) => void, + ) { + this.providers = providers; + this.onSuccess = onSuccess; + } + + async requestText(request: SemanticRequest): Promise { + const unavailableProviders: string[] = []; + for (const provider of this.providers) { + const response = await provider.requestText(request); + if (response) { + response.fallbackFrom = [ + ...unavailableProviders.map(name => `provider:${name}`), + ...(response.fallbackFrom || []).map(model => `model:${model}`), + ]; + this.onSuccess?.(response, request); + return response.text; + } + unavailableProviders.push(provider.name); + } + + RuntimeLogger.warn(`${request.taskName} exhausted all semantic providers`); + return null; + } +} diff --git a/universal-refiner/src/core/server.ts b/universal-refiner/src/core/server.ts index dff0a51..1111afb 100644 --- a/universal-refiner/src/core/server.ts +++ b/universal-refiner/src/core/server.ts @@ -26,6 +26,17 @@ import { CorrelationEngine } from "../history/correlation-engine.js"; import { PromptOptimizer } from "../refiners/prompt-optimizer.js"; import { TemplateGenerator } from "../history/template-generator.js"; import { BackgroundAutonomyService } from "./background-service.js"; +import { + LocalOpenAiProvider, + McpSamplingProvider, + SemanticProvider, + SemanticProviderChain, + SemanticResponse, +} from "./semantic-provider.js"; +import { parseStructuredResponse } from "./structured-response.js"; +import { RepositoryIdentity } from "../history/repository-identity.js"; +import { ApprovedTemplateSelector } from "../refiners/template-selector.js"; +import { createABEvaluationRecord, evaluatePrompt } from "../evaluation/prompt-evaluator.js"; export class PromptRefinerServer { private server: Server; @@ -33,17 +44,64 @@ export class PromptRefinerServer { private samplingUnavailableReason: string | null = null; private eventStore: EventStore; private backgroundAutonomy: BackgroundAutonomyService | null = null; + private semanticProviders: SemanticProviderChain; + private repository: RepositoryIdentity; + private templateSelector: ApprovedTemplateSelector; constructor(rootPath: string = ".") { this.rootPath = rootPath; this.eventStore = EventStore.getInstance(); + this.repository = this.eventStore.ensureRepository(rootPath); + this.templateSelector = new ApprovedTemplateSelector(this.eventStore); this.server = new Server( { name: "prompt-refiner", version: getPackageVersion() }, { capabilities: { tools: {}, logging: {}, experimental: { sampling: {} } } } ); + this.semanticProviders = this.createSemanticProviderChain(); this.setupToolHandlers(); } + private createSemanticProviderChain(): SemanticProviderChain { + const config = ConfigManager.getSemanticConfig(this.rootPath); + const providers: SemanticProvider[] = []; + + if (config.localEnabled) { + try { + providers.push(new LocalOpenAiProvider(config)); + } catch (error) { + RuntimeLogger.warn("Local semantic provider configuration rejected", { + error: String(error), + }); + } + } + if (config.mcpSamplingEnabled) { + providers.push(new McpSamplingProvider(this.requestMcpSamplingText.bind(this))); + } + + return new SemanticProviderChain(providers, this.recordSemanticSuccess.bind(this)); + } + + private recordSemanticSuccess(response: SemanticResponse, request: { taskName: string }) { + const details = { + taskName: request.taskName, + provider: response.provider, + model: response.model, + latencyMs: response.latencyMs, + promptTokens: response.promptTokens, + completionTokens: response.completionTokens, + fallbackFrom: response.fallbackFrom, + }; + RuntimeLogger.info(`${request.taskName} semantic request completed`, details); + this.eventStore.recordEvent({ + id: `evt_semantic_${Date.now()}_${Math.floor(Math.random() * 100000)}`, + event_type: "semantic_request_completed", + repo_id: this.repository.id, + summary: `${request.taskName} completed with ${response.provider}/${response.model}`, + details_json: JSON.stringify(details), + }); + CommandCenterDashboard.log(`${request.taskName}: ${response.provider}/${response.model} in ${response.latencyMs}ms`); + } + private async scoutProject(query?: string): Promise { const nodeCtx = await NodeDetector.detect(this.rootPath); const pyCtx = await PythonDetector.detect(this.rootPath); @@ -53,7 +111,7 @@ export class PromptRefinerServer { const predictive = ConfigManager.getPredictiveMandates(); const snippets = query ? await NeuralSnippets.search(query, this.rootPath) : []; const activeIntents = AgenticBlackboard.getActiveIntents(this.rootPath); - const repoId = path.basename(this.rootPath); + const repoId = this.repository.id; const predictiveLessons = this.eventStore.getRecentLessons(repoId, 5); return { @@ -89,12 +147,12 @@ export class PromptRefinerServer { RuntimeLogger.warn("MCP sampling is unavailable; semantic features will fall back to local-only behavior", { rootPath: this.rootPath, reason, - error: error instanceof Error ? error.stack || error.message : String(error), + error: String(error), }); CommandCenterDashboard.log(`Semantic Intelligence unavailable: ${reason}`); } - public async requestModelText(taskName: string, userPrompt: string, maxTokens: number): Promise { + private async requestMcpSamplingText(userPrompt: string, maxTokens: number): Promise { if (this.samplingUnavailableReason) { return null; } @@ -120,14 +178,18 @@ export class PromptRefinerServer { return null; } - RuntimeLogger.warn(`${taskName} sampling request failed`, { + RuntimeLogger.warn(`MCP sampling request failed`, { rootPath: this.rootPath, - error: error instanceof Error ? error.stack || error.message : String(error), + error: String(error), }); return null; } } + public async requestModelText(taskName: string, userPrompt: string, maxTokens: number): Promise { + return this.semanticProviders.requestText({ taskName, prompt: userPrompt, maxTokens }); + } + private async lintSemantic(prompt: string, ctx: ProjectContext): Promise { CommandCenterDashboard.log(`Executing Semantic Intelligence Analysis...`); const responseText = await this.requestModelText( @@ -152,13 +214,13 @@ Output ONLY the JSON array. If no gaps, return [].`, } try { - return JSON.parse(responseText); + return parseStructuredResponse(responseText); } catch (error) { RuntimeLogger.warn("Semantic analysis returned invalid JSON", { rootPath: this.rootPath, promptPreview: prompt.substring(0, 120), responsePreview: responseText.substring(0, 300), - error: error instanceof Error ? error.stack || error.message : String(error), + error: String(error), }); CommandCenterDashboard.log(`Semantic Analysis returned invalid JSON.`); return []; @@ -179,7 +241,10 @@ Output ONLY the JSON array. If no gaps, return [].`, description: "Performs modular analysis of a prompt and codebase.", inputSchema: { type: "object", - properties: { prompt: { type: "string" } }, + properties: { + prompt: { type: "string" }, + semantic: { type: "boolean", description: "Set false for latency-sensitive automation that needs rule-based linting only." } + }, required: ["prompt"], }, }, @@ -237,6 +302,35 @@ Output ONLY the JSON array. If no gaps, return [].`, required: ["id"] } }, + { + name: "list_learning_candidates", + description: "Lists review-gated lesson and prompt-template candidates for the current repository.", + inputSchema: { type: "object", properties: {} } + }, + { + name: "review_lesson", + description: "Approves or rejects a proposed lesson before it can influence future prompts.", + inputSchema: { + type: "object", + properties: { + id: { type: "string" }, + approved: { type: "boolean" } + }, + required: ["id", "approved"] + } + }, + { + name: "review_template", + description: "Approves or rejects a proposed prompt template.", + inputSchema: { + type: "object", + properties: { + id: { type: "string" }, + approved: { type: "boolean" } + }, + required: ["id", "approved"] + } + }, { name: "ingest_pattern", description: "Saves a learned engineering pattern to the project's persistent memory.", @@ -295,11 +389,57 @@ Output ONLY the JSON array. If no gaps, return [].`, properties: { prompt_id: { type: "string", description: "The tracking ID found in the refined prompt (e.g., 'ref_123...')" }, output_summary: { type: "string", description: "A concise summary of what was achieved or the final response text." }, - artifacts_json: { type: "string", description: "Optional: JSON string of any artifacts created (files, links, etc.)." } + artifacts_json: { type: "string", description: "Optional: JSON string of any artifacts created (files, links, etc.)." }, + status: { type: "string", enum: ["completed", "failed"], description: "Verified execution outcome. Defaults to completed." } }, required: ["prompt_id", "output_summary"] } }, + { + name: "evaluate_prompt", + description: "Scores a prompt with deterministic evidence indicators across five quality dimensions.", + inputSchema: { + type: "object", + properties: { + prompt: { type: "string" }, + baseline_prompt: { type: "string", description: "Optional original prompt used to measure intent preservation." } + }, + required: ["prompt"] + } + }, + { + name: "compare_prompt_variants", + description: "Compares prompt variants and separates heuristic preference from optional observed execution evidence.", + inputSchema: { + type: "object", + properties: { + baseline_prompt: { type: "string" }, + variant_a: { type: "string" }, + variant_b: { type: "string" }, + outcome_a: { + type: "object", + properties: { + status: { type: "string", enum: ["completed", "failed", "cancelled"] }, + testsPassed: { type: "number" }, + testsFailed: { type: "number" }, + reworkCount: { type: "number" } + }, + required: ["status"] + }, + outcome_b: { + type: "object", + properties: { + status: { type: "string", enum: ["completed", "failed", "cancelled"] }, + testsPassed: { type: "number" }, + testsFailed: { type: "number" }, + reworkCount: { type: "number" } + }, + required: ["status"] + } + }, + required: ["baseline_prompt", "variant_a", "variant_b"] + } + }, ], })); @@ -310,7 +450,10 @@ Output ONLY the JSON array. If no gaps, return [].`, switch (request.params.name) { case "lint_prompt": { - const { prompt } = z.object({ prompt: z.string() }).parse(request.params.arguments); + const { prompt, semantic } = z.object({ + prompt: z.string(), + semantic: z.boolean().optional(), + }).parse(request.params.arguments); AgenticBlackboard.postIntent(agentName, "lint", prompt, this.rootPath); CommandCenterDashboard.log(`Scouting project for prompt: "${prompt.substring(0, 30)}..."`); @@ -320,16 +463,16 @@ Output ONLY the JSON array. If no gaps, return [].`, client: "MCP", agent_name: agentName, raw_prompt: prompt, - repo_id: path.basename(this.rootPath) + repo_id: this.repository.id }); const ctx = await this.scoutProject(prompt); const ruleGaps = PromptLinter.lint(prompt, ctx); - const semanticGaps = await this.lintSemantic(prompt, ctx); + const semanticGaps = semantic === false ? [] : await this.lintSemantic(prompt, ctx); const gaps = PromptLinter.mergeGaps(ruleGaps, semanticGaps); CommandCenterDashboard.log(`Found ${gaps.length} total gaps (Rule: ${ruleGaps.length}, Semantic: ${semanticGaps.length}).`); - return { content: [{ type: "text", text: JSON.stringify({ gaps, context: ctx }) }] }; + return { content: [{ type: "text", text: JSON.stringify({ promptId, gaps, context: ctx }) }] }; } case "create_questions": { const { gaps } = z.object({ gaps: z.array(z.any()) }).parse(request.params.arguments); @@ -349,8 +492,13 @@ Output ONLY the JSON array. If no gaps, return [].`, const ctx = await this.scoutProject(original_prompt); const promptId = `ref_${Date.now()}`; - const refined = PromptRefiner.refine(original_prompt, ctx, answers, promptId); - const gain = PromptRefiner.calculateGain(original_prompt, refined, ctx); + const approvedTemplates = await this.templateSelector.select({ + repoId: this.repository.id, + prompt: original_prompt, + }); + const refinementContext = { approvedTemplates }; + const refined = PromptRefiner.refine(original_prompt, ctx, answers, promptId, refinementContext); + const gain = PromptRefiner.calculateGain(original_prompt, refined, ctx, refinementContext); this.eventStore.recordPrompt({ id: promptId, @@ -359,11 +507,11 @@ Output ONLY the JSON array. If no gaps, return [].`, raw_prompt: original_prompt, normalized_prompt: refined, intent: "refine", - repo_id: path.basename(this.rootPath) + repo_id: this.repository.id }); CommandCenterDashboard.setLastRefinement(original_prompt, refined, this.rootPath, gain); - CommandCenterDashboard.log(`Refinement Complete. Quality Gain: ${gain}%. Injected ${ctx.learnedPatterns?.length || 0} Mandates.`); + CommandCenterDashboard.log(`Refinement Complete. Quality Gain: ${gain}%. Injected ${ctx.learnedPatterns?.length || 0} mandates and ${approvedTemplates.length} approved templates.`); return { content: [{ type: "text", text: refined }] }; } @@ -381,7 +529,7 @@ Output ONLY the JSON array. If no gaps, return [].`, } try { - const proposals = JSON.parse(responseText); + const proposals = parseStructuredResponse>(responseText); for (const p of proposals) { LocalBrain.savePattern({ ...p, isProposed: true }, this.rootPath); } @@ -398,6 +546,28 @@ Output ONLY the JSON array. If no gaps, return [].`, CommandCenterDashboard.log(`Rule Approved: ${id}`); return { content: [{ type: "text", text: "Rule '" + id + "' promoted!" }] }; } + case "list_learning_candidates": { + const candidates = this.eventStore.getLearningCandidates(this.repository.id); + return { content: [{ type: "text", text: JSON.stringify(candidates) }] }; + } + case "review_lesson": { + const { id, approved } = z.object({ id: z.string(), approved: z.boolean() }).parse(request.params.arguments); + const changed = this.eventStore.reviewLesson(this.repository.id, id, approved); + if (!changed) { + throw new McpError(ErrorCode.InvalidParams, `Pending lesson not found: ${id}`); + } + CommandCenterDashboard.log(`Lesson ${approved ? "approved" : "rejected"}: ${id}`); + return { content: [{ type: "text", text: `Lesson ${id} ${approved ? "approved" : "rejected"}.` }] }; + } + case "review_template": { + const { id, approved } = z.object({ id: z.string(), approved: z.boolean() }).parse(request.params.arguments); + const changed = this.eventStore.reviewTemplate(this.repository.id, id, approved); + if (!changed) { + throw new McpError(ErrorCode.InvalidParams, `Pending template not found: ${id}`); + } + CommandCenterDashboard.log(`Template ${approved ? "approved" : "rejected"}: ${id}`); + return { content: [{ type: "text", text: `Template ${id} ${approved ? "approved" : "rejected"}.` }] }; + } case "ingest_pattern": { const args = z.object({ id: z.string(), @@ -473,17 +643,18 @@ Output ONLY the JSON array. If no gaps, return [].`, } case "generate_templates": { CommandCenterDashboard.log(`Executing Autonomous Template Synthesis...`); - const repoId = path.basename(this.rootPath); + const repoId = this.repository.id; const generator = new TemplateGenerator(this.requestModelText.bind(this)); await generator.generateNewTemplates(repoId); CommandCenterDashboard.log(`Template Synthesis complete.`); return { content: [{ type: "text", text: "Successfully completed template generation cycle." }] }; } case "record_agent_output": { - const { prompt_id, output_summary, artifacts_json } = z.object({ + const { prompt_id, output_summary, artifacts_json, status } = z.object({ prompt_id: z.string(), output_summary: z.string(), - artifacts_json: z.string().optional() + artifacts_json: z.string().optional(), + status: z.enum(["completed", "failed"]).optional() }).parse(request.params.arguments); CommandCenterDashboard.log(`Recording agent output for prompt ${prompt_id.substring(0, 10)}...`); @@ -498,7 +669,7 @@ Output ONLY the JSON array. If no gaps, return [].`, prompt_id: prompt_id, workflow_name: "external-agent", executor_name: agentName, - status: "completed", + status: status || "completed", started_at: now, ended_at: now, result_summary: output_summary, @@ -507,7 +678,7 @@ Output ONLY the JSON array. If no gaps, return [].`, } else { this.eventStore.updateExecution({ id: execution.id, - status: "completed", + status: status || "completed", ended_at: now, result_summary: output_summary, artifacts_json: artifacts_json || execution.artifacts_json @@ -516,6 +687,36 @@ Output ONLY the JSON array. If no gaps, return [].`, return { content: [{ type: "text", text: `Successfully recorded output for prompt ${prompt_id}.` }] }; } + case "evaluate_prompt": { + const { prompt, baseline_prompt } = z.object({ + prompt: z.string(), + baseline_prompt: z.string().optional(), + }).parse(request.params.arguments); + const evaluation = baseline_prompt ? evaluatePrompt(prompt, baseline_prompt) : evaluatePrompt(prompt); + return { content: [{ type: "text", text: JSON.stringify(evaluation) }] }; + } + case "compare_prompt_variants": { + const outcomeSchema = z.object({ + status: z.enum(["completed", "failed", "cancelled"]), + testsPassed: z.number().nonnegative().optional(), + testsFailed: z.number().nonnegative().optional(), + reworkCount: z.number().nonnegative().optional(), + }); + const { baseline_prompt, variant_a, variant_b, outcome_a, outcome_b } = z.object({ + baseline_prompt: z.string(), + variant_a: z.string(), + variant_b: z.string(), + outcome_a: outcomeSchema.optional(), + outcome_b: outcomeSchema.optional(), + }).parse(request.params.arguments); + const experiment = createABEvaluationRecord({ + experimentId: `exp_${Date.now()}`, + baselinePrompt: baseline_prompt, + variantA: { id: "A", prompt: variant_a, observedOutcome: outcome_a }, + variantB: { id: "B", prompt: variant_b, observedOutcome: outcome_b }, + }); + return { content: [{ type: "text", text: JSON.stringify(experiment) }] }; + } default: throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${request.params.name}`); } @@ -536,7 +737,8 @@ Output ONLY the JSON array. If no gaps, return [].`, // Start Background Autonomy this.backgroundAutonomy = new BackgroundAutonomyService( this.rootPath, - this.requestModelText.bind(this) + this.requestModelText.bind(this), + 30_000, // AUTO-03: poll git every 30s ); this.backgroundAutonomy.start(); diff --git a/universal-refiner/src/core/structured-response.ts b/universal-refiner/src/core/structured-response.ts new file mode 100644 index 0000000..9c659c4 --- /dev/null +++ b/universal-refiner/src/core/structured-response.ts @@ -0,0 +1,28 @@ +export function parseStructuredResponse(response: string): T { + const trimmed = response.trim(); + const unfenced = trimmed + .replace(/^```(?:json)?\s*/i, "") + .replace(/\s*```$/, "") + .trim(); + + try { + return JSON.parse(unfenced) as T; + } catch { + const objectStart = unfenced.indexOf("{"); + const arrayStart = unfenced.indexOf("["); + const starts = [objectStart, arrayStart].filter(index => index >= 0); + if (starts.length === 0) { + throw new Error("Structured response did not contain JSON."); + } + + const start = Math.min(...starts); + const opening = unfenced[start]; + const closing = opening === "{" ? "}" : "]"; + const end = unfenced.lastIndexOf(closing); + if (end <= start) { + throw new Error("Structured response contained incomplete JSON."); + } + + return JSON.parse(unfenced.slice(start, end + 1)) as T; + } +} diff --git a/universal-refiner/src/evaluation/prompt-evaluator.ts b/universal-refiner/src/evaluation/prompt-evaluator.ts new file mode 100644 index 0000000..032410d --- /dev/null +++ b/universal-refiner/src/evaluation/prompt-evaluator.ts @@ -0,0 +1,196 @@ +export type EvaluationDimension = + | "intentPreservation" + | "specificity" + | "actionability" + | "testability" + | "riskControls"; + +export interface DimensionEvaluation { + score: number; + maximum: 20; + evidence: readonly string[]; +} + +export interface DeterministicPromptEvaluation { + heuristicScore: number; + maximumScore: 100; + dimensions: Readonly>; + disclaimer: "Deterministic evidence indicators only; this is not an LLM quality judgment."; +} + +export interface PromptComparison { + original: DeterministicPromptEvaluation; + refined: DeterministicPromptEvaluation; + heuristicDelta: number; + heuristicPreference: "original" | "refined" | "tie"; +} + +export interface ObservedVariantOutcome { + status: "completed" | "failed" | "cancelled"; + testsPassed?: number; + testsFailed?: number; + reworkCount?: number; +} + +export interface ABVariantRecord { + id: string; + prompt: string; + evaluation: DeterministicPromptEvaluation; + observedOutcome?: ObservedVariantOutcome; +} + +export interface ABEvaluationRecord { + experimentId: string; + createdAt: string; + baselinePrompt: string; + variantA: ABVariantRecord; + variantB: ABVariantRecord; + heuristicPreference: "A" | "B" | "tie"; + observedWinner?: "A" | "B"; + interpretation: "heuristic-only" | "observed-evidence"; +} + +const DISCLAIMER = "Deterministic evidence indicators only; this is not an LLM quality judgment." as const; +const WORD_PATTERN = /[a-z0-9][a-z0-9_.\\/-]*/g; + +export function evaluatePrompt(prompt: string, baselinePrompt = prompt): DeterministicPromptEvaluation { + const promptTokens = tokenize(prompt); + const baselineTokens = tokenize(baselinePrompt); + + const dimensions: Record = { + intentPreservation: evaluateIntentPreservation(promptTokens, baselineTokens), + specificity: evaluateSpecificity(prompt), + actionability: evaluateActionability(prompt), + testability: evaluateTestability(prompt), + riskControls: evaluateRiskControls(prompt) + }; + + return { + heuristicScore: Object.values(dimensions).reduce((total, dimension) => total + dimension.score, 0), + maximumScore: 100, + dimensions, + disclaimer: DISCLAIMER + }; +} + +export function comparePrompts(original: string, refined: string): PromptComparison { + const originalEvaluation = evaluatePrompt(original, original); + const refinedEvaluation = evaluatePrompt(refined, original); + const heuristicDelta = refinedEvaluation.heuristicScore - originalEvaluation.heuristicScore; + + return { + original: originalEvaluation, + refined: refinedEvaluation, + heuristicDelta, + heuristicPreference: heuristicDelta > 0 ? "refined" : heuristicDelta < 0 ? "original" : "tie" + }; +} + +export function createABEvaluationRecord(input: { + experimentId: string; + baselinePrompt: string; + variantA: Omit; + variantB: Omit; + createdAt?: string; +}): ABEvaluationRecord { + const variantA: ABVariantRecord = { + ...input.variantA, + evaluation: evaluatePrompt(input.variantA.prompt, input.baselinePrompt) + }; + const variantB: ABVariantRecord = { + ...input.variantB, + evaluation: evaluatePrompt(input.variantB.prompt, input.baselinePrompt) + }; + const scoreDelta = variantA.evaluation.heuristicScore - variantB.evaluation.heuristicScore; + const observedWinner = determineObservedWinner(variantA.observedOutcome, variantB.observedOutcome); + + return { + experimentId: input.experimentId, + createdAt: input.createdAt ?? new Date().toISOString(), + baselinePrompt: input.baselinePrompt, + variantA, + variantB, + heuristicPreference: scoreDelta > 0 ? "A" : scoreDelta < 0 ? "B" : "tie", + observedWinner, + interpretation: observedWinner ? "observed-evidence" : "heuristic-only" + }; +} + +function evaluateIntentPreservation(promptTokens: ReadonlySet, baselineTokens: ReadonlySet): DimensionEvaluation { + if (baselineTokens.size === 0) return dimension(20, ["empty-baseline"]); + const overlap = [...baselineTokens].filter(token => promptTokens.has(token)).length; + const ratio = overlap / baselineTokens.size; + return dimension(Math.round(ratio * 20), [`baseline-token-coverage:${Math.round(ratio * 100)}%`]); +} + +function evaluateSpecificity(prompt: string): DimensionEvaluation { + return indicatorDimension([ + ["file-or-path-reference", /(?:[a-z0-9_-]+[\\/])+[a-z0-9_.-]+|[a-z0-9_-]+\.(?:ts|js|py|md|json|yml|yaml)/i, 6], + ["named-interface-or-symbol", /\b(?:interface|class|function|module|type|API|schema|endpoint)\b/i, 5], + ["explicit-constraint", /\b(?:must|only|do not|without|preserve|limit|scope|ownership)\b/i, 5], + ["concrete-value", /\b\d+(?:\.\d+)?%?\b/, 4] + ], prompt); +} + +function evaluateActionability(prompt: string): DimensionEvaluation { + return indicatorDimension([ + ["implementation-verb", /\b(?:add|build|create|edit|fix|implement|remove|replace|update|wire)\b/i, 7], + ["ordered-or-bulleted-steps", /(?:^|\n)\s*(?:[-*]|\d+\.)\s+/m, 5], + ["deliverable", /\b(?:report|return|output|result|changed files|artifact)\b/i, 4], + ["acceptance-language", /\b(?:acceptance|success criteria|should|must)\b/i, 4] + ], prompt); +} + +function evaluateTestability(prompt: string): DimensionEvaluation { + return indicatorDimension([ + ["test-requirement", /\b(?:test|tests|coverage)\b/i, 7], + ["verification-requirement", /\b(?:verify|verification|validate|build|lint|smoke)\b/i, 6], + ["observable-outcome", /\b(?:pass|fail|working|expected|result|report)\b/i, 4], + ["test-command", /\b(?:npm|pnpm|yarn|vitest|jest|pytest|dotnet)\s+(?:run\s+)?(?:test|build|lint)\b/i, 3] + ], prompt); +} + +function evaluateRiskControls(prompt: string): DimensionEvaluation { + return indicatorDimension([ + ["error-or-fallback-handling", /\b(?:error|failure|fallback|retry|timeout|rollback)\b/i, 6], + ["security-or-privacy", /\b(?:security|secret|token|privacy|OWASP|sanitize|validate input)\b/i, 5], + ["compatibility-or-regression", /\b(?:compatibility|regression|preserve|existing behavior|backward)\b/i, 5], + ["scope-boundary", /\b(?:do not edit|only|scope|ownership|unrelated changes)\b/i, 4] + ], prompt); +} + +function indicatorDimension(indicators: ReadonlyArray, prompt: string): DimensionEvaluation { + const matched = indicators.filter(([, pattern]) => pattern.test(prompt)); + return dimension( + matched.reduce((total, [, , score]) => total + score, 0), + matched.map(([name]) => name) + ); +} + +function dimension(score: number, evidence: readonly string[]): DimensionEvaluation { + return { score: Math.min(Math.max(score, 0), 20), maximum: 20, evidence }; +} + +function tokenize(value: string): Set { + return new Set(value.toLowerCase().match(WORD_PATTERN) ?? []); +} + +function determineObservedWinner( + outcomeA: ObservedVariantOutcome | undefined, + outcomeB: ObservedVariantOutcome | undefined +): "A" | "B" | undefined { + if (!outcomeA || !outcomeB) return undefined; + + const scoreA = observedOutcomeScore(outcomeA); + const scoreB = observedOutcomeScore(outcomeB); + if (scoreA === scoreB) return undefined; + return scoreA > scoreB ? "A" : "B"; +} + +function observedOutcomeScore(outcome: ObservedVariantOutcome): number { + const statusScore = outcome.status === "completed" ? 1_000 : outcome.status === "cancelled" ? 100 : 0; + return statusScore + + (outcome.testsPassed ?? 0) + - ((outcome.testsFailed ?? 0) * 10) + - ((outcome.reworkCount ?? 0) * 5); +} diff --git a/universal-refiner/src/history/commit-ingest.ts b/universal-refiner/src/history/commit-ingest.ts index 1d6f0dc..e6cd8dd 100644 --- a/universal-refiner/src/history/commit-ingest.ts +++ b/universal-refiner/src/history/commit-ingest.ts @@ -1,5 +1,4 @@ -import { execSync } from "child_process"; -import * as path from "path"; +import { execFileSync } from "child_process"; import { EventStore } from "./event-store.js"; import { RuntimeLogger } from "../core/logger.js"; @@ -36,7 +35,7 @@ export class CommitIngester { async ingest(repoPath: string, limit = 10): Promise { try { RuntimeLogger.info(`Starting commit ingestion for ${repoPath}...`); - const repoId = path.basename(repoPath); + const repoId = this.eventStore.ensureRepository(repoPath).id; const lastSha = this.eventStore.getLastCommitSha(repoId); const commits = this.getLatestCommits(repoPath, limit, lastSha); @@ -68,23 +67,23 @@ export class CommitIngester { private getLatestCommits(repoPath: string, limit: number, lastSha: string | null = null): GitCommit[] { // Format: SHA|Author|Date(ISO)|Message const format = "%H|%an|%ai|%s"; - let logCommand = `git log -n ${limit} --pretty=format:"${format}"`; + let logArgs = ["log", "-n", String(limit), `--pretty=format:${format}`]; if (lastSha) { // Fetch all commits from lastSha to HEAD // We use --reverse to ingest in chronological order - logCommand = `git log ${lastSha}..HEAD --pretty=format:"${format}" --reverse`; + logArgs = ["log", `${lastSha}..HEAD`, `--pretty=format:${format}`, "--reverse"]; RuntimeLogger.info(`Fetching commits since ${lastSha.substring(0, 7)}...`); } let logOutput: string; try { - logOutput = execSync(logCommand, { cwd: repoPath, encoding: "utf-8" }); + logOutput = this.runGit(repoPath, logArgs); } catch (error) { // Fallback if lastSha is not found in the repo (e.g. force push or shallow clone issues) if (lastSha) { RuntimeLogger.warn(`Failed to fetch commits since ${lastSha}, falling back to last ${limit} commits.`); - logOutput = execSync(`git log -n ${limit} --pretty=format:"${format}"`, { cwd: repoPath, encoding: "utf-8" }); + logOutput = this.runGit(repoPath, ["log", "-n", String(limit), `--pretty=format:${format}`]); } else { throw error; } @@ -98,20 +97,14 @@ export class CommitIngester { const [sha, author, date, message] = line.split("|"); // Get changed files - const filesOutput = execSync( - `git show --name-only --pretty=format: ${sha}`, - { cwd: repoPath, encoding: "utf-8" } - ); + const filesOutput = this.runGit(repoPath, ["show", "--name-only", "--pretty=format:", sha]); const files = filesOutput.split("\n").filter(f => f.trim().length > 0); // Get diff stats (shortstat) // Example output: " 1 file changed, 10 insertions(+), 5 deletions(-)" let stats = { insertions: 0, deletions: 0 }; try { - const statOutput = execSync( - `git show --shortstat --pretty=format: ${sha}`, - { cwd: repoPath, encoding: "utf-8" } - ).trim(); + const statOutput = this.runGit(repoPath, ["show", "--shortstat", "--pretty=format:", sha]).trim(); if (statOutput) { const insMatch = statOutput.match(/(\d+) insertion/); @@ -128,4 +121,12 @@ export class CommitIngester { return commits; } + + private runGit(repoPath: string, args: string[]): string { + return execFileSync("git", args, { + cwd: repoPath, + encoding: "utf-8", + stdio: ["ignore", "pipe", "pipe"], + }); + } } diff --git a/universal-refiner/src/history/event-store.ts b/universal-refiner/src/history/event-store.ts index 46a63d1..2599967 100644 --- a/universal-refiner/src/history/event-store.ts +++ b/universal-refiner/src/history/event-store.ts @@ -4,26 +4,39 @@ import * as os from "os"; import * as fs from "fs"; import { SCHEMA_V1 } from "./schema.js"; import { RuntimeLogger } from "../core/logger.js"; +import { RepositoryIdentity, resolveRepositoryIdentity } from "./repository-identity.js"; +import type { PromptTemplateCandidate } from "../refiners/template-selector.js"; export class EventStore { private db: Database.Database; + private dbPath: string; private static instance: EventStore | null = null; - private constructor() { - const dbPath = this.getDatabasePath(); - this.ensureDirectory(path.dirname(dbPath)); - this.db = new Database(dbPath); - this.initializeSchema(); + private constructor(dbPath: string) { + this.dbPath = dbPath; + this.ensureDirectory(path.dirname(this.dbPath)); + this.db = new Database(this.dbPath); + try { + this.initializeSchema(); + } catch (error) { + this.db.close(); + throw error; + } } static getInstance(): EventStore { - if (!this.instance) { - this.instance = new EventStore(); + const dbPath = this.resolveDatabasePath(); + if (!this.instance || this.instance.dbPath !== dbPath) { + try { + this.instance?.close(); + } catch { + } + this.instance = new EventStore(dbPath); } return this.instance; } - private getDatabasePath(): string { + private static resolveDatabasePath(): string { const globalDir = process.env.PROMPT_REFINER_GLOBAL_DIR || path.join(os.homedir(), ".refiner"); return path.join(globalDir, "events.db"); } @@ -36,6 +49,13 @@ export class EventStore { private initializeSchema() { try { + try { + this.db.pragma("journal_mode = WAL"); + } catch (error) { + RuntimeLogger.warn("EventStore could not enable WAL mode; continuing with the current journal mode", error); + } + this.db.pragma("busy_timeout = 5000"); + this.db.pragma("foreign_keys = ON"); this.db.exec(SCHEMA_V1); RuntimeLogger.info("EventStore schema initialized successfully."); } catch (error) { @@ -233,7 +253,7 @@ export class EventStore { ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) `); - stmt.run( + const result = stmt.run( commit.id, commit.repo_id, commit.sha, @@ -245,6 +265,10 @@ export class EventStore { commit.diff_stats_json || "{}" ); + if (result.changes === 0) { + return; + } + this.recordEvent({ id: `evt_${commit.sha}`, event_type: "commit_detected", @@ -360,18 +384,137 @@ export class EventStore { ); } + ensureRepository(repoPath: string): RepositoryIdentity { + const identity = resolveRepositoryIdentity(repoPath); + const now = new Date().toISOString(); + const existing = this.db.prepare("SELECT id FROM repos WHERE path = ?").get(identity.path) as { id: string } | undefined; + if (existing) { + return { ...identity, id: existing.id }; + } + + const transaction = this.db.transaction(() => { + this.db.prepare(` + INSERT OR IGNORE INTO repos (id, path, name, trusted, created_at, updated_at) + VALUES (?, ?, ?, 1, ?, ?) + `).run(identity.id, identity.path, identity.name, now, now); + + const tables = ["sessions", "prompts", "commits", "events", "lessons", "prompt_clusters", "prompt_templates"]; + for (const table of tables) { + this.db.prepare(`UPDATE ${table} SET repo_id = ? WHERE repo_id = ?`).run(identity.id, identity.legacyId); + } + }); + transaction(); + RuntimeLogger.info("Registered canonical repository identity", { + repoId: identity.id, + path: identity.path, + legacyId: identity.legacyId, + }); + return identity; + } + + getTemplates(repoId: string): PromptTemplateCandidate[] { + return this.db.prepare(` + SELECT + id, + repo_id AS repoId, + category, + title, + template_text AS templateText, + usage_notes AS usageNotes, + success_score AS successScore, + approved, + deprecated + FROM prompt_templates + WHERE repo_id = ? AND approved = 1 AND deprecated = 0 + ORDER BY success_score DESC, updated_at DESC + LIMIT 100 + `).all(repoId) as PromptTemplateCandidate[]; + } + getRecentLessons(repoId: string, limit = 10): any[] { const stmt = this.db.prepare(` SELECT * FROM lessons WHERE repo_id = ? - AND (approved = 1 OR confidence = 'high') + AND approved = 1 ORDER BY created_at DESC LIMIT ? `); return stmt.all(repoId, limit); } + getLearningCandidates(repoId: string): { lessons: any[]; templates: any[] } { + return { + lessons: this.db.prepare(` + SELECT id, lesson_type, title, summary, confidence, source, created_at + FROM lessons WHERE repo_id = ? AND approved = 0 ORDER BY created_at DESC + `).all(repoId), + templates: this.db.prepare(` + SELECT id, category, title, template_text, usage_notes, success_score, source_type, created_at + FROM prompt_templates WHERE repo_id = ? AND approved = 0 AND deprecated = 0 ORDER BY created_at DESC + `).all(repoId), + }; + } + + async backup(destinationPath: string): Promise { + this.ensureDirectory(path.dirname(destinationPath)); + await this.db.backup(destinationPath); + + const backup = new Database(destinationPath, { readonly: true }); + try { + const result = backup.pragma("integrity_check", { simple: true }); + if (result !== "ok") { + throw new Error(`Backup integrity check failed: ${String(result)}`); + } + } finally { + backup.close(); + } + + return destinationPath; + } + + static restore(backupPath: string): EventStore { + if (!fs.existsSync(backupPath)) { + throw new Error(`Backup does not exist: ${backupPath}`); + } + + const backup = new Database(backupPath, { readonly: true }); + try { + const result = backup.pragma("integrity_check", { simple: true }); + if (result !== "ok") { + throw new Error(`Backup integrity check failed: ${String(result)}`); + } + } finally { + backup.close(); + } + + const dbPath = this.resolveDatabasePath(); + this.instance?.close(); + this.instance = null; + fs.copyFileSync(backupPath, dbPath); + this.instance = new EventStore(dbPath); + return this.instance; + } + + reviewLesson(repoId: string, id: string, approved: boolean): boolean { + const result = this.db.prepare(` + UPDATE lessons SET approved = ?, updated_at = ? WHERE repo_id = ? AND id = ? AND approved = 0 + `).run(approved ? 1 : -1, new Date().toISOString(), repoId, id); + return result.changes > 0; + } + + reviewTemplate(repoId: string, id: string, approved: boolean): boolean { + const result = approved + ? this.db.prepare(`UPDATE prompt_templates SET approved = 1, updated_at = ? WHERE repo_id = ? AND id = ? AND approved = 0 AND deprecated = 0`) + .run(new Date().toISOString(), repoId, id) + : this.db.prepare(`UPDATE prompt_templates SET deprecated = 1, updated_at = ? WHERE repo_id = ? AND id = ? AND approved = 0`) + .run(new Date().toISOString(), repoId, id); + return result.changes > 0; + } + close() { this.db.close(); + if (EventStore.instance === this) { + EventStore.instance = null; + } } } diff --git a/universal-refiner/src/history/git-poller.ts b/universal-refiner/src/history/git-poller.ts new file mode 100644 index 0000000..919c375 --- /dev/null +++ b/universal-refiner/src/history/git-poller.ts @@ -0,0 +1,74 @@ +import { EventEmitter } from "node:events"; +import { CommitIngester } from "./commit-ingest.js"; +import { RuntimeLogger } from "../core/logger.js"; +import { CommandCenterDashboard } from "../core/dashboard.js"; + +export const DEFAULT_POLL_INTERVAL_MS = 30_000; + +/** + * GitPoller satisfies AUTO-03: continuously monitors a git repository for new + * commits on a configurable interval, independent of file system events. + * + * Emits "commits" with the count of newly ingested commits whenever the poll + * detects work. Consumers (BackgroundAutonomyService) subscribe to "commits" + * to trigger the full learning pipeline (AUTO-04). + */ +export class GitPoller extends EventEmitter { + private readonly repoPath: string; + private readonly intervalMs: number; + private timer: NodeJS.Timeout | null = null; + private running = false; + + constructor(repoPath: string, intervalMs = DEFAULT_POLL_INTERVAL_MS) { + super(); + this.repoPath = repoPath; + this.intervalMs = intervalMs; + } + + /** Start polling. Idempotent; calling twice is a no-op. */ + public start(): void { + if (this.running) { + return; + } + this.running = true; + RuntimeLogger.info("[GitPoller] Starting git commit polling", { + repoPath: this.repoPath, + intervalMs: this.intervalMs, + }); + CommandCenterDashboard.log(`Background Autonomy: Git polling every ${this.intervalMs / 1000}s.`); + this.timer = setInterval(() => void this.poll(), this.intervalMs); + } + + /** Stop polling and release the interval. */ + public stop(): void { + if (!this.running) { + return; + } + this.running = false; + if (this.timer !== null) { + clearInterval(this.timer); + this.timer = null; + } + RuntimeLogger.info("[GitPoller] Stopped git commit polling"); + } + + /** + * One-shot poll: ingest any new commits. If new commits are found, emits + * "commits" with the count so subscribers can trigger downstream pipelines. + * Safe to call externally for an immediate check. + */ + public async poll(): Promise { + try { + const count = await CommitIngester.ingestLatest(this.repoPath, 50); + if (count > 0) { + RuntimeLogger.debug(`[GitPoller] Ingested ${count} new commit(s)`, { repoPath: this.repoPath }); + CommandCenterDashboard.log(`Background Autonomy: ${count} new commit(s) detected.`); + this.emit("commits", count); + } + return count; + } catch (err) { + RuntimeLogger.error("[GitPoller] Poll failed", err); + return 0; + } + } +} diff --git a/universal-refiner/src/history/lesson-extractor.ts b/universal-refiner/src/history/lesson-extractor.ts index 48ecbf6..80af3d6 100644 --- a/universal-refiner/src/history/lesson-extractor.ts +++ b/universal-refiner/src/history/lesson-extractor.ts @@ -1,5 +1,6 @@ import { EventStore } from "./event-store.js"; import { RuntimeLogger } from "../core/logger.js"; +import { parseStructuredResponse } from "../core/structured-response.js"; export class LessonExtractor { private eventStore: EventStore; @@ -21,80 +22,13 @@ export class LessonExtractor { JOIN execution_commits ec ON e.id = ec.execution_id JOIN commits c ON ec.commit_id = c.id LEFT JOIN lessons l ON p.id = l.prompt_id AND c.id = l.commit_id - WHERE l.id IS NULL + WHERE l.id IS NULL AND e.status = 'completed' `).all(); for (const pair of unanalyzedPairs) { await this.analyzePair(pair); } - // 2. Also analyze successful executions that might not be linked to commits yet - const standaloneExecutions = db.prepare(` - SELECT p.id as prompt_id, p.raw_prompt, p.normalized_prompt, e.id as execution_id, e.result_summary, p.repo_id - FROM prompts p - JOIN executions e ON p.id = e.prompt_id - LEFT JOIN lessons l ON p.id = l.prompt_id - WHERE e.status = 'completed' - AND l.id IS NULL - LIMIT 10 - `).all(); - - for (const exec of standaloneExecutions) { - await this.analyzeExecution(exec); - } - } - - private async analyzeExecution(exec: any) { - RuntimeLogger.info(`Analyzing Successful Execution for lesson: ${exec.execution_id}`); - - const analysisPrompt = ` -Act as a senior software architect. Analyze this successful prompt execution and extract a reusable "Engineering Mandate" for future prompts in this project. - -USER PROMPT: -"${exec.raw_prompt}" - -REFINED PROMPT: -"${exec.normalized_prompt || "N/A"}" - -EXECUTION SUMMARY: -"${exec.result_summary || "Task completed successfully."}" - -What is the reusable "lesson" here? (e.g., "When using framework X, ensure the prompt specifies Y for proper Z"). - -Output a JSON object: -{ - "title": "Short descriptive title", - "summary": "The reusable engineering mandate", - "lesson_type": "architecture | security | quality | convention", - "confidence": "high | medium | low" -} - -Output ONLY the JSON object. -`; - - const response = await this.requestModelText("Execution lesson extraction", analysisPrompt, 1000); - if (!response) return; - - try { - const lessonData = JSON.parse(response); - const lessonId = `lsn_exec_${Date.now()}_${Math.floor(Math.random() * 1000)}`; - - this.eventStore.recordLesson({ - id: lessonId, - repo_id: exec.repo_id, - prompt_id: exec.prompt_id, - execution_id: exec.execution_id, - lesson_type: lessonData.lesson_type, - title: lessonData.title, - summary: lessonData.summary, - confidence: lessonData.confidence, - source: "execution-analysis" - }); - - RuntimeLogger.info(`Successfully extracted execution lesson: ${lessonData.title}`); - } catch (error) { - RuntimeLogger.error("Failed to parse execution lesson JSON", error); - } } private async analyzePair(pair: any) { @@ -130,7 +64,12 @@ Output ONLY the JSON object. if (!response) return; try { - const lessonData = JSON.parse(response); + const lessonData = parseStructuredResponse<{ + title: string; + summary: string; + lesson_type: string; + confidence: string; + }>(response); const lessonId = `lsn_${Date.now()}_${Math.floor(Math.random() * 1000)}`; this.eventStore.recordLesson({ diff --git a/universal-refiner/src/history/repository-identity.ts b/universal-refiner/src/history/repository-identity.ts new file mode 100644 index 0000000..43951e7 --- /dev/null +++ b/universal-refiner/src/history/repository-identity.ts @@ -0,0 +1,23 @@ +import { createHash } from "node:crypto"; +import * as path from "node:path"; + +export interface RepositoryIdentity { + id: string; + legacyId: string; + name: string; + path: string; +} + +export function resolveRepositoryIdentity(repoPath: string): RepositoryIdentity { + const canonicalPath = path.resolve(repoPath); + const normalizedPath = canonicalPath.replace(/\\/g, "/").toLowerCase(); + const hash = createHash("sha256").update(normalizedPath).digest("hex").slice(0, 16); + const name = path.basename(canonicalPath); + + return { + id: `repo_${hash}`, + legacyId: name, + name, + path: canonicalPath, + }; +} diff --git a/universal-refiner/src/history/template-generator.ts b/universal-refiner/src/history/template-generator.ts index 36a2099..4b5656a 100644 --- a/universal-refiner/src/history/template-generator.ts +++ b/universal-refiner/src/history/template-generator.ts @@ -1,5 +1,6 @@ import { EventStore } from "./event-store.js"; import { RuntimeLogger } from "../core/logger.js"; +import { parseStructuredResponse } from "../core/structured-response.js"; export class TemplateGenerator { private eventStore: EventStore; @@ -68,7 +69,15 @@ Output ONLY the JSON object. if (!response) return; try { - const data = JSON.parse(response); + const data = parseStructuredResponse<{ + templates: Array<{ + name: string; + category: string; + template_text: string; + usage_notes: string; + success_score: number; + }>; + }>(response); for (const t of data.templates) { const templateId = `tpl_${Date.now()}_${Math.floor(Math.random() * 1000)}`; this.eventStore.recordTemplate({ diff --git a/universal-refiner/src/index.ts b/universal-refiner/src/index.ts index c8028f7..2dca833 100644 --- a/universal-refiner/src/index.ts +++ b/universal-refiner/src/index.ts @@ -1,9 +1,8 @@ -#!/usr/bin/env node +#!/usr/bin/env node import { PromptRefinerServer } from "./core/server.js"; import { CommandCenterDashboard } from "./core/dashboard.js"; -import { CommitIngester } from "./history/commit-ingest.js"; -import { CorrelationEngine } from "./history/correlation-engine.js"; -import { LessonExtractor } from "./history/lesson-extractor.js"; +import { FileWatcher } from "./watcher/index.js"; +import { RuntimeLogger } from "./core/logger.js"; import * as path from "path"; // Start the Web Dashboard in the background @@ -13,16 +12,15 @@ CommandCenterDashboard.start(port, rootPath); const server = new PromptRefinerServer(rootPath); -// Initial background ingestion and correlation -async function runBackgroundTasks() { - await CommitIngester.ingestLatest(rootPath); - const correlation = new CorrelationEngine(); - await correlation.correlateAll(); - - const extractor = new LessonExtractor((task, prompt, tokens) => server.requestModelText(task, prompt, tokens)); - await extractor.extractNewLessons(); -} -void runBackgroundTasks(); +// Phase 1 (AUTO-01, AUTO-02): Real-time file system watcher +// Starts watching for meaningful source/prompt file changes and logs them. +const fileWatcher = new FileWatcher(rootPath); +fileWatcher.on("change", (evt) => { + RuntimeLogger.info(`[FS] ${evt.event}: ${evt.path}`); + CommandCenterDashboard.log(`[FS] ${evt.event}: ${path.relative(rootPath, evt.path)}`); +}); +fileWatcher.start(); + server.run().catch((error) => { console.error("[FATAL ERROR]", error); process.exit(1); diff --git a/universal-refiner/src/memory/local-brain.ts b/universal-refiner/src/memory/local-brain.ts index 91cc1f3..56b2fac 100644 --- a/universal-refiner/src/memory/local-brain.ts +++ b/universal-refiner/src/memory/local-brain.ts @@ -49,17 +49,19 @@ export class LocalBrain { try { const data = JSON.parse(fs.readFileSync(storagePath, "utf-8")); + const patterns: LearnedPattern[] = Array.isArray(data.patterns) ? data.patterns : []; const newPattern: LearnedPattern = { ...pattern, learnedAt: new Date().toISOString() }; - const existingIndex = data.patterns.findIndex((p: any) => p.id === pattern.id); + const existingIndex = patterns.findIndex((p: LearnedPattern) => p.id === pattern.id); if (existingIndex >= 0) { - data.patterns[existingIndex] = newPattern; + patterns[existingIndex] = newPattern; } else { - data.patterns.push(newPattern); + patterns.push(newPattern); } + data.patterns = patterns; fs.writeFileSync(storagePath, JSON.stringify(data, null, 2)); return newPattern; @@ -75,7 +77,8 @@ export class LocalBrain { try { const data = JSON.parse(fs.readFileSync(storagePath, "utf-8")); - const pattern = data.patterns.find((p: LearnedPattern) => p.id === id); + const patterns: LearnedPattern[] = Array.isArray(data.patterns) ? data.patterns : []; + const pattern = patterns.find((p: LearnedPattern) => p.id === id); if (pattern) { pattern.isProposed = false; fs.writeFileSync(storagePath, JSON.stringify(data, null, 2)); diff --git a/universal-refiner/src/refiners/prompt-refiner.ts b/universal-refiner/src/refiners/prompt-refiner.ts index 1a9954f..8420dbb 100644 --- a/universal-refiner/src/refiners/prompt-refiner.ts +++ b/universal-refiner/src/refiners/prompt-refiner.ts @@ -1,24 +1,33 @@ import { ProjectContext } from "../detectors/project-scout.js"; +import type { SelectedApprovedTemplate } from "./template-selector.js"; + +export interface RefinementContext { + approvedTemplates?: readonly SelectedApprovedTemplate[]; +} export class PromptRefiner { - static calculateGain(original: string, refined: string, ctx: ProjectContext): number { - const originalLen = original.length; - const refinedLen = refined.length; - - // Base gain from length/detail - let gain = (refinedLen / originalLen) * 10; - - // Context bonuses + static calculateGain(original: string, refined: string, ctx: ProjectContext, refinementContext: RefinementContext = {}): number { + // Score evidence-backed context enrichment, not output verbosity. + let gain = 10; + + if (original.trim().length > 0 && refined.includes(original)) gain += 5; if (ctx.relevantSnippets && ctx.relevantSnippets.length > 0) gain += 25; if (ctx.learnedPatterns && ctx.learnedPatterns.length > 0) gain += 15; if (ctx.customMandates && ctx.customMandates.length > 0) gain += 20; if (ctx.predictiveLessons && ctx.predictiveLessons.length > 0) gain += 15; + if (refinementContext.approvedTemplates && refinementContext.approvedTemplates.length > 0) gain += 15; if (ctx.framework !== "Unknown") gain += 10; return Math.min(Math.round(gain), 100); } - static refine(originalPrompt: string, ctx: ProjectContext, answers: Record, promptId?: string): string { + static refine( + originalPrompt: string, + ctx: ProjectContext, + answers: Record, + promptId?: string, + refinementContext: RefinementContext = {} + ): string { let refined = `**REFINED PROMPT (COMMAND CENTER v10.0)**\n`; if (promptId) { refined += `[PROMPT_ID: ${promptId}]\n`; @@ -63,6 +72,17 @@ export class PromptRefiner { } } + if (refinementContext.approvedTemplates && refinementContext.approvedTemplates.length > 0) { + refined += `\n### Approved Prompt Templates\n`; + refined += `Use these reviewed templates as context. Adapt them to the task; do not replace the user's intent.\n\n`; + for (const template of refinementContext.approvedTemplates) { + refined += `**${template.title}** (${template.category}; relevance ${template.relevanceScore})\n`; + refined += `${template.templateText.trim()}\n`; + if (template.usageNotes) refined += `Usage notes: ${template.usageNotes.trim()}\n`; + refined += `\n`; + } + } + refined += `\n### 💡 User Requirements\n`; for (const [key, value] of Object.entries(answers)) { refined += `- ${key}: ${value}\n`; diff --git a/universal-refiner/src/refiners/template-selector.ts b/universal-refiner/src/refiners/template-selector.ts new file mode 100644 index 0000000..5224f79 --- /dev/null +++ b/universal-refiner/src/refiners/template-selector.ts @@ -0,0 +1,114 @@ +export interface PromptTemplateCandidate { + id: string; + repoId: string; + category: string; + title: string; + templateText: string; + usageNotes?: string; + successScore: number; + approved: boolean | number; + deprecated?: boolean | number; +} + +export interface ApprovedTemplateSource { + getTemplates(repoId: string): Promise | readonly PromptTemplateCandidate[]; +} + +export interface TemplateSelectionRequest { + repoId: string; + prompt: string; + category?: string; + limit?: number; +} + +export interface SelectedApprovedTemplate { + id: string; + category: string; + title: string; + templateText: string; + usageNotes?: string; + relevanceScore: number; + selectionReasons: readonly string[]; +} + +const MAX_TEMPLATE_LENGTH = 4_000; +const WORD_PATTERN = /[a-z0-9][a-z0-9_-]*/g; + +export class ApprovedTemplateSelector { + constructor(private readonly source: ApprovedTemplateSource) {} + + async select(request: TemplateSelectionRequest): Promise { + const limit = Math.max(0, Math.min(request.limit ?? 3, 10)); + if (limit === 0) return []; + + const promptTokens = tokenize(request.prompt); + const category = request.category?.trim().toLowerCase() || inferCategory(promptTokens); + const candidates = await this.source.getTemplates(request.repoId); + + return candidates + .filter(template => template.repoId === request.repoId) + .filter(template => isEnabled(template.approved) && !isEnabled(template.deprecated)) + .map(template => rankTemplate(template, promptTokens, category)) + .sort((left, right) => + right.relevanceScore - left.relevanceScore + || right.successScore - left.successScore + || left.id.localeCompare(right.id) + ) + .slice(0, limit) + .map(({ successScore: _successScore, ...selection }) => selection); + } +} + +function rankTemplate( + template: PromptTemplateCandidate, + promptTokens: ReadonlySet, + category: string | undefined +): SelectedApprovedTemplate & { successScore: number } { + const templateTokens = tokenize(`${template.title} ${template.category} ${template.usageNotes ?? ""} ${template.templateText}`); + const overlap = [...promptTokens].filter(token => templateTokens.has(token)).length; + const lexicalScore = promptTokens.size === 0 ? 0 : Math.round((overlap / promptTokens.size) * 50); + const categoryMatches = Boolean(category && template.category.toLowerCase() === category); + const categoryScore = categoryMatches ? 25 : 0; + const successScore = clamp(template.successScore, 0, 100); + const historicalScore = Math.round(successScore * 0.25); + const selectionReasons = [ + ...(categoryMatches ? [`category:${category}`] : []), + ...(overlap > 0 ? [`shared-keywords:${overlap}`] : []), + `approved-history:${Math.round(successScore)}` + ]; + + return { + id: template.id, + category: template.category, + title: template.title, + templateText: template.templateText.slice(0, MAX_TEMPLATE_LENGTH), + usageNotes: template.usageNotes?.slice(0, MAX_TEMPLATE_LENGTH), + relevanceScore: clamp(lexicalScore + categoryScore + historicalScore, 0, 100), + selectionReasons, + successScore + }; +} + +function inferCategory(tokens: ReadonlySet): string | undefined { + if (hasAny(tokens, ["fix", "bug", "defect", "error", "failure"])) return "bugfix"; + if (hasAny(tokens, ["test", "tests", "coverage", "verify", "verification"])) return "test"; + if (hasAny(tokens, ["refactor", "cleanup", "simplify", "restructure"])) return "refactor"; + if (hasAny(tokens, ["add", "build", "create", "implement", "feature"])) return "feature"; + return undefined; +} + +function tokenize(value: string): Set { + return new Set(value.toLowerCase().match(WORD_PATTERN) ?? []); +} + +function hasAny(tokens: ReadonlySet, candidates: readonly string[]): boolean { + return candidates.some(candidate => tokens.has(candidate)); +} + +function isEnabled(value: boolean | number | undefined): boolean { + return value === true || value === 1; +} + +function clamp(value: number, minimum: number, maximum: number): number { + return Math.min(Math.max(Number.isFinite(value) ? value : minimum, minimum), maximum); +} diff --git a/universal-refiner/src/watcher/file-watcher.ts b/universal-refiner/src/watcher/file-watcher.ts new file mode 100644 index 0000000..5a9ae22 --- /dev/null +++ b/universal-refiner/src/watcher/file-watcher.ts @@ -0,0 +1,162 @@ +import { EventEmitter } from "node:events"; +import { watch as chokidarWatch, FSWatcher } from "chokidar"; +import { RuntimeLogger } from "../core/logger.js"; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export type FileEventKind = "add" | "change" | "unlink"; + +export interface FileChangeEvent { + path: string; + event: FileEventKind; + timestamp: Date; +} + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +/** File extensions considered "meaningful" for AUTO-01 */ +export const MEANINGFUL_EXTENSIONS = new Set([".ts", ".js", ".md", ".txt", ".prompt"]); + +/** + * Path segments that mark noise directories (AUTO-02). + * Used as a secondary in-process guard after chokidar's ignore patterns. + */ +export const NOISE_PATH_SEGMENTS = ["node_modules", "dist", ".git", "coverage", ".pytest_cache"]; + +/** File suffixes that mark noise files (AUTO-02). */ +export const NOISE_SUFFIXES = [".log", ".tmp"]; + +/** Paths and patterns passed to chokidar `ignored` option (AUTO-02). */ +const CHOKIDAR_IGNORE: (string | RegExp)[] = [ + "**/node_modules/**", + "**/dist/**", + "**/.git/**", + "**/*.log", + "**/*.tmp", + "**/coverage/**", + "**/.pytest_cache/**", +]; + +// --------------------------------------------------------------------------- +// FileWatcher +// --------------------------------------------------------------------------- + +/** + * FileWatcher wraps chokidar with a focused, typed EventEmitter interface. + * + * Responsibilities: + * - Detect meaningful source/prompt file changes (AUTO-01) + * - Filter transient/noise paths before emitting (AUTO-02) + * + * Usage: + * const watcher = new FileWatcher('/path/to/project'); + * watcher.on('change', (evt) => console.log(evt)); + * watcher.start(); + * // later + * await watcher.stop(); + */ +export class FileWatcher extends EventEmitter { + private readonly rootPath: string; + private inner: FSWatcher | null = null; + + constructor(rootPath: string) { + super(); + this.rootPath = rootPath; + } + + // ------------------------------------------------------------------ + // Public API + // ------------------------------------------------------------------ + + /** Start watching. Idempotent — calling twice is a no-op. */ + public start(): void { + if (this.inner) { + return; + } + + RuntimeLogger.info("[FileWatcher] Starting file system watcher", { + rootPath: this.rootPath, + }); + + this.inner = chokidarWatch(this.rootPath, { + ignored: CHOKIDAR_IGNORE, + persistent: true, + ignoreInitial: true, + awaitWriteFinish: { + stabilityThreshold: 100, + pollInterval: 50, + }, + }); + + this.inner.on("add", (filePath) => this.emitChange("add", filePath)); + this.inner.on("change", (filePath) => this.emitChange("change", filePath)); + this.inner.on("unlink", (filePath) => this.emitChange("unlink", filePath)); + this.inner.on("error", (err: unknown) => { + RuntimeLogger.error("[FileWatcher] Watcher error", err); + if (this.listenerCount("error") > 0) { + this.emit("error", err instanceof Error ? err : new Error(String(err))); + } + }); + } + + /** Stop watching and release all resources. */ + public async stop(): Promise { + if (!this.inner) { + return; + } + await this.inner.close(); + this.inner = null; + RuntimeLogger.info("[FileWatcher] Stopped file system watcher"); + } + + // ------------------------------------------------------------------ + // Internal helpers + // ------------------------------------------------------------------ + + /** + * Apply both AUTO-01 and AUTO-02 filters before emitting. + * + * Two-layer defence: + * 1. chokidar `ignored` patterns (coarse, path-glob based) + * 2. In-process extension + segment checks (precise, cross-platform) + * + * The in-process layer catches edge cases where chokidar's glob ignore + * may lag (e.g. directories created before the watcher starts on Windows). + */ + private emitChange(kind: FileEventKind, filePath: string): void { + // Normalise separators for consistent matching on Windows + const normalised = filePath.replace(/\\/g, "/"); + + // AUTO-02: segment-level noise filter + for (const seg of NOISE_PATH_SEGMENTS) { + if (normalised.includes(`/${seg}/`) || normalised.includes(`/${seg}`)) { + return; + } + } + + // AUTO-01: extension filter — only emit for meaningful file types + const dotIdx = normalised.lastIndexOf("."); + const ext = dotIdx >= 0 ? normalised.slice(dotIdx).toLowerCase() : ""; + + // AUTO-02: suffix-level noise filter (e.g. .log, .tmp) + if (NOISE_SUFFIXES.includes(ext)) { + return; + } + + if (!MEANINGFUL_EXTENSIONS.has(ext)) { + return; + } + + const evt: FileChangeEvent = { + path: filePath, + event: kind, + timestamp: new Date(), + }; + RuntimeLogger.debug(`[FileWatcher] ${kind}: ${filePath}`); + this.emit("change", evt); + } +} diff --git a/universal-refiner/src/watcher/index.ts b/universal-refiner/src/watcher/index.ts new file mode 100644 index 0000000..56676cf --- /dev/null +++ b/universal-refiner/src/watcher/index.ts @@ -0,0 +1,2 @@ +export { FileWatcher } from "./file-watcher.js"; +export type { FileChangeEvent, FileEventKind } from "./file-watcher.js"; diff --git a/universal-refiner/tests/acceptance/mcp-tools.acceptance.test.ts b/universal-refiner/tests/acceptance/mcp-tools.acceptance.test.ts new file mode 100644 index 0000000..df57466 --- /dev/null +++ b/universal-refiner/tests/acceptance/mcp-tools.acceptance.test.ts @@ -0,0 +1,150 @@ +import { mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { PromptRefinerServer } from "../../src/core/server.js"; +import { EventStore } from "../../src/history/event-store.js"; + +const handlers: Array<(request: unknown) => unknown> = []; +vi.mock("@modelcontextprotocol/sdk/server/index.js", () => ({ + Server: class { + setRequestHandler = vi.fn((_schema, handler) => handlers.push(handler)); + connect = vi.fn(); + createMessage = vi.fn(); + }, +})); +vi.mock("@modelcontextprotocol/sdk/server/stdio.js", () => ({ StdioServerTransport: vi.fn() })); + +describe("MCP all-tool acceptance", () => { + let directory: string; + + beforeEach(() => { + directory = mkdtempSync(join(tmpdir(), "mcp-tools-acceptance-")); + process.env.PROMPT_REFINER_GLOBAL_DIR = directory; + (EventStore as unknown as { instance: EventStore | null }).instance = null; + }); + + afterEach(() => { + const holder = EventStore as unknown as { instance: EventStore | null }; + holder.instance?.close(); + holder.instance = null; + rmSync(directory, { recursive: true, force: true }); + }); + + it("advertises valid schemas and implements a dispatcher case for every tool", async () => { + handlers.length = 0; + new PromptRefinerServer("."); + const listResponse = await handlers[0]({}) as { + tools: Array<{ name: string; description: string; inputSchema: { type: string; properties: Record; required?: string[] } }>; + }; + const source = readFileSync(new URL("../../src/core/server.ts", import.meta.url), "utf8"); + const names = listResponse.tools.map(tool => tool.name); + + expect(new Set(names).size).toBe(names.length); + expect(names).toHaveLength(19); + for (const tool of listResponse.tools) { + expect(tool.description.length).toBeGreaterThan(10); + expect(tool.inputSchema.type).toBe("object"); + expect(tool.inputSchema.properties).toBeDefined(); + expect(source).toContain(`case "${tool.name}"`); + for (const required of tool.inputSchema.required ?? []) { + expect(tool.inputSchema.properties).toHaveProperty(required); + } + } + }); + + it("dispatches deterministic evaluation and A/B comparison tools", async () => { + handlers.length = 0; + new PromptRefinerServer("."); + const dispatch = handlers[1] as (request: unknown) => Promise<{ content: Array<{ type: string; text: string }> }>; + + const evaluation = await dispatch({ + params: { + name: "evaluate_prompt", + arguments: { prompt: "Implement src/auth.ts and run npm test." }, + }, + }); + expect(JSON.parse(evaluation.content[0].text)).toMatchObject({ maximumScore: 100 }); + + const comparison = await dispatch({ + params: { + name: "compare_prompt_variants", + arguments: { + baseline_prompt: "Fix login", + variant_a: "Fix login", + variant_b: "Fix login in src/auth.ts and run npm test.", + outcome_a: { status: "failed", testsFailed: 1 }, + outcome_b: { status: "completed", testsPassed: 4 }, + }, + }, + }); + expect(JSON.parse(comparison.content[0].text)).toMatchObject({ + observedWinner: "B", + interpretation: "observed-evidence", + }); + }); + + it("executes every advertised dispatcher path with valid arguments", async () => { + handlers.length = 0; + const server = new PromptRefinerServer(directory); + (server as any).requestModelText = vi.fn(async (taskName: string) => { + if (taskName === "Rule discovery") return "[]"; + if (taskName.startsWith("Prompt Optimization")) return "---REWRITTEN PROMPT---\nImproved prompt"; + return "model response"; + }); + const list = await handlers[0]({}) as { tools: Array<{ name: string }> }; + const dispatch = handlers[1] as (request: unknown) => Promise<{ content: Array<{ type: string; text: string }> }>; + const store = EventStore.getInstance(); + const repoId = (server as any).repository.id; + store.recordLesson({ + id: "pending-lesson", + repo_id: repoId, + lesson_type: "quality", + title: "Pending lesson", + summary: "Review me", + confidence: "high", + source: "test", + }); + store.recordTemplate({ + id: "pending-template", + repo_id: repoId, + category: "feature", + title: "Pending template", + template_text: "Implement and verify.", + usage_notes: "", + source_type: "test", + success_score: 90, + }); + + const args: Record> = { + lint_prompt: { prompt: "Implement a feature", semantic: false }, + create_questions: { gaps: [{ message: "Missing tests.", suggestedAction: "Add tests." }] }, + finalize_prompt: { original_prompt: "Implement a feature", answers: { scope: "src" } }, + proactive_suggest: { prompt: "Implement a feature" }, + generate_agent_onboarding: {}, + discover_rules: {}, + approve_rule: { id: "missing-rule" }, + list_learning_candidates: {}, + review_lesson: { id: "pending-lesson", approved: true }, + review_template: { id: "pending-template", approved: true }, + ingest_pattern: { id: "pattern", category: "quality", description: "Verify changes." }, + ingest_commits: { limit: 1 }, + derive_lessons: {}, + correlate_history: {}, + optimize_prompt: { prompt: "Implement a feature", iterations: 1 }, + generate_templates: {}, + record_agent_output: { prompt_id: "external-prompt", output_summary: "Completed." }, + evaluate_prompt: { prompt: "Implement src/a.ts and run npm test." }, + compare_prompt_variants: { + baseline_prompt: "Implement feature", + variant_a: "Implement feature", + variant_b: "Implement feature and run tests", + }, + }; + + for (const tool of list.tools) { + await expect(dispatch({ params: { name: tool.name, arguments: args[tool.name] } })) + .resolves.toHaveProperty("content"); + } + }); +}); diff --git a/universal-refiner/tests/acceptance/semantic-provider.acceptance.test.ts b/universal-refiner/tests/acceptance/semantic-provider.acceptance.test.ts new file mode 100644 index 0000000..9a033b3 --- /dev/null +++ b/universal-refiner/tests/acceptance/semantic-provider.acceptance.test.ts @@ -0,0 +1,56 @@ +import { afterEach, describe, expect, it } from "vitest"; +import { startFakeOpenAiServer } from "../../scripts/support/fake-openai-server.mjs"; +import { + LocalOpenAiProvider, + SemanticProvider, + SemanticProviderChain, +} from "../../src/core/semantic-provider.js"; + +describe("semantic provider acceptance", () => { + const servers: Array<{ close: () => Promise }> = []; + + afterEach(async () => { + await Promise.all(servers.splice(0).map(server => server.close())); + }); + + it.each(["gemma3:12b", "gemma3:1b"])("accepts configured local model %s", async model => { + const fake = await startFakeOpenAiServer({ responses: { [model]: `${model} accepted` } }); + servers.push(fake); + const provider = new LocalOpenAiProvider({ + baseUrl: fake.baseUrl, + models: [model], + timeoutMs: 1000, + temperature: 0, + allowNonLoopback: false, + }); + + await expect(provider.requestText({ taskName: "acceptance", prompt: "hello", maxTokens: 20 })) + .resolves.toMatchObject({ text: `${model} accepted`, model }); + }); + + it("falls back across models and then across providers during outages", async () => { + const fake = await startFakeOpenAiServer({ unavailableModels: ["gemma3:12b", "gemma3:1b"] }); + servers.push(fake); + const local = new LocalOpenAiProvider({ + baseUrl: fake.baseUrl, + models: ["gemma3:12b", "gemma3:1b"], + timeoutMs: 1000, + temperature: 0, + allowNonLoopback: false, + }); + const fallback: SemanticProvider = { + name: "deterministic-test", + requestText: async () => ({ + text: "fallback accepted", + provider: "deterministic-test", + model: "none", + latencyMs: 0, + }), + }; + + await expect(new SemanticProviderChain([local, fallback]) + .requestText({ taskName: "outage", prompt: "hello", maxTokens: 20 })) + .resolves.toBe("fallback accepted"); + expect(fake.requests.map(request => request.model)).toEqual(["gemma3:12b", "gemma3:1b"]); + }); +}); diff --git a/universal-refiner/tests/background-service.test.ts b/universal-refiner/tests/background-service.test.ts new file mode 100644 index 0000000..e4ff700 --- /dev/null +++ b/universal-refiner/tests/background-service.test.ts @@ -0,0 +1,114 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const mocks = vi.hoisted(() => ({ + watcher: { on: vi.fn(), close: vi.fn() }, + watch: vi.fn(), + ingest: vi.fn(), + correlate: vi.fn(), + extract: vi.fn(), + info: vi.fn(), + debug: vi.fn(), + error: vi.fn(), + dashboard: vi.fn(), + poller: { on: vi.fn(), start: vi.fn(), stop: vi.fn() }, + pollerConstructor: vi.fn(), +})); + +vi.mock("chokidar", () => ({ watch: mocks.watch })); +vi.mock("../src/history/commit-ingest.js", () => ({ CommitIngester: { ingestLatest: mocks.ingest } })); +vi.mock("../src/history/correlation-engine.js", () => ({ CorrelationEngine: class { correlateAll = mocks.correlate; } })); +vi.mock("../src/history/lesson-extractor.js", () => ({ LessonExtractor: class { extractNewLessons = mocks.extract; } })); +vi.mock("../src/core/logger.js", () => ({ RuntimeLogger: { info: mocks.info, debug: mocks.debug, error: mocks.error } })); +vi.mock("../src/core/dashboard.js", () => ({ CommandCenterDashboard: { log: mocks.dashboard } })); +vi.mock("../src/history/git-poller.js", () => ({ + GitPoller: class { + constructor(rootPath: string, interval: number) { + mocks.pollerConstructor(rootPath, interval); + } + on = mocks.poller.on; + start = mocks.poller.start; + stop = mocks.poller.stop; + }, +})); + +import { BackgroundAutonomyService } from "../src/core/background-service.js"; + +describe("BackgroundAutonomyService", () => { + beforeEach(() => { + vi.clearAllMocks(); + mocks.watch.mockReturnValue(mocks.watcher); + mocks.watcher.on.mockReturnValue(mocks.watcher); + mocks.ingest.mockResolvedValue(2); + mocks.correlate.mockResolvedValue(undefined); + mocks.extract.mockResolvedValue(undefined); + }); + + it("starts once, runs the initial serialized cycle, and stops the watcher", async () => { + const service = new BackgroundAutonomyService("C:/repo", vi.fn()); + service.start(); + service.start(); + await service.idle(); + service.stop(); + + expect(mocks.watch).toHaveBeenCalledTimes(1); + expect(mocks.ingest).toHaveBeenCalledWith("C:/repo", 100); + expect(mocks.correlate).toHaveBeenCalledBefore(mocks.extract); + expect(mocks.watcher.close).toHaveBeenCalledOnce(); + }); + + it("debounces file changes and logs cycle failures for queue retries", async () => { + vi.useFakeTimers(); + mocks.correlate.mockRejectedValue(new Error("correlation failed")); + const service = new BackgroundAutonomyService("C:/repo", vi.fn()); + service.start(); + const changeHandler = mocks.watcher.on.mock.calls.find(call => call[0] === "all")?.[1]; + changeHandler("change", "src/a.ts"); + changeHandler("change", "src/b.ts"); + await vi.runAllTimersAsync(); + await service.idle(); + service.stop(); + vi.useRealTimers(); + + expect(mocks.debug).toHaveBeenCalledWith(expect.stringContaining("src/b.ts")); + expect(mocks.error).toHaveBeenCalledWith("Background Autonomy cycle failed", expect.any(Error)); + }); + + it("starts and stops git polling and reacts to discovered commits", async () => { + vi.useFakeTimers(); + const service = new BackgroundAutonomyService("C:/repo", vi.fn(), 25); + service.start(); + await service.idle(); + + const commitHandler = mocks.poller.on.mock.calls.find(call => call[0] === "commits")?.[1]; + commitHandler(); + await vi.advanceTimersByTimeAsync(3000); + await service.idle(); + service.stop(); + vi.useRealTimers(); + + expect(mocks.pollerConstructor).toHaveBeenCalledWith("C:/repo", 25); + expect(mocks.poller.start).toHaveBeenCalledOnce(); + expect(mocks.poller.stop).toHaveBeenCalledOnce(); + expect(mocks.ingest).toHaveBeenCalledTimes(2); + }); + + it("coalesces a triggered cycle while the initial cycle is pending", async () => { + vi.useFakeTimers(); + let release!: () => void; + mocks.ingest.mockReturnValue(new Promise(resolve => { + release = () => resolve(1); + })); + const service = new BackgroundAutonomyService("C:/repo", vi.fn()); + service.start(); + const changeHandler = mocks.watcher.on.mock.calls.find(call => call[0] === "all")?.[1]; + changeHandler("change", "src/a.ts"); + await vi.advanceTimersByTimeAsync(3000); + + expect(mocks.debug).toHaveBeenCalledWith("Background autonomy cycle coalesced", { rootPath: "C:/repo" }); + release(); + await service.idle(); + service.stop(); + service.stop(); + vi.useRealTimers(); + }); +}); diff --git a/universal-refiner/tests/blackboard.test.ts b/universal-refiner/tests/blackboard.test.ts new file mode 100644 index 0000000..dcd9b0c --- /dev/null +++ b/universal-refiner/tests/blackboard.test.ts @@ -0,0 +1,202 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import * as fs from "fs"; +import * as os from "os"; +import * as path from "path"; +import { AgenticBlackboard, type AgentIntent, type SystemLog } from "../src/core/blackboard.js"; +import { RuntimeLogger } from "../src/core/logger.js"; + +describe("AgenticBlackboard", () => { + let tmpDir: string; + let globalDir: string; + + beforeEach(async () => { + await AgenticBlackboard.flushPendingWrites(); + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "blackboard-test-")); + fs.mkdirSync(path.join(tmpDir, ".refiner")); + globalDir = path.join(tmpDir, "global"); + process.env.PROMPT_REFINER_GLOBAL_DIR = globalDir; + }); + + afterEach(async () => { + await AgenticBlackboard.flushPendingWrites(); + delete process.env.PROMPT_REFINER_GLOBAL_DIR; + vi.restoreAllMocks(); + if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it("stores nested-project activity at the nearest project root", async () => { + const project = path.join(tmpDir, "project"); + const nested = path.join(project, "src", "feature"); + fs.mkdirSync(path.join(project, ".refiner"), { recursive: true }); + fs.mkdirSync(nested, { recursive: true }); + + AgenticBlackboard.postLog("nested activity", nested); + await AgenticBlackboard.flushPendingWrites(); + + const storagePath = path.join(project, ".refiner", "blackboard.json"); + expect(fs.existsSync(storagePath)).toBe(true); + expect(fs.existsSync(path.join(nested, ".refiner"))).toBe(false); + expect(AgenticBlackboard.getLogs(nested)[0]?.message).toBe("nested activity"); + }); + + it("initializes missing collections, bounds histories, and de-duplicates projects", async () => { + const refinerDir = path.join(tmpDir, ".refiner"); + const storagePath = path.join(refinerDir, "blackboard.json"); + const globalPath = path.join(globalDir, "global_history.json"); + const projectPath = path.resolve(tmpDir); + const projectLogs = Array.from({ length: 200 }, (_, index): SystemLog => ({ + timestamp: new Date(0).toISOString(), + message: `project-${index}`, + })); + const globalLogs = Array.from({ length: 500 }, (_, index): SystemLog => ({ + timestamp: new Date(0).toISOString(), + message: `global-${index}`, + })); + fs.mkdirSync(refinerDir, { recursive: true }); + fs.mkdirSync(globalDir); + fs.writeFileSync(storagePath, JSON.stringify({ activeIntents: [], logs: projectLogs })); + fs.writeFileSync(globalPath, JSON.stringify({ logs: globalLogs, projects: [projectPath] })); + + AgenticBlackboard.postLog("bounded", tmpDir); + await AgenticBlackboard.flushPendingWrites(); + + expect(AgenticBlackboard.getLogs(tmpDir)).toHaveLength(200); + expect(AgenticBlackboard.getGlobalData()).toMatchObject({ + logs: expect.arrayContaining([expect.objectContaining({ message: "bounded" })]), + projects: [projectPath], + }); + expect(AgenticBlackboard.getGlobalData().logs).toHaveLength(500); + + fs.writeFileSync(storagePath, "{}"); + fs.writeFileSync(globalPath, "{}"); + AgenticBlackboard.postLog("initialize arrays", tmpDir); + await AgenticBlackboard.flushPendingWrites(); + + expect(AgenticBlackboard.getLogs(tmpDir)[0]?.message).toBe("initialize arrays"); + expect(AgenticBlackboard.getGlobalData().projects).toEqual([projectPath]); + }); + + it("notifies healthy listeners, isolates failing listeners, and supports unsubscribe", async () => { + const healthy = vi.fn(); + const failing = vi.fn(() => { + throw new Error("listener failed"); + }); + const error = vi.spyOn(RuntimeLogger, "error").mockImplementation(() => undefined); + const unsubscribeHealthy = AgenticBlackboard.onUpdate(healthy); + const unsubscribeFailing = AgenticBlackboard.onUpdate(failing); + + AgenticBlackboard.postLog("first", tmpDir); + await AgenticBlackboard.flushPendingWrites(); + + expect(healthy).toHaveBeenCalledOnce(); + expect(failing).toHaveBeenCalledOnce(); + expect(error).toHaveBeenCalledWith("Listener notification failed", expect.any(Error)); + + unsubscribeHealthy(); + unsubscribeFailing(); + AgenticBlackboard.postLog("second", tmpDir); + await AgenticBlackboard.flushPendingWrites(); + + expect(healthy).toHaveBeenCalledOnce(); + expect(failing).toHaveBeenCalledOnce(); + }); + + it("replaces duplicate intents, removes expired intents, and preserves other active agents", async () => { + const refinerDir = path.join(tmpDir, ".refiner"); + const storagePath = path.join(refinerDir, "blackboard.json"); + const active = (agentName: string, expiresAt: string): AgentIntent => ({ + agentName, + toolType: "CLI", + intent: "existing", + timestamp: new Date(0).toISOString(), + expiresAt, + }); + fs.mkdirSync(refinerDir, { recursive: true }); + fs.writeFileSync(storagePath, JSON.stringify({ + logs: [], + activeIntents: [ + active("expired", new Date(0).toISOString()), + active("replacement", new Date(Date.now() + 60_000).toISOString()), + active("other", new Date(Date.now() + 60_000).toISOString()), + ], + })); + + const created = AgenticBlackboard.postIntent("replacement", "lint", "x".repeat(80), tmpDir); + await AgenticBlackboard.flushPendingWrites(); + + expect(created.projectPath).toBe(path.resolve(tmpDir)); + expect(new Date(created.expiresAt).getTime()).toBeGreaterThan(Date.now()); + expect(AgenticBlackboard.getActiveIntents(tmpDir).map(intent => intent.agentName)).toEqual([ + "other", + "replacement", + ]); + expect(AgenticBlackboard.getLogs(tmpDir)[0]?.message).toContain(`${"x".repeat(50)}...`); + + fs.writeFileSync(storagePath, "{}"); + AgenticBlackboard.postIntent("fresh", "lint", "new", tmpDir); + await AgenticBlackboard.flushPendingWrites(); + expect(AgenticBlackboard.getActiveIntents(tmpDir).map(intent => intent.agentName)).toEqual(["fresh"]); + }); + + it("returns safe fallbacks for missing, malformed, and incomplete project data", () => { + const error = vi.spyOn(RuntimeLogger, "error").mockImplementation(() => undefined); + + expect(AgenticBlackboard.getLogs(tmpDir)).toEqual([]); + expect(AgenticBlackboard.getActiveIntents(tmpDir)).toEqual([]); + expect(AgenticBlackboard.getLastRefinement(tmpDir)).toBeNull(); + + const refinerDir = path.join(tmpDir, ".refiner"); + fs.mkdirSync(refinerDir, { recursive: true }); + fs.writeFileSync(path.join(refinerDir, "blackboard.json"), "{"); + + expect(AgenticBlackboard.getLogs(tmpDir)).toEqual([]); + expect(error).toHaveBeenCalledWith(expect.stringContaining("Failed to read JSON file"), expect.any(Error)); + + fs.writeFileSync(path.join(refinerDir, "blackboard.json"), "{}"); + expect(AgenticBlackboard.getLogs(tmpDir)).toEqual([]); + expect(AgenticBlackboard.getActiveIntents(tmpDir)).toEqual([]); + expect(AgenticBlackboard.getLastRefinement(tmpDir)).toBeNull(); + }); + + it("persists and returns the latest refinement with default gain", async () => { + await AgenticBlackboard.setLastRefinement("before", "after", tmpDir); + await AgenticBlackboard.flushPendingWrites(); + + expect(AgenticBlackboard.getLastRefinement(tmpDir)).toMatchObject({ + original: "before", + refined: "after", + gain: 0, + }); + expect(AgenticBlackboard.getLogs(tmpDir)[0]?.message).toBe("Refinement Complete (Gain: 0%)"); + }); + + it("logs and recovers from project-root and storage failures", async () => { + const error = vi.spyOn(RuntimeLogger, "error").mockImplementation(() => undefined); + const warn = vi.spyOn(RuntimeLogger, "warn").mockImplementation(() => undefined); + const blockedProject = path.join(tmpDir, "blocked-project"); + fs.mkdirSync(blockedProject); + fs.writeFileSync(path.join(blockedProject, ".refiner"), "not a directory"); + fs.writeFileSync(globalDir, "not a directory"); + + AgenticBlackboard.postLog("cannot persist", blockedProject); + await AgenticBlackboard.setLastRefinement("before", "after", blockedProject, 5); + await AgenticBlackboard.flushPendingWrites(); + + expect(AgenticBlackboard.getGlobalData()).toEqual({ logs: [], projects: [] }); + expect(error).toHaveBeenCalledWith("Failed to ensure project blackboard storage", expect.any(Error)); + expect(error).toHaveBeenCalledWith("Failed to ensure global blackboard storage", expect.any(Error)); + expect(error).toHaveBeenCalledWith(expect.stringContaining("Atomic update failed"), expect.any(Error)); + + expect(Array.isArray(AgenticBlackboard.getLogs(null as unknown as string))).toBe(true); + expect(warn).toHaveBeenCalledWith(expect.stringContaining("Failed to resolve project root"), expect.any(Error)); + }); + + it("uses the home-directory global store when no override is configured", () => { + delete process.env.PROMPT_REFINER_GLOBAL_DIR; + + const data = AgenticBlackboard.getGlobalData(); + + expect(data).toHaveProperty("logs"); + expect(data).toHaveProperty("projects"); + }); +}); diff --git a/universal-refiner/tests/commit-ingest.test.ts b/universal-refiner/tests/commit-ingest.test.ts index 8da4a08..a80b39e 100644 --- a/universal-refiner/tests/commit-ingest.test.ts +++ b/universal-refiner/tests/commit-ingest.test.ts @@ -1,13 +1,13 @@ import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; import { CommitIngester } from "../src/history/commit-ingest.js"; import { EventStore } from "../src/history/event-store.js"; -import { execSync } from "child_process"; +import { execFileSync } from "child_process"; import * as fs from "fs"; import * as path from "path"; import * as os from "os"; vi.mock("child_process", () => ({ - execSync: vi.fn() + execFileSync: vi.fn() })); describe("CommitIngester", () => { @@ -35,10 +35,10 @@ describe("CommitIngester", () => { const mockFiles = "file1.ts\nfile2.ts"; const mockStats = " 2 files changed, 15 insertions(+), 5 deletions(-)"; - (execSync as any).mockImplementation((cmd: string) => { - if (cmd.includes("git log")) return mockLog; - if (cmd.includes("git show --shortstat")) return mockStats; - if (cmd.includes("git show --name-only")) return mockFiles; + (execFileSync as any).mockImplementation((_file: string, args: string[]) => { + if (args.includes("log")) return mockLog; + if (args.includes("--shortstat")) return mockStats; + if (args.includes("--name-only")) return mockFiles; return ""; }); @@ -60,4 +60,79 @@ describe("CommitIngester", () => { expect(stats.insertions).toBe(15); expect(stats.deletions).toBe(5); }); + + it("fails quietly and returns zero for a non-git directory", async () => { + const gitError = new Error("not a git repository"); + (execFileSync as any).mockImplementation(() => { + throw gitError; + }); + const errorSpy = vi.spyOn(console, "error").mockImplementation(() => undefined); + + const ingester = new CommitIngester(); + const count = await ingester.ingest(testDir, 1); + + expect(count).toBe(0); + expect(execFileSync).toHaveBeenCalledWith( + "git", + ["log", "-n", "1", "--pretty=format:%H|%an|%ai|%s"], + expect.objectContaining({ + cwd: testDir, + stdio: ["ignore", "pipe", "pipe"], + }), + ); + expect(errorSpy).toHaveBeenCalled(); + errorSpy.mockRestore(); + }); + + it("uses the static default limit and ignores an empty git log", async () => { + (execFileSync as any).mockReturnValue(""); + + await expect(CommitIngester.ingestLatest("C:/repo/empty")).resolves.toBe(0); + expect(execFileSync).toHaveBeenCalledWith( + "git", + ["log", "-n", "10", "--pretty=format:%H|%an|%ai|%s"], + expect.any(Object), + ); + }); + + it("falls back when the stored SHA is unavailable and tolerates incomplete stats", async () => { + const store = EventStore.getInstance(); + const identity = store.ensureRepository("C:/repo/rebased"); + store.recordCommit({ + id: "old", + repo_id: identity.id, + sha: "old-sha", + author: "author", + message: "old", + committed_at: "2026-01-01T00:00:00Z", + }); + + (execFileSync as any).mockImplementation((_file: string, args: string[]) => { + if (args[0] === "log" && args[1] === "old-sha..HEAD") { + throw new Error("unknown revision"); + } + if (args[0] === "log") { + return "\nsha-new|author|2026-01-02T00:00:00Z|new\nsha-statless|author|2026-01-03T00:00:00Z|statless"; + } + if (args.includes("--name-only")) { + return "\nsrc/new.ts\n"; + } + if (args.includes("--shortstat") && args.at(-1) === "sha-statless") { + throw new Error("shortstat unavailable"); + } + if (args.includes("--shortstat")) { + return " 1 file changed"; + } + return ""; + }); + + await expect(new CommitIngester().ingest("C:/repo/rebased", 2)).resolves.toBe(2); + + const rows = (store as any).db.prepare("SELECT sha, diff_stats_json FROM commits WHERE repo_id = ? ORDER BY sha").all(identity.id); + expect(rows).toMatchObject([ + { sha: "old-sha" }, + { sha: "sha-new", diff_stats_json: JSON.stringify({ insertions: 0, deletions: 0 }) }, + { sha: "sha-statless", diff_stats_json: JSON.stringify({ insertions: 0, deletions: 0 }) }, + ]); + }); }); diff --git a/universal-refiner/tests/config.test.ts b/universal-refiner/tests/config.test.ts index eedc2d9..8bc486a 100644 --- a/universal-refiner/tests/config.test.ts +++ b/universal-refiner/tests/config.test.ts @@ -1,8 +1,9 @@ -import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; import * as fs from "fs"; import * as path from "path"; import * as os from "os"; import { ConfigManager } from "../src/core/config.js"; +import { AgenticBlackboard } from "../src/core/blackboard.js"; describe("ConfigManager", () => { let tmpDir: string; @@ -12,6 +13,7 @@ describe("ConfigManager", () => { }); afterEach(() => { + vi.restoreAllMocks(); fs.rmSync(tmpDir, { recursive: true, force: true }); }); @@ -30,4 +32,151 @@ describe("ConfigManager", () => { const loaded = ConfigManager.loadConfig(tmpDir); expect(loaded).toEqual({}); }); + + it("should use quality-first local semantic defaults", () => { + const config = ConfigManager.getSemanticConfig(tmpDir); + expect(config.baseUrl).toBe("http://localhost:9000/v1"); + expect(config.models).toEqual(["gemma3:12b", "gemma3:1b"]); + expect(config.allowNonLoopback).toBe(false); + }); + + it("should merge semantic overrides with safe defaults", () => { + fs.writeFileSync(path.join(tmpDir, ".gemini-refiner.json"), JSON.stringify({ + semantic: { models: ["gemma3:1b"], timeoutMs: 5000 } + })); + + const config = ConfigManager.getSemanticConfig(tmpDir); + expect(config.models).toEqual(["gemma3:1b"]); + expect(config.timeoutMs).toBe(5000); + expect(config.allowNonLoopback).toBe(false); + }); + + it("should reject malformed semantic overrides", () => { + fs.writeFileSync(path.join(tmpDir, ".gemini-refiner.json"), JSON.stringify({ + semantic: { models: [42], timeoutMs: -1, temperature: 99, baseUrl: "" } + })); + + const config = ConfigManager.getSemanticConfig(tmpDir); + expect(config.models).toEqual(["gemma3:12b", "gemma3:1b"]); + expect(config.timeoutMs).toBe(120000); + expect(config.temperature).toBe(0.2); + expect(config.baseUrl).toBe("http://localhost:9000/v1"); + }); + + it("returns an empty config and reports invalid JSON", () => { + fs.writeFileSync(path.join(tmpDir, ".gemini-refiner.json"), "{"); + const error = vi.spyOn(console, "error").mockImplementation(() => undefined); + + expect(ConfigManager.loadConfig(tmpDir)).toEqual({}); + expect(error).toHaveBeenCalled(); + }); + + it("accepts all bounded semantic overrides", () => { + fs.writeFileSync(path.join(tmpDir, ".gemini-refiner.json"), JSON.stringify({ + semantic: { + localEnabled: false, + mcpSamplingEnabled: false, + baseUrl: " http://127.0.0.1:1234/v1 ", + models: [" primary ", " fallback "], + timeoutMs: 1, + temperature: 2, + allowNonLoopback: true, + }, + })); + + expect(ConfigManager.getSemanticConfig(tmpDir)).toEqual({ + localEnabled: false, + mcpSamplingEnabled: false, + baseUrl: "http://127.0.0.1:1234/v1", + models: ["primary", "fallback"], + timeoutMs: 1, + temperature: 2, + allowNonLoopback: true, + }); + }); + + it("rejects every invalid semantic override boundary", () => { + vi.spyOn(ConfigManager, "loadConfig").mockReturnValue({ + semantic: { + localEnabled: "yes" as unknown as boolean, + mcpSamplingEnabled: 1 as unknown as boolean, + baseUrl: 42 as unknown as string, + models: [], + timeoutMs: Number.POSITIVE_INFINITY, + temperature: -1, + allowNonLoopback: null as unknown as boolean, + }, + }); + + expect(ConfigManager.getSemanticConfig(tmpDir)).toEqual({ + localEnabled: true, + mcpSamplingEnabled: true, + baseUrl: "http://localhost:9000/v1", + models: ["gemma3:12b", "gemma3:1b"], + timeoutMs: 120000, + temperature: 0.2, + allowNonLoopback: false, + }); + }); + + it.each([ + [[""], "blank model"], + [["valid", " "], "one blank model"], + ["not-an-array", "non-array models"], + ])("rejects %s", (models) => { + vi.spyOn(ConfigManager, "loadConfig").mockReturnValue({ + semantic: { models: models as unknown as string[] }, + }); + + expect(ConfigManager.getSemanticConfig(tmpDir).models).toEqual(["gemma3:12b", "gemma3:1b"]); + }); + + it.each([ + [{ timeoutMs: "fast" as unknown as number }, "non-number timeout"], + [{ timeoutMs: 0 }, "zero timeout"], + [{ temperature: Number.NaN }, "non-finite temperature"], + [{ temperature: -0.1 }, "negative temperature"], + ])("rejects %s", (semantic) => { + vi.spyOn(ConfigManager, "loadConfig").mockReturnValue({ semantic }); + + const config = ConfigManager.getSemanticConfig(tmpDir); + expect(config.timeoutMs).toBe(120000); + expect(config.temperature).toBe(0.2); + }); + + it("uses default-path overloads without requiring a config file", () => { + expect(ConfigManager.loadConfig()).toEqual({}); + expect(ConfigManager.getSemanticConfig()).toMatchObject({ + localEnabled: true, + mcpSamplingEnabled: true, + }); + }); + + it("derives supported predictive mandates and ignores unsupported recurring keywords", () => { + vi.spyOn(AgenticBlackboard, "getLogs").mockReturnValue([ + { timestamp: "", message: "test security doc performance error refactor" }, + { timestamp: "", message: "TEST SECURITY DOC PERFORMANCE ERROR REFACTOR" }, + { timestamp: "", message: "test security doc performance error refactor" }, + { timestamp: "", message: "unrelated" }, + ]); + + expect(ConfigManager.getPredictiveMandates()).toEqual([ + "Predictive: You've asked for tests in 30% of recent prompts. Ensure comprehensive testing.", + "Predictive: Security is a recurring theme. Apply OWASP principles strictly.", + "Predictive: Frequent documentation requests detected. Ensure JSDoc/README updates.", + ]); + }); + + it("only considers the ten most recent logs and requires three keyword matches", () => { + vi.spyOn(AgenticBlackboard, "getLogs").mockReturnValue([ + { timestamp: "", message: "test" }, + { timestamp: "", message: "test" }, + ...Array.from({ length: 8 }, () => ({ timestamp: "", message: "unrelated" })), + { timestamp: "", message: "test security doc" }, + { timestamp: "", message: "security doc" }, + { timestamp: "", message: "security doc" }, + ]); + + expect(ConfigManager.getPredictiveMandates()).toEqual([]); + }); }); diff --git a/universal-refiner/tests/correlation.test.ts b/universal-refiner/tests/correlation.test.ts index 8fbe747..86812a0 100644 --- a/universal-refiner/tests/correlation.test.ts +++ b/universal-refiner/tests/correlation.test.ts @@ -81,4 +81,101 @@ describe("CorrelationEngine", () => { const link = db.prepare("SELECT * FROM execution_commits WHERE commit_id = ?").get("c_unrelated"); expect(link).toBeUndefined(); }); + + it("leaves a commit unlinked when there are no prompt candidates", async () => { + const store = EventStore.getInstance(); + const db = (store as any).db; + db.prepare("INSERT INTO commits (id, repo_id, sha, author, message, committed_at) VALUES (?, ?, ?, ?, ?, ?)") + .run("orphan", "repo", "sha-orphan", "author", "feat: orphan", "2026-04-12T10:00:00Z"); + + await new CorrelationEngine().correlateAll(); + + expect(db.prepare("SELECT * FROM execution_commits WHERE commit_id = ?").get("orphan")).toBeUndefined(); + }); + + it("uses file awareness and an existing execution when correlating", async () => { + const store = EventStore.getInstance(); + const db = (store as any).db; + store.recordPrompt({ + id: "file-prompt", + repo_id: "repo", + client: "test", + raw_prompt: "Update target.ts and docs", + normalized_prompt: "Modify target.ts", + timestamp: "2026-04-12T10:00:00Z", + }); + store.recordExecution({ + id: "existing-execution", + prompt_id: "file-prompt", + workflow_name: "manual", + executor_name: "test", + status: "completed", + }); + db.prepare("INSERT INTO commits (id, repo_id, sha, author, message, committed_at, changed_files_json) VALUES (?, ?, ?, ?, ?, ?, ?)") + .run("file-commit", "repo", "sha-file", "author", "chore: unrelated wording", "2026-04-12T10:01:00Z", JSON.stringify(["src/target.ts", "/", "a"])); + + await new CorrelationEngine().correlateAll(); + + expect(db.prepare("SELECT execution_id FROM execution_commits WHERE commit_id = ?").get("file-commit")) + .toEqual({ execution_id: "existing-execution" }); + }); + + it("tolerates malformed changed files while correlating by content", async () => { + const store = EventStore.getInstance(); + const db = (store as any).db; + store.recordPrompt({ + id: "short-prompt", + repo_id: "repo", + client: "test", + raw_prompt: "matching change", + timestamp: "2026-04-12T10:00:00Z", + }); + db.prepare("INSERT INTO commits (id, repo_id, sha, author, message, committed_at, changed_files_json) VALUES (?, ?, ?, ?, ?, ?, ?)") + .run("malformed-commit", "repo", "sha-malformed", "author", "matching change", "2026-04-12T10:01:00Z", "{"); + + await new CorrelationEngine().correlateAll(); + + expect(db.prepare("SELECT * FROM execution_commits WHERE commit_id = ?").get("malformed-commit")).toBeDefined(); + }); + + it("evaluates but does not correlate a prompt with no usable keywords", async () => { + const store = EventStore.getInstance(); + const db = (store as any).db; + store.recordPrompt({ + id: "short-prompt", + repo_id: "repo", + client: "test", + raw_prompt: "a to an", + timestamp: "2026-04-12T10:00:00Z", + }); + db.prepare("INSERT INTO commits (id, repo_id, sha, author, message, committed_at) VALUES (?, ?, ?, ?, ?, ?)") + .run("short-commit", "repo", "sha-short", "author", "anything", "2026-04-12T10:01:00Z"); + + await new CorrelationEngine().correlateAll(); + + expect(db.prepare("SELECT * FROM execution_commits WHERE commit_id = ?").get("short-commit")).toBeUndefined(); + }); + + it("uses an empty changed-file list when defensive input omits it", () => { + const engine = new CorrelationEngine(); + const candidate = { + id: "defensive-prompt", + raw_prompt: "matching change", + normalized_prompt: null, + intent: null, + timestamp: "2026-04-12T10:00:00Z", + }; + (engine as any).eventStore = { + db: { + prepare: vi.fn().mockReturnValue({ all: vi.fn().mockReturnValue([candidate]) }), + }, + }; + + expect((engine as any).findBestPromptMatch({ + repo_id: "repo", + committed_at: "2026-04-12T10:01:00Z", + message: "matching change", + changed_files_json: null, + })).toBe(candidate); + }); }); diff --git a/universal-refiner/tests/dashboard-api.test.ts b/universal-refiner/tests/dashboard-api.test.ts new file mode 100644 index 0000000..872ed8b --- /dev/null +++ b/universal-refiner/tests/dashboard-api.test.ts @@ -0,0 +1,174 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import * as fs from "fs"; +import * as os from "os"; +import * as path from "path"; +import { CommandCenterDashboard, isSameOriginRequest } from "../src/core/dashboard.js"; +import { EventStore } from "../src/history/event-store.js"; + +describe("dashboard review and health APIs", () => { + const testDir = path.join(os.tmpdir(), `refiner-dashboard-api-${Date.now()}`); + const repoDir = path.join(testDir, "selected-repo"); + + beforeEach(() => { + fs.mkdirSync(repoDir, { recursive: true }); + process.env.PROMPT_REFINER_GLOBAL_DIR = path.join(testDir, "global"); + (EventStore as any).instance = null; + }); + + afterEach(() => { + const instance = (EventStore as any).instance as EventStore | null; + instance?.close(); + (EventStore as any).instance = null; + delete process.env.PROMPT_REFINER_GLOBAL_DIR; + fs.rmSync(testDir, { recursive: true, force: true }); + }); + + it("allows missing or same-origin origins and rejects cross-origin requests", () => { + expect(isSameOriginRequest(undefined, "http://127.0.0.1:3000/api/review/lesson/1")).toBe(true); + expect(isSameOriginRequest("http://127.0.0.1:3000", "http://127.0.0.1:3000/api/review/lesson/1")).toBe(true); + expect(isSameOriginRequest("https://attacker.example", "http://127.0.0.1:3000/api/review/lesson/1")).toBe(false); + }); + + it("reviews only pending candidates in the selected repository", async () => { + const store = EventStore.getInstance(); + const repoId = store.ensureRepository(repoDir).id; + store.recordLesson({ + id: "selected-lesson", + repo_id: repoId, + lesson_type: "quality", + title: "Selected", + summary: "Selected repo lesson", + confidence: "high", + source: "test", + }); + store.recordLesson({ + id: "other-lesson", + repo_id: "other-repo", + lesson_type: "quality", + title: "Other", + summary: "Other repo lesson", + confidence: "high", + source: "test", + }); + store.recordTemplate({ + id: "selected-template", + repo_id: repoId, + category: "quality", + title: "Selected template", + template_text: "Do the work", + usage_notes: "test", + source_type: "test", + success_score: 90, + }); + const app = CommandCenterDashboard.createApp(repoDir); + + const lessonResponse = await app.request("/api/review/lesson/selected-lesson", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ decision: "approve" }), + }); + expect(lessonResponse.status).toBe(200); + + const templateResponse = await app.request("/api/review/template/selected-template", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ decision: "reject" }), + }); + expect(templateResponse.status).toBe(200); + + const scopedResponse = await app.request("/api/review/lesson/other-lesson", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ decision: "approve" }), + }); + expect(scopedResponse.status).toBe(404); + + expect(store.getLearningCandidates(repoId)).toEqual({ lessons: [], templates: [] }); + expect(store.getLearningCandidates("other-repo").lessons).toHaveLength(1); + const db = (store as any).db; + expect(db.prepare("SELECT COUNT(*) AS count FROM events WHERE repo_id = ? AND event_type IN ('lesson_reviewed', 'template_reviewed')") + .get(repoId).count).toBe(2); + }); + + it("rejects cross-origin and malformed review mutations", async () => { + const app = CommandCenterDashboard.createApp(repoDir); + const crossOrigin = await app.request("http://localhost/api/review/lesson/x", { + method: "POST", + headers: { "content-type": "application/json", origin: "https://attacker.example" }, + body: JSON.stringify({ decision: "approve" }), + }); + expect(crossOrigin.status).toBe(403); + + const invalidDecision = await app.request("/api/review/lesson/x", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ decision: "delete" }), + }); + expect(invalidDecision.status).toBe(400); + + const invalidJson = await app.request("/api/review/lesson/x", { + method: "POST", + headers: { "content-type": "application/json" }, + body: "{", + }); + expect(invalidJson.status).toBe(400); + }); + + it("returns sanitized semantic provider and runtime health", async () => { + fs.writeFileSync(path.join(repoDir, ".gemini-refiner.json"), JSON.stringify({ + semantic: { + baseUrl: "http://secret-user:secret-pass@localhost:9000/v1?token=secret", + models: ["gemma3:12b"], + }, + })); + const store = EventStore.getInstance(); + const repoId = store.ensureRepository(repoDir).id; + store.recordEvent({ + id: "semantic-safe", + event_type: "semantic_request_completed", + repo_id: repoId, + summary: "safe", + timestamp: "2026-06-14T10:00:00.000Z", + details_json: JSON.stringify({ + taskName: "lint_prompt", + provider: "local-openai", + model: "gemma3:12b", + latencyMs: 125, + fallbackFrom: ["model:gemma3:1b"], + prompt: "private prompt", + apiKey: "private key", + }), + }); + const app = CommandCenterDashboard.createApp(repoDir); + + const response = await app.request("/api/health"); + const health = await response.json() as any; + const serialized = JSON.stringify(health); + + expect(response.status).toBe(200); + expect(health.semantic.status).toBe("healthy"); + expect(health.semantic.local.endpoint).toBe("http://localhost:9000"); + expect(health.semantic.totals).toEqual({ + completed: 1, + averageLatencyMs: 125, + fallbackCompletions: 1, + }); + expect(health.semantic.lastSuccess.provider).toBe("local-openai"); + expect(serialized).not.toContain("secret"); + expect(serialized).not.toContain("private prompt"); + expect(serialized).not.toContain("apiKey"); + }); + + it("renders review controls and provider health UI", async () => { + const app = CommandCenterDashboard.createApp(repoDir); + const response = await app.request("/"); + const html = await response.text(); + + expect(response.status).toBe(200); + expect(html).toContain("reviewCandidate"); + expect(html).toContain("Approve"); + expect(html).toContain("Reject"); + expect(html).toContain("PROVIDER HEALTH"); + expect(html).toContain("/api/health"); + }); +}); diff --git a/universal-refiner/tests/dashboard-coverage.test.ts b/universal-refiner/tests/dashboard-coverage.test.ts new file mode 100644 index 0000000..8347d2a --- /dev/null +++ b/universal-refiner/tests/dashboard-coverage.test.ts @@ -0,0 +1,189 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { AgenticBlackboard } from "../src/core/blackboard.js"; +import { ConfigManager } from "../src/core/config.js"; +import { CommandCenterDashboard, isSameOriginRequest } from "../src/core/dashboard.js"; +import { RuntimeLogger } from "../src/core/logger.js"; +import { ArchitecturalScout, NodeDetector, PythonDetector } from "../src/detectors/project-scout.js"; +import { EventStore } from "../src/history/event-store.js"; +import { TimelineProvider } from "../src/history/timeline.js"; + +describe("dashboard deterministic fallbacks", () => { + let directory: string; + let store: EventStore; + + beforeEach(() => { + directory = fs.mkdtempSync(path.join(os.tmpdir(), "dashboard-coverage-")); + process.env.PROMPT_REFINER_GLOBAL_DIR = path.join(directory, "global"); + (EventStore as unknown as { instance: EventStore | null }).instance = null; + store = EventStore.getInstance(); + vi.spyOn(console, "error").mockImplementation(() => undefined); + }); + + afterEach(() => { + vi.restoreAllMocks(); + (EventStore as unknown as { instance: EventStore | null }).instance?.close(); + (EventStore as unknown as { instance: EventStore | null }).instance = null; + delete process.env.PROMPT_REFINER_GLOBAL_DIR; + fs.rmSync(directory, { recursive: true, force: true }); + }); + + it("rejects malformed origin URLs", () => { + expect(isSameOriginRequest("not a url", "also invalid")).toBe(false); + }); + + it("sanitizes endpoints without ports and logs plain route errors", () => { + expect((CommandCenterDashboard as any).buildHealth).toBeTypeOf("function"); + vi.spyOn(ConfigManager, "getSemanticConfig").mockReturnValue({ + localEnabled: false, + mcpSamplingEnabled: false, + baseUrl: "https://localhost/v1", + models: [], + timeoutMs: 1, + temperature: 0, + allowNonLoopback: false, + }); + expect((CommandCenterDashboard as any).buildHealth(directory).semantic.local.endpoint).toBe("https://localhost"); + expect(() => (CommandCenterDashboard as any).logRouteError("plain", "plain failure")).not.toThrow(); + }); + + it("builds fallback state and filters missing projects", async () => { + const missing = path.join(directory, "missing"); + vi.spyOn(AgenticBlackboard, "getGlobalData").mockReturnValue({ + projects: [missing], + } as any); + vi.spyOn(AgenticBlackboard, "getLogs").mockReturnValue([]); + vi.spyOn(AgenticBlackboard, "getActiveIntents").mockReturnValue([]); + vi.spyOn(AgenticBlackboard, "getLastRefinement").mockReturnValue(null); + vi.spyOn(ArchitecturalScout, "detectPatterns").mockResolvedValue([]); + vi.spyOn(NodeDetector, "detect").mockResolvedValue({}); + vi.spyOn(PythonDetector, "detect").mockResolvedValue({}); + CommandCenterDashboard.createApp(directory); + + const state = await (CommandCenterDashboard as any).buildState(directory); + + expect(state).toMatchObject({ + selectedPath: directory, + globalLogs: [], + stack: "Unknown", + framework: "None", + pattern: "Standard", + }); + expect(state.projects).toEqual([path.resolve(directory)]); + }); + + it("handles global dashboard data without a projects collection", async () => { + vi.spyOn(AgenticBlackboard, "getGlobalData").mockReturnValue({} as any); + CommandCenterDashboard.createApp(directory); + expect((CommandCenterDashboard as any).getVisibleProjects()).toEqual([path.resolve(directory)]); + }); + + it("sanitizes malformed, duplicated, and disabled health telemetry", () => { + vi.spyOn(ConfigManager, "getSemanticConfig").mockReturnValue({ + localEnabled: false, + mcpSamplingEnabled: false, + baseUrl: "invalid", + models: [], + timeoutMs: 1, + temperature: 0, + allowNonLoopback: false, + }); + const db = (store as any).db; + const repoId = store.ensureRepository(directory).id; + store.recordEvent({ + id: "malformed", + event_type: "semantic_request_completed", + repo_id: repoId, + summary: "malformed", + timestamp: "2026-01-02T00:00:00.000Z", + details_json: "{", + }); + store.recordEvent({ + id: "typed", + event_type: "semantic_request_completed", + repo_id: repoId, + summary: "typed", + timestamp: "2026-01-01T00:00:00.000Z", + details_json: JSON.stringify({ + taskName: 42, + provider: "local", + model: "m", + latencyMs: -4.6, + fallbackFrom: ["a", "", 3, "b"], + }), + }); + expect(db.prepare("SELECT COUNT(*) AS count FROM events").get().count).toBe(2); + + const health = (CommandCenterDashboard as any).buildHealth(directory); + + expect(health.semantic.status).toBe("disabled"); + expect(health.semantic.local.endpoint).toBe("invalid"); + expect(health.semantic.totals).toMatchObject({ + completed: 2, + averageLatencyMs: 0, + fallbackCompletions: 1, + }); + expect(health.semantic.providers).toEqual(expect.arrayContaining([ + expect.objectContaining({ provider: "unknown", averageLatencyMs: null }), + expect.objectContaining({ provider: "local", averageLatencyMs: 0, models: ["m"] }), + ])); + }); + + it("returns route-owned failures without leaking thrown details", async () => { + const app = CommandCenterDashboard.createApp(directory); + const routeCases: Array<[string, () => void]> = [ + ["/api/state", () => vi.spyOn(CommandCenterDashboard as any, "buildState").mockRejectedValueOnce(new Error("state secret"))], + ["/api/timeline", () => vi.spyOn(TimelineProvider.prototype, "getUnifiedTimeline").mockImplementationOnce(() => { throw new Error("timeline secret"); })], + ["/api/commits", () => vi.spyOn(EventStore, "getInstance").mockImplementationOnce(() => { throw new Error("commit secret"); })], + ["/api/lessons", () => vi.spyOn(EventStore, "getInstance").mockImplementationOnce(() => { throw new Error("lesson secret"); })], + ["/api/templates", () => vi.spyOn(EventStore, "getInstance").mockImplementationOnce(() => { throw new Error("template secret"); })], + ["/api/health", () => vi.spyOn(CommandCenterDashboard as any, "buildHealth").mockImplementationOnce(() => { throw new Error("health secret"); })], + ["/", () => vi.spyOn(CommandCenterDashboard as any, "buildState").mockRejectedValueOnce("root failure")], + ]; + + for (const [route, arrange] of routeCases) { + arrange(); + const response = await app.request(route); + expect(response.status, route).toBe(500); + } + expect(RuntimeLogger.error).toBeDefined(); + }); + + it("renders an Error without a stack in the root failure page", async () => { + const app = CommandCenterDashboard.createApp(directory); + const error = new Error("root message"); + error.stack = ""; + vi.spyOn(CommandCenterDashboard as any, "buildState").mockRejectedValueOnce(error); + const response = await app.request("/"); + expect(await response.text()).toContain("root message"); + }); + + it("handles review persistence failures and successful template approval", async () => { + const app = CommandCenterDashboard.createApp(directory); + const request = (kind: string, id: string, decision: "approve" | "reject") => app.request( + `/api/review/${kind}/${id}`, + { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ decision }), + }, + ); + vi.spyOn(EventStore, "getInstance").mockImplementationOnce(() => { throw "review failure"; }); + expect((await request("lesson", "id", "approve")).status).toBe(500); + + const repoId = store.ensureRepository(directory).id; + store.recordTemplate({ + id: "template", + repo_id: repoId, + category: "quality", + title: "Template", + template_text: "Verify.", + usage_notes: "", + source_type: "test", + success_score: 1, + }); + expect((await request("template", "template", "approve")).status).toBe(200); + }); +}); diff --git a/universal-refiner/tests/dashboard-events.test.ts b/universal-refiner/tests/dashboard-events.test.ts new file mode 100644 index 0000000..18a55b7 --- /dev/null +++ b/universal-refiner/tests/dashboard-events.test.ts @@ -0,0 +1,228 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +const mocks = vi.hoisted(() => ({ + missingHtml: false, + streamSSE: vi.fn(), +})); + +vi.mock("hono/streaming", () => ({ + streamSSE: mocks.streamSSE, +})); +vi.mock("fs", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + existsSync: (target: fs.PathLike) => mocks.missingHtml ? false : actual.existsSync(target), + }; +}); + +import { AgenticBlackboard } from "../src/core/blackboard.js"; +import { ConfigManager } from "../src/core/config.js"; +import { CommandCenterDashboard } from "../src/core/dashboard.js"; +import { ArchitecturalScout, NodeDetector, PythonDetector } from "../src/detectors/project-scout.js"; +import { EventStore } from "../src/history/event-store.js"; + +describe("dashboard event stream and render failures", () => { + let controller: AbortController; + let directory: string; + let store: EventStore; + + beforeEach(() => { + mocks.missingHtml = false; + controller = new AbortController(); + directory = fs.mkdtempSync(path.join(os.tmpdir(), "dashboard-events-")); + process.env.PROMPT_REFINER_GLOBAL_DIR = path.join(directory, "global"); + (EventStore as unknown as { instance: EventStore | null }).instance = null; + store = EventStore.getInstance(); + vi.spyOn(console, "error").mockImplementation(() => undefined); + }); + + afterEach(() => { + vi.restoreAllMocks(); + store.close(); + (EventStore as unknown as { instance: EventStore | null }).instance = null; + delete process.env.PROMPT_REFINER_GLOBAL_DIR; + fs.rmSync(directory, { recursive: true, force: true }); + }); + + it("streams ready, update, and ping events, then unsubscribes on abort", async () => { + const writes: Array<{ event: string; data: string; id: string }> = []; + const unsubscribe = vi.fn(); + let pushUpdate: (() => void) | undefined; + vi.spyOn(AgenticBlackboard, "onUpdate").mockImplementation((callback) => { + pushUpdate = callback; + return unsubscribe; + }); + mocks.streamSSE.mockImplementation(async (context, callback) => { + const stream = { + writeSSE: vi.fn(async (event) => { + writes.push(event); + }), + sleep: vi.fn(async () => { + pushUpdate?.(); + controller.abort(); + pushUpdate?.(); + }), + }; + await callback(stream); + return context.text("closed"); + }); + + const app = CommandCenterDashboard.createApp(directory); + const response = await app.request("/api/events", { signal: controller.signal }); + + expect(response.status).toBe(200); + expect(writes.map(({ event }) => event)).toEqual(["ready", "update", "ping"]); + expect(unsubscribe).toHaveBeenCalledOnce(); + }); + + it("forwards dashboard refinement and log updates to the blackboard", async () => { + const setLastRefinement = vi.spyOn(AgenticBlackboard, "setLastRefinement").mockResolvedValue(); + const postLog = vi.spyOn(AgenticBlackboard, "postLog").mockImplementation(() => undefined); + + await CommandCenterDashboard.setLastRefinement("before", "after", directory, 7); + CommandCenterDashboard.log("completed", directory); + + expect(setLastRefinement).toHaveBeenCalledWith("before", "after", directory, 7); + expect(postLog).toHaveBeenCalledWith("completed", directory); + }); + + it("builds state from Python and architectural detector fallbacks", async () => { + vi.spyOn(AgenticBlackboard, "getGlobalData").mockReturnValue({ projects: [], logs: [] } as any); + vi.spyOn(AgenticBlackboard, "getLogs").mockReturnValue([]); + vi.spyOn(AgenticBlackboard, "getActiveIntents").mockReturnValue([]); + vi.spyOn(AgenticBlackboard, "getLastRefinement").mockReturnValue(null); + vi.spyOn(ArchitecturalScout, "detectPatterns").mockResolvedValue(["Layered", "Ignored"]); + vi.spyOn(NodeDetector, "detect").mockResolvedValue({}); + vi.spyOn(PythonDetector, "detect").mockResolvedValue({ language: "Python", framework: "FastAPI" }); + CommandCenterDashboard.createApp(directory); + + const state = await (CommandCenterDashboard as any).buildState(directory); + + expect(state).toMatchObject({ + stack: "Python", + framework: "FastAPI", + pattern: "Layered", + }); + }); + + it("builds configured health from duplicate and sanitized provider telemetry", () => { + vi.spyOn(ConfigManager, "getSemanticConfig").mockReturnValue({ + localEnabled: true, + mcpSamplingEnabled: false, + baseUrl: "http://localhost:1234/path?secret=value", + models: ["local"], + timeoutMs: 10, + temperature: 0, + allowNonLoopback: false, + }); + const repoId = store.ensureRepository(directory).id; + const longValue = "x".repeat(200); + store.recordEvent({ + id: "latest", + event_type: "semantic_request_completed", + repo_id: repoId, + summary: "latest", + timestamp: "2026-06-15T02:00:00.000Z", + details_json: JSON.stringify({ + taskName: longValue, + provider: "local", + model: "", + latencyMs: Number.POSITIVE_INFINITY, + fallbackFrom: "not-an-array", + }), + }); + store.recordEvent({ + id: "earlier", + event_type: "semantic_request_completed", + repo_id: repoId, + summary: "earlier", + timestamp: "2026-06-15T01:00:00.000Z", + details_json: JSON.stringify({ + provider: "local", + model: "model", + latencyMs: 9.6, + fallbackFrom: Array.from({ length: 12 }, (_, index) => `fallback-${index}`), + }), + }); + + const health = (CommandCenterDashboard as any).buildHealth(directory); + + expect(health.semantic.status).toBe("healthy"); + expect(health.semantic.local.endpoint).toBe("http://localhost:1234"); + expect(health.semantic.lastSuccess).toMatchObject({ + taskName: longValue.slice(0, 120), + provider: "local", + model: "unknown", + latencyMs: null, + fallbackFrom: [], + }); + expect(health.semantic.providers).toEqual([ + expect.objectContaining({ + provider: "local", + completions: 2, + averageLatencyMs: 10, + models: ["unknown", "model"], + }), + ]); + expect(health.semantic.totals.averageLatencyMs).toBe(10); + expect(health.semantic.totals.fallbackCompletions).toBe(1); + }); + + it("reports configured semantic health before the first completion", () => { + vi.spyOn(ConfigManager, "getSemanticConfig").mockReturnValue({ + localEnabled: false, + mcpSamplingEnabled: true, + baseUrl: "http://localhost:1234", + models: [], + timeoutMs: 10, + temperature: 0, + allowNonLoopback: false, + }); + + const health = (CommandCenterDashboard as any).buildHealth(directory); + + expect(health.semantic.status).toBe("configured"); + expect(health.semantic.lastSuccess).toBeNull(); + expect(health.semantic.totals.averageLatencyMs).toBeNull(); + expect(health.semantic.providers).toEqual([]); + }); + + it("returns the route-owned event stream error response", async () => { + mocks.streamSSE.mockImplementation(() => { + throw new Error("stream failed"); + }); + const app = CommandCenterDashboard.createApp(directory); + + const response = await app.request("/api/events"); + + expect(response.status).toBe(500); + expect(await response.text()).toBe("Dashboard event stream unavailable"); + }); + + it("renders a route-owned error when dashboard HTML is missing", async () => { + vi.spyOn(CommandCenterDashboard as any, "buildState").mockResolvedValue({ + selectedPath: directory, + projects: [directory], + globalLogs: [], + logs: [], + intents: [], + lastRefinement: null, + stack: "Unknown", + framework: "None", + pattern: "Standard", + }); + mocks.missingHtml = true; + const app = CommandCenterDashboard.createApp(directory); + + const response = await app.request("/"); + const html = await response.text(); + + expect(response.status).toBe(500); + expect(html).toContain("Dashboard Error"); + expect(html).toContain("Could not find dashboard.html"); + }); +}); diff --git a/universal-refiner/tests/dashboard-routes.test.ts b/universal-refiner/tests/dashboard-routes.test.ts new file mode 100644 index 0000000..446eafe --- /dev/null +++ b/universal-refiner/tests/dashboard-routes.test.ts @@ -0,0 +1,82 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { CommandCenterDashboard } from "../src/core/dashboard.js"; +import { EventStore } from "../src/history/event-store.js"; + +describe("dashboard route coverage", () => { + let testDir: string; + let repoDir: string; + let store: EventStore; + + beforeEach(() => { + testDir = fs.mkdtempSync(path.join(os.tmpdir(), "dashboard-routes-")); + repoDir = path.join(testDir, "repo"); + fs.mkdirSync(repoDir); + fs.writeFileSync(path.join(repoDir, "package.json"), JSON.stringify({ scripts: { test: "vitest" } })); + process.env.PROMPT_REFINER_GLOBAL_DIR = path.join(testDir, "global"); + (EventStore as any).instance = null; + store = EventStore.getInstance(); + }); + + afterEach(() => { + store.close(); + delete process.env.PROMPT_REFINER_GLOBAL_DIR; + fs.rmSync(testDir, { recursive: true, force: true }); + }); + + it("serves state, timeline, commits, lessons, templates, health, and HTML", async () => { + const repoId = store.ensureRepository(repoDir).id; + store.recordPrompt({ id: "prompt", repo_id: repoId, client: "test", raw_prompt: "Implement feature" }); + store.recordCommit({ + id: "commit", + repo_id: repoId, + sha: "abc", + message: "feat: test", + committed_at: new Date().toISOString(), + }); + store.recordLesson({ + id: "lesson", + repo_id: repoId, + lesson_type: "quality", + title: "Lesson", + summary: "Summary", + confidence: "high", + source: "test", + }); + store.recordTemplate({ + id: "template", + repo_id: repoId, + category: "feature", + title: "Template", + template_text: "Implement it.", + usage_notes: "", + source_type: "test", + success_score: 80, + }); + const app = CommandCenterDashboard.createApp(repoDir); + + for (const route of ["/api/state", "/api/timeline", "/api/commits", "/api/lessons", "/api/templates", "/api/health", "/"]) { + const response = await app.request(route); + expect(response.status, route).toBe(200); + } + const fallback = await app.request(`/api/state?project=${encodeURIComponent(path.join(testDir, "not-visible"))}`); + expect((await fallback.json() as any).selectedPath).toBe(path.resolve(repoDir)); + }); + + it("validates every review mutation boundary", async () => { + const app = CommandCenterDashboard.createApp(repoDir); + const request = (route: string, body = "{}", headers: Record = { "content-type": "application/json" }) => + app.request(route, { method: "POST", headers, body }); + + expect((await request("/api/review/unsupported/id", JSON.stringify({ decision: "approve" }))).status).toBe(400); + expect((await request("/api/review/lesson/id", "{}", {})).status).toBe(415); + expect((await request("/api/review/lesson/id", "{")).status).toBe(400); + expect((await request("/api/review/lesson/id", JSON.stringify({ decision: "approve" }), { + "content-type": "application/json", + origin: "https://attacker.example", + })).status).toBe(403); + expect((await request(`/api/review/lesson/${"x".repeat(201)}`, JSON.stringify({ decision: "approve" }))).status).toBe(400); + }); +}); diff --git a/universal-refiner/tests/dashboard-start.test.ts b/universal-refiner/tests/dashboard-start.test.ts new file mode 100644 index 0000000..9310cd0 --- /dev/null +++ b/universal-refiner/tests/dashboard-start.test.ts @@ -0,0 +1,40 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const mocks = vi.hoisted(() => ({ + serve: vi.fn(), + on: vi.fn(), + error: vi.fn(), +})); + +vi.mock("@hono/node-server", () => ({ serve: mocks.serve })); +vi.mock("../src/core/logger.js", () => ({ + RuntimeLogger: { error: mocks.error, info: vi.fn(), warn: vi.fn(), debug: vi.fn() }, +})); + +import { CommandCenterDashboard } from "../src/core/dashboard.js"; + +describe("dashboard server startup", () => { + beforeEach(() => { + vi.clearAllMocks(); + mocks.serve.mockReturnValue({ on: mocks.on }); + }); + + it("binds the configured host and reports server errors", () => { + process.env.PROMPT_REFINER_DASHBOARD_HOST = "127.0.0.1"; + CommandCenterDashboard.start(3999, "."); + expect(mocks.serve).toHaveBeenCalledWith(expect.objectContaining({ port: 3999, hostname: "127.0.0.1" })); + const handler = mocks.on.mock.calls[0][1]; + handler(Object.assign(new Error("busy"), { code: "EADDRINUSE" })); + handler(Object.assign(new Error("failure"), { code: "EIO" })); + expect(mocks.error).toHaveBeenCalledTimes(2); + delete process.env.PROMPT_REFINER_DASHBOARD_HOST; + }); + + it("rethrows synchronous startup failures after logging", () => { + mocks.serve.mockImplementation(() => { + throw new Error("startup failed"); + }); + expect(() => CommandCenterDashboard.start(3999, ".")).toThrow("startup failed"); + expect(mocks.error).toHaveBeenCalledWith("Dashboard failed to start on port 3999", expect.any(Error)); + }); +}); diff --git a/universal-refiner/tests/evaluation.test.ts b/universal-refiner/tests/evaluation.test.ts new file mode 100644 index 0000000..dcc7bab --- /dev/null +++ b/universal-refiner/tests/evaluation.test.ts @@ -0,0 +1,63 @@ +import { describe, expect, it } from "vitest"; +import { + comparePrompts, + createABEvaluationRecord, + evaluatePrompt +} from "../src/evaluation/prompt-evaluator.js"; + +describe("deterministic prompt evaluation", () => { + it("scores evidence-oriented dimensions and labels the result as heuristic", () => { + const evaluation = evaluatePrompt( + "Implement src/auth.ts. Preserve existing behavior, add tests, run npm test, and report results.", + "Implement auth" + ); + + expect(evaluation.heuristicScore).toBeGreaterThan(50); + expect(evaluation.dimensions.testability.evidence).toContain("test-command"); + expect(evaluation.dimensions.riskControls.evidence).toContain("compatibility-or-regression"); + expect(evaluation.disclaimer).toContain("not an LLM quality judgment"); + }); + + it("compares original and refined prompts without rewarding length alone", () => { + const original = "Fix login"; + const verbose = `${original} ${"background ".repeat(500)}`; + const actionable = "Fix login in src/auth.ts. Preserve existing behavior, add tests, run npm test, and report results."; + + expect(comparePrompts(original, verbose).heuristicDelta).toBe(0); + expect(comparePrompts(original, actionable).heuristicPreference).toBe("refined"); + }); + + it("records A/B heuristic preference separately from observed evidence", () => { + const record = createABEvaluationRecord({ + experimentId: "exp-1", + createdAt: "2026-06-14T00:00:00.000Z", + baselinePrompt: "Fix login", + variantA: { + id: "A", + prompt: "Fix login and add tests.", + observedOutcome: { status: "failed", testsFailed: 1 } + }, + variantB: { + id: "B", + prompt: "Fix login in src/auth.ts. Preserve behavior and run npm test.", + observedOutcome: { status: "completed", testsPassed: 10 } + } + }); + + expect(record.heuristicPreference).toBe("B"); + expect(record.observedWinner).toBe("B"); + expect(record.interpretation).toBe("observed-evidence"); + }); + + it("does not declare an observed winner without outcomes from both variants", () => { + const record = createABEvaluationRecord({ + experimentId: "exp-2", + baselinePrompt: "Add login", + variantA: { id: "A", prompt: "Add login" }, + variantB: { id: "B", prompt: "Add login with tests." } + }); + + expect(record.observedWinner).toBeUndefined(); + expect(record.interpretation).toBe("heuristic-only"); + }); +}); diff --git a/universal-refiner/tests/file-watcher.test.ts b/universal-refiner/tests/file-watcher.test.ts new file mode 100644 index 0000000..42f4e3b --- /dev/null +++ b/universal-refiner/tests/file-watcher.test.ts @@ -0,0 +1,172 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import * as fs from "node:fs"; +import * as path from "node:path"; +import * as os from "node:os"; +import { FileWatcher, FileChangeEvent } from "../src/watcher/file-watcher.js"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Wait up to `timeout` ms for `predicate` to return true, polling every `interval` ms. + * Provides a clear assertion failure message on timeout. + */ +function waitFor( + predicate: () => boolean, + timeout = 6000, + interval = 50 +): Promise { + return new Promise((resolve, reject) => { + const deadline = Date.now() + timeout; + const timer = setInterval(() => { + if (predicate()) { + clearInterval(timer); + resolve(); + } else if (Date.now() > deadline) { + clearInterval(timer); + reject(new Error("waitFor: timeout exceeded")); + } + }, interval); + }); +} + +/** + * Wait for chokidar to finish setting up FS listeners. + * Windows FSEvents/polling can need 500-1500ms before reliably firing. + */ +const WATCHER_SETTLE_MS = 1500; + +// --------------------------------------------------------------------------- +// Suite +// --------------------------------------------------------------------------- + +describe("FileWatcher", () => { + let tmpDir: string; + let watcher: FileWatcher; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "fw-test-")); + }); + + afterEach(async () => { + if (watcher) { + await watcher.stop(); + } + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + // ----------------------------------------------------------------------- + // AUTO-01: detect meaningful file changes + // ----------------------------------------------------------------------- + + it("detects a file write as a 'change' event (AUTO-01)", async () => { + // Create file before watcher starts so initial scan won't fire 'add' + const filePath = path.join(tmpDir, "test.ts"); + fs.writeFileSync(filePath, "// initial"); + + const events: FileChangeEvent[] = []; + watcher = new FileWatcher(tmpDir); + watcher.on("change", (evt) => events.push(evt)); + watcher.start(); + + // Allow chokidar to finish indexing the directory + await new Promise((r) => setTimeout(r, WATCHER_SETTLE_MS)); + + // Modify the file + fs.writeFileSync(filePath, "// updated"); + + // Wait for event + await waitFor(() => events.some((e) => e.event === "change" && e.path.endsWith("test.ts"))); + + const hit = events.find((e) => e.event === "change"); + expect(hit).toBeDefined(); + expect(hit!.path).toContain("test.ts"); + expect(hit!.timestamp).toBeInstanceOf(Date); + }, 15_000); + + it("detects a new file as an 'add' event (AUTO-01)", async () => { + const events: FileChangeEvent[] = []; + watcher = new FileWatcher(tmpDir); + watcher.on("change", (evt) => events.push(evt)); + watcher.start(); + + await new Promise((r) => setTimeout(r, WATCHER_SETTLE_MS)); + + fs.writeFileSync(path.join(tmpDir, "new.ts"), "export {}"); + + await waitFor(() => events.some((e) => e.event === "add" && e.path.endsWith("new.ts"))); + + const hit = events.find((e) => e.event === "add"); + expect(hit).toBeDefined(); + expect(hit!.event).toBe("add"); + expect(hit!.timestamp).toBeInstanceOf(Date); + }, 15_000); + + // ----------------------------------------------------------------------- + // AUTO-02: noise filter + // ----------------------------------------------------------------------- + + it("does NOT emit events for paths inside node_modules (AUTO-02)", async () => { + const nmDir = path.join(tmpDir, "node_modules", "some-pkg"); + fs.mkdirSync(nmDir, { recursive: true }); + + const events: FileChangeEvent[] = []; + watcher = new FileWatcher(tmpDir); + watcher.on("change", (evt) => events.push(evt)); + watcher.start(); + + await new Promise((r) => setTimeout(r, WATCHER_SETTLE_MS)); + + fs.writeFileSync(path.join(nmDir, "index.ts"), "// ignored"); + + // Wait and assert silence + await new Promise((r) => setTimeout(r, 800)); + const nmEvents = events.filter((e) => e.path.includes("node_modules")); + expect(nmEvents).toHaveLength(0); + }, 15_000); + + it("does NOT emit events for *.log files (AUTO-02)", async () => { + const events: FileChangeEvent[] = []; + watcher = new FileWatcher(tmpDir); + watcher.on("change", (evt) => events.push(evt)); + watcher.start(); + + await new Promise((r) => setTimeout(r, WATCHER_SETTLE_MS)); + + fs.writeFileSync(path.join(tmpDir, "debug.log"), "some log line"); + + await new Promise((r) => setTimeout(r, 800)); + expect(events.filter((e) => e.path.endsWith(".log"))).toHaveLength(0); + }, 15_000); + + it("does not crash when chokidar reports an error without an error listener", () => { + watcher = new FileWatcher(tmpDir); + watcher.start(); + + expect(() => (watcher as any).inner.emit("error", new Error("permission denied"))).not.toThrow(); + }); + + // ----------------------------------------------------------------------- + // stop() — no further events after stop + // ----------------------------------------------------------------------- + + it("stop() prevents further events from being emitted", async () => { + const filePath = path.join(tmpDir, "track.ts"); + fs.writeFileSync(filePath, "// v1"); + + const events: FileChangeEvent[] = []; + watcher = new FileWatcher(tmpDir); + watcher.on("change", (evt) => events.push(evt)); + watcher.start(); + + await new Promise((r) => setTimeout(r, WATCHER_SETTLE_MS)); + await watcher.stop(); + + const countBefore = events.length; + fs.writeFileSync(filePath, "// v2"); + await new Promise((r) => setTimeout(r, 800)); + + expect(events.length).toBe(countBefore); + }, 15_000); +}); diff --git a/universal-refiner/tests/git-poller.test.ts b/universal-refiner/tests/git-poller.test.ts new file mode 100644 index 0000000..4121362 --- /dev/null +++ b/universal-refiner/tests/git-poller.test.ts @@ -0,0 +1,144 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { GitPoller } from "../src/history/git-poller.js"; +import { CommitIngester } from "../src/history/commit-ingest.js"; + +// --------------------------------------------------------------------------- +// Stub CommitIngester so tests don't touch the real DB / git log +// --------------------------------------------------------------------------- + +vi.mock("../src/history/commit-ingest.js", () => ({ + CommitIngester: { + ingestLatest: vi.fn(), + }, +})); + +vi.mock("../src/core/logger.js", () => ({ + RuntimeLogger: { info: vi.fn(), debug: vi.fn(), error: vi.fn() }, +})); + +vi.mock("../src/core/dashboard.js", () => ({ + CommandCenterDashboard: { log: vi.fn() }, +})); + +const ingestLatest = vi.mocked(CommitIngester.ingestLatest); + +// --------------------------------------------------------------------------- +// Suite +// --------------------------------------------------------------------------- + +describe("GitPoller", () => { + beforeEach(() => { + vi.useFakeTimers(); + ingestLatest.mockResolvedValue(0); + }); + + afterEach(() => { + vi.useRealTimers(); + vi.clearAllMocks(); + }); + + // ------------------------------------------------------------------------- + // AUTO-03: poll() calls CommitIngester.ingestLatest + // ------------------------------------------------------------------------- + + it("poll() calls CommitIngester.ingestLatest with the repo path (AUTO-03)", async () => { + const poller = new GitPoller("/repo"); + await poller.poll(); + expect(ingestLatest).toHaveBeenCalledWith("/repo", 50); + }); + + // ------------------------------------------------------------------------- + // AUTO-04: emits "commits" when new commits found + // ------------------------------------------------------------------------- + + it("emits 'commits' event when ingestLatest returns > 0 (AUTO-04)", async () => { + ingestLatest.mockResolvedValue(3); + const poller = new GitPoller("/repo"); + const handler = vi.fn(); + poller.on("commits", handler); + + await poller.poll(); + + expect(handler).toHaveBeenCalledWith(3); + }); + + it("does NOT emit 'commits' when ingestLatest returns 0", async () => { + ingestLatest.mockResolvedValue(0); + const poller = new GitPoller("/repo"); + const handler = vi.fn(); + poller.on("commits", handler); + + await poller.poll(); + + expect(handler).not.toHaveBeenCalled(); + }); + + // ------------------------------------------------------------------------- + // poll() return value + // ------------------------------------------------------------------------- + + it("poll() returns the count from ingestLatest", async () => { + ingestLatest.mockResolvedValue(7); + const poller = new GitPoller("/repo"); + const count = await poller.poll(); + expect(count).toBe(7); + }); + + it("poll() returns 0 and does not throw when ingestLatest rejects", async () => { + ingestLatest.mockRejectedValue(new Error("git failure")); + const poller = new GitPoller("/repo"); + const count = await poller.poll(); + expect(count).toBe(0); + }); + + // ------------------------------------------------------------------------- + // AUTO-03: start() triggers polling on interval + // ------------------------------------------------------------------------- + + it("start() triggers poll at the configured interval", async () => { + const poller = new GitPoller("/repo", 1000); + poller.start(); + + await vi.advanceTimersByTimeAsync(2100); + poller.stop(); + + expect(ingestLatest.mock.calls.length).toBeGreaterThanOrEqual(2); + }); + + it("start() is idempotent; calling twice does not double the interval", async () => { + const poller = new GitPoller("/repo", 1000); + poller.start(); + poller.start(); + + await vi.advanceTimersByTimeAsync(1000); + poller.stop(); + + // One interval fired exactly once + expect(ingestLatest.mock.calls.length).toBe(1); + }); + + // ------------------------------------------------------------------------- + // stop() clears the interval + // ------------------------------------------------------------------------- + + it("stop() prevents further interval polls", async () => { + const poller = new GitPoller("/repo", 1000); + poller.start(); + + await vi.advanceTimersByTimeAsync(1000); + const callsBeforeStop = ingestLatest.mock.calls.length; + poller.stop(); + + await vi.advanceTimersByTimeAsync(5000); + expect(ingestLatest.mock.calls.length).toBe(callsBeforeStop); + }); + + it("stop() is idempotent before start and handles a missing timer", () => { + const poller = new GitPoller("/repo"); + expect(() => poller.stop()).not.toThrow(); + + (poller as any).running = true; + (poller as any).timer = null; + expect(() => poller.stop()).not.toThrow(); + }); +}); diff --git a/universal-refiner/tests/history.test.ts b/universal-refiner/tests/history.test.ts index b9dd66e..c2fc274 100644 --- a/universal-refiner/tests/history.test.ts +++ b/universal-refiner/tests/history.test.ts @@ -3,6 +3,7 @@ import { EventStore } from "../src/history/event-store.js"; import * as fs from "fs"; import * as path from "path"; import * as os from "os"; +import Database from "better-sqlite3"; describe("EventStore", () => { const testDir = path.join(os.tmpdir(), "refiner-test-" + Date.now()); @@ -72,4 +73,308 @@ describe("EventStore", () => { expect(eRow).toBeDefined(); expect(eRow.event_type).toBe("prompt_received"); }); + + it("records a global lesson without a repository id", () => { + const store = EventStore.getInstance(); + store.recordLesson({ + id: "global-lesson", + lesson_type: "quality", + title: "Global", + summary: "Global lesson", + confidence: "high", + source: "test", + }); + const db = (store as any).db; + expect(db.prepare("SELECT repo_id FROM lessons WHERE id = ?").get("global-lesson")).toEqual({ repo_id: null }); + }); + + it("uses the default global directory when no override is configured", () => { + const previous = process.env.PROMPT_REFINER_GLOBAL_DIR; + delete process.env.PROMPT_REFINER_GLOBAL_DIR; + expect((EventStore as any).resolveDatabasePath()).toContain(".refiner"); + process.env.PROMPT_REFINER_GLOBAL_DIR = previous; + }); + + it("should expose only approved lessons to future refinements", () => { + const store = EventStore.getInstance(); + store.recordLesson({ + id: "pending", + repo_id: "repo", + lesson_type: "quality", + title: "Pending", + summary: "Do not inject yet", + confidence: "high", + source: "test", + }); + store.recordLesson({ + id: "approved", + repo_id: "repo", + lesson_type: "quality", + title: "Approved", + summary: "Inject this", + confidence: "medium", + source: "test", + approved: 1, + }); + + expect(store.getRecentLessons("repo").map(lesson => lesson.id)).toEqual(["approved"]); + }); + + it("should persist learning candidate approval and rejection", () => { + const store = EventStore.getInstance(); + store.recordLesson({ + id: "lesson", + repo_id: "repo", + lesson_type: "quality", + title: "Candidate", + summary: "Candidate summary", + confidence: "high", + source: "test", + }); + store.recordTemplate({ + id: "template", + repo_id: "repo", + category: "feature", + title: "Candidate template", + template_text: "Build [THING]", + usage_notes: "test", + source_type: "test", + success_score: 80, + }); + + expect(store.getLearningCandidates("repo").lessons).toHaveLength(1); + expect(store.reviewLesson("other-repo", "lesson", true)).toBe(false); + expect(store.reviewLesson("repo", "lesson", true)).toBe(true); + expect(store.reviewTemplate("other-repo", "template", false)).toBe(false); + expect(store.reviewTemplate("repo", "template", false)).toBe(true); + expect(store.getLearningCandidates("repo")).toEqual({ lessons: [], templates: [] }); + expect(store.getRecentLessons("repo").map(lesson => lesson.id)).toContain("lesson"); + }); + + it("should idempotently record an already ingested commit", () => { + const store = EventStore.getInstance(); + const commit = { + id: "commit-1", + repo_id: "repo", + sha: "abc123", + author: "Acceptance", + message: "feat: acceptance", + committed_at: "2026-06-14T10:00:00Z", + }; + + store.recordCommit(commit); + expect(() => store.recordCommit(commit)).not.toThrow(); + + const db = (store as any).db; + expect(db.prepare("SELECT COUNT(*) AS count FROM commits WHERE id = ?").get(commit.id).count).toBe(1); + expect(db.prepare("SELECT COUNT(*) AS count FROM events WHERE commit_id = ?").get(commit.id).count).toBe(1); + }); + + it("should migrate legacy basename repository records to canonical identity", () => { + const store = EventStore.getInstance(); + store.recordPrompt({ id: "legacy-prompt", repo_id: "service", client: "test", raw_prompt: "legacy" }); + + const identity = store.ensureRepository("C:/repo/team/service"); + const db = (store as any).db; + expect(identity.id).not.toBe("service"); + expect(db.prepare("SELECT repo_id FROM prompts WHERE id = ?").get("legacy-prompt").repo_id).toBe(identity.id); + }); + + it("returns only approved active templates for a repository", () => { + const store = EventStore.getInstance(); + const identity = store.ensureRepository("C:/repo/team/service"); + for (const id of ["approved", "pending"]) { + store.recordTemplate({ + id, + repo_id: identity.id, + category: "bugfix", + title: id, + template_text: `${id} template`, + usage_notes: "", + source_type: "test", + success_score: id === "approved" ? 90 : 95, + }); + } + expect(store.reviewTemplate(identity.id, "approved", true)).toBe(true); + + expect(store.getTemplates(identity.id)).toMatchObject([ + { id: "approved", repoId: identity.id, approved: 1, deprecated: 0 }, + ]); + }); + + it("records complete optional metadata and all execution updates", () => { + const store = EventStore.getInstance(); + store.recordEvent({ + id: "complete-event", + event_type: "complete", + repo_id: "repo", + session_id: "session", + prompt_id: "prompt", + execution_id: "execution", + commit_id: "commit", + timestamp: "2026-06-15T00:00:00Z", + severity: "warning", + summary: "complete", + details_json: JSON.stringify({ complete: true }), + }); + store.recordPrompt({ + id: "complete-prompt", + repo_id: "repo", + session_id: "session", + timestamp: "2026-06-15T00:00:00Z", + client: "test", + agent_name: "agent", + raw_prompt: "complete", + normalized_prompt: "normalized", + intent: "test", + complexity: "high", + scope: "module", + risk: "low", + tags_json: "[\"complete\"]", + }); + store.recordExecution({ + id: "complete-execution", + prompt_id: "complete-prompt", + workflow_name: "test", + executor_name: "test", + status: "started", + started_at: "2026-06-15T00:00:00Z", + ended_at: "2026-06-15T00:01:00Z", + result_summary: "initial", + artifacts_json: "{\"initial\":true}", + }); + + store.updateExecution({ id: "complete-execution" }); + store.updateExecution({ + id: "complete-execution", + status: "completed", + ended_at: "2026-06-15T00:02:00Z", + result_summary: "done", + artifacts_json: "{\"done\":true}", + }); + + const db = (store as any).db; + expect(store.getExecutionByPromptId("missing")).toBeNull(); + expect(db.prepare("SELECT * FROM executions WHERE id = ?").get("complete-execution")).toMatchObject({ + status: "completed", + ended_at: "2026-06-15T00:02:00Z", + result_summary: "done", + artifacts_json: "{\"done\":true}", + }); + }); + + it("records clusters and returns an existing canonical repository", () => { + const store = EventStore.getInstance(); + store.recordCluster({ + id: "cluster", + repo_id: "repo", + intent: "test", + category: "quality", + cluster_title: "Quality", + cluster_summary: "Quality cluster", + representative_prompt: "Test it", + prompt_count: 2, + success_rate: 100, + }); + + const first = store.ensureRepository("C:/repo/existing"); + const second = store.ensureRepository("C:/repo/existing"); + expect(second).toEqual(first); + expect((store as any).db.prepare("SELECT * FROM prompt_clusters WHERE id = ?").get("cluster")).toMatchObject({ + prompt_count: 2, + success_rate: 100, + }); + }); + + it("backs up and restores a verified database", async () => { + const store = EventStore.getInstance(); + store.recordEvent({ id: "before-backup", event_type: "test", summary: "persist me" }); + const backupPath = path.join(testDir, "backups", "events.db"); + + await expect(store.backup(backupPath)).resolves.toBe(backupPath); + store.recordEvent({ id: "after-backup", event_type: "test", summary: "remove me" }); + const restored = EventStore.restore(backupPath); + const db = (restored as any).db; + + expect(db.prepare("SELECT id FROM events WHERE id = ?").get("before-backup")).toBeDefined(); + expect(db.prepare("SELECT id FROM events WHERE id = ?").get("after-backup")).toBeUndefined(); + }); + + it("rejects missing and integrity-failing backups", async () => { + const store = EventStore.getInstance(); + await expect(() => EventStore.restore(path.join(testDir, "missing.db"))).toThrow("Backup does not exist"); + + const originalPragma = Database.prototype.pragma; + const pragmaSpy = vi.spyOn(Database.prototype, "pragma").mockImplementation(function (this: Database.Database, source: string, options?: any) { + if (source === "integrity_check") { + return "corrupt" as any; + } + return originalPragma.call(this, source, options); + }); + + const backupPath = path.join(testDir, "bad-backup.db"); + await expect(store.backup(backupPath)).rejects.toThrow("Backup integrity check failed"); + pragmaSpy.mockRestore(); + + await store.backup(backupPath); + const restorePragmaSpy = vi.spyOn(Database.prototype, "pragma").mockImplementation(function (this: Database.Database, source: string, options?: any) { + if (source === "integrity_check") { + return "corrupt" as any; + } + return originalPragma.call(this, source, options); + }); + expect(() => EventStore.restore(backupPath)).toThrow("Backup integrity check failed"); + restorePragmaSpy.mockRestore(); + }); + + it("continues when WAL mode is unavailable", () => { + const originalPragma = Database.prototype.pragma; + const pragmaSpy = vi.spyOn(Database.prototype, "pragma").mockImplementation(function (this: Database.Database, source: string, options?: any) { + if (source === "journal_mode = WAL") { + throw new Error("WAL unavailable"); + } + return originalPragma.call(this, source, options); + }); + expect(EventStore.getInstance()).toBeDefined(); + EventStore.getInstance().close(); + pragmaSpy.mockRestore(); + }); + + it("logs and rethrows schema initialization errors", () => { + const execSpy = vi.spyOn(Database.prototype, "exec").mockImplementationOnce(function (this: Database.Database) { + throw new Error("schema failure"); + }); + + expect(() => EventStore.getInstance()).toThrow("schema failure"); + execSpy.mockRestore(); + expect(() => fs.rmSync(testDir, { recursive: true, force: true })).not.toThrow(); + }); + + it("supports rejection decisions and closing a non-singleton store", () => { + const store = EventStore.getInstance(); + store.recordLesson({ + id: "reject-lesson", + repo_id: "repo", + lesson_type: "quality", + title: "Reject", + summary: "Reject", + confidence: "low", + source: "test", + }); + store.recordTemplate({ + id: "approve-template", + repo_id: "repo", + category: "test", + title: "Approve", + template_text: "Approve", + usage_notes: "", + source_type: "test", + success_score: 1, + }); + + expect(store.reviewLesson("repo", "reject-lesson", false)).toBe(true); + expect(store.reviewTemplate("repo", "approve-template", true)).toBe(true); + (EventStore as any).instance = null; + expect(() => store.close()).not.toThrow(); + }); }); diff --git a/universal-refiner/tests/hook-runtime.test.ts b/universal-refiner/tests/hook-runtime.test.ts new file mode 100644 index 0000000..6950ab4 --- /dev/null +++ b/universal-refiner/tests/hook-runtime.test.ts @@ -0,0 +1,170 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import * as fs from "fs"; +import * as path from "path"; +import { + allowOutput, + buildLintContext, + clearState, + detectClient, + extractOutputLength, + extractPrompt, + extractPromptId, + loadState, + parseHookInput, + runPostExecution, + runPrePrompt, + saveState, + statePath, +} from "../hooks/lib/hook-runtime.js"; + +const input = { + session_id: "hook-test-session", + cwd: "C:\\repo\\example", + hook_event_name: "BeforeAgent", + prompt: "Implement the feature", +}; + +afterEach(() => clearState(input)); + +describe("cross-CLI hook runtime", () => { + it("normalizes prompt fields and extracts tracking IDs", () => { + expect(parseHookInput('\uFEFF{"prompt":"hello"}')).toEqual({ prompt: "hello" }); + expect(parseHookInput(" \n ")).toEqual({}); + expect(() => parseHookInput("{")).toThrow(); + expect(extractPrompt({ user_prompt: "hello" })).toBe("hello"); + expect(extractPrompt({ input: "fallback" })).toBe("fallback"); + expect(extractPrompt({ prompt: 1 })).toBeUndefined(); + expect(extractPromptId("[PROMPT_ID: ref_123]\nTask")).toBe("ref_123"); + expect(extractPromptId('{"promptId":"prm_123","gaps":[]}')).toBe("prm_123"); + expect(extractPromptId('{"promptId":123}')).toBeUndefined(); + expect(extractPromptId("not json")).toBeUndefined(); + }); + + it("detects explicit and event-derived clients", () => { + expect(detectClient({ client: "CoDeX" })).toBe("codex"); + expect(detectClient({ hook_event_name: "UserPromptSubmit" })).toBe("claude"); + expect(detectClient({ hook_event_name: "Stop" })).toBe("claude"); + expect(detectClient({ hook_event_name: "BeforeAgent" })).toBe("gemini"); + expect(detectClient({ hook_event_name: "AfterAgent" })).toBe("gemini"); + expect(detectClient({})).toBe("generic"); + }); + + it("extracts output lengths from every supported field", () => { + expect(extractOutputLength({ prompt_response: "one" })).toBe(3); + expect(extractOutputLength({ last_assistant_message: "four" })).toBe(4); + expect(extractOutputLength({ response: "12345" })).toBe(5); + expect(extractOutputLength({ output: "123456" })).toBe(6); + expect(extractOutputLength({ output: 7 })).toBe(0); + }); + + it("formats advisory lint context without exposing the original prompt", () => { + const context = buildLintContext(JSON.stringify({ + gaps: [{ message: "Testing is unspecified.", suggestedAction: "Add acceptance criteria." }], + }), "ref_123"); + + expect(context).toContain("Testing is unspecified."); + expect(context).toContain("ref_123"); + expect(context).not.toContain(input.prompt); + expect(buildLintContext('{"gaps":"invalid"}')).toBe("PromptImprover found no actionable prompt gaps."); + expect(buildLintContext('{"gaps":[]}', "tracked")).toContain("tracked"); + expect(buildLintContext('{"gaps":[{}]}')).toContain("Prompt quality gap detected."); + expect(buildLintContext("invalid", "tracked")).toContain("tracked"); + expect(buildLintContext("invalid")).toBe("PromptImprover linting completed. Continue normally."); + expect(buildLintContext(JSON.stringify({ gaps: Array.from({ length: 6 }, (_, index) => ({ message: `${index}` })) }))) + .not.toContain("- 5"); + }); + + it("creates client-compatible fail-open output", () => { + expect(allowOutput(input, "advice")).toEqual({ + decision: "allow", + hookSpecificOutput: { + hookEventName: "BeforeAgent", + additionalContext: "advice", + }, + }); + expect(allowOutput(input)).toEqual({ decision: "allow" }); + expect(allowOutput({}, "advice")).toEqual({ + decision: "allow", + hookSpecificOutput: { additionalContext: "advice" }, + }); + }); + + it("uses stable state paths and removes invalid or stale state", () => { + expect(statePath(input)).toBe(statePath(input)); + expect(statePath({ sessionId: "camel", cwd: "C:/repo" })).not.toBe(statePath(input)); + expect(statePath({})).toMatch(/promptimprover-hooks[\\/].+\.json$/); + + saveState(input, { promptId: "", client: "gemini", createdAt: new Date().toISOString() }); + expect(loadState(input)).toBeUndefined(); + expect(fs.existsSync(statePath(input))).toBe(false); + + saveState(input, { promptId: "old", client: "gemini", createdAt: "2000-01-01T00:00:00.000Z" }); + expect(loadState(input)).toBeUndefined(); + + fs.mkdirSync(path.dirname(statePath(input)), { recursive: true }); + fs.writeFileSync(statePath(input), "{", "utf8"); + expect(loadState(input)).toBeUndefined(); + }); + + it("lints, creates a trackable prompt, and persists only correlation metadata", async () => { + const call = vi.fn().mockResolvedValueOnce('{"promptId":"prm_456","gaps":[]}'); + + await expect(runPrePrompt(input, call)).resolves.toMatchObject({ decision: "allow" }); + expect(call).toHaveBeenCalledWith("lint_prompt", { prompt: input.prompt, semantic: false }); + expect(loadState(input)).toMatchObject({ promptId: "prm_456", client: "gemini" }); + expect(fs.readFileSync).toBeDefined(); + }); + + it("fails open when linting times out", async () => { + const call = vi.fn().mockRejectedValueOnce(new Error("timeout")); + + await expect(runPrePrompt(input, call)).resolves.toEqual({ decision: "allow" }); + expect(loadState(input)).toBeUndefined(); + }); + + it("fails open for empty prompts and does not persist untracked lint results", async () => { + const call = vi.fn().mockResolvedValue('{"gaps":[]}'); + await expect(runPrePrompt({ ...input, prompt: " " }, call)).resolves.toEqual({ decision: "allow" }); + await expect(runPrePrompt({ ...input, prompt: "usable" }, call)).resolves.toMatchObject({ decision: "allow" }); + expect(call).toHaveBeenCalledTimes(1); + expect(loadState(input)).toBeUndefined(); + }); + + it("records a privacy-safe completion and clears correlation state", async () => { + const preCall = vi.fn().mockResolvedValueOnce('{"promptId":"prm_789","gaps":[]}'); + await runPrePrompt(input, preCall); + + const postCall = vi.fn().mockResolvedValue("ok"); + await runPostExecution({ + ...input, + hook_event_name: "AfterAgent", + prompt_response: "private response body", + }, postCall); + + expect(postCall).toHaveBeenCalledWith("record_agent_output", expect.objectContaining({ + prompt_id: "prm_789", + output_summary: "gemini completed the tracked turn; output_length=21.", + })); + expect(JSON.stringify(postCall.mock.calls)).not.toContain("private response body"); + expect(loadState(input)).toBeUndefined(); + }); + + it("records explicit failed completions without prior state and permits untracked completions", async () => { + const call = vi.fn().mockResolvedValue("ok"); + await expect(runPostExecution({}, call)).resolves.toEqual({ decision: "allow" }); + await runPostExecution({ + client: "CODEX", + prompt_id: "explicit", + status: "failed", + output: "result", + }, call); + + expect(call).toHaveBeenCalledOnce(); + expect(call).toHaveBeenCalledWith("record_agent_output", expect.objectContaining({ + prompt_id: "explicit", + status: "failed", + output_summary: "codex completed the tracked turn; output_length=6.", + artifacts_json: JSON.stringify({ client: "codex", hook_event: "manual", output_length: 6 }), + })); + }); +}); diff --git a/universal-refiner/tests/index.test.ts b/universal-refiner/tests/index.test.ts new file mode 100644 index 0000000..1663920 --- /dev/null +++ b/universal-refiner/tests/index.test.ts @@ -0,0 +1,72 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const mocks = vi.hoisted(() => ({ + dashboardStart: vi.fn(), + dashboardLog: vi.fn(), + serverRun: vi.fn(), + watcherStart: vi.fn(), + watcherOn: vi.fn(), + loggerInfo: vi.fn(), + serverConstructor: vi.fn(), + watcherConstructor: vi.fn(), +})); + +vi.mock("../src/core/dashboard.js", () => ({ + CommandCenterDashboard: { start: mocks.dashboardStart, log: mocks.dashboardLog }, +})); +vi.mock("../src/core/server.js", () => ({ + PromptRefinerServer: class { + constructor(rootPath: string) { + mocks.serverConstructor(rootPath); + } + run = mocks.serverRun; + }, +})); +vi.mock("../src/watcher/index.js", () => ({ + FileWatcher: class { + constructor(rootPath: string) { + mocks.watcherConstructor(rootPath); + } + on = mocks.watcherOn; + start = mocks.watcherStart; + }, +})); +vi.mock("../src/core/logger.js", () => ({ RuntimeLogger: { info: mocks.loggerInfo } })); + +describe("runtime bootstrap", () => { + beforeEach(() => { + vi.resetModules(); + vi.clearAllMocks(); + mocks.serverRun.mockResolvedValue(undefined); + delete process.env.PORT; + }); + + it("starts dashboard, watcher, and MCP server and forwards file events", async () => { + await import("../src/index.js"); + + expect(mocks.dashboardStart).toHaveBeenCalledOnce(); + expect(mocks.dashboardStart).toHaveBeenCalledWith(3000, process.cwd()); + expect(mocks.serverConstructor).toHaveBeenCalledWith(process.cwd()); + expect(mocks.watcherConstructor).toHaveBeenCalledWith(process.cwd()); + expect(mocks.watcherStart).toHaveBeenCalledOnce(); + expect(mocks.serverRun).toHaveBeenCalledOnce(); + const changeHandler = mocks.watcherOn.mock.calls.find(call => call[0] === "change")?.[1]; + changeHandler({ event: "change", path: `${process.cwd()}\\src\\a.ts` }); + expect(mocks.loggerInfo).toHaveBeenCalledWith(expect.stringContaining("[FS] change")); + expect(mocks.dashboardLog).toHaveBeenCalledWith(expect.stringContaining("[FS] change")); + }); + + it("uses the configured dashboard port and exits on fatal server failure", async () => { + process.env.PORT = "4321"; + const error = new Error("startup failed"); + mocks.serverRun.mockRejectedValue(error); + const consoleError = vi.spyOn(console, "error").mockImplementation(() => undefined); + const exit = vi.spyOn(process, "exit").mockImplementation((() => undefined) as never); + + await import("../src/index.js"); + await vi.waitFor(() => expect(exit).toHaveBeenCalledWith(1)); + + expect(mocks.dashboardStart).toHaveBeenCalledWith(4321, process.cwd()); + expect(consoleError).toHaveBeenCalledWith("[FATAL ERROR]", error); + }); +}); diff --git a/universal-refiner/tests/job-queue.test.ts b/universal-refiner/tests/job-queue.test.ts new file mode 100644 index 0000000..2af615c --- /dev/null +++ b/universal-refiner/tests/job-queue.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, it, vi } from "vitest"; +import { SerializedJobQueue } from "../src/core/job-queue.js"; +import { RuntimeLogger } from "../src/core/logger.js"; + +describe("SerializedJobQueue", () => { + it("coalesces duplicate pending jobs and serializes distinct jobs", async () => { + const queue = new SerializedJobQueue(); + const order: string[] = []; + expect(queue.enqueue("same", async () => { + order.push("first-start"); + await new Promise(resolve => setTimeout(resolve, 20)); + order.push("first-end"); + })).toBe(true); + expect(queue.enqueue("same", async () => order.push("duplicate"))).toBe(false); + expect(queue.enqueue("second", async () => order.push("second"))).toBe(true); + await queue.idle(); + expect(order).toEqual(["first-start", "first-end", "second"]); + }); + + it("retries transient failures", async () => { + const queue = new SerializedJobQueue(); + const job = vi.fn() + .mockRejectedValueOnce(new Error("transient")) + .mockResolvedValue(undefined); + queue.enqueue("retry", job, { retries: 1, retryDelayMs: 1 }); + await queue.idle(); + expect(job).toHaveBeenCalledTimes(2); + }); + + it("logs permanent failures and accepts the key again after completion", async () => { + const queue = new SerializedJobQueue(); + const error = vi.spyOn(RuntimeLogger, "error").mockImplementation(() => undefined); + const job = vi.fn().mockRejectedValue("permanent"); + + expect(queue.enqueue("failure", job, { retries: 0 })).toBe(true); + await queue.idle(); + expect(error).toHaveBeenCalledWith("Queued job failed permanently: failure", "permanent"); + expect(queue.enqueue("failure", async () => undefined)).toBe(true); + await queue.idle(); + }); + + it("uses default retry options and renders non-Error failures", async () => { + vi.useFakeTimers(); + const warn = vi.spyOn(RuntimeLogger, "warn").mockImplementation(() => undefined); + const queue = new SerializedJobQueue(); + const job = vi.fn() + .mockRejectedValueOnce("transient") + .mockResolvedValue(undefined); + + queue.enqueue("defaults", job); + await vi.runAllTimersAsync(); + await queue.idle(); + vi.useRealTimers(); + + expect(warn).toHaveBeenCalledWith("Queued job retry: defaults", { + attempt: 1, + error: "transient", + }); + }); +}); diff --git a/universal-refiner/tests/lessons.test.ts b/universal-refiner/tests/lessons.test.ts index 00aa97c..f0ab75b 100644 --- a/universal-refiner/tests/lessons.test.ts +++ b/universal-refiner/tests/lessons.test.ts @@ -49,4 +49,52 @@ describe("LessonExtractor", () => { expect(lesson).toBeDefined(); expect(lesson.title).toBe("Authentication Best Practice"); }); + + it("should not learn from failed executions", async () => { + const store = EventStore.getInstance(); + const db = (store as any).db; + store.recordPrompt({ id: "p-failed", repo_id: "test", client: "cli", raw_prompt: "Broken task" }); + db.prepare("INSERT INTO commits (id, repo_id, sha, message, committed_at) VALUES (?, ?, ?, ?, ?)") + .run("c-failed", "test", "sha-failed", "fix: failed attempt", "2026-04-12T10:00:00Z"); + db.prepare("INSERT INTO executions (id, prompt_id, workflow_name, executor_name, status, started_at) VALUES (?, ?, ?, ?, ?, ?)") + .run("e-failed", "p-failed", "test", "test", "failed", "2026-04-12T09:00:00Z"); + db.prepare("INSERT INTO execution_commits (execution_id, commit_id) VALUES (?, ?)") + .run("e-failed", "c-failed"); + + const mockRequestModel = vi.fn(); + await new LessonExtractor(mockRequestModel).extractNewLessons(); + + expect(mockRequestModel).not.toHaveBeenCalled(); + expect(db.prepare("SELECT * FROM lessons WHERE prompt_id = ?").get("p-failed")).toBeUndefined(); + }); + + it("does not record a lesson when the model is unavailable or returns malformed output", async () => { + const store = EventStore.getInstance(); + const db = (store as any).db; + store.recordPrompt({ id: "p-model", repo_id: "test", client: "cli", raw_prompt: "Create model test" }); + store.recordCommit({ + id: "c-model", + repo_id: "test", + sha: "sha-model", + author: "test", + message: "test: model", + committed_at: "2026-04-12T10:00:00Z", + }); + store.recordExecution({ + id: "e-model", + prompt_id: "p-model", + workflow_name: "test", + executor_name: "test", + status: "completed", + }); + store.linkCommitToExecution("e-model", "c-model"); + + const request = vi.fn().mockResolvedValueOnce(null).mockResolvedValueOnce("not json"); + const extractor = new LessonExtractor(request); + await extractor.extractNewLessons(); + await extractor.extractNewLessons(); + + expect(request).toHaveBeenCalledTimes(2); + expect(db.prepare("SELECT * FROM lessons WHERE prompt_id = ?").get("p-model")).toBeUndefined(); + }); }); diff --git a/universal-refiner/tests/logger.test.ts b/universal-refiner/tests/logger.test.ts new file mode 100644 index 0000000..7dc96ed --- /dev/null +++ b/universal-refiner/tests/logger.test.ts @@ -0,0 +1,89 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import * as fs from "fs"; +import * as os from "os"; +import * as path from "path"; +import { RuntimeLogger } from "../src/core/logger.js"; + +describe("RuntimeLogger", () => { + let directory: string; + let consoleError: ReturnType; + + beforeEach(() => { + directory = fs.mkdtempSync(path.join(os.tmpdir(), "promptimprover-logger-")); + process.env.PROMPT_REFINER_GLOBAL_DIR = directory; + delete process.env.PROMPT_REFINER_LOG_LEVEL; + consoleError = vi.spyOn(console, "error").mockImplementation(() => undefined); + }); + + afterEach(() => { + delete process.env.PROMPT_REFINER_GLOBAL_DIR; + delete process.env.PROMPT_REFINER_LOG_LEVEL; + fs.rmSync(directory, { recursive: true, force: true }); + vi.restoreAllMocks(); + }); + + it("writes enabled levels and suppresses levels below the configured threshold", () => { + process.env.PROMPT_REFINER_LOG_LEVEL = "warn"; + RuntimeLogger.debug("hidden debug"); + RuntimeLogger.info("hidden info"); + RuntimeLogger.warn("visible warn"); + RuntimeLogger.error("visible error"); + + const log = fs.readFileSync(path.join(directory, "runtime.log"), "utf8"); + expect(log).not.toContain("hidden"); + expect(log).toContain("[WARN] visible warn"); + expect(log).toContain("[ERROR] visible error"); + expect(consoleError).toHaveBeenCalledTimes(2); + }); + + it("suppresses warn below the error threshold while always recording errors", () => { + process.env.PROMPT_REFINER_LOG_LEVEL = "error"; + RuntimeLogger.warn("hidden warn"); + expect(fs.existsSync(path.join(directory, "runtime.log"))).toBe(false); + + process.env.PROMPT_REFINER_LOG_LEVEL = "error"; + RuntimeLogger.error("visible error"); + expect(fs.readFileSync(path.join(directory, "runtime.log"), "utf8")).toContain("visible error"); + }); + + it("defaults invalid levels to info and serializes every metadata shape", () => { + process.env.PROMPT_REFINER_LOG_LEVEL = "INVALID"; + RuntimeLogger.debug("hidden"); + RuntimeLogger.info("none"); + RuntimeLogger.info("string", "detail"); + RuntimeLogger.info("object", { ready: true }); + const errorWithoutStack = new Error("failure"); + errorWithoutStack.stack = ""; + RuntimeLogger.info("error", errorWithoutStack); + + const circular: { self?: unknown } = {}; + circular.self = circular; + RuntimeLogger.info("circular", circular); + + const log = fs.readFileSync(path.join(directory, "runtime.log"), "utf8"); + expect(log).toContain("[INFO] none"); + expect(log).toContain("string | detail"); + expect(log).toContain('object | {"ready":true}'); + expect(log).toContain("error | Error: failure"); + expect(log).toContain("circular | [object Object]"); + }); + + it("uses the home default and continues when file output fails", () => { + const originalProfile = process.env.USERPROFILE; + const originalHome = process.env.HOME; + process.env.USERPROFILE = directory; + process.env.HOME = directory; + delete process.env.PROMPT_REFINER_GLOBAL_DIR; + process.env.PROMPT_REFINER_LOG_LEVEL = "DEBUG"; + const blockedPath = path.join(directory, ".refiner"); + fs.writeFileSync(blockedPath, "not a directory"); + + RuntimeLogger.debug("still rendered", new Error("with stack")); + + expect(consoleError).toHaveBeenCalledWith(expect.stringContaining("[DEBUG] still rendered")); + if (originalProfile === undefined) delete process.env.USERPROFILE; + else process.env.USERPROFILE = originalProfile; + if (originalHome === undefined) delete process.env.HOME; + else process.env.HOME = originalHome; + }); +}); diff --git a/universal-refiner/tests/mcp-client.test.ts b/universal-refiner/tests/mcp-client.test.ts new file mode 100644 index 0000000..6cb59a4 --- /dev/null +++ b/universal-refiner/tests/mcp-client.test.ts @@ -0,0 +1,80 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +const mocks = vi.hoisted(() => ({ + connect: vi.fn(), + request: vi.fn(), + close: vi.fn(), + transport: vi.fn(), + existsSync: vi.fn(), +})); + +vi.mock("@modelcontextprotocol/sdk/client/index.js", () => ({ + Client: class { + connect = mocks.connect; + request = mocks.request; + close = mocks.close; + }, +})); +vi.mock("@modelcontextprotocol/sdk/client/stdio.js", () => ({ + StdioClientTransport: class { + constructor(options: unknown) { + mocks.transport(options); + } + }, +})); +vi.mock("fs", () => ({ existsSync: mocks.existsSync })); + +import { callMcpTool, resolveServerPath } from "../hooks/lib/mcp-client.js"; + +describe("hook MCP client", () => { + beforeEach(() => { + mocks.close.mockResolvedValue(undefined); + mocks.connect.mockResolvedValue(undefined); + mocks.existsSync.mockReturnValue(false); + }); + + afterEach(() => { + vi.restoreAllMocks(); + vi.clearAllMocks(); + delete process.env.PROMPTIMPROVER_SERVER_PATH; + delete process.env.PROMPTIMPROVER_HOOK_TIMEOUT_MS; + }); + + it("calls a tool, returns text, honors timeout, and closes the client", async () => { + process.env.PROMPTIMPROVER_SERVER_PATH = "./custom-server.js"; + process.env.PROMPTIMPROVER_HOOK_TIMEOUT_MS = "25"; + mocks.request.mockResolvedValue({ content: [{ type: "text", text: "result" }] }); + + await expect(callMcpTool("lint_prompt", { prompt: "test" })).resolves.toBe("result"); + expect(mocks.transport).toHaveBeenCalledWith(expect.objectContaining({ args: [resolveServerPath()] })); + expect(mocks.request.mock.calls[0][2]).toEqual({ timeout: 25 }); + expect(mocks.close).toHaveBeenCalledOnce(); + }); + + it("throws for missing text and still closes after request failures", async () => { + mocks.request.mockResolvedValueOnce({ content: [] }).mockRejectedValueOnce(new Error("closed")); + + await expect(callMcpTool("lint_prompt", {})).rejects.toThrow(/no text/); + await expect(callMcpTool("lint_prompt", {})).rejects.toThrow("closed"); + expect(mocks.close).toHaveBeenCalledTimes(2); + }); + + it("resolves built server candidates and uses the default timeout", async () => { + mocks.request.mockResolvedValue({ content: [{ type: "text", text: "ok" }] }); + + expect(resolveServerPath()).toMatch(/src[\\/]index\.js$/); + await callMcpTool("lint_prompt", {}); + expect(mocks.request.mock.calls[0][2]).toEqual({ timeout: 15_000 }); + }); + + it("selects an existing built candidate, rejects invalid timeouts, and tolerates close failures", async () => { + mocks.existsSync.mockImplementation((candidate: string) => candidate.includes("dist")); + mocks.request.mockResolvedValue({ content: [{ type: "text", text: "ok" }] }); + mocks.close.mockRejectedValue(new Error("close failed")); + process.env.PROMPTIMPROVER_HOOK_TIMEOUT_MS = "-1"; + + expect(resolveServerPath()).toMatch(/dist[\\/]src[\\/]index\.js$/); + await expect(callMcpTool("lint_prompt", {})).resolves.toBe("ok"); + expect(mocks.request.mock.calls[0][2]).toEqual({ timeout: 15_000 }); + }); +}); diff --git a/universal-refiner/tests/owned-modules-coverage.test.ts b/universal-refiner/tests/owned-modules-coverage.test.ts new file mode 100644 index 0000000..94bc202 --- /dev/null +++ b/universal-refiner/tests/owned-modules-coverage.test.ts @@ -0,0 +1,395 @@ +import { EventEmitter } from "node:events"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { ArchitecturalScout, NodeDetector, PythonDetector, type ProjectContext } from "../src/detectors/project-scout.js"; +import { comparePrompts, createABEvaluationRecord, evaluatePrompt } from "../src/evaluation/prompt-evaluator.js"; +import { PromptLinter } from "../src/linters/prompt-linter.js"; +import { LocalBrain } from "../src/memory/local-brain.js"; +import { NeuralSnippets } from "../src/memory/neural-snippets.js"; +import { PromptOptimizer } from "../src/refiners/prompt-optimizer.js"; +import { PromptRefiner } from "../src/refiners/prompt-refiner.js"; +import { ApprovedTemplateSelector, type PromptTemplateCandidate } from "../src/refiners/template-selector.js"; +import { FileWatcher, MEANINGFUL_EXTENSIONS, NOISE_PATH_SEGMENTS, NOISE_SUFFIXES } from "../src/watcher/file-watcher.js"; + +const directories: string[] = []; + +function tempDir(prefix: string): string { + const directory = fs.mkdtempSync(path.join(os.tmpdir(), prefix)); + directories.push(directory); + return directory; +} + +function writeJson(directory: string, value: unknown): void { + fs.writeFileSync(path.join(directory, "package.json"), JSON.stringify(value)); +} + +afterEach(() => { + vi.restoreAllMocks(); + NeuralSnippets.reset(); + for (const directory of directories.splice(0)) { + fs.rmSync(directory, { recursive: true, force: true }); + } +}); + +describe("LocalBrain failure and update behavior", () => { + it("creates storage, handles missing pattern arrays, and updates existing patterns", () => { + const directory = tempDir("brain-"); + expect(LocalBrain.getPatterns(directory)).toEqual([]); + + const storage = path.join(directory, ".refiner", "memory.json"); + fs.writeFileSync(storage, "{}"); + expect(LocalBrain.getPatterns(directory, true)).toEqual([]); + expect(() => LocalBrain.savePattern({ id: "migrated", category: "quality", description: "migrated" }, directory)).not.toThrow(); + expect(LocalBrain.getPatterns(directory, true)).toMatchObject([{ id: "migrated" }]); + fs.writeFileSync(storage, "{}"); + expect(() => LocalBrain.approvePattern("missing", directory)).not.toThrow(); + + fs.writeFileSync(storage, JSON.stringify({ patterns: [] })); + LocalBrain.savePattern({ id: "one", category: "quality", description: "first", isProposed: true }, directory); + LocalBrain.savePattern({ id: "one", category: "quality", description: "updated" }, directory); + expect(LocalBrain.getPatterns(directory, true)).toMatchObject([{ id: "one", description: "updated" }]); + + LocalBrain.approvePattern("missing", directory); + expect(LocalBrain.getPatterns(directory, true)).toHaveLength(1); + }); + + it("returns an empty list for corrupt storage and surfaces corrupt writes", () => { + const directory = tempDir("brain-corrupt-"); + LocalBrain.getPatterns(directory); + const storage = path.join(directory, ".refiner", "memory.json"); + fs.writeFileSync(storage, "{"); + + expect(LocalBrain.getPatterns(directory)).toEqual([]); + expect(() => LocalBrain.savePattern({ id: "bad", category: "test", description: "bad" }, directory)).toThrow(); + expect(() => LocalBrain.approvePattern("bad", directory)).toThrow(); + }); +}); + +describe("NeuralSnippets traversal, parsing, and ranking behavior", () => { + it("walks only meaningful source files and handles missing roots", async () => { + const directory = tempDir("snippets-walk-"); + for (const ignored of ["node_modules", "dist", "build", "out", "coverage", "tests", "test", ".hidden"]) { + fs.mkdirSync(path.join(directory, ignored)); + fs.writeFileSync(path.join(directory, ignored, "ignored.ts"), "export function ignored() {}"); + } + fs.writeFileSync(path.join(directory, "kept.ts"), "export function kept() {}"); + fs.mkdirSync(path.join(directory, "nested")); + fs.writeFileSync(path.join(directory, "nested", "nested.js"), "export function nested() {}"); + fs.writeFileSync(path.join(directory, "ignored.test.ts"), "export function ignoredTest() {}"); + fs.writeFileSync(path.join(directory, "ignored.spec.js"), "export function ignoredSpec() {}"); + fs.writeFileSync(path.join(directory, "ignored.md"), "not source"); + + await NeuralSnippets.initialize(path.join(directory, "missing")); + expect(NeuralSnippets.isInitialized).toBe(true); + + NeuralSnippets.reset(); + await NeuralSnippets.initialize(directory); + expect(await NeuralSnippets.search("kept", directory)).toMatchObject([{ symbolName: "kept" }]); + expect(await NeuralSnippets.search("ignored", directory)).toEqual([]); + await NeuralSnippets.initialize(directory); + }); + + it("extracts JavaScript-style blocks at closing, safety, and maximum-length boundaries", () => { + const extract = (NeuralSnippets as any).extractSymbolBlock.bind(NeuralSnippets); + expect(extract(["function short() {", " return true;", "}"], 0, "ts")).toContain("}"); + expect(extract(["declaration", "one", "two", "three", "four", "five", "six"], 0, "ts").split("\n")).toHaveLength(6); + const long = ["function long() {", ...Array.from({ length: 55 }, () => " work();")]; + expect(extract(long, 0, "ts").split("\n")).toHaveLength(51); + }); + + it("extracts Python indentation boundaries and all named TypeScript symbol types", async () => { + const directory = tempDir("snippets-symbols-"); + fs.writeFileSync(path.join(directory, "symbols.py"), [ + "class Service:", + " def run(self):", + " return True", + "", + "def helper():", + " return False", + "after = 1", + ].join("\n")); + fs.writeFileSync(path.join(directory, "symbols.ts"), [ + "export interface Contract { value: string }", + "export type Alias = string;", + "export default class {}", + "export default function () {}", + "export function searchableOne() {}", + "export function searchableTwo() {}", + `export const longText = "${"searchable ".repeat(30)}";`, + ].join("\n")); + + await NeuralSnippets.initialize(directory); + expect((await NeuralSnippets.search("Contract", directory))[0]?.symbolType).toBe("interface"); + expect((await NeuralSnippets.search("Alias", directory))[0]?.symbolType).toBe("type"); + expect((await NeuralSnippets.search("helper", directory))[0]?.content).not.toContain("after = 1"); + expect(["chunk", "function"]).toContain((await NeuralSnippets.search("searchable", directory, 1))[0]?.symbolType); + expect(await NeuralSnippets.search("missing", directory)).toEqual([]); + expect(await NeuralSnippets.search("searchable", directory, 1)).toHaveLength(1); + }); + + it("defensively indexes unnamed symbols and ranks unique symbols before chunks", async () => { + const directory = tempDir("snippets-ranking-"); + fs.writeFileSync(path.join(directory, "source.ts"), "export const value = 1;"); + const originalParse = (NeuralSnippets as any).parseSymbols; + (NeuralSnippets as any).parseSymbols = () => [{ content: "unnamed symbol", symbolType: "function" }]; + await NeuralSnippets.initialize(directory); + (NeuralSnippets as any).parseSymbols = originalParse; + + (NeuralSnippets as any).store = new Map([ + [1, { id: 1, filePath: "a.ts", content: "symbol", symbolName: "symbol", symbolType: "function" }], + [2, { id: 2, filePath: "a.ts", content: "chunk", symbolType: "chunk" }], + ]); + (NeuralSnippets as any).symbolIndex = { search: vi.fn().mockResolvedValueOnce([1, 1, 999]).mockResolvedValue(null) }; + (NeuralSnippets as any).contentIndex = { search: vi.fn().mockResolvedValueOnce([1, 2, 999]).mockResolvedValue(null) }; + (NeuralSnippets as any).isInitialized = true; + + expect(await NeuralSnippets.search("symbol extra", directory)).toEqual([ + expect.objectContaining({ id: 1 }), + expect.objectContaining({ id: 2 }), + ]); + }); +}); + +describe("project detector fallbacks and precedence", () => { + it("covers empty, modular, git, nested fallback, and unreadable architecture roots", async () => { + expect(await ArchitecturalScout.detectPatterns(path.join(tempDir("arch-missing-"), "missing"))).toEqual([]); + + const modular = tempDir("arch-modular-"); + fs.mkdirSync(path.join(modular, "src")); + fs.mkdirSync(path.join(modular, ".git")); + writeJson(modular, {}); + expect(await ArchitecturalScout.detectPatterns(modular)).toEqual(expect.arrayContaining([ + "Modular TypeScript/Node Project", + "Git Repository", + ])); + + const nested = tempDir("arch-nested-"); + fs.mkdirSync(path.join(nested, ".hidden")); + fs.mkdirSync(path.join(nested, "module")); + fs.mkdirSync(path.join(nested, "module", "src")); + expect(await ArchitecturalScout.detectPatterns(nested)).toContain("Multi-Module (module)"); + + const nestedPackage = tempDir("arch-nested-package-"); + fs.mkdirSync(path.join(nestedPackage, "module")); + writeJson(path.join(nestedPackage, "module"), {}); + expect(await ArchitecturalScout.detectPatterns(nestedPackage)).toContain("Multi-Module (module)"); + + const monorepo = tempDir("arch-hidden-monorepo-"); + for (const name of [".hidden", "packages", "one", "two"]) fs.mkdirSync(path.join(monorepo, name)); + expect((await ArchitecturalScout.detectPatterns(monorepo))[0]).not.toContain(".hidden"); + + const file = path.join(tempDir("arch-file-"), "file"); + fs.writeFileSync(file, ""); + expect(await ArchitecturalScout.detectPatterns(file)).toEqual([]); + }); + + it("detects all Node alternatives, package managers, and empty metadata", async () => { + const none = tempDir("node-none-"); + fs.mkdirSync(path.join(none, ".hidden")); + fs.mkdirSync(path.join(none, "empty")); + expect(await NodeDetector.detect(none)).toEqual({}); + + const npmProject = tempDir("node-npm-"); + fs.writeFileSync(path.join(npmProject, "package-lock.json"), ""); + writeJson(npmProject, { + dependencies: { + express: "1", hono: "1", "@nestjs/core": "1", react: "1", next: "1", + prisma: "1", "drizzle-orm": "1", typeorm: "1", sequelize: "1", + tailwindcss: "1", "styled-components": "1", + "@azure/functions": "1", "@aws-sdk/client-lambda": "1", + jest: "1", vitest: "1", cypress: "1", + }, + }); + expect(await NodeDetector.detect(npmProject)).toMatchObject({ + language: "JavaScript", + framework: "Next.js", + orm: "Sequelize", + styling: "Styled Components", + cloud: "AWS Lambda", + testing: "Cypress", + packageManager: "npm", + scripts: [], + }); + + const honoProject = tempDir("node-hono-"); + writeJson(honoProject, { dependencies: { hono: "1" } }); + expect(await NodeDetector.detect(honoProject)).toMatchObject({ framework: "Hono", packageManager: "pnpm" }); + }); + + it("covers Python absence, read failures, and all detection alternatives", async () => { + const absent = tempDir("python-absent-"); + expect(await PythonDetector.detect(absent)).toEqual({}); + + const unreadable = tempDir("python-unreadable-"); + fs.mkdirSync(path.join(unreadable, "requirements.txt")); + expect(await PythonDetector.detect(unreadable)).toEqual({ language: "Python", isTypeScript: false }); + + const project = tempDir("python-all-"); + fs.writeFileSync(path.join(project, "requirements.txt"), "fastapi\ndjango\nflask\nsqlalchemy\ntortoise-orm\npeewee\npytest\nunittest"); + fs.writeFileSync(path.join(project, "pyproject.toml"), ""); + expect(await PythonDetector.detect(project)).toMatchObject({ + framework: "Flask", + orm: "Peewee", + testing: "Unittest", + }); + }); +}); + +describe("PromptLinter complete context behavior", () => { + it("adds every context-aware gap and de-duplicates merged semantic gaps", () => { + const context: ProjectContext = { + language: "Unknown", framework: "Unknown", testing: "Unknown", isTypeScript: false, + orm: "Prisma", styling: "Tailwind CSS", cloud: "Azure Functions", + }; + const gaps = PromptLinter.lint("refactor entire system module", context); + expect(gaps.map(gap => gap.id)).toEqual([ + "testing", "tech-stack", "architecture", "documentation", "security", + "orm-context", "styling-context", "cloud-context", + ]); + expect(PromptLinter.mergeGaps(gaps, [gaps[0], { id: "semantic", message: "m", suggestedAction: "a" }])) + .toHaveLength(gaps.length + 1); + }); + + it("adds no gaps when all requirements and context details are explicit", () => { + const context: ProjectContext = { + language: "Unknown", framework: "Unknown", testing: "Unknown", isTypeScript: false, + orm: "Prisma", styling: "Tailwind CSS", cloud: "Azure Functions", + }; + expect(PromptLinter.lint( + "Using framework language architecture pattern: test docs comments readme error handling security schema migration model class style responsive trigger handler env", + context, + )).toEqual([]); + }); +}); + +describe("PromptRefiner optional context behavior", () => { + it("renders every optional context section, mandate, and prompt id", () => { + const context: ProjectContext = { + language: "TypeScript", framework: "Unknown", testing: "Unknown", isTypeScript: true, + orm: "Prisma", styling: "Tailwind CSS", cloud: "Azure Functions", packageManager: "npm", + architecturalPatterns: ["Clean Architecture / DDD", "Modern Component-Based Architecture (React/Vue style)", "MVC (Model-View-Controller)"], + learnedPatterns: [{ id: "pattern", category: "quality", description: "Preserve behavior", learnedAt: "now" }], + relevantSnippets: [ + { id: 1, filePath: "src/a.ts", content: " function a() {} ", symbolName: "a", symbolType: "function" }, + { id: 2, filePath: "src/b.ts", content: " chunk content ", symbolType: "chunk" }, + ], + customMandates: ["Use strict types"], + predictiveLessons: [{ title: "Regression", summary: "Add tests", confidence: 0.9 }], + }; + const output = PromptRefiner.refine("Implement feature", context, { size: "small" }, "prompt-1"); + expect(output).toContain("[PROMPT_ID: prompt-1]"); + expect(output).toContain("ORM/Database"); + expect(output).toContain("MVC Mandate"); + expect(output).toContain("[chunk]"); + expect(output).toContain("Predictive Autonomous Lessons"); + expect(output).toContain("industry standards"); + expect(PromptRefiner.calculateGain("", "", context)).toBe(85); + + expect(PromptRefiner.refine("Task", context, {}, undefined, { + approvedTemplates: [{ + id: "template", category: "feature", title: "No notes", templateText: "Implement.", relevanceScore: 1, selectionReasons: [], + }], + })).not.toContain("Usage notes:"); + }); +}); + +describe("PromptOptimizer fallback branches", () => { + it("handles absent optional context, zero iterations, and an empty fallback rewrite", async () => { + const context: ProjectContext = { language: "Unknown", framework: "Unknown", testing: "Unknown", isTypeScript: false }; + const request = vi.fn() + .mockResolvedValueOnce("Here is the rewritten") + .mockResolvedValueOnce(" "); + expect(await new PromptOptimizer(request).optimize("Original", context, 1)).toBe("Here is the rewritten"); + expect(await new PromptOptimizer(request).optimize("Original", { + ...context, + relevantSnippets: [{ id: 1, filePath: "a.ts", content: "content", symbolType: "chunk" }], + }, 1)).toBe(""); + expect(await new PromptOptimizer(request).optimize("Original", context, 0)).toBe("Original"); + }); +}); + +describe("ApprovedTemplateSelector edge ranking behavior", () => { + it("infers each category and clamps malformed scores and lengths", async () => { + const longText = "x".repeat(4_100); + const candidates: PromptTemplateCandidate[] = [ + { id: "high", repoId: "r", category: "bugfix", title: "Fix failure", templateText: longText, usageNotes: longText, successScore: 200, approved: true }, + { id: "low", repoId: "r", category: "test", title: "Test", templateText: "verify", successScore: -1, approved: 1 }, + { id: "nan", repoId: "r", category: "refactor", title: "Cleanup", templateText: "simplify", successScore: Number.NaN, approved: 1 }, + { id: "feature", repoId: "r", category: "feature", title: "Build", templateText: "create", successScore: 0, approved: 1 }, + ]; + const selector = new ApprovedTemplateSelector({ getTemplates: async () => candidates }); + + expect((await selector.select({ repoId: "r", prompt: "fix bug error failure", category: " BUGFIX ", limit: 99 }))[0]) + .toMatchObject({ id: "high", relevanceScore: 75 }); + expect((await selector.select({ repoId: "r", prompt: "fix defect" }))[0].selectionReasons).toContain("category:bugfix"); + expect((await selector.select({ repoId: "r", prompt: "tests coverage verification" })) + .find(item => item.category === "test")?.selectionReasons).toContain("category:test"); + expect((await selector.select({ repoId: "r", prompt: "refactor cleanup restructure" })) + .find(item => item.category === "refactor")?.selectionReasons).toContain("category:refactor"); + expect((await selector.select({ repoId: "r", prompt: "add build create implement feature" })) + .find(item => item.category === "feature")?.selectionReasons).toContain("category:feature"); + const unspecified = await selector.select({ repoId: "r", prompt: "", limit: 10 }); + expect(unspecified.find(item => item.id === "high")?.templateText).toHaveLength(4_000); + expect(unspecified.find(item => item.id === "high")?.usageNotes).toHaveLength(4_000); + }); +}); + +describe("deterministic evaluation edge behavior", () => { + it("covers empty intent, original/tie preferences, generated timestamps, and observed outcome ties", () => { + expect(evaluatePrompt("", "").dimensions.intentPreservation.evidence).toEqual(["empty-baseline"]); + expect(comparePrompts("Fix src/a.ts with tests", "fix").heuristicPreference).toBe("original"); + expect(comparePrompts("same", "same").heuristicPreference).toBe("tie"); + expect(createABEvaluationRecord({ + experimentId: "heuristic-a", baselinePrompt: "fix", + variantA: { id: "A", prompt: "Fix src/a.ts with tests and verify results" }, + variantB: { id: "B", prompt: "fix" }, + }).heuristicPreference).toBe("A"); + + const completeA = createABEvaluationRecord({ + experimentId: "a", baselinePrompt: "", variantA: { id: "A", prompt: "", observedOutcome: { status: "completed" } }, + variantB: { id: "B", prompt: "", observedOutcome: { status: "cancelled", testsPassed: 100, testsFailed: 1, reworkCount: 1 } }, + }); + expect(completeA.heuristicPreference).toBe("tie"); + expect(completeA.observedWinner).toBe("A"); + expect(completeA.createdAt).toBeTruthy(); + + const tie = createABEvaluationRecord({ + experimentId: "tie", baselinePrompt: "x", + variantA: { id: "A", prompt: "x", observedOutcome: { status: "failed" } }, + variantB: { id: "B", prompt: "x", observedOutcome: { status: "failed" } }, + }); + expect(tie.observedWinner).toBeUndefined(); + }); +}); + +describe("FileWatcher lifecycle and filtering behavior", () => { + it("covers idempotent lifecycle, error forwarding, unlink, and all in-process filters", async () => { + expect(MEANINGFUL_EXTENSIONS.has(".prompt")).toBe(true); + expect(NOISE_PATH_SEGMENTS).toContain("coverage"); + expect(NOISE_SUFFIXES).toContain(".tmp"); + + const watcher = new FileWatcher(tempDir("watcher-unit-")); + watcher.start(); + watcher.start(); + const inner = (watcher as any).inner as EventEmitter; + + const errors: Error[] = []; + const changes: unknown[] = []; + watcher.on("error", error => errors.push(error)); + watcher.on("change", event => changes.push(event)); + inner.emit("error", "failure"); + inner.emit("error", new Error("typed failure")); + expect(errors[0]).toEqual(new Error("failure")); + expect(errors[1]).toEqual(new Error("typed failure")); + + for (const noisy of ["C:\\repo\\node_modules\\a.ts", "/repo/coverage/a.ts", "/repo/a.log", "/repo/a.bin", "/repo/no-extension"]) { + (watcher as any).emitChange("change", noisy); + } + inner.emit("unlink", "/repo/a.md"); + expect(changes).toHaveLength(1); + await watcher.stop(); + await watcher.stop(); + }); +}); diff --git a/universal-refiner/tests/project-scout.test.ts b/universal-refiner/tests/project-scout.test.ts new file mode 100644 index 0000000..20c31e0 --- /dev/null +++ b/universal-refiner/tests/project-scout.test.ts @@ -0,0 +1,81 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { ArchitecturalScout, NodeDetector, PythonDetector } from "../src/detectors/project-scout.js"; + +describe("project scouts", () => { + const directories: string[] = []; + + afterEach(() => { + vi.restoreAllMocks(); + for (const directory of directories.splice(0)) { + rmSync(directory, { recursive: true, force: true }); + } + }); + + function createDirectory(): string { + const directory = mkdtempSync(join(tmpdir(), "project-scout-")); + directories.push(directory); + return directory; + } + + it("detects architecture markers and nested modules", async () => { + const directory = createDirectory(); + for (const name of ["domain", "application", "infrastructure", "components", "hooks", "services", "skills", "packages"]) { + mkdirSync(join(directory, name)); + } + writeFileSync(join(directory, "package.json"), "{}"); + + const patterns = await ArchitecturalScout.detectPatterns(directory); + + expect(patterns).toContain("Clean Architecture / DDD"); + expect(patterns).toContain("Modern Component-Based Architecture (React/Vue style)"); + expect(patterns).toContain("Gemini CLI Extension Project"); + expect(patterns.some(pattern => pattern.startsWith("Monorepo"))).toBe(true); + }); + + it("detects a nested Node package and its full stack", async () => { + const directory = createDirectory(); + const app = join(directory, "app"); + mkdirSync(app); + writeFileSync(join(app, "tsconfig.json"), "{}"); + writeFileSync(join(app, "yarn.lock"), ""); + writeFileSync(join(app, "package.json"), JSON.stringify({ + scripts: { test: "vitest" }, + dependencies: { + next: "1", react: "1", prisma: "1", tailwindcss: "1", + "@azure/functions": "1", vitest: "1", + }, + })); + + await expect(NodeDetector.detect(directory)).resolves.toMatchObject({ + language: "TypeScript", + framework: "Next.js", + orm: "Prisma", + styling: "Tailwind CSS", + cloud: "Azure Functions", + testing: "Vitest", + packageManager: "yarn", + scripts: ["test"], + }); + }); + + it("returns an empty Node context for malformed package metadata", async () => { + const directory = createDirectory(); + writeFileSync(join(directory, "package.json"), "{"); + await expect(NodeDetector.detect(directory)).resolves.toEqual({}); + }); + + it("detects Python framework, ORM, and test runner", async () => { + const directory = createDirectory(); + writeFileSync(join(directory, "requirements.txt"), "fastapi\nsqlalchemy\npytest\n"); + await expect(PythonDetector.detect(directory)).resolves.toMatchObject({ + language: "Python", + framework: "FastAPI", + orm: "SQLAlchemy", + testing: "Pytest", + isTypeScript: false, + }); + }); +}); diff --git a/universal-refiner/tests/prompt-optimizer.test.ts b/universal-refiner/tests/prompt-optimizer.test.ts new file mode 100644 index 0000000..a01bc51 --- /dev/null +++ b/universal-refiner/tests/prompt-optimizer.test.ts @@ -0,0 +1,39 @@ +import { describe, expect, it, vi } from "vitest"; +import { PromptOptimizer } from "../src/refiners/prompt-optimizer.js"; +import { ProjectContext } from "../src/detectors/project-scout.js"; + +const context: ProjectContext = { + language: "TypeScript", + framework: "Node.js", + testing: "Vitest", + isTypeScript: true, + customMandates: ["Preserve behavior"], + predictiveLessons: [{ summary: "Add regression tests" }], + relevantSnippets: [{ filePath: "src/auth.ts", content: "export function login() {}", symbolName: "login", symbolType: "function" }], +}; + +describe("PromptOptimizer", () => { + it("iteratively adopts rewritten prompts and passes project context to the model", async () => { + const request = vi.fn() + .mockResolvedValueOnce("critique\n---REWRITTEN PROMPT---\nFirst rewrite") + .mockResolvedValueOnce("critique\n---REWRITTEN PROMPT---\nFinal rewrite"); + + await expect(new PromptOptimizer(request).optimize("Fix login", context, 2)).resolves.toBe("Final rewrite"); + expect(request).toHaveBeenCalledTimes(2); + expect(request.mock.calls[0][1]).toContain("Preserve behavior"); + expect(request.mock.calls[0][1]).toContain("Add regression tests"); + expect(request.mock.calls[0][1]).toContain("src/auth.ts"); + expect(request.mock.calls[1][1]).toContain("First rewrite"); + }); + + it("uses fallback response parsing and stops when the provider is unavailable", async () => { + const request = vi.fn() + .mockResolvedValueOnce("Rewritten Prompt: Add tests and verify login") + .mockResolvedValueOnce(null); + + await expect(new PromptOptimizer(request).optimize("Fix login", { ...context, relevantSnippets: undefined }, 3)) + .resolves.toBe("Rewritten Prompt: Add tests and verify login"); + expect(request).toHaveBeenCalledTimes(2); + expect(request.mock.calls[0][1]).toContain("No relevant snippets found."); + }); +}); diff --git a/universal-refiner/tests/refiners.test.ts b/universal-refiner/tests/refiners.test.ts index 0ddcfb4..a83e674 100644 --- a/universal-refiner/tests/refiners.test.ts +++ b/universal-refiner/tests/refiners.test.ts @@ -44,4 +44,30 @@ describe("PromptRefiner", () => { expect(refined).toContain("Agent A"); expect(refined).toContain("Agent B"); }); + + it("should not treat prompt verbosity as quality gain", () => { + const shortRefinement = PromptRefiner.calculateGain("task", "task with context", baseCtx); + const verboseRefinement = PromptRefiner.calculateGain("task", `task ${"detail ".repeat(1000)}`, baseCtx); + expect(verboseRefinement).toBe(shortRefinement); + }); + + it("should use selected approved templates as bounded refinement context", () => { + const approvedTemplates = [{ + id: "feature-template", + category: "feature", + title: "Reviewed feature template", + templateText: "Implement [FEATURE] using existing project patterns.", + usageNotes: "Verify behavior with focused tests.", + relevanceScore: 92, + selectionReasons: ["category:feature"] + }]; + + const refined = PromptRefiner.refine("Create a login button", baseCtx, {}, undefined, { approvedTemplates }); + const gain = PromptRefiner.calculateGain("Create a login button", refined, baseCtx, { approvedTemplates }); + + expect(refined).toContain("Approved Prompt Templates"); + expect(refined).toContain("Reviewed feature template"); + expect(refined).toContain("do not replace the user's intent"); + expect(gain).toBeGreaterThan(PromptRefiner.calculateGain("Create a login button", refined, baseCtx)); + }); }); diff --git a/universal-refiner/tests/repository-identity.test.ts b/universal-refiner/tests/repository-identity.test.ts new file mode 100644 index 0000000..e2fe364 --- /dev/null +++ b/universal-refiner/tests/repository-identity.test.ts @@ -0,0 +1,15 @@ +import { describe, expect, it } from "vitest"; +import { resolveRepositoryIdentity } from "../src/history/repository-identity.js"; + +describe("resolveRepositoryIdentity", () => { + it("distinguishes same-named repositories at different paths", () => { + const first = resolveRepositoryIdentity("C:/repo/team-a/service"); + const second = resolveRepositoryIdentity("C:/repo/team-b/service"); + expect(first.legacyId).toBe(second.legacyId); + expect(first.id).not.toBe(second.id); + }); + + it("normalizes path casing and separators", () => { + expect(resolveRepositoryIdentity("C:\\Repo\\Service").id).toBe(resolveRepositoryIdentity("c:/repo/service").id); + }); +}); diff --git a/universal-refiner/tests/semantic-provider.test.ts b/universal-refiner/tests/semantic-provider.test.ts new file mode 100644 index 0000000..4816ca2 --- /dev/null +++ b/universal-refiner/tests/semantic-provider.test.ts @@ -0,0 +1,203 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { createServer, Server } from "node:http"; +import { AddressInfo } from "node:net"; +import { + LocalOpenAiProvider, + SemanticProvider, + SemanticProviderChain, + McpSamplingProvider, +} from "../src/core/semantic-provider.js"; + +describe("semantic providers", () => { + let server: Server | undefined; + + afterEach(async () => { + vi.unstubAllGlobals(); + if (server) { + await new Promise((resolve, reject) => server?.close(error => error ? reject(error) : resolve())); + server = undefined; + } + }); + + it("rejects non-loopback local endpoints by default", () => { + expect(() => new LocalOpenAiProvider({ + baseUrl: "https://example.com/v1", + models: ["gemma3:12b"], + timeoutMs: 1000, + temperature: 0.2, + allowNonLoopback: false, + })).toThrow(/loopback/); + }); + + it("rejects malformed local endpoint URLs and permits explicit remote endpoints", () => { + const options = { + models: ["gemma3:12b"], + timeoutMs: 1000, + temperature: 0.2, + }; + + expect(() => new LocalOpenAiProvider({ + ...options, + baseUrl: "not a valid URL", + allowNonLoopback: false, + })).toThrow(/loopback/); + expect(() => new LocalOpenAiProvider({ + ...options, + baseUrl: "https://example.com/v1", + allowNonLoopback: true, + })).not.toThrow(); + }); + + it.each([ + "http://localhost:11434/v1", + "http://[::1]:11434/v1", + ])("accepts loopback endpoint %s", baseUrl => { + expect(() => new LocalOpenAiProvider({ + baseUrl, + models: ["gemma3:12b"], + timeoutMs: 1000, + temperature: 0.2, + allowNonLoopback: false, + })).not.toThrow(); + }); + + it("falls back to the next configured local model", async () => { + const requestedModels: string[] = []; + server = createServer((request, response) => { + let body = ""; + request.on("data", chunk => body += chunk); + request.on("end", () => { + const payload = JSON.parse(body); + requestedModels.push(payload.model); + response.setHeader("content-type", "application/json"); + if (payload.model === "gemma3:12b") { + response.statusCode = 503; + response.end(JSON.stringify({ error: "unavailable" })); + return; + } + response.end(JSON.stringify({ + choices: [{ message: { content: "fallback response" } }], + usage: { prompt_tokens: 10, completion_tokens: 2 }, + })); + }); + }); + await new Promise(resolve => server?.listen(0, "127.0.0.1", resolve)); + const port = (server.address() as AddressInfo).port; + + const provider = new LocalOpenAiProvider({ + baseUrl: `http://127.0.0.1:${port}/v1`, + models: ["gemma3:12b", "gemma3:1b"], + timeoutMs: 1000, + temperature: 0.2, + allowNonLoopback: false, + }); + + const result = await provider.requestText({ taskName: "test", prompt: "hello", maxTokens: 10 }); + + expect(requestedModels).toEqual(["gemma3:12b", "gemma3:1b"]); + expect(result?.text).toBe("fallback response"); + expect(result?.model).toBe("gemma3:1b"); + expect(result?.fallbackFrom).toEqual(["gemma3:12b"]); + }); + + it("falls through provider chain when a provider is unavailable", async () => { + const unavailable: SemanticProvider = { + name: "unavailable", + requestText: async () => null, + }; + const available: SemanticProvider = { + name: "available", + requestText: async () => ({ + text: "ready", + provider: "available", + model: "test", + latencyMs: 1, + }), + }; + + const chain = new SemanticProviderChain([unavailable, available]); + await expect(chain.requestText({ taskName: "test", prompt: "hello", maxTokens: 10 })).resolves.toBe("ready"); + }); + + it("rejects malformed local responses and exhausts configured models", async () => { + server = createServer((_request, response) => { + response.setHeader("content-type", "application/json"); + response.end(JSON.stringify({ choices: [{ message: { content: " " } }] })); + }); + await new Promise(resolve => server?.listen(0, "127.0.0.1", resolve)); + const port = (server.address() as AddressInfo).port; + const provider = new LocalOpenAiProvider({ + baseUrl: `http://127.0.0.1:${port}/v1/`, + models: ["bad"], + timeoutMs: 1000, + temperature: 0, + allowNonLoopback: false, + }); + + await expect(provider.requestText({ taskName: "malformed", prompt: "hello", maxTokens: 10 })).resolves.toBeNull(); + }); + + it("rejects non-string model content and handles non-Error request failures", async () => { + vi.stubGlobal("fetch", vi.fn() + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ choices: [{ message: { content: 42 } }] }), + }) + .mockRejectedValueOnce("connection lost")); + const provider = new LocalOpenAiProvider({ + baseUrl: "http://localhost:11434/v1", + models: ["non-string", "offline"], + timeoutMs: 1000, + temperature: 0, + allowNonLoopback: false, + }); + + await expect(provider.requestText({ taskName: "invalid", prompt: "hello", maxTokens: 10 })).resolves.toBeNull(); + }); + + it("supports MCP sampling and records provider-chain telemetry", async () => { + const onSuccess = vi.fn(); + const sampling = new McpSamplingProvider(async () => "sampled"); + const chain = new SemanticProviderChain([ + { name: "offline", requestText: async () => null }, + sampling, + ], onSuccess); + const request = { taskName: "sample", prompt: "hello", maxTokens: 10 }; + + await expect(chain.requestText(request)).resolves.toBe("sampled"); + expect(onSuccess).toHaveBeenCalledWith( + expect.objectContaining({ provider: "mcp-sampling", fallbackFrom: ["provider:offline"] }), + request, + ); + }); + + it("maps model fallbacks from a successful provider response", async () => { + const chain = new SemanticProviderChain([{ + name: "provider", + requestText: async () => ({ + text: "ready", + provider: "provider", + model: "fallback", + latencyMs: 1, + fallbackFrom: ["primary"], + }), + }]); + await expect(chain.requestText({ taskName: "fallback", prompt: "x", maxTokens: 1 })).resolves.toBe("ready"); + }); + + it("returns null when MCP sampling and every provider are unavailable", async () => { + const sampling = new McpSamplingProvider(async () => null); + const chain = new SemanticProviderChain([sampling]); + await expect(chain.requestText({ taskName: "offline", prompt: "hello", maxTokens: 10 })).resolves.toBeNull(); + }); + + it("propagates sampling failures to the caller", async () => { + const failure = new Error("sampling failed"); + const sampling = new McpSamplingProvider(async () => { + throw failure; + }); + + await expect(sampling.requestText({ taskName: "sample", prompt: "hello", maxTokens: 10 })) + .rejects.toBe(failure); + }); +}); diff --git a/universal-refiner/tests/server-coverage.test.ts b/universal-refiner/tests/server-coverage.test.ts new file mode 100644 index 0000000..3526800 --- /dev/null +++ b/universal-refiner/tests/server-coverage.test.ts @@ -0,0 +1,223 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { ErrorCode, McpError } from "@modelcontextprotocol/sdk/types.js"; +import { AgenticBlackboard } from "../src/core/blackboard.js"; +import { ConfigManager } from "../src/core/config.js"; +import { PromptRefinerServer } from "../src/core/server.js"; +import { ArchitecturalScout, NodeDetector, PythonDetector } from "../src/detectors/project-scout.js"; +import { EventStore } from "../src/history/event-store.js"; +import { PromptLinter } from "../src/linters/prompt-linter.js"; +import { LocalBrain } from "../src/memory/local-brain.js"; +import { NeuralSnippets } from "../src/memory/neural-snippets.js"; + +const handlers: Array<(request: any) => any> = []; +const createMessage = vi.fn(); + +vi.mock("@modelcontextprotocol/sdk/server/index.js", () => ({ + Server: class { + setRequestHandler = vi.fn((_schema, handler) => handlers.push(handler)); + connect = vi.fn(); + createMessage = createMessage; + }, +})); +vi.mock("@modelcontextprotocol/sdk/server/stdio.js", () => ({ StdioServerTransport: vi.fn() })); + +describe("PromptRefinerServer deterministic fallbacks", () => { + let directory: string; + let server: PromptRefinerServer; + let dispatch: (request: any) => Promise; + + beforeEach(() => { + vi.clearAllMocks(); + handlers.length = 0; + directory = mkdtempSync(join(tmpdir(), "server-coverage-")); + process.env.PROMPT_REFINER_GLOBAL_DIR = directory; + (EventStore as unknown as { instance: EventStore | null }).instance = null; + server = new PromptRefinerServer(directory); + dispatch = handlers[1]; + }); + + afterEach(() => { + vi.restoreAllMocks(); + (EventStore as unknown as { instance: EventStore | null }).instance?.close(); + (EventStore as unknown as { instance: EventStore | null }).instance = null; + delete process.env.PROMPT_REFINER_GLOBAL_DIR; + rmSync(directory, { recursive: true, force: true }); + }); + + it("creates empty and rejected semantic provider chains", () => { + vi.spyOn(ConfigManager, "getSemanticConfig").mockReturnValue({ + localEnabled: false, + mcpSamplingEnabled: false, + baseUrl: "http://localhost:1/v1", + models: [], + timeoutMs: 1, + temperature: 0, + allowNonLoopback: false, + }); + expect((server as any).createSemanticProviderChain()).toBeDefined(); + + vi.spyOn(ConfigManager, "getSemanticConfig").mockReturnValue({ + localEnabled: true, + mcpSamplingEnabled: false, + baseUrl: "https://remote.example/v1", + models: ["m"], + timeoutMs: 1, + temperature: 0, + allowNonLoopback: false, + }); + expect((server as any).createSemanticProviderChain()).toBeDefined(); + }); + + it("scouts Python and unknown fallbacks with and without snippets", async () => { + vi.spyOn(NodeDetector, "detect").mockResolvedValue({}); + vi.spyOn(PythonDetector, "detect") + .mockResolvedValueOnce({ language: "Python", framework: "Flask", testing: "Pytest", orm: "SQLAlchemy", isTypeScript: false }) + .mockResolvedValueOnce({}); + vi.spyOn(ArchitecturalScout, "detectPatterns").mockResolvedValue([]); + vi.spyOn(LocalBrain, "getPatterns").mockReturnValue([]); + vi.spyOn(ConfigManager, "loadConfig").mockReturnValue({}); + vi.spyOn(ConfigManager, "getPredictiveMandates").mockReturnValue([]); + vi.spyOn(NeuralSnippets, "search").mockResolvedValue([]); + vi.spyOn(AgenticBlackboard, "getActiveIntents").mockReturnValue([]); + + await expect((server as any).scoutProject("query")).resolves.toMatchObject({ + language: "Python", + framework: "Flask", + testing: "Pytest", + orm: "SQLAlchemy", + isTypeScript: false, + }); + await expect((server as any).scoutProject()).resolves.toMatchObject({ + language: "Unknown", + framework: "Unknown", + testing: "Unknown", + }); + }); + + it("handles all semantic lint and sampling fallback outcomes", async () => { + vi.spyOn(server, "requestModelText") + .mockResolvedValueOnce(null) + .mockResolvedValueOnce('[{"id":"gap"}]') + .mockResolvedValueOnce("not json"); + + await expect((server as any).lintSemantic("prompt", {})).resolves.toEqual([]); + await expect((server as any).lintSemantic("prompt", {})).resolves.toEqual([{ id: "gap" }]); + await expect((server as any).lintSemantic("prompt", {})).resolves.toEqual([]); + expect((server as any).isSamplingUnsupportedError("-32601 unsupported")).toBe(true); + expect((server as any).isSamplingUnsupportedError("other")).toBe(false); + (server as any).disableSampling("first", "plain failure"); + (server as any).disableSampling("second", new Error("ignored")); + expect((server as any).samplingUnavailableReason).toBe("first"); + }); + + it("delegates model requests and dispatches semantic linting by default", async () => { + const requestText = vi.fn().mockResolvedValue("model response"); + (server as any).semanticProviders = { requestText }; + + await expect(server.requestModelText("Task", "Prompt", 321)).resolves.toBe("model response"); + expect(requestText).toHaveBeenCalledWith({ taskName: "Task", prompt: "Prompt", maxTokens: 321 }); + + vi.spyOn(server as any, "scoutProject").mockResolvedValue({}); + vi.spyOn(PromptLinter, "lint").mockReturnValue([]); + vi.spyOn(PromptLinter, "mergeGaps").mockImplementation((_ruleGaps, semanticGaps) => semanticGaps); + const semanticLint = vi.spyOn(server as any, "lintSemantic").mockResolvedValue([{ id: "semantic-gap" }]); + + const result = await dispatch({ params: { name: "lint_prompt", arguments: { prompt: "Analyze this" } } }); + + expect(semanticLint).toHaveBeenCalledWith("Analyze this", {}); + expect(JSON.parse(result.content[0].text).gaps).toEqual([{ id: "semantic-gap" }]); + }); + + it("persists valid discovery proposals for review", async () => { + vi.spyOn(server as any, "scoutProject").mockResolvedValue({}); + vi.spyOn(server, "requestModelText").mockResolvedValue( + '[{"id":"strict-types","category":"quality","description":"Require strict types."}]', + ); + const savePattern = vi.spyOn(LocalBrain, "savePattern"); + + await expect(dispatch({ params: { name: "discover_rules", arguments: {} } })) + .resolves.toHaveProperty("content.0.text", "Successfully discovered and proposed 1 new rules."); + expect(savePattern).toHaveBeenCalledWith({ + id: "strict-types", + category: "quality", + description: "Require strict types.", + isProposed: true, + }, directory); + }); + + it("covers dispatcher fallback responses and preserved protocol errors", async () => { + vi.spyOn(server, "requestModelText").mockResolvedValue(null); + await expect(dispatch({ params: { name: "discover_rules", arguments: {} } })) + .resolves.toHaveProperty("content.0.text", "Discovery unavailable because MCP sampling is not supported by the current client/runtime."); + await expect(dispatch({ params: { name: "proactive_suggest", arguments: { prompt: "x" } } })) + .resolves.toHaveProperty("content.0.text", expect.stringContaining("unavailable")); + await expect(dispatch({ params: { name: "generate_agent_onboarding", arguments: {} } })) + .resolves.toHaveProperty("content.0.text", expect.stringContaining("unavailable")); + await expect(dispatch({ params: { name: "unknown", arguments: {} } })).rejects.toMatchObject({ code: ErrorCode.MethodNotFound }); + await expect(dispatch({ params: { name: "lint_prompt", arguments: {} } })).rejects.toMatchObject({ code: ErrorCode.InternalError }); + await expect(dispatch({ params: { name: "review_lesson", arguments: { id: "missing", approved: false } } })) + .rejects.toBeInstanceOf(McpError); + await expect(dispatch({ params: { name: "review_template", arguments: { id: "missing", approved: false } } })) + .rejects.toBeInstanceOf(McpError); + }); + + it("covers discovery parse failure, default options, rejection, and execution update", async () => { + vi.spyOn(server, "requestModelText").mockResolvedValueOnce("invalid"); + await expect(dispatch({ params: { name: "discover_rules", arguments: {} } })) + .resolves.toHaveProperty("content.0.text", "Discovery failed to parse."); + + const store = EventStore.getInstance(); + const repoId = (server as any).repository.id; + store.recordLesson({ + id: "lesson", + repo_id: repoId, + lesson_type: "quality", + title: "Lesson", + summary: "Summary", + confidence: "high", + source: "test", + }); + store.recordTemplate({ + id: "template", + repo_id: repoId, + category: "quality", + title: "Template", + template_text: "Verify.", + usage_notes: "", + source_type: "test", + success_score: 1, + }); + await expect(dispatch({ params: { name: "review_lesson", arguments: { id: "lesson", approved: false } } })).resolves.toBeDefined(); + await expect(dispatch({ params: { name: "review_template", arguments: { id: "template", approved: false } } })).resolves.toBeDefined(); + await expect(dispatch({ params: { name: "ingest_commits", arguments: {} } })).resolves.toBeDefined(); + vi.spyOn(server, "requestModelText").mockResolvedValue("no rewrite marker"); + await expect(dispatch({ params: { name: "optimize_prompt", arguments: { prompt: "work", iterations: 0 } } })) + .resolves.toBeDefined(); + + store.recordPrompt({ id: "prompt", repo_id: repoId, client: "test", raw_prompt: "work" }); + store.recordExecution({ + id: "execution", + prompt_id: "prompt", + workflow_name: "test", + executor_name: "test", + status: "running", + artifacts_json: '{"existing":true}', + }); + await expect(dispatch({ + params: { + name: "record_agent_output", + _meta: { progressToken: "agent-token" }, + arguments: { prompt_id: "prompt", output_summary: "done" }, + }, + })).resolves.toBeDefined(); + await expect(dispatch({ + params: { + name: "evaluate_prompt", + arguments: { prompt: "Do work and test.", baseline_prompt: "Do work." }, + }, + })).resolves.toBeDefined(); + }); +}); diff --git a/universal-refiner/tests/server.test.ts b/universal-refiner/tests/server.test.ts index 7b98562..940bf66 100644 --- a/universal-refiner/tests/server.test.ts +++ b/universal-refiner/tests/server.test.ts @@ -1,13 +1,23 @@ -import { describe, it, expect, vi, beforeEach } from "vitest"; +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; import { PromptRefinerServer } from "../src/core/server.js"; +import { EventStore } from "../src/history/event-store.js"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; + +const lifecycle = vi.hoisted(() => ({ + connect: vi.fn(), + createMessage: vi.fn(), + backgroundStart: vi.fn(), +})); // Mock MCP SDK vi.mock("@modelcontextprotocol/sdk/server/index.js", () => { return { Server: class { setRequestHandler = vi.fn(); - connect = vi.fn(); - createMessage = vi.fn().mockResolvedValue({ content: { type: "text", text: "[]" } }); + connect = lifecycle.connect; + createMessage = lifecycle.createMessage; } }; }); @@ -17,15 +27,33 @@ vi.mock("@modelcontextprotocol/sdk/server/stdio.js", () => { StdioServerTransport: vi.fn(), }; }); +vi.mock("../src/core/background-service.js", () => ({ + BackgroundAutonomyService: class { + start = lifecycle.backgroundStart; + }, +})); describe("PromptRefinerServer", () => { let server: PromptRefinerServer; + let testDir: string; beforeEach(() => { vi.clearAllMocks(); + lifecycle.connect.mockResolvedValue(undefined); + lifecycle.createMessage.mockResolvedValue({ content: { type: "text", text: "[]" } }); + testDir = fs.mkdtempSync(path.join(os.tmpdir(), "server-test-")); + process.env.PROMPT_REFINER_GLOBAL_DIR = testDir; + (EventStore as any).instance = null; server = new PromptRefinerServer("."); }); + afterEach(() => { + EventStore.getInstance().close(); + (EventStore as any).instance = null; + delete process.env.PROMPT_REFINER_GLOBAL_DIR; + fs.rmSync(testDir, { recursive: true, force: true }); + }); + it("should initialize with correct tools", () => { // Access private property for testing if needed, or check setRequestHandler calls const mockServerInstance = (server as any).server; @@ -40,4 +68,43 @@ describe("PromptRefinerServer", () => { // Since we mock the schema, we can check how many times it was called expect(mockServerInstance.setRequestHandler).toHaveBeenCalledTimes(2); // One for ListTools, one for CallTool }); + + it("connects transport and starts background autonomy", async () => { + await server.run(); + expect(lifecycle.connect).toHaveBeenCalledOnce(); + expect(lifecycle.backgroundStart).toHaveBeenCalledOnce(); + }); + + it("handles MCP sampling text, non-text, unsupported, and transient failures", async () => { + lifecycle.createMessage + .mockResolvedValueOnce({ content: { type: "text", text: "sampled" } }) + .mockResolvedValueOnce({ content: { type: "image", data: "x", mimeType: "image/png" } }) + .mockRejectedValueOnce(new Error("Method not found")) + .mockRejectedValueOnce(new Error("transient")); + + await expect((server as any).requestMcpSamplingText("prompt", 20)).resolves.toBe("sampled"); + await expect((server as any).requestMcpSamplingText("prompt", 20)).resolves.toBeNull(); + await expect((server as any).requestMcpSamplingText("prompt", 20)).resolves.toBeNull(); + expect(lifecycle.createMessage).toHaveBeenCalledTimes(3); + await expect((server as any).requestMcpSamplingText("prompt", 20)).resolves.toBeNull(); + expect(lifecycle.createMessage).toHaveBeenCalledTimes(3); + + const transientServer = new PromptRefinerServer("."); + await expect((transientServer as any).requestMcpSamplingText("prompt", 20)).resolves.toBeNull(); + }); + + it("records semantic success telemetry and resolves agent names", () => { + (server as any).recordSemanticSuccess({ + text: "ok", + provider: "local", + model: "gemma", + latencyMs: 10, + promptTokens: 4, + completionTokens: 2, + }, { taskName: "test" }); + const db = (EventStore.getInstance() as any).db; + expect(db.prepare("SELECT * FROM events WHERE event_type = ?").get("semantic_request_completed")).toBeDefined(); + expect((server as any).getAgentName({ params: { _meta: { agentName: "agent" } } })).toBe("agent"); + expect((server as any).getAgentName({ params: {} })).toBe("User CLI"); + }); }); diff --git a/universal-refiner/tests/stress/event-store.stress.test.ts b/universal-refiner/tests/stress/event-store.stress.test.ts new file mode 100644 index 0000000..fb6b075 --- /dev/null +++ b/universal-refiner/tests/stress/event-store.stress.test.ts @@ -0,0 +1,65 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { existsSync, mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { EventStore } from "../../src/history/event-store.js"; + +describe("EventStore stress and restart persistence", () => { + let directory: string; + + beforeEach(() => { + directory = mkdtempSync(join(tmpdir(), "event-store-stress-")); + process.env.PROMPT_REFINER_GLOBAL_DIR = directory; + (EventStore as unknown as { instance: EventStore | null }).instance = null; + }); + + afterEach(() => { + const holder = EventStore as unknown as { instance: EventStore | null }; + holder.instance?.close(); + holder.instance = null; + rmSync(directory, { recursive: true, force: true }); + }); + + it("preserves all operations submitted concurrently", async () => { + const store = EventStore.getInstance(); + const operations = Array.from({ length: 500 }, (_, index) => Promise.resolve().then(() => { + store.recordEvent({ id: `concurrent-${index}`, event_type: "stress", summary: `event ${index}` }); + })); + + await Promise.all(operations); + + const db = (store as unknown as { db: { prepare: (sql: string) => { get: () => { count: number } } } }).db; + expect(db.prepare("SELECT COUNT(*) AS count FROM events WHERE event_type = 'stress'").get().count).toBe(500); + }); + + it("retains events after the store is closed and reopened", () => { + const holder = EventStore as unknown as { instance: EventStore | null }; + const first = EventStore.getInstance(); + first.recordEvent({ id: "before-restart", event_type: "restart", summary: "persist me" }); + first.close(); + holder.instance = null; + + const restarted = EventStore.getInstance(); + const db = (restarted as unknown as { db: { prepare: (sql: string) => { get: (id: string) => unknown } } }).db; + expect(db.prepare("SELECT id FROM events WHERE id = ?").get("before-restart")).toEqual({ id: "before-restart" }); + }); + + it("creates an integrity-checked backup and restores it", async () => { + const store = EventStore.getInstance(); + const backupPath = join(directory, "backups", "events.db"); + store.recordEvent({ id: "before-backup", event_type: "recovery", summary: "restore me" }); + + await expect(store.backup(backupPath)).resolves.toBe(backupPath); + expect(existsSync(backupPath)).toBe(true); + store.recordEvent({ id: "after-backup", event_type: "recovery", summary: "discard me" }); + + const restored = EventStore.restore(backupPath); + const db = (restored as unknown as { db: { prepare: (sql: string) => { get: (id: string) => unknown } } }).db; + expect(db.prepare("SELECT id FROM events WHERE id = ?").get("before-backup")).toEqual({ id: "before-backup" }); + expect(db.prepare("SELECT id FROM events WHERE id = ?").get("after-backup")).toBeUndefined(); + }); + + it("rejects a missing backup", () => { + expect(() => EventStore.restore(join(directory, "missing.db"))).toThrow(/Backup does not exist/); + }); +}); diff --git a/universal-refiner/tests/structured-response.test.ts b/universal-refiner/tests/structured-response.test.ts new file mode 100644 index 0000000..858ee80 --- /dev/null +++ b/universal-refiner/tests/structured-response.test.ts @@ -0,0 +1,24 @@ +import { describe, expect, it } from "vitest"; +import { parseStructuredResponse } from "../src/core/structured-response.js"; + +describe("parseStructuredResponse", () => { + it("parses direct JSON", () => { + expect(parseStructuredResponse<{ ready: boolean }>('{"ready":true}')).toEqual({ ready: true }); + }); + + it("parses fenced JSON from local models", () => { + expect(parseStructuredResponse<{ ready: boolean }>('```json\n{"ready":true}\n```')).toEqual({ ready: true }); + }); + + it("extracts JSON after bounded explanatory text", () => { + expect(parseStructuredResponse('Result:\n["one","two"]')).toEqual(["one", "two"]); + expect(parseStructuredResponse<{ ready: boolean }>('Result: {"ready":true} trailing')).toEqual({ ready: true }); + expect(parseStructuredResponse<{ ready: boolean }>('prefix {"ready":true} and [ignored]')).toEqual({ ready: true }); + }); + + it("rejects responses without JSON", () => { + expect(() => parseStructuredResponse("not structured")).toThrow(/JSON/); + expect(() => parseStructuredResponse("prefix { incomplete")).toThrow(/incomplete JSON/); + expect(() => parseStructuredResponse("prefix [ incomplete")).toThrow(/incomplete JSON/); + }); +}); diff --git a/universal-refiner/tests/template-generator.test.ts b/universal-refiner/tests/template-generator.test.ts new file mode 100644 index 0000000..c0be0fb --- /dev/null +++ b/universal-refiner/tests/template-generator.test.ts @@ -0,0 +1,80 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { EventStore } from "../src/history/event-store.js"; +import { TemplateGenerator } from "../src/history/template-generator.js"; + +describe("TemplateGenerator", () => { + let testDir: string; + let store: EventStore; + + beforeEach(() => { + testDir = fs.mkdtempSync(path.join(os.tmpdir(), "template-generator-")); + process.env.PROMPT_REFINER_GLOBAL_DIR = testDir; + (EventStore as any).instance = null; + store = EventStore.getInstance(); + }); + + afterEach(() => { + store.close(); + delete process.env.PROMPT_REFINER_GLOBAL_DIR; + fs.rmSync(testDir, { recursive: true, force: true }); + }); + + function addStory(id: string, repoId = "repo") { + const db = (store as any).db; + store.recordPrompt({ id: `prompt-${id}`, repo_id: repoId, client: "test", raw_prompt: `Prompt ${id}` }); + store.recordExecution({ + id: `execution-${id}`, + prompt_id: `prompt-${id}`, + workflow_name: "test", + executor_name: "test", + status: "completed", + result_summary: `Result ${id}`, + }); + store.recordCommit({ + id: `commit-${id}`, + repo_id: repoId, + sha: `sha-${id}`, + message: `feat: story ${id}`, + committed_at: new Date().toISOString(), + changed_files_json: JSON.stringify([`src/${id}.ts`]), + }); + db.prepare("INSERT INTO execution_commits (execution_id, commit_id) VALUES (?, ?)").run(`execution-${id}`, `commit-${id}`); + } + + it("does not invoke synthesis without enough successful history", async () => { + addStory("one"); + const request = vi.fn(); + await new TemplateGenerator(request).generateNewTemplates("repo"); + expect(request).not.toHaveBeenCalled(); + }); + + it("records synthesized templates and ignores unavailable or malformed responses", async () => { + addStory("one"); + addStory("two"); + const valid = JSON.stringify({ + templates: [{ + name: "Verified feature", + category: "feature", + template_text: "Implement [INTENT] and verify it.", + usage_notes: "Use for features.", + success_score: 95, + }], + }); + const request = vi.fn() + .mockResolvedValueOnce(null) + .mockResolvedValueOnce("not json") + .mockResolvedValueOnce(valid); + const generator = new TemplateGenerator(request); + + await generator.generateNewTemplates("repo"); + await generator.generateNewTemplates("repo"); + await generator.generateNewTemplates("repo"); + + const templates = (store as any).db.prepare("SELECT * FROM prompt_templates WHERE repo_id = ?").all("repo"); + expect(templates).toMatchObject([{ title: "Verified feature", success_score: 95 }]); + expect(request.mock.calls[2][1]).toContain("feat: story one"); + }); +}); diff --git a/universal-refiner/tests/template-selector.test.ts b/universal-refiner/tests/template-selector.test.ts new file mode 100644 index 0000000..ac981b0 --- /dev/null +++ b/universal-refiner/tests/template-selector.test.ts @@ -0,0 +1,88 @@ +import { describe, expect, it } from "vitest"; +import { + ApprovedTemplateSelector, + type ApprovedTemplateSource, + type PromptTemplateCandidate +} from "../src/refiners/template-selector.js"; + +describe("ApprovedTemplateSelector", () => { + const templates: PromptTemplateCandidate[] = [ + { + id: "feature", + repoId: "repo", + category: "feature", + title: "Feature implementation", + templateText: "Implement [FEATURE] and verify it.", + usageNotes: "Use for new features.", + successScore: 90, + approved: 1 + }, + { + id: "bugfix", + repoId: "repo", + category: "bugfix", + title: "Bug fix", + templateText: "Reproduce and fix [BUG].", + successScore: 75, + approved: true + }, + { + id: "pending", + repoId: "repo", + category: "feature", + title: "Pending", + templateText: "Do not select.", + successScore: 100, + approved: 0 + }, + { + id: "deprecated", + repoId: "repo", + category: "feature", + title: "Deprecated", + templateText: "Do not select.", + successScore: 100, + approved: 1, + deprecated: 1 + } + ]; + + const source: ApprovedTemplateSource = { + getTemplates: () => templates + }; + + it("retrieves only approved, active templates and ranks prompt relevance", async () => { + const selected = await new ApprovedTemplateSelector(source).select({ + repoId: "repo", + prompt: "Implement a new login feature", + limit: 2 + }); + + expect(selected.map(template => template.id)).toEqual(["feature", "bugfix"]); + expect(selected[0].selectionReasons).toContain("category:feature"); + expect(selected.some(template => template.id === "pending")).toBe(false); + expect(selected.some(template => template.id === "deprecated")).toBe(false); + }); + + it("keeps repository boundaries", async () => { + const selected = await new ApprovedTemplateSelector(source).select({ + repoId: "other", + prompt: "Implement a feature" + }); + + expect(selected).toEqual([]); + }); + + it("supports a disabled selection limit without querying the source", async () => { + let queried = false; + const selector = new ApprovedTemplateSelector({ + getTemplates: () => { + queried = true; + return templates; + } + }); + + expect(await selector.select({ repoId: "repo", prompt: "Implement a feature", limit: 0 })).toEqual([]); + expect(queried).toBe(false); + }); +}); diff --git a/universal-refiner/tests/timeline.test.ts b/universal-refiner/tests/timeline.test.ts index 24d4e4c..3580556 100644 --- a/universal-refiner/tests/timeline.test.ts +++ b/universal-refiner/tests/timeline.test.ts @@ -44,4 +44,18 @@ describe("TimelineProvider", () => { expect(timeline[0].summary).toBe("new event"); expect(timeline[1].type).toBe("prompt"); }); + + it("uses empty detail fallbacks and applies the requested limit", () => { + const provider = new TimelineProvider(); + const prepare = vi.fn() + .mockReturnValueOnce({ all: vi.fn().mockReturnValue([{ type: "prompt", id: "p", timestamp: "2026-01-01", summary: "p", details: null }]) }) + .mockReturnValueOnce({ all: vi.fn().mockReturnValue([{ type: "commit", id: "c", timestamp: "2026-01-03", summary: "c", details: null }]) }) + .mockReturnValueOnce({ all: vi.fn().mockReturnValue([{ type: "log", id: "e", timestamp: "2026-01-02", summary: "e", details: null }]) }); + (provider as any).eventStore = { db: { prepare } }; + + expect(provider.getUnifiedTimeline(2)).toEqual([ + expect.objectContaining({ id: "c", details: { files: [] } }), + expect.objectContaining({ id: "e", details: {} }), + ]); + }); }); diff --git a/universal-refiner/vitest.config.ts b/universal-refiner/vitest.config.ts new file mode 100644 index 0000000..a3cc3c0 --- /dev/null +++ b/universal-refiner/vitest.config.ts @@ -0,0 +1,18 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + coverage: { + provider: "v8", + include: ["hooks/lib/**/*.ts", "src/**/*.ts"], + exclude: ["src/core/generated-version.ts"], + reporter: ["text", "json-summary", "lcov"], + thresholds: { + statements: 100, + branches: 100, + functions: 100, + lines: 100, + }, + }, + }, +});