From cdef2a8c95d61533915282710f0ecde63218c561 Mon Sep 17 00:00:00 2001 From: David Ichim Date: Sat, 2 May 2026 11:19:54 +0300 Subject: [PATCH 1/4] Introduced max-tokens as default value sometimes gave truncated replies which broke some good results for models that are verbose --- BENCH_PACK_AUTHORING.md | 2 +- BENCH_PROTOCOL_V1.md | 3 +- app/src/renderer/src/App.tsx | 5 +- package.json | 3 +- packages/benchlocal-core/src/protocol.ts | 14 +++++ packages/benchlocal-core/src/workspaces.ts | 1 + packages/benchpack-host/src/index.ts | 71 ++++++++++++++-------- 7 files changed, 71 insertions(+), 28 deletions(-) diff --git a/BENCH_PACK_AUTHORING.md b/BENCH_PACK_AUTHORING.md index 4faf9a5..fc69dd2 100644 --- a/BENCH_PACK_AUTHORING.md +++ b/BENCH_PACK_AUTHORING.md @@ -185,7 +185,7 @@ Behavior: - if a Bench Pack provides a default, BenchLocal uses it unless the user overrides it in that tab - if a field is omitted, BenchLocal falls back to platform defaults where defined -- BenchLocal currently applies `request_timeout_seconds: 300` unless your pack or the user overrides it +- BenchLocal currently applies `max_tokens: 2048` and `request_timeout_seconds: 300` unless your pack or the user overrides them ## Compatibility requirements diff --git a/BENCH_PROTOCOL_V1.md b/BENCH_PROTOCOL_V1.md index 1945150..04a7331 100644 --- a/BENCH_PROTOCOL_V1.md +++ b/BENCH_PROTOCOL_V1.md @@ -205,6 +205,7 @@ type GenerationRequest = { top_k?: number; min_p?: number; repetition_penalty?: number; + max_tokens?: number; request_timeout_seconds?: number; }; ``` @@ -213,7 +214,7 @@ Behavior: - if a field is present, the pack may forward it to the provider client - if a field is omitted, BenchLocal may still supply a platform default before the pack receives the request -- BenchLocal currently applies `request_timeout_seconds: 300` unless the pack or user overrides it +- BenchLocal currently applies `max_tokens: 2048` and `request_timeout_seconds: 300` unless the pack or user overrides them This allows: diff --git a/app/src/renderer/src/App.tsx b/app/src/renderer/src/App.tsx index 2964a68..0ba8e25 100644 --- a/app/src/renderer/src/App.tsx +++ b/app/src/renderer/src/App.tsx @@ -193,6 +193,7 @@ type SamplingFormState = { top_k: string; min_p: string; repetition_penalty: string; + max_tokens: string; request_timeout_seconds: string; }; @@ -290,7 +291,7 @@ type BenchPackRunBlocker = { type BenchPackMutationState = BenchPackMutationProgress; const THIRD_PARTY_INSTALL_MUTATION_ID = "__third_party_install__"; -const DEFAULT_BENCHLOCAL_GENERATION: GenerationRequest = { request_timeout_seconds: 300 }; +const DEFAULT_BENCHLOCAL_GENERATION: GenerationRequest = { max_tokens: 2048, request_timeout_seconds: 300 }; function isAbortLikeError(error: unknown): boolean { return error instanceof Error && /abort|cancel/i.test(error.name + " " + error.message); @@ -347,6 +348,7 @@ const SAMPLING_FIELDS: Array<{ { key: "top_k", label: "Top K", placeholder: "Leave blank", integer: true }, { key: "min_p", label: "Min P", placeholder: "Leave blank" }, { key: "repetition_penalty", label: "Repetition Penalty", placeholder: "Leave blank" }, + { key: "max_tokens", label: "Max Tokens", placeholder: "Leave blank", integer: true }, { key: "request_timeout_seconds", label: "Request Timeout Seconds", placeholder: "Leave blank", integer: true } ]; @@ -475,6 +477,7 @@ function createSamplingForm(input?: GenerationRequest): SamplingFormState { top_k: input?.top_k?.toString() ?? "", min_p: input?.min_p?.toString() ?? "", repetition_penalty: input?.repetition_penalty?.toString() ?? "", + max_tokens: input?.max_tokens?.toString() ?? "", request_timeout_seconds: input?.request_timeout_seconds?.toString() ?? "" }; } diff --git a/package.json b/package.json index d8d69dc..9a217fc 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,7 @@ "release:setup:mac": "node scripts/setup-macos-release.mjs", "release:doctor:mac": "node scripts/check-macos-release.mjs", "release:mac": "node scripts/build-macos-release.mjs", - "typecheck": "npm run typecheck --workspace @benchlocal/core && npm run typecheck --workspace @benchlocal/sdk && npm run typecheck --workspace @benchlocal/benchpack-host && npm run typecheck --workspace app" + "typecheck": "npm run typecheck --workspace @benchlocal/core && npm run typecheck --workspace @benchlocal/sdk && npm run typecheck --workspace @benchlocal/benchpack-host && npm run typecheck --workspace app", + "sync:benchpack": "node scripts/sync-local-benchpack.mjs" } } diff --git a/packages/benchlocal-core/src/protocol.ts b/packages/benchlocal-core/src/protocol.ts index dd7e8d5..2bb30ce 100644 --- a/packages/benchlocal-core/src/protocol.ts +++ b/packages/benchlocal-core/src/protocol.ts @@ -249,12 +249,15 @@ export interface GenerationRequest { top_k?: number; min_p?: number; repetition_penalty?: number; + max_tokens?: number; request_timeout_seconds?: number; } export const DEFAULT_BENCHLOCAL_REQUEST_TIMEOUT_SECONDS = 300; +export const DEFAULT_BENCHLOCAL_MAX_TOKENS = 2048; export const DEFAULT_BENCHLOCAL_GENERATION: GenerationRequest = { + max_tokens: DEFAULT_BENCHLOCAL_MAX_TOKENS, request_timeout_seconds: DEFAULT_BENCHLOCAL_REQUEST_TIMEOUT_SECONDS }; @@ -370,6 +373,17 @@ export interface ScenarioResultEvent { export interface ScenarioFinishedEvent { type: "scenario_finished"; scenarioId: string; + generation?: GenerationRequest; + results?: Array<{ + modelId: string; + status: ScenarioResult["status"]; + score?: number; + provider?: { + finishReason?: string | null; + usage?: Record; + contentLength?: number; + }; + }>; } export interface RunFinishedEvent { diff --git a/packages/benchlocal-core/src/workspaces.ts b/packages/benchlocal-core/src/workspaces.ts index c6e466b..83a9b24 100644 --- a/packages/benchlocal-core/src/workspaces.ts +++ b/packages/benchlocal-core/src/workspaces.ts @@ -74,6 +74,7 @@ const WorkspaceTabSchema = z.object({ top_k: z.number().optional(), min_p: z.number().optional(), repetition_penalty: z.number().optional(), + max_tokens: z.number().int().min(1).optional(), request_timeout_seconds: z.number().int().min(1).optional() }) .default({}), diff --git a/packages/benchpack-host/src/index.ts b/packages/benchpack-host/src/index.ts index d076e47..06e9728 100644 --- a/packages/benchpack-host/src/index.ts +++ b/packages/benchpack-host/src/index.ts @@ -1616,6 +1616,7 @@ type LoadedBenchPackRuntime = { top_k?: number; min_p?: number; repetition_penalty?: number; + max_tokens?: number; request_timeout_seconds?: number; }; }, emit: (event: ProgressEvent) => Promise | void) => Promise; @@ -1816,6 +1817,29 @@ function mergeResultsByModel( return merged; } +function scenarioFinishedEvent( + scenarioId: string, + generation: GenerationRequest, + results: Array<{ modelId: string; result: ScenarioResult }> +): Extract { + type ProviderDiagnostics = NonNullable["results"]>[number]["provider"]; + type ScenarioResultWithRuntimeDiagnostics = ScenarioResult & { + __providerDiagnostics?: ProviderDiagnostics; + }; + + return { + type: "scenario_finished", + scenarioId, + generation, + results: results.map(({ modelId, result }) => ({ + modelId, + status: result.status, + score: result.score, + provider: (result as ScenarioResultWithRuntimeDiagnostics).__providerDiagnostics + })) + }; +} + function hasCompleteRunResults(summary: BenchPackRunSummary): boolean { const modelIds = Object.keys(summary.resultsByModel); @@ -2899,10 +2923,14 @@ async function executeSerialTestCasesMode( await emit({ type: "scenario_result", modelId: model.id, scenarioId: scenario.id, result }); } - await emit({ - type: "scenario_finished", - scenarioId: scenario.id - }); + await emit(scenarioFinishedEvent( + scenario.id, + generation, + runnableModels.map((model) => ({ + modelId: model.id, + result: resultsByModel[model.id].find((result) => result.scenarioId === scenario.id)! + })) + )); } } @@ -2973,6 +3001,7 @@ async function executeSerialByModelMode( ): Promise { const startedScenarios = new Set(); const finishedCounts = new Map(); + const finishedResults = new Map>(); const expectedCounts = new Map( scenarios.map((scenario) => [ scenario.id, @@ -3016,15 +3045,16 @@ async function executeSerialByModelMode( resultsByModel[model.id].push(result); await emit({ type: "scenario_result", modelId: model.id, scenarioId: scenario.id, result }); + finishedResults.set(scenario.id, [ + ...(finishedResults.get(scenario.id) ?? []), + { modelId: model.id, result } + ]); const completedCount = (finishedCounts.get(scenario.id) ?? 0) + 1; finishedCounts.set(scenario.id, completedCount); if (completedCount >= (expectedCounts.get(scenario.id) ?? 0)) { - await emit({ - type: "scenario_finished", - scenarioId: scenario.id - }); + await emit(scenarioFinishedEvent(scenario.id, generation, finishedResults.get(scenario.id) ?? [])); } } } @@ -3044,6 +3074,7 @@ async function executeParallelModelsMode( ): Promise { const startedScenarios = new Set(); const finishedCounts = new Map(); + const finishedResults = new Map>(); const expectedCounts = new Map( scenarios.map((scenario) => [ scenario.id, @@ -3088,15 +3119,16 @@ async function executeParallelModelsMode( resultsByModel[model.id].push(result); await emit({ type: "scenario_result", modelId: model.id, scenarioId: scenario.id, result }); + finishedResults.set(scenario.id, [ + ...(finishedResults.get(scenario.id) ?? []), + { modelId: model.id, result } + ]); const completedCount = (finishedCounts.get(scenario.id) ?? 0) + 1; finishedCounts.set(scenario.id, completedCount); if (completedCount >= (expectedCounts.get(scenario.id) ?? 0)) { - await emit({ - type: "scenario_finished", - scenarioId: scenario.id - }); + await emit(scenarioFinishedEvent(scenario.id, generation, finishedResults.get(scenario.id) ?? [])); } }) ); @@ -3155,10 +3187,7 @@ async function executeParallelTestCasesMode( await emit({ type: "scenario_result", modelId, scenarioId: scenario.id, result }); } - await emit({ - type: "scenario_finished", - scenarioId: scenario.id - }); + await emit(scenarioFinishedEvent(scenario.id, generation, scenarioResults)); } } @@ -3215,10 +3244,7 @@ async function executeFullParallelMode( await emit({ type: "scenario_result", modelId, scenarioId: scenario.id, result }); } - await emit({ - type: "scenario_finished", - scenarioId: scenario.id - }); + await emit(scenarioFinishedEvent(scenario.id, generation, scenarioResults)); }) ); } @@ -3548,10 +3574,7 @@ export async function retryScenarioForBenchPackRun( scenarioId: scenario.id, result }); - await emit({ - type: "scenario_finished", - scenarioId: scenario.id - }); + await emit(scenarioFinishedEvent(scenario.id, generation, [{ modelId: model.id, result }])); return await withRunSummaryLock(`${benchPackId}:${existingSummary.runId}`, async () => { const latestSummary = await loadRunSummaryForBenchPack(config, benchPackId, existingSummary.runId); From 471dc5686a537ce3d799228b13fa677eb93a3588 Mon Sep 17 00:00:00 2001 From: Tiberiu Ichim Date: Sun, 3 May 2026 11:12:53 +0300 Subject: [PATCH 2/4] feat: add web app server + HTTP API client - Add Fastify server (app/src/server/) with REST API at /api/* - Add SSE event streaming at /api/events/sse for run events, mutation progress, verifier progress - Add in-process SSE bus and active run manager - Extract Electron-free helpers (themes, app-metadata, models) for server use - Add HTTP + SSE API client (app/src/renderer/src/api/client.ts) replacing IPC bridge - Adapt App.tsx: window.benchlocal.* -> bl.*, IPC listeners -> SSE, remove update/detached-logs UI - Add Vite web config (vite.config.web.ts) for renderer-only builds - Add npm scripts: web:dev, web:build, web:start - Add fastify, @fastify/static, tsx, esbuild, concurrently dependencies - Stub removed Electron features (updates, logs, onOpenAbout/onOpenSettings) for backward compatibility - Web server runs on port 4300 (configurable via BENCHLOCAL_PORT) --- app/package.json | 14 +- app/src/renderer/src/App.tsx | 16718 +++++++++++++---------- app/src/renderer/src/api/client.ts | 101 + app/src/renderer/src/vite-env-web.d.ts | 20 + app/src/server/api-routes.ts | 346 + app/src/server/app-metadata.ts | 83 + app/src/server/index.ts | 47 + app/src/server/models.ts | 148 + app/src/server/run-manager.ts | 37 + app/src/server/sse-bus.ts | 20 + app/src/server/sse-route.ts | 39 + app/src/server/themes.ts | 116 + app/vite.config.web.ts | 31 + package-lock.json | 1567 ++- 14 files changed, 11702 insertions(+), 7585 deletions(-) create mode 100644 app/src/renderer/src/api/client.ts create mode 100644 app/src/renderer/src/vite-env-web.d.ts create mode 100644 app/src/server/api-routes.ts create mode 100644 app/src/server/app-metadata.ts create mode 100644 app/src/server/index.ts create mode 100644 app/src/server/models.ts create mode 100644 app/src/server/run-manager.ts create mode 100644 app/src/server/sse-bus.ts create mode 100644 app/src/server/sse-route.ts create mode 100644 app/src/server/themes.ts create mode 100644 app/vite.config.web.ts diff --git a/app/package.json b/app/package.json index fd70634..d31c211 100644 --- a/app/package.json +++ b/app/package.json @@ -20,11 +20,20 @@ "build:mac": "npm run build:compile && npm run package:mac", "build:win": "npm run build:compile && npm run package:win", "build:linux": "npm run build:compile && npm run package:linux", - "typecheck": "tsc -p tsconfig.json --noEmit" + "typecheck": "tsc -p tsconfig.json --noEmit", + "web:dev:renderer": "vite --config vite.config.web.ts", + "web:dev:server": "tsx watch src/server/index.ts", + "web:dev": "concurrently \"npm run web:dev:renderer\" \"npm run web:dev:server\"", + "web:build": "npm run build:compile && vite build --config vite.config.web.ts && esbuild src/server/index.ts --bundle --platform=node --target=node20 --format=esm --outdir=../dist/server --external:@benchlocal/*", + "web:start": "node ../dist/server/index.js" }, "dependencies": { "electron-updater": "^6.6.2", "lucide-react": "^1.7.0", + "@benchlocal/benchpack-host": "file:../packages/benchpack-host", + "@benchlocal/core": "file:../packages/benchlocal-core", + "@fastify/static": "^8.0.0", + "fastify": "^5.0.0", "react": "^19.0.0", "react-dom": "^19.0.0" }, @@ -41,6 +50,9 @@ "postcss": "^8.5.1", "tailwindcss": "^4.1.11", "typescript": "^5.8.3", + "concurrently": "^9.0.0", + "esbuild": "^0.25.0", + "tsx": "^4.0.0", "vite": "^6.3.5" }, "build": { diff --git a/app/src/renderer/src/App.tsx b/app/src/renderer/src/App.tsx index 0ba8e25..2ac32ac 100644 --- a/app/src/renderer/src/App.tsx +++ b/app/src/renderer/src/App.tsx @@ -1,7922 +1,9548 @@ -import { useEffect, useMemo, useRef, useState, type ReactNode } from "react"; -import benchlocalIcon from "../../../assets/benchlocal-icon.png"; -import { - ArrowRight, - ArrowUp, - CircleAlert, - Check, - Bot, - ChevronDown, - ChevronLeft, - ChevronRight, - Cog, - FolderOpen, - GripVertical, - LayoutList, - Logs, - Pencil, - Palette, - Play, - PlugZap, - Plus, - RotateCcw, - Save, - Square, - Server, - Sidebar, - SlidersHorizontal, - Trash2, - Wrench, - X -} from "lucide-react"; import type { - BenchPackRegistryEntry, - BenchLocalConfig, - BenchLocalExecutionMode, - BenchLocalModelConfig, - BenchLocalProviderConfig, - BenchLocalProviderKind, - BenchLocalThemeDefinition, - BenchLocalThemeDescriptor, - BenchLocalVerifierConfig, - BenchLocalWorkspace, - BenchLocalWorkspaceState, - BenchLocalWorkspaceTab, - BenchLocalWorkspaceTabModelSelection, - GenerationRequest, - ProgressEvent, - ScenarioResult, - BenchPackInspection, - BenchPackManifest, - BenchPackRunHistoryEntry, - BenchPackRunSummary, - ScenarioMeta + BenchLocalConfig, + BenchLocalExecutionMode, + BenchLocalModelConfig, + BenchLocalProviderConfig, + BenchLocalProviderKind, + BenchLocalThemeDefinition, + BenchLocalThemeDescriptor, + BenchLocalVerifierConfig, + BenchLocalWorkspace, + BenchLocalWorkspaceState, + BenchLocalWorkspaceTab, + BenchLocalWorkspaceTabModelSelection, + BenchPackInspection, + BenchPackManifest, + BenchPackRegistryEntry, + BenchPackRunHistoryEntry, + BenchPackRunSummary, + GenerationRequest, + ProgressEvent, + ScenarioMeta, + ScenarioResult, } from "@core"; +import { + ArrowRight, + ArrowUp, + Bot, + Check, + ChevronDown, + ChevronLeft, + ChevronRight, + CircleAlert, + Cog, + FolderOpen, + GripVertical, + LayoutList, + Logs, + Palette, + Pencil, + Play, + PlugZap, + Plus, + RotateCcw, + Save, + Server, + Sidebar, + SlidersHorizontal, + Square, + Trash2, + Wrench, + X, +} from "lucide-react"; +import { type ReactNode, useEffect, useMemo, useRef, useState } from "react"; import type { - BenchLocalAppMetadata, - BenchLocalUpdateState, - BenchPackMutationProgress, - BenchLocalDiscoveredModel, - DetachedLogsState, - BenchPackVerifierStatus + BenchLocalAppMetadata, + BenchLocalDiscoveredModel, + BenchLocalUpdateState, + BenchPackMutationProgress, + BenchPackVerifierStatus, + DetachedLogsState, } from "@/shared/desktop-api"; +import benchlocalIcon from "../../../assets/benchlocal-icon.png"; +import { bl } from "./api/client"; -const DETACHED_LOGS_VIEW = - typeof window !== "undefined" && new URLSearchParams(window.location.search).get("view") === "logs"; +const IS_IS_DETACHED_LOGS_VIEW = + typeof window !== "undefined" && + new URLSearchParams(window.location.search).get("view") === "logs"; function describeAppUpdateState(state: BenchLocalUpdateState | null): string { - if (!state) { - return "Updater is initializing."; - } - - if (state.message?.trim()) { - return state.message.trim(); - } - - switch (state.status) { - case "unsupported": - return "Self-update is unavailable in this BenchLocal build."; - case "checking": - return "Checking for BenchLocal updates."; - case "available": - return state.availableVersion - ? `BenchLocal ${state.availableVersion} is available. Downloading update.` - : "A BenchLocal update is available. Downloading update."; - case "downloading": - return state.availableVersion - ? `Downloading BenchLocal ${state.availableVersion}.` - : "Downloading BenchLocal update."; - case "downloaded": - return state.downloadedVersion - ? `BenchLocal ${state.downloadedVersion} is ready to install.` - : "A BenchLocal update is ready to install."; - case "not_available": - return "BenchLocal is up to date."; - case "error": - return "BenchLocal could not complete the update request."; - default: - return "BenchLocal can check for updates."; - } + if (!state) { + return "Updater is initializing."; + } + + if (state.message?.trim()) { + return state.message.trim(); + } + + switch (state.status) { + case "unsupported": + return "Self-update is unavailable in this BenchLocal build."; + case "checking": + return "Checking for BenchLocal updates."; + case "available": + return state.availableVersion + ? `BenchLocal ${state.availableVersion} is available. Downloading update.` + : "A BenchLocal update is available. Downloading update."; + case "downloading": + return state.availableVersion + ? `Downloading BenchLocal ${state.availableVersion}.` + : "Downloading BenchLocal update."; + case "downloaded": + return state.downloadedVersion + ? `BenchLocal ${state.downloadedVersion} is ready to install.` + : "A BenchLocal update is ready to install."; + case "not_available": + return "BenchLocal is up to date."; + case "error": + return "BenchLocal could not complete the update request."; + default: + return "BenchLocal can check for updates."; + } } function formatAppUpdateCheckedAt(checkedAt?: string): string | null { - if (!checkedAt) { - return null; - } + if (!checkedAt) { + return null; + } - const date = new Date(checkedAt); - if (Number.isNaN(date.valueOf())) { - return null; - } + const date = new Date(checkedAt); + if (Number.isNaN(date.valueOf())) { + return null; + } - return date.toLocaleString(); + return date.toLocaleString(); } -type SettingsTab = "providers" | "models" | "benchPacks" | "verification" | "advanced"; +type SettingsTab = + | "providers" + | "models" + | "benchPacks" + | "verification" + | "advanced"; type LoadState = { - path: string; - created: boolean; - config: BenchLocalConfig; + path: string; + created: boolean; + config: BenchLocalConfig; }; type ProviderFormState = { - id: string; - kind: BenchLocalProviderKind; - name: string; - enabled: boolean; - base_url: string; - api_key: string; + id: string; + kind: BenchLocalProviderKind; + name: string; + enabled: boolean; + base_url: string; + api_key: string; }; type ProviderModalState = - | { - mode: "create"; - initialId?: undefined; - form: ProviderFormState; - } - | { - mode: "edit"; - initialId: string; - form: ProviderFormState; - }; + | { + mode: "create"; + initialId?: undefined; + form: ProviderFormState; + } + | { + mode: "edit"; + initialId: string; + form: ProviderFormState; + }; type ModelFormState = { - provider: string; - model: string; - label: string; - group: string; - enabled: boolean; + provider: string; + model: string; + label: string; + group: string; + enabled: boolean; }; type ModelModalState = - | { - mode: "create"; - index?: undefined; - form: ModelFormState; - } - | { - mode: "edit"; - index: number; - form: ModelFormState; - }; + | { + mode: "create"; + index?: undefined; + form: ModelFormState; + } + | { + mode: "edit"; + index: number; + form: ModelFormState; + }; type ModelBrowserModalState = { - providerId: string; - providerName: string; - entries: BenchLocalDiscoveredModel[]; - query: string; - selectedModelId: string | null; - loading: boolean; - error: string | null; + providerId: string; + providerName: string; + entries: BenchLocalDiscoveredModel[]; + query: string; + selectedModelId: string | null; + loading: boolean; + error: string | null; }; type DetailModalState = { - tabId: string; - runId: string | null; - benchPackId: string; - modelId: string; - scenarioId: string; - summary: string; - rawLog: string; - status: "pass" | "partial" | "fail"; + tabId: string; + runId: string | null; + benchPackId: string; + modelId: string; + scenarioId: string; + summary: string; + rawLog: string; + status: "pass" | "partial" | "fail"; }; type TabModelsModalState = { - tabId: string; - selections: BenchLocalWorkspaceTabModelSelection[]; + tabId: string; + selections: BenchLocalWorkspaceTabModelSelection[]; }; type SamplingFormState = { - temperature: string; - top_p: string; - top_k: string; - min_p: string; - repetition_penalty: string; - max_tokens: string; - request_timeout_seconds: string; + temperature: string; + top_p: string; + top_k: string; + min_p: string; + repetition_penalty: string; + max_tokens: string; + request_timeout_seconds: string; }; type SamplingModalState = { - tabId: string; - benchPackId: string; - benchPackName: string; - defaults: GenerationRequest; - form: SamplingFormState; + tabId: string; + benchPackId: string; + benchPackName: string; + defaults: GenerationRequest; + form: SamplingFormState; }; type ModelAliasModalState = { - tabId: string; - modelId: string; - baseLabel: string; - alias: string; + tabId: string; + modelId: string; + baseLabel: string; + alias: string; }; type HistoryModalState = { - benchPackId: string; - benchPackName: string; - entries: BenchPackRunHistoryEntry[]; + benchPackId: string; + benchPackName: string; + entries: BenchPackRunHistoryEntry[]; }; -type WorkspaceModalState = - | { - mode: "rename"; - workspaceId: string; - name: string; - } - | null; +type WorkspaceModalState = { + mode: "rename"; + workspaceId: string; + name: string; +} | null; type WorkspaceContextMenuState = { - workspaceId: string; - workspaceName: string; - x: number; - y: number; + workspaceId: string; + workspaceName: string; + x: number; + y: number; } | null; -type ConfirmDialogState = - | { - title: string; - subtitle: string; - confirmLabel: string; - tone?: "danger" | "neutral"; - onConfirm: () => void; - } - | null; +type ConfirmDialogState = { + title: string; + subtitle: string; + confirmLabel: string; + tone?: "danger" | "neutral"; + onConfirm: () => void; +} | null; type ResolvedTabModel = BenchLocalModelConfig & { - displayLabel: string; - alias?: string; + displayLabel: string; + alias?: string; }; type LiveRunState = { - runId?: string; - events: ProgressEvent[]; - resultsByModel: Record; - activeCellKeys: string[]; + runId?: string; + events: ProgressEvent[]; + resultsByModel: Record; + activeCellKeys: string[]; }; type ActiveRunEntry = { - benchPackId: string; - mode?: "host" | "replay"; + benchPackId: string; + mode?: "host" | "replay"; }; type LoadedHistoryEntry = { - runId: string; - startedAt: string; - mode?: "history" | "replay"; + runId: string; + startedAt: string; + mode?: "history" | "replay"; }; type LiveScenarioFocusState = { - liveScenarioId: string | null; - autoFollow: boolean; + liveScenarioId: string | null; + autoFollow: boolean; }; -type VerifierPreparingProgress = Extract; +type VerifierPreparingProgress = Extract< + ProgressEvent, + { type: "verifier_preparing" } +>; type VerifierPreparationModalState = { - tabId: string; - progress: VerifierPreparingProgress; + tabId: string; + progress: VerifierPreparingProgress; }; type SettingsVerifierPreparationModalState = { - benchPackId: string; - progress: VerifierPreparingProgress; + benchPackId: string; + progress: VerifierPreparingProgress; }; type BenchPackRunBlocker = { - title: string; - message: string; - actionLabel: string; + title: string; + message: string; + actionLabel: string; }; type BenchPackMutationState = BenchPackMutationProgress; const THIRD_PARTY_INSTALL_MUTATION_ID = "__third_party_install__"; -const DEFAULT_BENCHLOCAL_GENERATION: GenerationRequest = { max_tokens: 2048, request_timeout_seconds: 300 }; +const DEFAULT_BENCHLOCAL_GENERATION: GenerationRequest = { + max_tokens: 2048, + request_timeout_seconds: 300, +}; function isAbortLikeError(error: unknown): boolean { - return error instanceof Error && /abort|cancel/i.test(error.name + " " + error.message); + return ( + error instanceof Error && + /abort|cancel/i.test(error.name + " " + error.message) + ); } -function resolveThemeLabel(themeId: string, themes: BenchLocalThemeDescriptor[], prefersDark: boolean): string { - if (themeId === "system") { - return `System (${prefersDark ? "Dark" : "Light"})`; - } +function resolveThemeLabel( + themeId: string, + themes: BenchLocalThemeDescriptor[], + prefersDark: boolean, +): string { + if (themeId === "system") { + return `System (${prefersDark ? "Dark" : "Light"})`; + } - return themes.find((theme) => theme.id === themeId)?.name ?? themeId; + return themes.find((theme) => theme.id === themeId)?.name ?? themeId; } -const EXECUTION_MODE_OPTIONS: Array<{ value: BenchLocalExecutionMode; label: string }> = [ - { value: "serial", label: "Serial per Test Case" }, - { value: "serial_by_model", label: "Serial per Model" }, - { value: "parallel_by_model", label: "Parallel per Model" }, - { value: "parallel_by_test_case", label: "Parallel per Test Case" }, - { value: "full_parallel", label: "Parallel for All" } +const EXECUTION_MODE_OPTIONS: Array<{ + value: BenchLocalExecutionMode; + label: string; +}> = [ + { value: "serial", label: "Serial per Test Case" }, + { value: "serial_by_model", label: "Serial per Model" }, + { value: "parallel_by_model", label: "Parallel per Model" }, + { value: "parallel_by_test_case", label: "Parallel per Test Case" }, + { value: "full_parallel", label: "Parallel for All" }, ]; -function supportsLiveScenarioColumnFocus(executionMode: BenchLocalExecutionMode): boolean { - return executionMode !== "parallel_by_model" && executionMode !== "full_parallel"; +function supportsLiveScenarioColumnFocus( + executionMode: BenchLocalExecutionMode, +): boolean { + return ( + executionMode !== "parallel_by_model" && executionMode !== "full_parallel" + ); } const SIDEBAR_OPEN_STORAGE_KEY = "benchlocal.sidebar-open"; -const PROVIDER_KIND_OPTIONS: Array<{ value: BenchLocalProviderKind; label: string }> = [ - { value: "openai_compatible", label: "OpenAI Compatible" }, - { value: "openrouter", label: "OpenRouter" }, - { value: "huggingface", label: "Hugging Face" }, - { value: "ollama", label: "Ollama" }, - { value: "llamacpp", label: "llama.cpp" }, - { value: "mlx", label: "MLX" }, - { value: "lmstudio", label: "LM Studio" }, - { value: "pico", label: "Pico" } +const PROVIDER_KIND_OPTIONS: Array<{ + value: BenchLocalProviderKind; + label: string; +}> = [ + { value: "openai_compatible", label: "OpenAI Compatible" }, + { value: "openrouter", label: "OpenRouter" }, + { value: "huggingface", label: "Hugging Face" }, + { value: "ollama", label: "Ollama" }, + { value: "llamacpp", label: "llama.cpp" }, + { value: "mlx", label: "MLX" }, + { value: "lmstudio", label: "LM Studio" }, + { value: "pico", label: "Pico" }, ]; -const SETTINGS_TABS: Array<{ id: SettingsTab; label: string; blurb: string; icon: ReactNode }> = [ - { id: "providers", label: "Providers", blurb: "Provider endpoints and credentials.", icon: }, - { id: "models", label: "Models", blurb: "Shared model registry across Bench Packs.", icon: }, - { id: "benchPacks", label: "Bench Packs", blurb: "Browse, install, update, and remove official Bench Packs.", icon: }, - { id: "verification", label: "Verification", blurb: "Managed verifiers and dependency modes.", icon: } +const SETTINGS_TABS: Array<{ + id: SettingsTab; + label: string; + blurb: string; + icon: ReactNode; +}> = [ + { + id: "providers", + label: "Providers", + blurb: "Provider endpoints and credentials.", + icon: , + }, + { + id: "models", + label: "Models", + blurb: "Shared model registry across Bench Packs.", + icon: , + }, + { + id: "benchPacks", + label: "Bench Packs", + blurb: "Browse, install, update, and remove official Bench Packs.", + icon: , + }, + { + id: "verification", + label: "Verification", + blurb: "Managed verifiers and dependency modes.", + icon: , + }, ]; const SAMPLING_FIELDS: Array<{ - key: keyof SamplingFormState; - label: string; - placeholder: string; - integer?: boolean; + key: keyof SamplingFormState; + label: string; + placeholder: string; + integer?: boolean; }> = [ - { key: "temperature", label: "Temperature", placeholder: "Leave blank" }, - { key: "top_p", label: "Top P", placeholder: "Leave blank" }, - { key: "top_k", label: "Top K", placeholder: "Leave blank", integer: true }, - { key: "min_p", label: "Min P", placeholder: "Leave blank" }, - { key: "repetition_penalty", label: "Repetition Penalty", placeholder: "Leave blank" }, - { key: "max_tokens", label: "Max Tokens", placeholder: "Leave blank", integer: true }, - { key: "request_timeout_seconds", label: "Request Timeout Seconds", placeholder: "Leave blank", integer: true } + { key: "temperature", label: "Temperature", placeholder: "Leave blank" }, + { key: "top_p", label: "Top P", placeholder: "Leave blank" }, + { key: "top_k", label: "Top K", placeholder: "Leave blank", integer: true }, + { key: "min_p", label: "Min P", placeholder: "Leave blank" }, + { + key: "repetition_penalty", + label: "Repetition Penalty", + placeholder: "Leave blank", + }, + { + key: "max_tokens", + label: "Max Tokens", + placeholder: "Leave blank", + integer: true, + }, + { + key: "request_timeout_seconds", + label: "Request Timeout Seconds", + placeholder: "Leave blank", + integer: true, + }, ]; function cloneConfig(config: BenchLocalConfig): BenchLocalConfig { - return structuredClone(config); + return structuredClone(config); } const FILESYSTEM_CONFIG_KEYS = [ - "run_storage_dir", - "benchpack_storage_dir", - "log_storage_dir", - "cache_dir" + "run_storage_dir", + "benchpack_storage_dir", + "log_storage_dir", + "cache_dir", ] as const satisfies Array; function reapplyPendingFilesystemDraft( - baseConfig: BenchLocalConfig, - currentDraft: BenchLocalConfig, - persistedConfig: BenchLocalConfig + baseConfig: BenchLocalConfig, + currentDraft: BenchLocalConfig, + persistedConfig: BenchLocalConfig, ): BenchLocalConfig { - const nextConfig = cloneConfig(baseConfig); + const nextConfig = cloneConfig(baseConfig); - for (const key of FILESYSTEM_CONFIG_KEYS) { - if (currentDraft[key] !== persistedConfig[key]) { - nextConfig[key] = currentDraft[key]; - } - } + for (const key of FILESYSTEM_CONFIG_KEYS) { + if (currentDraft[key] !== persistedConfig[key]) { + nextConfig[key] = currentDraft[key]; + } + } - return nextConfig; + return nextConfig; } function providerKindLabel(kind: BenchLocalProviderKind): string { - return PROVIDER_KIND_OPTIONS.find((option) => option.value === kind)?.label ?? kind; + return ( + PROVIDER_KIND_OPTIONS.find((option) => option.value === kind)?.label ?? kind + ); } function defaultProviderName(kind: BenchLocalProviderKind): string { - return providerKindLabel(kind); + return providerKindLabel(kind); } -function defaultProviderApiKeyPlaceholder(kind: BenchLocalProviderKind): string { - switch (kind) { - case "huggingface": - return "hf_..."; - default: - return "sk-or-v1-..."; - } +function defaultProviderApiKeyPlaceholder( + kind: BenchLocalProviderKind, +): string { + switch (kind) { + case "huggingface": + return "hf_..."; + default: + return "sk-or-v1-..."; + } } function benchPackMutationLabel(mutation: BenchPackMutationState): string { - switch (mutation.action) { - case "install": - return mutation.phase === "complete" ? "Installed" : "Installing..."; - case "update": - return mutation.phase === "complete" ? "Updated" : "Updating..."; - case "uninstall": - return mutation.phase === "complete" ? "Removed" : "Removing..."; - default: - return mutation.message; - } + switch (mutation.action) { + case "install": + return mutation.phase === "complete" ? "Installed" : "Installing..."; + case "update": + return mutation.phase === "complete" ? "Updated" : "Updating..."; + case "uninstall": + return mutation.phase === "complete" ? "Removed" : "Removing..."; + default: + return mutation.message; + } } function defaultProviderBaseUrl(kind: BenchLocalProviderKind): string { - switch (kind) { - case "openrouter": - return "https://openrouter.ai/api/v1"; - case "huggingface": - return "https://router.huggingface.co/v1"; - case "ollama": - return "http://127.0.0.1:11434/v1"; - case "llamacpp": - return "http://127.0.0.1:8080/v1"; - case "mlx": - return "http://127.0.0.1:8082/v1"; - case "lmstudio": - return "http://127.0.0.1:1234/v1"; - case "pico": - return "http://127.0.0.1:7426/v1"; - case "openai_compatible": - default: - return "https://api.example.com/v1"; - } + switch (kind) { + case "openrouter": + return "https://openrouter.ai/api/v1"; + case "huggingface": + return "https://router.huggingface.co/v1"; + case "ollama": + return "http://127.0.0.1:11434/v1"; + case "llamacpp": + return "http://127.0.0.1:8080/v1"; + case "mlx": + return "http://127.0.0.1:8082/v1"; + case "lmstudio": + return "http://127.0.0.1:1234/v1"; + case "pico": + return "http://127.0.0.1:7426/v1"; + case "openai_compatible": + default: + return "https://api.example.com/v1"; + } } function createEmptyProvider(): ProviderFormState { - return { - id: `openai_compatible-${crypto.randomUUID()}`, - kind: "openai_compatible", - name: "", - enabled: true, - base_url: "https://api.example.com/v1", - api_key: "" - }; + return { + id: `openai_compatible-${crypto.randomUUID()}`, + kind: "openai_compatible", + name: "", + enabled: true, + base_url: "https://api.example.com/v1", + api_key: "", + }; } function createEmptyModel(providerId = "openrouter"): ModelFormState { - return { - provider: providerId, - model: "", - label: "", - group: "primary", - enabled: true - }; + return { + provider: providerId, + model: "", + label: "", + group: "primary", + enabled: true, + }; } -function providerSupportsModelDiscovery(provider?: BenchLocalProviderConfig | null): boolean { - return provider?.kind === "openrouter" || provider?.kind === "huggingface" || provider?.kind === "openai_compatible"; +function providerSupportsModelDiscovery( + provider?: BenchLocalProviderConfig | null, +): boolean { + return ( + provider?.kind === "openrouter" || + provider?.kind === "huggingface" || + provider?.kind === "openai_compatible" + ); } function defaultModelLabel( - providerName: string, - modelId: string, - discoveredName?: string + providerName: string, + modelId: string, + discoveredName?: string, ): string { - const trimmedDiscoveredName = discoveredName?.trim(); + const trimmedDiscoveredName = discoveredName?.trim(); - if (trimmedDiscoveredName) { - return trimmedDiscoveredName; - } + if (trimmedDiscoveredName) { + return trimmedDiscoveredName; + } - return `${modelId.trim()} via ${providerName}`.trim(); + return `${modelId.trim()} via ${providerName}`.trim(); } function createSamplingForm(input?: GenerationRequest): SamplingFormState { - return { - temperature: input?.temperature?.toString() ?? "", - top_p: input?.top_p?.toString() ?? "", - top_k: input?.top_k?.toString() ?? "", - min_p: input?.min_p?.toString() ?? "", - repetition_penalty: input?.repetition_penalty?.toString() ?? "", - max_tokens: input?.max_tokens?.toString() ?? "", - request_timeout_seconds: input?.request_timeout_seconds?.toString() ?? "" - }; + return { + temperature: input?.temperature?.toString() ?? "", + top_p: input?.top_p?.toString() ?? "", + top_k: input?.top_k?.toString() ?? "", + min_p: input?.min_p?.toString() ?? "", + repetition_penalty: input?.repetition_penalty?.toString() ?? "", + max_tokens: input?.max_tokens?.toString() ?? "", + request_timeout_seconds: input?.request_timeout_seconds?.toString() ?? "", + }; } -function parseSamplingForm(form: SamplingFormState): { value?: GenerationRequest; error?: string } { - const result: GenerationRequest = {}; +function parseSamplingForm(form: SamplingFormState): { + value?: GenerationRequest; + error?: string; +} { + const result: GenerationRequest = {}; - for (const field of SAMPLING_FIELDS) { - const rawValue = form[field.key].trim(); + for (const field of SAMPLING_FIELDS) { + const rawValue = form[field.key].trim(); - if (!rawValue) { - continue; - } + if (!rawValue) { + continue; + } - const parsed = field.integer ? Number.parseInt(rawValue, 10) : Number(rawValue); + const parsed = field.integer + ? Number.parseInt(rawValue, 10) + : Number(rawValue); - if (!Number.isFinite(parsed)) { - return { error: `${field.label} must be a valid number.` }; - } + if (!Number.isFinite(parsed)) { + return { error: `${field.label} must be a valid number.` }; + } - if (field.integer && parsed <= 0) { - return { error: `${field.label} must be greater than zero.` }; - } + if (field.integer && parsed <= 0) { + return { error: `${field.label} must be greater than zero.` }; + } - result[field.key as keyof GenerationRequest] = parsed; - } + result[field.key as keyof GenerationRequest] = parsed; + } - return { value: result }; + return { value: result }; } -function toProviderForm(id: string, provider: BenchLocalProviderConfig): ProviderFormState { - return { - id, - kind: provider.kind, - name: provider.name, - enabled: provider.enabled, - base_url: provider.base_url, - api_key: provider.api_key ?? "" - }; +function toProviderForm( + id: string, + provider: BenchLocalProviderConfig, +): ProviderFormState { + return { + id, + kind: provider.kind, + name: provider.name, + enabled: provider.enabled, + base_url: provider.base_url, + api_key: provider.api_key ?? "", + }; } function toModelForm(model: BenchLocalModelConfig): ModelFormState { - return { - provider: model.provider, - model: model.model, - label: model.label, - group: model.group, - enabled: model.enabled - }; + return { + provider: model.provider, + model: model.model, + label: model.label, + group: model.group, + enabled: model.enabled, + }; } function buildModelConfig( - form: ModelFormState, - providers: Record + form: ModelFormState, + providers: Record, ): BenchLocalModelConfig { - const provider = providers[form.provider.trim()]; - const providerLabel = provider?.name?.trim() || form.provider.trim(); - - return { - id: `${form.provider}:${form.model}`.trim(), - provider: form.provider.trim(), - model: form.model.trim(), - label: form.label.trim() || `${form.model.trim()} via ${providerLabel}`, - group: form.group.trim() || "primary", - enabled: form.enabled - }; + const provider = providers[form.provider.trim()]; + const providerLabel = provider?.name?.trim() || form.provider.trim(); + + return { + id: `${form.provider}:${form.model}`.trim(), + provider: form.provider.trim(), + model: form.model.trim(), + label: form.label.trim() || `${form.model.trim()} via ${providerLabel}`, + group: form.group.trim() || "primary", + enabled: form.enabled, + }; } function createWorkspaceName(existingCount: number): string { - return existingCount === 0 ? "My Workspace" : `Workspace ${existingCount + 1}`; + return existingCount === 0 + ? "My Workspace" + : `Workspace ${existingCount + 1}`; } -function createTabTitle(benchPackId: string, inspections: BenchPackInspection[]): string { - return inspections.find((inspection) => inspection.id === benchPackId)?.manifest?.name ?? benchPackId; +function createTabTitle( + benchPackId: string, + inspections: BenchPackInspection[], +): string { + return ( + inspections.find((inspection) => inspection.id === benchPackId)?.manifest + ?.name ?? benchPackId + ); } function normalizeTabModelSelections( - selections: BenchLocalWorkspaceTabModelSelection[] + selections: BenchLocalWorkspaceTabModelSelection[], ): BenchLocalWorkspaceTabModelSelection[] { - const seen = new Set(); - - return selections - .filter((selection) => { - const modelId = selection.modelId.trim(); - - if (!modelId || seen.has(modelId)) { - return false; - } - - seen.add(modelId); - return true; - }) - .map((selection) => ({ - modelId: selection.modelId.trim(), - alias: selection.alias?.trim() || undefined - })); + const seen = new Set(); + + return selections + .filter((selection) => { + const modelId = selection.modelId.trim(); + + if (!modelId || seen.has(modelId)) { + return false; + } + + seen.add(modelId); + return true; + }) + .map((selection) => ({ + modelId: selection.modelId.trim(), + alias: selection.alias?.trim() || undefined, + })); } function normalizeEditableTabModelSelections( - selections: BenchLocalWorkspaceTabModelSelection[] + selections: BenchLocalWorkspaceTabModelSelection[], ): BenchLocalWorkspaceTabModelSelection[] { - const seen = new Set(); - - return selections - .filter((selection) => { - const modelId = selection.modelId.trim(); - - if (!modelId || seen.has(modelId)) { - return false; - } - - seen.add(modelId); - return true; - }) - .map((selection) => ({ - modelId: selection.modelId.trim(), - alias: selection.alias - })); + const seen = new Set(); + + return selections + .filter((selection) => { + const modelId = selection.modelId.trim(); + + if (!modelId || seen.has(modelId)) { + return false; + } + + seen.add(modelId); + return true; + }) + .map((selection) => ({ + modelId: selection.modelId.trim(), + alias: selection.alias, + })); } function getTableScrollbarThumbWidth(metrics: { - clientWidth: number; - scrollWidth: number; - scrollLeft: number; + clientWidth: number; + scrollWidth: number; + scrollLeft: number; }): number { - if (metrics.scrollWidth <= 0 || metrics.clientWidth <= 0) { - return 0; - } + if (metrics.scrollWidth <= 0 || metrics.clientWidth <= 0) { + return 0; + } - const ratio = metrics.clientWidth / metrics.scrollWidth; - return Math.max(56, Math.round(metrics.clientWidth * ratio)); + const ratio = metrics.clientWidth / metrics.scrollWidth; + return Math.max(56, Math.round(metrics.clientWidth * ratio)); } function SettingsTableShell({ - children, - className + children, + className, }: { - children: ReactNode; - className?: string; + children: ReactNode; + className?: string; }) { - const viewportRef = useRef(null); - const scrollbarTrackRef = useRef(null); - const scrollbarDragRef = useRef<{ - startX: number; - startScrollLeft: number; - } | null>(null); - const [scrollMetrics, setScrollMetrics] = useState({ - clientWidth: 0, - scrollWidth: 0, - scrollLeft: 0 - }); - - const hasHorizontalOverflow = scrollMetrics.scrollWidth > scrollMetrics.clientWidth + 1; - const scrollbarThumbWidth = hasHorizontalOverflow ? getTableScrollbarThumbWidth(scrollMetrics) : 0; - const scrollbarThumbOffset = - hasHorizontalOverflow && scrollbarTrackRef.current - ? ((scrollMetrics.scrollLeft / Math.max(1, scrollMetrics.scrollWidth - scrollMetrics.clientWidth)) * - Math.max(0, scrollbarTrackRef.current.clientWidth - scrollbarThumbWidth)) - : 0; - const wrapClassName = [ - "settings-list-table-wrap", - className, - hasHorizontalOverflow ? "has-sticky-last-column-shadow" : "" - ] - .filter(Boolean) - .join(" "); - - useEffect(() => { - const viewport = viewportRef.current; - - if (!viewport) { - return; - } - - const updateMetrics = () => { - setScrollMetrics({ - clientWidth: viewport.clientWidth, - scrollWidth: viewport.scrollWidth, - scrollLeft: viewport.scrollLeft - }); - }; - - const syncFromViewport = () => { - updateMetrics(); - }; - - updateMetrics(); - viewport.addEventListener("scroll", syncFromViewport); - window.addEventListener("resize", updateMetrics); - - const resizeObserver = - typeof ResizeObserver !== "undefined" - ? new ResizeObserver(() => { - updateMetrics(); - }) - : null; - - resizeObserver?.observe(viewport); - - if (viewport.firstElementChild instanceof HTMLElement) { - resizeObserver?.observe(viewport.firstElementChild); - } - - return () => { - viewport.removeEventListener("scroll", syncFromViewport); - window.removeEventListener("resize", updateMetrics); - resizeObserver?.disconnect(); - }; - }, [children]); - - useEffect(() => { - const handleMove = (event: MouseEvent) => { - const viewport = viewportRef.current; - const track = scrollbarTrackRef.current; - const drag = scrollbarDragRef.current; - - if (!viewport || !track || !drag) { - return; - } - - const maxScrollLeft = Math.max(0, viewport.scrollWidth - viewport.clientWidth); - const maxThumbOffset = Math.max(1, track.clientWidth - getTableScrollbarThumbWidth(scrollMetrics)); - const deltaX = event.clientX - drag.startX; - const nextScrollLeft = Math.min( - maxScrollLeft, - Math.max(0, drag.startScrollLeft + (deltaX / maxThumbOffset) * maxScrollLeft) - ); - viewport.scrollLeft = nextScrollLeft; - }; - - const handleUp = () => { - scrollbarDragRef.current = null; - document.body.style.userSelect = ""; - }; - - window.addEventListener("mousemove", handleMove); - window.addEventListener("mouseup", handleUp); - - return () => { - window.removeEventListener("mousemove", handleMove); - window.removeEventListener("mouseup", handleUp); - }; - }, [scrollMetrics]); - - return ( -
-
- {children} -
- {hasHorizontalOverflow ? ( - - ); + const viewportRef = useRef(null); + const scrollbarTrackRef = useRef(null); + const scrollbarDragRef = useRef<{ + startX: number; + startScrollLeft: number; + } | null>(null); + const [scrollMetrics, setScrollMetrics] = useState({ + clientWidth: 0, + scrollWidth: 0, + scrollLeft: 0, + }); + + const hasHorizontalOverflow = + scrollMetrics.scrollWidth > scrollMetrics.clientWidth + 1; + const scrollbarThumbWidth = hasHorizontalOverflow + ? getTableScrollbarThumbWidth(scrollMetrics) + : 0; + const scrollbarThumbOffset = + hasHorizontalOverflow && scrollbarTrackRef.current + ? (scrollMetrics.scrollLeft / + Math.max(1, scrollMetrics.scrollWidth - scrollMetrics.clientWidth)) * + Math.max(0, scrollbarTrackRef.current.clientWidth - scrollbarThumbWidth) + : 0; + const wrapClassName = [ + "settings-list-table-wrap", + className, + hasHorizontalOverflow ? "has-sticky-last-column-shadow" : "", + ] + .filter(Boolean) + .join(" "); + + useEffect(() => { + const viewport = viewportRef.current; + + if (!viewport) { + return; + } + + const updateMetrics = () => { + setScrollMetrics({ + clientWidth: viewport.clientWidth, + scrollWidth: viewport.scrollWidth, + scrollLeft: viewport.scrollLeft, + }); + }; + + const syncFromViewport = () => { + updateMetrics(); + }; + + updateMetrics(); + viewport.addEventListener("scroll", syncFromViewport); + window.addEventListener("resize", updateMetrics); + + const resizeObserver = + typeof ResizeObserver !== "undefined" + ? new ResizeObserver(() => { + updateMetrics(); + }) + : null; + + resizeObserver?.observe(viewport); + + if (viewport.firstElementChild instanceof HTMLElement) { + resizeObserver?.observe(viewport.firstElementChild); + } + + return () => { + viewport.removeEventListener("scroll", syncFromViewport); + window.removeEventListener("resize", updateMetrics); + resizeObserver?.disconnect(); + }; + }, [children]); + + useEffect(() => { + const handleMove = (event: MouseEvent) => { + const viewport = viewportRef.current; + const track = scrollbarTrackRef.current; + const drag = scrollbarDragRef.current; + + if (!viewport || !track || !drag) { + return; + } + + const maxScrollLeft = Math.max( + 0, + viewport.scrollWidth - viewport.clientWidth, + ); + const maxThumbOffset = Math.max( + 1, + track.clientWidth - getTableScrollbarThumbWidth(scrollMetrics), + ); + const deltaX = event.clientX - drag.startX; + const nextScrollLeft = Math.min( + maxScrollLeft, + Math.max( + 0, + drag.startScrollLeft + (deltaX / maxThumbOffset) * maxScrollLeft, + ), + ); + viewport.scrollLeft = nextScrollLeft; + }; + + const handleUp = () => { + scrollbarDragRef.current = null; + document.body.style.userSelect = ""; + }; + + window.addEventListener("mousemove", handleMove); + window.addEventListener("mouseup", handleUp); + + return () => { + window.removeEventListener("mousemove", handleMove); + window.removeEventListener("mouseup", handleUp); + }; + }, [scrollMetrics]); + + return ( +
+
+ {children} +
+ {hasHorizontalOverflow ? ( + + ); } -function resolveTabModels(tab: BenchLocalWorkspaceTab | null, models: BenchLocalModelConfig[]): ResolvedTabModel[] { - const enabledModels = models.filter((model) => model.enabled); - const modelMap = new Map(enabledModels.map((model) => [model.id, model])); - - return normalizeTabModelSelections(tab?.modelSelections ?? []).reduce((resolved, selection) => { - const model = modelMap.get(selection.modelId); - - if (!model) { - return resolved; - } - - resolved.push({ - ...model, - alias: selection.alias, - displayLabel: selection.alias || model.label - }); - - return resolved; - }, []); +function resolveTabModels( + tab: BenchLocalWorkspaceTab | null, + models: BenchLocalModelConfig[], +): ResolvedTabModel[] { + const enabledModels = models.filter((model) => model.enabled); + const modelMap = new Map(enabledModels.map((model) => [model.id, model])); + + return normalizeTabModelSelections(tab?.modelSelections ?? []).reduce< + ResolvedTabModel[] + >((resolved, selection) => { + const model = modelMap.get(selection.modelId); + + if (!model) { + return resolved; + } + + resolved.push({ + ...model, + alias: selection.alias, + displayLabel: selection.alias || model.label, + }); + + return resolved; + }, []); } function resolveHistoryModels( - runSummary: BenchPackRunSummary | null, - models: BenchLocalModelConfig[] + runSummary: BenchPackRunSummary | null, + models: BenchLocalModelConfig[], ): ResolvedTabModel[] { - if (!runSummary) { - return []; - } - - const modelMap = new Map(models.map((model) => [model.id, model])); - const runStartedEvent = runSummary.events.find( - (event): event is Extract => event.type === "run_started" - ); - const orderedModelIds = [ - ...(runStartedEvent?.models.map((model) => model.id) ?? []), - ...Object.keys(runSummary.resultsByModel) - ].filter((modelId, index, all) => modelId && all.indexOf(modelId) === index); - - return orderedModelIds.map((modelId) => { - const currentModel = modelMap.get(modelId); - const historicalLabel = runStartedEvent?.models.find((model) => model.id === modelId)?.label; - const label = currentModel?.label ?? historicalLabel ?? modelId; - - return { - id: modelId, - provider: currentModel?.provider ?? "history", - model: currentModel?.model ?? modelId, - label, - group: currentModel?.group ?? "history", - enabled: currentModel?.enabled ?? false, - displayLabel: label - }; - }); + if (!runSummary) { + return []; + } + + const modelMap = new Map(models.map((model) => [model.id, model])); + const runStartedEvent = runSummary.events.find( + (event): event is Extract => + event.type === "run_started", + ); + const orderedModelIds = [ + ...(runStartedEvent?.models.map((model) => model.id) ?? []), + ...Object.keys(runSummary.resultsByModel), + ].filter((modelId, index, all) => modelId && all.indexOf(modelId) === index); + + return orderedModelIds.map((modelId) => { + const currentModel = modelMap.get(modelId); + const historicalLabel = runStartedEvent?.models.find( + (model) => model.id === modelId, + )?.label; + const label = currentModel?.label ?? historicalLabel ?? modelId; + + return { + id: modelId, + provider: currentModel?.provider ?? "history", + model: currentModel?.model ?? modelId, + label, + group: currentModel?.group ?? "history", + enabled: currentModel?.enabled ?? false, + displayLabel: label, + }; + }); } function countStoredRunResults(summary: BenchPackRunSummary | null): number { - if (!summary) { - return 0; - } - - return Object.values(summary.resultsByModel).reduce((total, results) => total + results.length, 0); + if (!summary) { + return 0; + } + + return Object.values(summary.resultsByModel).reduce( + (total, results) => total + results.length, + 0, + ); } function isRunSummaryComplete(summary: BenchPackRunSummary | null): boolean { - if (!summary) { - return false; - } + if (!summary) { + return false; + } - return countStoredRunResults(summary) >= summary.modelCount * summary.scenarioCount; + return ( + countStoredRunResults(summary) >= summary.modelCount * summary.scenarioCount + ); } function buildHistoryModelSelections( - runSummary: BenchPackRunSummary | null, - models: BenchLocalModelConfig[] + runSummary: BenchPackRunSummary | null, + models: BenchLocalModelConfig[], ): BenchLocalWorkspaceTabModelSelection[] { - return resolveHistoryModels(runSummary, models).map((model) => ({ - modelId: model.id, - alias: model.displayLabel !== model.label ? model.displayLabel : undefined - })); + return resolveHistoryModels(runSummary, models).map((model) => ({ + modelId: model.id, + alias: model.displayLabel !== model.label ? model.displayLabel : undefined, + })); } type ReplayCell = { - modelId: string; - scenarioId: string; - result: ScenarioResult; + modelId: string; + scenarioId: string; + result: ScenarioResult; }; function buildReplayGroups( - summary: BenchPackRunSummary, - scenarios: ScenarioMeta[], - modelIds: string[] + summary: BenchPackRunSummary, + scenarios: ScenarioMeta[], + modelIds: string[], ): ReplayCell[][] { - const scenarioOrder = scenarios.map((scenario) => scenario.id); - const resultMap = new Map(); - - for (const [modelId, results] of Object.entries(summary.resultsByModel)) { - for (const result of results) { - resultMap.set(`${modelId}::${result.scenarioId}`, result); - } - } - - const singletonCellsByScenarioThenModel = scenarioOrder.flatMap((scenarioId) => - modelIds.flatMap((modelId) => { - const result = resultMap.get(`${modelId}::${scenarioId}`); - return result ? [[{ modelId, scenarioId, result } satisfies ReplayCell]] : []; - }) - ); - - switch (summary.executionMode ?? "parallel_by_test_case") { - case "serial": - return singletonCellsByScenarioThenModel; - case "serial_by_model": - return modelIds.flatMap((modelId) => - scenarioOrder.flatMap((scenarioId) => { - const result = resultMap.get(`${modelId}::${scenarioId}`); - return result ? [[{ modelId, scenarioId, result } satisfies ReplayCell]] : []; - }) - ); - case "parallel_by_test_case": - return scenarioOrder - .map((scenarioId) => - modelIds.flatMap((modelId) => { - const result = resultMap.get(`${modelId}::${scenarioId}`); - return result ? [{ modelId, scenarioId, result } satisfies ReplayCell] : []; - }) - ) - .filter((group) => group.length > 0); - case "parallel_by_model": - return modelIds - .map((modelId) => - scenarioOrder.flatMap((scenarioId) => { - const result = resultMap.get(`${modelId}::${scenarioId}`); - return result ? [{ modelId, scenarioId, result } satisfies ReplayCell] : []; - }) - ) - .filter((group) => group.length > 0); - case "full_parallel": - return [ - scenarioOrder.flatMap((scenarioId) => - modelIds.flatMap((modelId) => { - const result = resultMap.get(`${modelId}::${scenarioId}`); - return result ? [{ modelId, scenarioId, result } satisfies ReplayCell] : []; - }) - ) - ].filter((group) => group.length > 0); - default: - return singletonCellsByScenarioThenModel; - } + const scenarioOrder = scenarios.map((scenario) => scenario.id); + const resultMap = new Map(); + + for (const [modelId, results] of Object.entries(summary.resultsByModel)) { + for (const result of results) { + resultMap.set(`${modelId}::${result.scenarioId}`, result); + } + } + + const singletonCellsByScenarioThenModel = scenarioOrder.flatMap( + (scenarioId) => + modelIds.flatMap((modelId) => { + const result = resultMap.get(`${modelId}::${scenarioId}`); + return result + ? [[{ modelId, scenarioId, result } satisfies ReplayCell]] + : []; + }), + ); + + switch (summary.executionMode ?? "parallel_by_test_case") { + case "serial": + return singletonCellsByScenarioThenModel; + case "serial_by_model": + return modelIds.flatMap((modelId) => + scenarioOrder.flatMap((scenarioId) => { + const result = resultMap.get(`${modelId}::${scenarioId}`); + return result + ? [[{ modelId, scenarioId, result } satisfies ReplayCell]] + : []; + }), + ); + case "parallel_by_test_case": + return scenarioOrder + .map((scenarioId) => + modelIds.flatMap((modelId) => { + const result = resultMap.get(`${modelId}::${scenarioId}`); + return result + ? [{ modelId, scenarioId, result } satisfies ReplayCell] + : []; + }), + ) + .filter((group) => group.length > 0); + case "parallel_by_model": + return modelIds + .map((modelId) => + scenarioOrder.flatMap((scenarioId) => { + const result = resultMap.get(`${modelId}::${scenarioId}`); + return result + ? [{ modelId, scenarioId, result } satisfies ReplayCell] + : []; + }), + ) + .filter((group) => group.length > 0); + case "full_parallel": + return [ + scenarioOrder.flatMap((scenarioId) => + modelIds.flatMap((modelId) => { + const result = resultMap.get(`${modelId}::${scenarioId}`); + return result + ? [{ modelId, scenarioId, result } satisfies ReplayCell] + : []; + }), + ), + ].filter((group) => group.length > 0); + default: + return singletonCellsByScenarioThenModel; + } } function upsertTabModelAlias( - tab: BenchLocalWorkspaceTab, - models: BenchLocalModelConfig[], - modelId: string, - alias: string + tab: BenchLocalWorkspaceTab, + models: BenchLocalModelConfig[], + modelId: string, + alias: string, ): BenchLocalWorkspaceTabModelSelection[] { - const normalized = normalizeTabModelSelections(tab.modelSelections); - const nextAlias = alias.trim() || undefined; - let found = false; - - const next = normalized.map((selection) => { - if (selection.modelId !== modelId) { - return selection; - } - - found = true; - return { - ...selection, - alias: nextAlias - }; - }); - - if (!found) { - next.push({ - modelId, - alias: nextAlias - }); - } - - return next; + const normalized = normalizeTabModelSelections(tab.modelSelections); + const nextAlias = alias.trim() || undefined; + let found = false; + + const next = normalized.map((selection) => { + if (selection.modelId !== modelId) { + return selection; + } + + found = true; + return { + ...selection, + alias: nextAlias, + }; + }); + + if (!found) { + next.push({ + modelId, + alias: nextAlias, + }); + } + + return next; } function pushScenarioResult( - current: Record, - modelId: string, - result: ScenarioResult + current: Record, + modelId: string, + result: ScenarioResult, ): Record { - return { - ...current, - [modelId]: [...(current[modelId] ?? []).filter((candidate) => candidate.scenarioId !== result.scenarioId), result] - }; + return { + ...current, + [modelId]: [ + ...(current[modelId] ?? []).filter( + (candidate) => candidate.scenarioId !== result.scenarioId, + ), + result, + ], + }; } function updateLiveRunState( - current: LiveRunState | undefined, - event: ProgressEvent + current: LiveRunState | undefined, + event: ProgressEvent, ): LiveRunState { - const next: LiveRunState = current ?? { - events: [], - resultsByModel: {}, - activeCellKeys: [] - }; - - const eventKey = - "modelId" in event && "scenarioId" in event ? `${event.modelId}::${event.scenarioId}` : null; - - next.events = [...next.events, event]; - - if (event.type === "run_started") { - next.runId = event.runId; - } - - if (event.type === "model_progress" && eventKey && !next.activeCellKeys.includes(eventKey)) { - next.activeCellKeys = [...next.activeCellKeys, eventKey]; - } - - if (event.type === "scenario_result" && eventKey) { - next.resultsByModel = pushScenarioResult(next.resultsByModel, event.modelId, event.result); - next.activeCellKeys = next.activeCellKeys.filter((key) => key !== eventKey); - } - - if (event.type === "run_finished" || event.type === "run_error") { - next.activeCellKeys = []; - } - - return next; + const next: LiveRunState = current ?? { + events: [], + resultsByModel: {}, + activeCellKeys: [], + }; + + const eventKey = + "modelId" in event && "scenarioId" in event + ? `${event.modelId}::${event.scenarioId}` + : null; + + next.events = [...next.events, event]; + + if (event.type === "run_started") { + next.runId = event.runId; + } + + if ( + event.type === "model_progress" && + eventKey && + !next.activeCellKeys.includes(eventKey) + ) { + next.activeCellKeys = [...next.activeCellKeys, eventKey]; + } + + if (event.type === "scenario_result" && eventKey) { + next.resultsByModel = pushScenarioResult( + next.resultsByModel, + event.modelId, + event.result, + ); + next.activeCellKeys = next.activeCellKeys.filter((key) => key !== eventKey); + } + + if (event.type === "run_finished" || event.type === "run_error") { + next.activeCellKeys = []; + } + + return next; } -function detailModalKey(detail: Pick): string { - return `${detail.tabId}::${detail.modelId}::${detail.scenarioId}`; +function detailModalKey( + detail: Pick, +): string { + return `${detail.tabId}::${detail.modelId}::${detail.scenarioId}`; } function getCellKey(modelId: string, scenarioId: string): string { - return `${modelId}::${scenarioId}`; + return `${modelId}::${scenarioId}`; } const REGISTRY_UNAVAILABLE_MESSAGE = - "Official Bench Pack registry is unavailable right now. Installed Bench Packs remain usable."; + "Official Bench Pack registry is unavailable right now. Installed Bench Packs remain usable."; function formatDesktopErrorMessage(error: unknown): string { - if (!(error instanceof Error)) { - return ""; - } + if (!(error instanceof Error)) { + return ""; + } - return error.message.replace(/^Error invoking remote method '[^']+':\s*/u, "").trim(); + return error.message + .replace(/^Error invoking remote method '[^']+':\s*/u, "") + .trim(); } function isRegistryConnectivityError(error: unknown): boolean { - const message = formatDesktopErrorMessage(error); - return /fetch failed/i.test(message); + const message = formatDesktopErrorMessage(error); + return /fetch failed/i.test(message); } function formatRegistryWarning(error: unknown): string { - const message = formatDesktopErrorMessage(error); + const message = formatDesktopErrorMessage(error); - if (!message) { - return REGISTRY_UNAVAILABLE_MESSAGE; - } + if (!message) { + return REGISTRY_UNAVAILABLE_MESSAGE; + } - if (!message || /fetch failed/i.test(message)) { - return REGISTRY_UNAVAILABLE_MESSAGE; - } + if (!message || /fetch failed/i.test(message)) { + return REGISTRY_UNAVAILABLE_MESSAGE; + } - return `${REGISTRY_UNAVAILABLE_MESSAGE} ${message}`; + return `${REGISTRY_UNAVAILABLE_MESSAGE} ${message}`; } function formatRegistryMutationError( - action: "install" | "update", - benchPackId: string, - error: unknown + action: "install" | "update", + benchPackId: string, + error: unknown, ): string { - if (isRegistryConnectivityError(error)) { - return `Failed to ${action} ${benchPackId}. Official Bench Pack registry is unavailable right now.`; - } + if (isRegistryConnectivityError(error)) { + return `Failed to ${action} ${benchPackId}. Official Bench Pack registry is unavailable right now.`; + } - return formatDesktopErrorMessage(error) || `Failed to ${action} ${benchPackId}.`; + return ( + formatDesktopErrorMessage(error) || `Failed to ${action} ${benchPackId}.` + ); } function getRequiredVerifierRunBlocker( - manifest: BenchPackManifest | undefined, - benchPackConfig: BenchLocalConfig["benchpacks"][string] | undefined, - verifierStatus: BenchPackVerifierStatus | undefined + manifest: BenchPackManifest | undefined, + benchPackConfig: BenchLocalConfig["benchpacks"][string] | undefined, + verifierStatus: BenchPackVerifierStatus | undefined, ): BenchPackRunBlocker | null { - const requiredVerifierSpecs = (manifest?.verifiers ?? manifest?.sidecars ?? []).filter((spec) => spec.required); - - if (requiredVerifierSpecs.length === 0) { - return null; - } - - if (verifierStatus?.docker.state === "not_installed") { - return { - title: "Docker Required", - message: "This Bench Pack needs a local verifier runtime. Install Docker Desktop before starting the test run.", - actionLabel: "Open Verification" - }; - } - - if (verifierStatus?.docker.state === "not_running") { - return { - title: "Docker Not Running", - message: "This Bench Pack needs a local verifier runtime. Start Docker Desktop, then try the run again.", - actionLabel: "Open Verification" - }; - } - - for (const spec of requiredVerifierSpecs) { - const runtimeConfig = benchPackConfig?.verifiers?.[spec.id] ?? benchPackConfig?.sidecars?.[spec.id]; - const runtimeStatus = verifierStatus?.verifiers.find((entry) => entry.id === spec.id); - - if ((runtimeConfig?.mode ?? spec.defaultMode) === "docker" && runtimeConfig?.auto_start === false && runtimeStatus?.status !== "running") { - return { - title: "Verifier Not Started", - message: "Auto Start is disabled for this required verifier. Start it from Verification settings before running the Bench Pack.", - actionLabel: "Open Verification" - }; - } - - if (runtimeStatus?.status === "missing_dependency") { - return { - title: "Docker Required", - message: runtimeStatus.details ?? "This Bench Pack needs Local Docker before it can run.", - actionLabel: "Open Verification" - }; - } - - if (runtimeStatus?.status === "dependency_not_running") { - return { - title: "Docker Not Running", - message: runtimeStatus.details ?? "This Bench Pack needs Local Docker to be running before it can run.", - actionLabel: "Open Verification" - }; - } - } - - return null; + const requiredVerifierSpecs = ( + manifest?.verifiers ?? + manifest?.sidecars ?? + [] + ).filter((spec) => spec.required); + + if (requiredVerifierSpecs.length === 0) { + return null; + } + + if (verifierStatus?.docker.state === "not_installed") { + return { + title: "Docker Required", + message: + "This Bench Pack needs a local verifier runtime. Install Docker Desktop before starting the test run.", + actionLabel: "Open Verification", + }; + } + + if (verifierStatus?.docker.state === "not_running") { + return { + title: "Docker Not Running", + message: + "This Bench Pack needs a local verifier runtime. Start Docker Desktop, then try the run again.", + actionLabel: "Open Verification", + }; + } + + for (const spec of requiredVerifierSpecs) { + const runtimeConfig = + benchPackConfig?.verifiers?.[spec.id] ?? + benchPackConfig?.sidecars?.[spec.id]; + const runtimeStatus = verifierStatus?.verifiers.find( + (entry) => entry.id === spec.id, + ); + + if ( + (runtimeConfig?.mode ?? spec.defaultMode) === "docker" && + runtimeConfig?.auto_start === false && + runtimeStatus?.status !== "running" + ) { + return { + title: "Verifier Not Started", + message: + "Auto Start is disabled for this required verifier. Start it from Verification settings before running the Bench Pack.", + actionLabel: "Open Verification", + }; + } + + if (runtimeStatus?.status === "missing_dependency") { + return { + title: "Docker Required", + message: + runtimeStatus.details ?? + "This Bench Pack needs Local Docker before it can run.", + actionLabel: "Open Verification", + }; + } + + if (runtimeStatus?.status === "dependency_not_running") { + return { + title: "Docker Not Running", + message: + runtimeStatus.details ?? + "This Bench Pack needs Local Docker to be running before it can run.", + actionLabel: "Open Verification", + }; + } + } + + return null; } -function getVerifierStatusTone(status: BenchPackVerifierStatus["verifiers"][number]["status"] | undefined): string { - switch (status) { - case "running": - return "status-ready"; - case "missing_dependency": - return "status-not-installed"; - case "dependency_not_running": - case "failed": - return "status-danger"; - default: - return "status-idle"; - } +function getVerifierStatusTone( + status: BenchPackVerifierStatus["verifiers"][number]["status"] | undefined, +): string { + switch (status) { + case "running": + return "status-ready"; + case "missing_dependency": + return "status-not-installed"; + case "dependency_not_running": + case "failed": + return "status-danger"; + default: + return "status-idle"; + } } -function formatVerifierRuntimeStatus(status: BenchPackVerifierStatus["verifiers"][number]["status"] | undefined): string { - switch (status) { - case "missing_dependency": - return "docker required"; - case "dependency_not_running": - return "docker not running"; - default: - return (status ?? "stopped").replaceAll("_", " "); - } +function formatVerifierRuntimeStatus( + status: BenchPackVerifierStatus["verifiers"][number]["status"] | undefined, +): string { + switch (status) { + case "missing_dependency": + return "docker required"; + case "dependency_not_running": + return "docker not running"; + default: + return (status ?? "stopped").replaceAll("_", " "); + } } export function App() { - if (DETACHED_LOGS_VIEW) { - return ; - } - - const isMacPlatform = typeof navigator !== "undefined" && navigator.userAgent.includes("Mac"); - const [loadState, setLoadState] = useState(null); - const [draft, setDraft] = useState(null); - const [workspaceState, setWorkspaceState] = useState(null); - const [benchPackInspections, setBenchPackInspections] = useState([]); - const [registryEntries, setRegistryEntries] = useState([]); - const [registryWarning, setRegistryWarning] = useState(null); - const [availableThemes, setAvailableThemes] = useState([]); - const [activeThemeDefinition, setActiveThemeDefinition] = useState(null); - const [systemPrefersDark, setSystemPrefersDark] = useState( - typeof window !== "undefined" ? window.matchMedia("(prefers-color-scheme: dark)").matches : false - ); - const [verifierStatuses, setVerifierStatuses] = useState>({}); - const [tabMenuOpen, setTabMenuOpen] = useState(false); - const [themeMenuOpen, setThemeMenuOpen] = useState(false); - const [sidebarOpen, setSidebarOpen] = useState(() => { - if (typeof window === "undefined") { - return true; - } - - return window.localStorage.getItem(SIDEBAR_OPEN_STORAGE_KEY) !== "false"; - }); - const [settingsOpen, setSettingsOpen] = useState(false); - const [settingsTab, setSettingsTab] = useState("providers"); - const [aboutDialogOpen, setAboutDialogOpen] = useState(false); - const [appMetadata, setAppMetadata] = useState(null); - const [appUpdateState, setAppUpdateState] = useState(null); - const [dismissedDownloadedUpdateVersion, setDismissedDownloadedUpdateVersion] = useState(null); - const [providerModal, setProviderModal] = useState(null); - const [modelModal, setModelModal] = useState(null); - const [modelBrowserModal, setModelBrowserModal] = useState(null); - const [tabModelsModal, setTabModelsModal] = useState(null); - const [samplingModal, setSamplingModal] = useState(null); - const [modelAliasModal, setModelAliasModal] = useState(null); - const [workspaceModal, setWorkspaceModal] = useState(null); - const [workspaceContextMenu, setWorkspaceContextMenu] = useState(null); - const [historyModal, setHistoryModal] = useState(null); - const [confirmDialog, setConfirmDialog] = useState(null); - const [verifierPreparationModal, setVerifierPreparationModal] = useState(null); - const [settingsVerifierPreparationModal, setSettingsVerifierPreparationModal] = useState(null); - const [stoppingVerifierStarts, setStoppingVerifierStarts] = useState>({}); - const [draggedTabId, setDraggedTabId] = useState(null); - const [editingTab, setEditingTab] = useState<{ tabId: string; value: string; width: number } | null>(null); - const [activeRuns, setActiveRuns] = useState>({}); - const [stoppingRuns, setStoppingRuns] = useState>({}); - const [runSummaries, setRunSummaries] = useState>({}); - const [runHistories, setRunHistories] = useState>({}); - const [liveRuns, setLiveRuns] = useState>({}); - const [liveScenarioFocus, setLiveScenarioFocus] = useState>({}); - const [loadedHistoryRuns, setLoadedHistoryRuns] = useState>({}); - const [logsOpen, setLogsOpen] = useState(false); - const [logsAutoScroll, setLogsAutoScroll] = useState(true); - const [logsDetached, setLogsDetached] = useState(false); - const [logDrawerHeight, setLogDrawerHeight] = useState(240); - const [detailModal, setDetailModal] = useState(null); - const [isBusy, setIsBusy] = useState(true); - const [error, setError] = useState(null); - const [appNotice, setAppNotice] = useState(null); - const [settingsNotice, setSettingsNotice] = useState(null); - const [benchPackMutations, setBenchPackMutations] = useState>({}); - const themeMenuRef = useRef(null); - const settingsOpenRef = useRef(false); - - const providerIds = useMemo(() => Object.keys(draft?.providers ?? {}), [draft]); - const themeOptions = useMemo(() => ["system", ...availableThemes.map((theme) => theme.id)], [availableThemes]); - const currentThemeLabel = useMemo( - () => resolveThemeLabel(draft?.ui.theme ?? "system", availableThemes, systemPrefersDark), - [draft?.ui.theme, availableThemes, systemPrefersDark] - ); - const readyInspections = useMemo(() => benchPackInspections.filter((inspection) => inspection.status === "ready"), [benchPackInspections]); - const activeWorkspace = useMemo( - () => (workspaceState?.activeWorkspaceId ? workspaceState.workspaces[workspaceState.activeWorkspaceId] ?? null : null), - [workspaceState] - ); - const workspaceTabs = useMemo( - () => - activeWorkspace?.tabIds - .map((tabId) => workspaceState?.tabs[tabId]) - .filter((tab): tab is BenchLocalWorkspaceTab => Boolean(tab)) ?? [], - [activeWorkspace, workspaceState] - ); - const activeTab = useMemo( - () => (activeWorkspace?.activeTabId ? workspaceState?.tabs[activeWorkspace.activeTabId] ?? null : workspaceTabs[0] ?? null), - [activeWorkspace, workspaceState, workspaceTabs] - ); - const activeInspection = useMemo( - () => benchPackInspections.find((inspection) => inspection.id === activeTab?.benchPackId) ?? null, - [benchPackInspections, activeTab] - ); - const activeVerifierStatus = useMemo( - () => (activeInspection ? verifierStatuses[activeInspection.id] ?? null : null), - [activeInspection, verifierStatuses] - ); - const activeTabModels = useMemo(() => (draft ? resolveTabModels(activeTab, draft.models) : []), [draft, activeTab]); - const activeRunSummary = useMemo(() => (activeTab ? runSummaries[activeTab.id] ?? null : null), [runSummaries, activeTab]); - const activeLiveRun = useMemo(() => (activeTab ? liveRuns[activeTab.id] ?? null : null), [liveRuns, activeTab]); - const activeLiveScenarioFocus = useMemo( - () => (activeTab ? liveScenarioFocus[activeTab.id] ?? null : null), - [liveScenarioFocus, activeTab] - ); - const activeRunBlocker = useMemo( - () => - activeInspection && draft - ? getRequiredVerifierRunBlocker(activeInspection.manifest, draft.benchpacks[activeInspection.id], activeVerifierStatus ?? undefined) - : null, - [activeInspection, activeVerifierStatus, draft] - ); - const activeLoadedHistory = useMemo( - () => (activeTab ? loadedHistoryRuns[activeTab.id] ?? null : null), - [loadedHistoryRuns, activeTab] - ); - const activeDisplayModels = useMemo(() => { - if (!draft) { - return []; - } - - if (activeLoadedHistory) { - return resolveHistoryModels(activeRunSummary, draft.models); - } - - return activeTabModels; - }, [draft, activeLoadedHistory, activeRunSummary, activeTabModels]); - const downloadedUpdateVersion = appUpdateState?.downloadedVersion ?? appUpdateState?.availableVersion ?? null; - const showDownloadedUpdateBanner = - appUpdateState?.status === "downloaded" && downloadedUpdateVersion !== dismissedDownloadedUpdateVersion; - const activeLogEvents = activeLiveRun?.events ?? activeRunSummary?.events ?? []; - const logContainerRef = useRef(null); - const tabStripShellRef = useRef(null); - const tabStripRef = useRef(null); - const tabChipRefs = useRef(new Map()); - const modelDiscoveryCacheRef = useRef>({}); - const replayRunTokensRef = useRef(new Map()); - const appliedThemeKeysRef = useRef([]); - const [tabStripOverflow, setTabStripOverflow] = useState(false); - const [activeTabMask, setActiveTabMask] = useState<{ left: number; width: number } | null>(null); - - const hasUnsavedChanges = - loadState && draft ? JSON.stringify(loadState.config) !== JSON.stringify(draft) : false; - const effectiveThemeId = useMemo(() => { - const requested = draft?.ui.theme ?? "system"; - - if (requested === "system") { - return systemPrefersDark ? "dark" : "light"; - } - - return requested; - }, [draft?.ui.theme, systemPrefersDark]); - - const updateDraft = (updater: (current: BenchLocalConfig) => BenchLocalConfig) => { - setDraft((current) => { - if (!current) { - return current; - } - - return updater(cloneConfig(current)); - }); - }; - - const persistWorkspaceState = async (nextState: BenchLocalWorkspaceState) => { - setWorkspaceState(nextState); - - try { - const saved = await window.benchlocal.workspaces.save(nextState); - setWorkspaceState(saved.state); - } catch (workspaceError) { - setError(workspaceError instanceof Error ? workspaceError.message : "Failed to save workspace state."); - } - }; - - const updateWorkspaceState = (updater: (current: BenchLocalWorkspaceState) => BenchLocalWorkspaceState) => { - setWorkspaceState((current) => { - if (!current) { - return current; - } - - const next = updater(structuredClone(current)); - void persistWorkspaceState(next); - return next; - }); - }; - - const loadBenchPackInspections = async () => { - try { - const inspections = await window.benchlocal.benchPacks.list(); - setBenchPackInspections(inspections); - } catch (pluginError) { - setError(pluginError instanceof Error ? pluginError.message : "Failed to inspect configured Bench Packs."); - } - }; - - const loadRegistryEntries = async () => { - try { - const entries = await window.benchlocal.benchPacks.registry(); - setRegistryEntries(entries); - setRegistryWarning(null); - } catch (registryError) { - setRegistryWarning(formatRegistryWarning(registryError)); - } - }; - - const loadVerifierStatuses = async () => { - try { - const statuses = await window.benchlocal.verifiers.list(); - setVerifierStatuses(Object.fromEntries(statuses.map((status) => [status.benchPackId, status]))); - } catch (verifierError) { - setError(verifierError instanceof Error ? verifierError.message : "Failed to load verifier status."); - } - }; - - const loadThemes = async () => { - try { - const themes = await window.benchlocal.themes.list(); - setAvailableThemes(themes); - } catch (themeError) { - setError(themeError instanceof Error ? themeError.message : "Failed to load available themes."); - } - }; - - const checkForAppUpdates = async () => { - try { - const nextState = await window.benchlocal.updates.check(); - setAppUpdateState(nextState); - } catch (updateError) { - setError(formatDesktopErrorMessage(updateError) || "Failed to check for BenchLocal updates."); - } - }; - - const installDownloadedAppUpdate = async () => { - try { - await window.benchlocal.updates.install(); - } catch (updateError) { - setError(formatDesktopErrorMessage(updateError) || "Failed to install the downloaded BenchLocal update."); - } - }; - - const loadHistoryForBenchPack = async (benchPackId: string) => { - try { - const history = await window.benchlocal.benchPacks.history({ benchPackId }); - setRunHistories((current) => ({ - ...current, - [benchPackId]: history - })); - } catch (historyError) { - setError(historyError instanceof Error ? historyError.message : "Failed to load Bench Pack history."); - } - }; - - useEffect(() => { - let cancelled = false; - - const load = async () => { - setIsBusy(true); - setError(null); - setRegistryWarning(null); - - try { - const [ - result, - workspaceResult, - inspections, - themes, - verifierStatusList, - activeRunsResult - ] = await Promise.all([ - window.benchlocal.config.load(), - window.benchlocal.workspaces.load(), - window.benchlocal.benchPacks.list(), - window.benchlocal.themes.list(), - window.benchlocal.verifiers.list(), - window.benchlocal.benchPacks.activeRuns() - ]); - - let registry: BenchPackRegistryEntry[] = []; - let nextRegistryWarning: string | null = null; - - try { - registry = await window.benchlocal.benchPacks.registry(); - } catch (registryError) { - nextRegistryWarning = formatRegistryWarning(registryError); - } - - if (cancelled) { - return; - } - - const persistedRunEntries = await Promise.all( - Object.values(workspaceResult.state.tabs) - .filter((tab) => tab.benchPackId && tab.loadedRunId) - .map(async (tab) => { - try { - const summary = await window.benchlocal.benchPacks.loadHistory({ - benchPackId: tab.benchPackId as string, - runId: tab.loadedRunId as string - }); - return [tab.id, summary] as const; - } catch { - return null; - } - }) - ); - - setLoadState(result); - setDraft(cloneConfig(result.config)); - setWorkspaceState(workspaceResult.state); - setRunSummaries( - Object.fromEntries( - persistedRunEntries.filter( - (entry): entry is readonly [string, BenchPackRunSummary] => entry !== null - ) - ) - ); - setLoadedHistoryRuns( - Object.fromEntries( - persistedRunEntries - .filter((entry): entry is readonly [string, BenchPackRunSummary] => entry !== null) - .map(([tabId, summary]) => [ - tabId, - { - runId: summary.runId, - startedAt: summary.startedAt, - mode: "history" - } - ]) - ) - ); - setBenchPackInspections(inspections); - setRegistryEntries(registry); - setRegistryWarning(nextRegistryWarning); - setAvailableThemes(themes); - setVerifierStatuses(Object.fromEntries(verifierStatusList.map((status) => [status.benchPackId, status]))); - setActiveRuns( - Object.fromEntries(activeRunsResult.map((run) => [run.tabId, { benchPackId: run.benchPackId }])) - ); - setAppNotice(result.created ? "Created a fresh ~/.benchlocal/config.toml bootstrap." : null); - } catch (loadError) { - if (!cancelled) { - setError(loadError instanceof Error ? loadError.message : "Failed to load BenchLocal config."); - } - } finally { - if (!cancelled) { - setIsBusy(false); - } - } - }; - - void load(); - - return () => { - cancelled = true; - }; - }, []); - - useEffect(() => { - if (typeof window === "undefined") { - return; - } - - const media = window.matchMedia("(prefers-color-scheme: dark)"); - const handleChange = () => { - setSystemPrefersDark(media.matches); - }; - - handleChange(); - media.addEventListener("change", handleChange); - - return () => { - media.removeEventListener("change", handleChange); - }; - }, []); - - useEffect(() => { - let cancelled = false; - - void window.benchlocal.updates - .state() - .then((state) => { - if (!cancelled) { - setAppUpdateState(state); - } - }) - .catch(() => undefined); - - const unsubscribe = window.benchlocal.updates.onState((state) => { - setAppUpdateState(state); - - if (state.status !== "downloaded") { - setDismissedDownloadedUpdateVersion(null); - } - }); - - return () => { - cancelled = true; - unsubscribe(); - }; - }, []); - - useEffect(() => { - let cancelled = false; - - const loadTheme = async () => { - const theme = await window.benchlocal.themes.load({ themeId: effectiveThemeId }); - - if (!cancelled) { - setActiveThemeDefinition(theme); - } - }; - - void loadTheme(); - - return () => { - cancelled = true; - }; - }, [effectiveThemeId]); - - useEffect(() => { - if (!activeThemeDefinition || typeof document === "undefined") { - return; - } - - const root = document.documentElement; - - for (const key of appliedThemeKeysRef.current) { - root.style.removeProperty(key); - } - - for (const [key, value] of Object.entries(activeThemeDefinition.variables)) { - root.style.setProperty(key, value); - } - - appliedThemeKeysRef.current = Object.keys(activeThemeDefinition.variables); - root.style.setProperty("color-scheme", activeThemeDefinition.colorScheme); - root.dataset.theme = activeThemeDefinition.id; - }, [activeThemeDefinition]); - - useEffect(() => { - return window.benchlocal.benchPacks.onRunEvent(({ tabId, event }) => { - if (event.type === "verifier_preparing") { - setVerifierPreparationModal({ - tabId, - progress: event - }); - } else { - setVerifierPreparationModal((current) => (current?.tabId === tabId ? null : current)); - } - - if (event.type === "run_finished" || event.type === "run_error") { - setActiveRuns((current) => { - if (!current[tabId]) { - return current; - } - - const next = { ...current }; - delete next[tabId]; - return next; - }); - setStoppingRuns((current) => { - if (!current[tabId]) { - return current; - } - - const next = { ...current }; - delete next[tabId]; - return next; - }); - } - - setLiveRuns((current) => ({ - ...current, - [tabId]: updateLiveRunState(current[tabId], event) - })); - - if (event.type === "run_started") { - setLiveScenarioFocus((current) => ({ - ...current, - [tabId]: { - liveScenarioId: null, - autoFollow: true - } - })); - } else if ( - event.type === "scenario_started" || - event.type === "model_progress" || - event.type === "scenario_result" || - event.type === "scenario_finished" - ) { - setLiveScenarioFocus((current) => { - const existing = current[tabId]; - return { - ...current, - [tabId]: { - liveScenarioId: event.scenarioId, - autoFollow: existing?.autoFollow ?? true - } - }; - }); - } - }); - }, []); - - useEffect(() => { - return window.benchlocal.benchPacks.onMutationProgress((payload) => { - setBenchPackMutations((current) => ({ - ...current, - [payload.benchPackId]: payload - })); - }); - }, []); - - useEffect(() => { - return window.benchlocal.verifiers.onProgress(({ benchPackId, event }) => { - setSettingsVerifierPreparationModal((current) => - current?.benchPackId === benchPackId || current === null - ? { - benchPackId, - progress: event - } - : current - ); - }); - }, []); - - useEffect(() => { - if (!settingsOpen || settingsTab !== "verification") { - return; - } - - void loadVerifierStatuses(); - }, [settingsOpen, settingsTab]); - - useEffect(() => { - if (!settingsOpen || settingsTab !== "advanced") { - return; - } - - setSettingsTab("providers"); - }, [settingsOpen, settingsTab]); - - useEffect(() => { - if (!logsOpen || !logsAutoScroll || !logContainerRef.current) { - return; - } - - logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight; - }, [activeLogEvents, logsOpen, logsAutoScroll]); - - useEffect(() => { - if (!activeInspection?.id || activeInspection.status !== "ready") { - return; - } - - void loadHistoryForBenchPack(activeInspection.id); - }, [activeInspection?.id, activeInspection?.status]); - - useEffect(() => { - const dispose = window.benchlocal.logs.onDetachedWindowClosed(() => { - setLogsDetached(false); - }); - - return dispose; - }, []); - - useEffect(() => { - void window.benchlocal.logs.publishDetachedState({ - workspaceName: activeWorkspace?.name ?? "No Workspace", - tabTitle: activeTab?.title ?? "No Active Tab", - eventCount: activeLogEvents.length, - events: activeLogEvents - }); - }, [activeWorkspace?.name, activeTab?.title, activeLogEvents]); - - useEffect(() => { - const handleMove = (event: MouseEvent) => { - const shell = document.querySelector(".desktop-shell"); - - if (!shell || !document.body.dataset.logResizeActive) { - return; - } - - const shellRect = shell.getBoundingClientRect(); - const nextHeight = Math.min(420, Math.max(160, shellRect.bottom - event.clientY - 30)); - setLogDrawerHeight(nextHeight); - }; - - const handleUp = () => { - delete document.body.dataset.logResizeActive; - }; - - window.addEventListener("mousemove", handleMove); - window.addEventListener("mouseup", handleUp); - - return () => { - window.removeEventListener("mousemove", handleMove); - window.removeEventListener("mouseup", handleUp); - }; - }, []); - - useEffect(() => { - if (!workspaceContextMenu) { - return; - } - - const closeMenu = () => { - setWorkspaceContextMenu(null); - }; - - const handleKeyDown = (event: KeyboardEvent) => { - if (event.key === "Escape") { - closeMenu(); - } - }; - - window.addEventListener("mousedown", closeMenu); - window.addEventListener("scroll", closeMenu, true); - window.addEventListener("resize", closeMenu); - window.addEventListener("keydown", handleKeyDown); - - return () => { - window.removeEventListener("mousedown", closeMenu); - window.removeEventListener("scroll", closeMenu, true); - window.removeEventListener("resize", closeMenu); - window.removeEventListener("keydown", handleKeyDown); - }; - }, [workspaceContextMenu]); - - useEffect(() => { - if (!themeMenuOpen) { - return; - } - - const handlePointerDown = (event: MouseEvent) => { - const target = event.target as Node; - if (!themeMenuRef.current?.contains(target)) { - setThemeMenuOpen(false); - } - }; - - const handleEscape = (event: KeyboardEvent) => { - if (event.key === "Escape") { - setThemeMenuOpen(false); - } - }; - - window.addEventListener("mousedown", handlePointerDown); - window.addEventListener("keydown", handleEscape); - - return () => { - window.removeEventListener("mousedown", handlePointerDown); - window.removeEventListener("keydown", handleEscape); - }; - }, [themeMenuOpen]); - - useEffect(() => { - return window.benchlocal.app.onOpenAbout(() => { - setAboutDialogOpen(true); - - if (!appMetadata) { - void window.benchlocal.app - .metadata() - .then((metadata) => { - setAppMetadata(metadata); - }) - .catch(() => undefined); - } - }); - }, [appMetadata]); - - useEffect(() => { - return window.benchlocal.app.onOpenSettings(() => { - setSettingsOpen(true); - }); - }, []); - - useEffect(() => { - settingsOpenRef.current = settingsOpen; - - if (!settingsOpen) { - setSettingsNotice(null); - } - }, [settingsOpen]); - - useEffect(() => { - if (typeof window === "undefined") { - return; - } - - window.localStorage.setItem(SIDEBAR_OPEN_STORAGE_KEY, String(sidebarOpen)); - }, [sidebarOpen]); - - useEffect(() => { - const updateOverflow = () => { - const element = tabStripRef.current; - - if (!element) { - setTabStripOverflow(false); - return; - } - - setTabStripOverflow(element.scrollWidth > element.clientWidth + 4); - }; - - updateOverflow(); - window.addEventListener("resize", updateOverflow); - - return () => { - window.removeEventListener("resize", updateOverflow); - }; - }, [workspaceTabs.length, activeWorkspace?.id, sidebarOpen]); - - useEffect(() => { - const shell = tabStripShellRef.current; - const strip = tabStripRef.current; - const activeTabId = activeTab?.id; - - if (!shell || !strip || !activeTabId) { - setActiveTabMask(null); - return; - } - - const updateMask = () => { - const activeElement = tabChipRefs.current.get(activeTabId); - - if (!activeElement) { - setActiveTabMask(null); - return; - } - - const shellRect = shell.getBoundingClientRect(); - const tabRect = activeElement.getBoundingClientRect(); - - setActiveTabMask({ - left: Math.round(tabRect.left - shellRect.left), - width: Math.round(tabRect.width) - }); - }; - - const frameId = window.requestAnimationFrame(updateMask); - window.addEventListener("resize", updateMask); - strip.addEventListener("scroll", updateMask, { passive: true }); - - return () => { - window.cancelAnimationFrame(frameId); - window.removeEventListener("resize", updateMask); - strip.removeEventListener("scroll", updateMask); - }; - }, [activeTab?.id, workspaceTabs, sidebarOpen, tabStripOverflow]); - - const persistConfig = async ( - nextConfig: BenchLocalConfig, - options?: { - notice?: string | null; - preserveFilesystemDraft?: boolean; - previousDraft?: BenchLocalConfig | null; - previousLoadConfig?: BenchLocalConfig | null; - } - ): Promise => { - if (!nextConfig) { - return false; - } - - setIsBusy(true); - setError(null); - - try { - const result = await window.benchlocal.config.save(nextConfig); - setLoadState(result); - setDraft( - options?.preserveFilesystemDraft && options.previousDraft && options.previousLoadConfig - ? reapplyPendingFilesystemDraft(result.config, options.previousDraft, options.previousLoadConfig) - : cloneConfig(result.config) - ); - await loadBenchPackInspections(); - await loadRegistryEntries(); - if (settingsOpenRef.current && options?.notice) { - setSettingsNotice(options.notice); - } - return true; - } catch (saveError) { - setError(saveError instanceof Error ? saveError.message : "Failed to save BenchLocal config."); - return false; - } finally { - setIsBusy(false); - } - }; - - const save = async (): Promise => { - if (!draft) { - return false; - } - - return persistConfig(draft, { notice: "Saved ~/.benchlocal/config.toml" }); - }; - - const refreshBenchPackState = async (result?: LoadState) => { - const nextLoadState = result ?? (await window.benchlocal.config.load()); - const inspections = await window.benchlocal.benchPacks.list(); - const verifierStatusList = await window.benchlocal.verifiers.list(); - let registry = registryEntries; - - try { - registry = await window.benchlocal.benchPacks.registry(); - setRegistryWarning(null); - } catch (registryError) { - setRegistryWarning(formatRegistryWarning(registryError)); - } - - setLoadState(nextLoadState); - setDraft(cloneConfig(nextLoadState.config)); - setBenchPackInspections(inspections); - setRegistryEntries(registry); - setVerifierStatuses(Object.fromEntries(verifierStatusList.map((status) => [status.benchPackId, status]))); - }; - - const ensureBenchPackMutationReady = async (): Promise => { - if (!hasUnsavedChanges) { - return true; - } - - return save(); - }; - - const installBenchPack = async (benchPackId: string) => { - if (!(await ensureBenchPackMutationReady())) { - return; - } - - setIsBusy(true); - setError(null); - setBenchPackMutations((current) => ({ - ...current, - [benchPackId]: { - benchPackId, - action: "install", - phase: "resolving", - message: "Resolving Bench Pack from registry." - } - })); - - try { - const result = await window.benchlocal.benchPacks.install({ benchPackId }); - await refreshBenchPackState(result); - if (settingsOpenRef.current) { - setSettingsNotice(`Installed ${benchPackId}.`); - } - } catch (installError) { - setError(formatRegistryMutationError("install", benchPackId, installError)); - } finally { - setIsBusy(false); - setBenchPackMutations((current) => { - const next = { ...current }; - delete next[benchPackId]; - return next; - }); - } - }; - - const installBenchPackFromUrl = async (url: string) => { - if (!(await ensureBenchPackMutationReady())) { - return; - } - - const normalizedUrl = url.trim(); - - if (!normalizedUrl) { - setError("Bench Pack URL is required."); - return; - } - - setIsBusy(true); - setError(null); - let installedBenchPackId: string | null = null; - setBenchPackMutations((current) => ({ - ...current, - [THIRD_PARTY_INSTALL_MUTATION_ID]: { - benchPackId: THIRD_PARTY_INSTALL_MUTATION_ID, - action: "install", - phase: "resolving", - message: "Resolving Bench Pack from URL." - } - })); - - try { - const result = await window.benchlocal.benchPacks.installFromUrl({ url: normalizedUrl }); - await refreshBenchPackState(result); - installedBenchPackId = - Object.entries(result.config.benchpacks).find(([, benchPack]) => benchPack.source === "archive" && benchPack.url === normalizedUrl)?.[0] ?? - null; - if (settingsOpenRef.current) { - setSettingsNotice(installedBenchPackId ? `Installed ${installedBenchPackId}.` : "Installed third-party Bench Pack."); - } - return true; - } catch (installError) { - setError(formatDesktopErrorMessage(installError) || "Failed to install Bench Pack from URL."); - return false; - } finally { - setIsBusy(false); - setBenchPackMutations((current) => { - const next = { ...current }; - delete next[THIRD_PARTY_INSTALL_MUTATION_ID]; - delete next["third-party"]; - if (installedBenchPackId) { - delete next[installedBenchPackId]; - } - return next; - }); - } - }; - - const updateBenchPack = async (benchPackId: string) => { - if (!(await ensureBenchPackMutationReady())) { - return; - } - - setIsBusy(true); - setError(null); - setBenchPackMutations((current) => ({ - ...current, - [benchPackId]: { - benchPackId, - action: "update", - phase: "resolving", - message: "Resolving Bench Pack update." - } - })); - - try { - const result = await window.benchlocal.benchPacks.update({ benchPackId }); - await refreshBenchPackState(result); - if (settingsOpenRef.current) { - setSettingsNotice(`Updated ${benchPackId}.`); - } - } catch (updateError) { - setError(formatRegistryMutationError("update", benchPackId, updateError)); - } finally { - setIsBusy(false); - setBenchPackMutations((current) => { - const next = { ...current }; - delete next[benchPackId]; - return next; - }); - } - }; - - const uninstallInstalledBenchPack = async (benchPackId: string) => { - if (!(await ensureBenchPackMutationReady())) { - return; - } - - if (Object.values(activeRuns).some((run) => run.benchPackId === benchPackId)) { - setError("Stop active Bench Pack runs before uninstalling this pack."); - return; - } - - setIsBusy(true); - setError(null); - setBenchPackMutations((current) => ({ - ...current, - [benchPackId]: { - benchPackId, - action: "uninstall", - phase: "removing", - message: "Removing Bench Pack." - } - })); - - try { - const result = await window.benchlocal.benchPacks.uninstall({ benchPackId }); - await refreshBenchPackState(result); - if (settingsOpenRef.current) { - setSettingsNotice(`Uninstalled ${benchPackId}.`); - } - } catch (uninstallError) { - setError(uninstallError instanceof Error ? uninstallError.message : `Failed to uninstall ${benchPackId}.`); - } finally { - setIsBusy(false); - setBenchPackMutations((current) => { - const next = { ...current }; - delete next[benchPackId]; - return next; - }); - } - }; - - const reset = () => { - if (!loadState) { - return; - } - - setDraft(cloneConfig(loadState.config)); - setProviderModal(null); - setModelModal(null); - if (settingsOpenRef.current) { - setSettingsNotice("Reverted unsaved changes."); - } - setError(null); - }; - - const saveThemeSelection = async (themeId: string) => { - if (!draft) { - return; - } - - const previousDraft = cloneConfig(draft); - const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; - const nextConfig = previousLoadConfig ? cloneConfig(previousLoadConfig) : cloneConfig(draft); - nextConfig.ui.theme = themeId; - setDraft(nextConfig); - - const saved = await persistConfig(nextConfig, { - preserveFilesystemDraft: true, - previousDraft, - previousLoadConfig - }); - if (!saved) { - setDraft(previousDraft); - } - }; - - const saveVerifierConfig = async ( - benchPackId: string, - verifierId: string, - updater: (verifier: BenchLocalVerifierConfig) => BenchLocalVerifierConfig - ) => { - if (!draft) { - return; - } - - const currentVerifier = draft.benchpacks[benchPackId]?.verifiers?.[verifierId]; - if (!currentVerifier) { - return; - } - - const previousDraft = cloneConfig(draft); - const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; - const nextConfig = previousLoadConfig ? cloneConfig(previousLoadConfig) : cloneConfig(draft); - nextConfig.benchpacks[benchPackId].verifiers![verifierId] = updater(currentVerifier); - setDraft(nextConfig); - - const saved = await persistConfig(nextConfig, { - preserveFilesystemDraft: true, - previousDraft, - previousLoadConfig - }); - if (!saved) { - setDraft(previousDraft); - } - }; - - const scrollTabStrip = (delta: number) => { - tabStripRef.current?.scrollBy({ - left: delta, - behavior: "smooth" - }); - }; - - const handleTabStripWheel = (event: React.WheelEvent) => { - const strip = tabStripRef.current; - - if (!strip || !tabStripOverflow) { - return; - } - - const horizontalDelta = Math.abs(event.deltaX) > Math.abs(event.deltaY) ? event.deltaX : event.deltaY; - - if (Math.abs(horizontalDelta) < 1) { - return; - } - - event.preventDefault(); - strip.scrollBy({ - left: horizontalDelta, - behavior: "auto" - }); - }; - - const runTab = async (tab: BenchLocalWorkspaceTab) => { - setError(null); - setAppNotice(null); - - if (!tab.benchPackId || !draft) { - setError("Select a Bench Pack for this tab first."); - return; - } - - const benchPackId = tab.benchPackId; - const selectedModels = resolveTabModels(tab, draft.models); - const inspection = benchPackInspections.find((candidate) => candidate.id === benchPackId); - - if (inspection?.manifest) { - try { - const verifierStatusList = await window.benchlocal.verifiers.list(); - const nextVerifierStatuses = Object.fromEntries(verifierStatusList.map((status) => [status.benchPackId, status])); - setVerifierStatuses(nextVerifierStatuses); - - const runBlocker = getRequiredVerifierRunBlocker( - inspection.manifest, - draft.benchpacks[benchPackId], - nextVerifierStatuses[benchPackId] - ); - - if (runBlocker) { - setConfirmDialog({ - title: runBlocker.title, - subtitle: runBlocker.message, - confirmLabel: runBlocker.actionLabel, - onConfirm: () => { - setSettingsTab("verification"); - setSettingsOpen(true); - } - }); - return; - } - } catch (verifierError) { - setError(verifierError instanceof Error ? verifierError.message : "Failed to refresh verifier status."); - return; - } - } - - if (selectedModels.length === 0) { - setError("Select at least one enabled model for this tab before running the Bench Pack."); - return; - } - - if (hasUnsavedChanges) { - const saved = await save(); - - if (!saved) { - return; - } - } - - setActiveRuns((current) => ({ - ...current, - [tab.id]: { benchPackId, mode: "host" } - })); - setStoppingRuns((current) => { - if (!current[tab.id]) { - return current; - } - - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setLiveRuns((current) => ({ - ...current, - [tab.id]: { - events: [], - resultsByModel: {}, - activeCellKeys: [] - } - })); - setRunSummaries((current) => { - if (!current[tab.id]) { - return current; - } - - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setLoadedHistoryRuns((current) => { - if (!current[tab.id]) { - return current; - } - - const next = { ...current }; - delete next[tab.id]; - return next; - }); - - try { - const result = await window.benchlocal.benchPacks.run({ - tabId: tab.id, - benchPackId, - modelIds: selectedModels.map((model) => model.id), - executionMode: tab.executionMode, - generation: tab.samplingOverrides - }); - setRunSummaries((current) => ({ - ...current, - [tab.id]: result - })); - updateWorkspaceState((current) => { - const nextTab = current.tabs[tab.id]; - - if (!nextTab) { - return current; - } - - nextTab.loadedRunId = result.runId; - nextTab.updatedAt = new Date().toISOString(); - return current; - }); - if (result.cancelled) { - setAppNotice(`Stopped ${result.benchPackName}.`); - } else { - setAppNotice(`Completed ${result.benchPackName} across ${result.scenarioCount} scenarios and ${result.modelCount} model${result.modelCount === 1 ? "" : "s"}.`); - } - await loadBenchPackInspections(); - await loadHistoryForBenchPack(benchPackId); - } catch (runError) { - setError(runError instanceof Error ? runError.message : `Failed to run Bench Pack for ${benchPackId}.`); - } finally { - setVerifierPreparationModal((current) => (current?.tabId === tab.id ? null : current)); - setActiveRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setStoppingRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setLiveRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setLoadedHistoryRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - } - }; - - const resumeTabRun = async (tab: BenchLocalWorkspaceTab, runSummary: BenchPackRunSummary) => { - setError(null); - setAppNotice(null); - - if (!tab.benchPackId || !draft) { - setError("Select a Bench Pack for this tab first."); - return; - } - - if (isRunSummaryComplete(runSummary)) { - setError("This saved run is already complete."); - return; - } - - const benchPackId = tab.benchPackId; - const previousLoadedHistory = loadedHistoryRuns[tab.id] ?? null; - const previousTabModelSelections = structuredClone(tab.modelSelections); - const previousExecutionMode = tab.executionMode; - - if (hasUnsavedChanges) { - const saved = await save(); - - if (!saved) { - return; - } - } - - const historicalSelections = buildHistoryModelSelections(runSummary, draft.models); - updateWorkspaceState((current) => { - const nextTab = current.tabs[tab.id]; - - if (!nextTab) { - return current; - } - - nextTab.modelSelections = normalizeTabModelSelections(historicalSelections); - nextTab.executionMode = runSummary.executionMode ?? nextTab.executionMode; - nextTab.updatedAt = new Date().toISOString(); - return current; - }); - - setLoadedHistoryRuns((current) => { - if (!current[tab.id]) { - return current; - } - - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setActiveRuns((current) => ({ - ...current, - [tab.id]: { benchPackId, mode: "host" } - })); - setStoppingRuns((current) => { - if (!current[tab.id]) { - return current; - } - - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setLiveRuns((current) => ({ - ...current, - [tab.id]: { - runId: runSummary.runId, - events: [], - resultsByModel: {}, - activeCellKeys: [] - } - })); - - try { - const result = await window.benchlocal.benchPacks.resumeRun({ - tabId: tab.id, - benchPackId, - runId: runSummary.runId, - executionMode: runSummary.executionMode ?? tab.executionMode, - generation: tab.samplingOverrides - }); - setRunSummaries((current) => ({ - ...current, - [tab.id]: result - })); - updateWorkspaceState((current) => { - const nextTab = current.tabs[tab.id]; - - if (!nextTab) { - return current; - } - - nextTab.loadedRunId = result.runId; - nextTab.updatedAt = new Date().toISOString(); - return current; - }); - if (result.cancelled) { - setAppNotice(`Stopped ${result.benchPackName}.`); - } else { - setAppNotice( - isRunSummaryComplete(result) - ? `Completed ${result.benchPackName} across ${result.scenarioCount} scenarios and ${result.modelCount} model${result.modelCount === 1 ? "" : "s"}.` - : `Resumed ${result.benchPackName}, but the run is still incomplete.` - ); - } - await loadBenchPackInspections(); - await loadHistoryForBenchPack(benchPackId); - } catch (runError) { - updateWorkspaceState((current) => { - const nextTab = current.tabs[tab.id]; - - if (!nextTab) { - return current; - } - - nextTab.modelSelections = structuredClone(previousTabModelSelections); - nextTab.executionMode = previousExecutionMode; - nextTab.updatedAt = new Date().toISOString(); - return current; - }); - if (previousLoadedHistory) { - setLoadedHistoryRuns((current) => ({ - ...current, - [tab.id]: previousLoadedHistory - })); - } - setError(runError instanceof Error ? runError.message : `Failed to resume Bench Pack for ${benchPackId}.`); - } finally { - setVerifierPreparationModal((current) => (current?.tabId === tab.id ? null : current)); - setActiveRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setStoppingRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setLiveRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - } - }; - - const replayTabRun = async (tab: BenchLocalWorkspaceTab, runSummary: BenchPackRunSummary) => { - if (!tab.benchPackId) { - setError("Select a Bench Pack for this tab first."); - return; - } - - if (!isRunSummaryComplete(runSummary)) { - setError("Replay is only available for completed test runs."); - return; - } - - const inspection = benchPackInspections.find((candidate) => candidate.id === tab.benchPackId); - const scenarios = inspection?.scenarios ?? []; - const modelIds = resolveHistoryModels(runSummary, draft?.models ?? []).map((model) => model.id); - const replayGroups = buildReplayGroups(runSummary, scenarios, modelIds); - const token = Symbol(`replay:${tab.id}`); - replayRunTokensRef.current.set(tab.id, token); - - setError(null); - setAppNotice(null); - setActiveRuns((current) => ({ - ...current, - [tab.id]: { benchPackId: tab.benchPackId as string, mode: "replay" } - })); - setStoppingRuns((current) => { - if (!current[tab.id]) { - return current; - } - - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setLiveRuns((current) => ({ - ...current, - [tab.id]: { - runId: runSummary.runId, - events: [], - resultsByModel: {}, - activeCellKeys: [] - } - })); - setLiveScenarioFocus((current) => ({ - ...current, - [tab.id]: { - liveScenarioId: null, - autoFollow: supportsLiveScenarioColumnFocus(runSummary.executionMode ?? tab.executionMode) - } - })); - - const wait = async (ms: number) => { - await new Promise((resolve) => setTimeout(resolve, ms)); - }; - - try { - for (const group of replayGroups) { - if (replayRunTokensRef.current.get(tab.id) !== token) { - return; - } - - const nextActiveCellKeys = group.map((cell) => getCellKey(cell.modelId, cell.scenarioId)); - const leadScenarioId = group[0]?.scenarioId ?? null; - - setLiveRuns((current) => { - const existing = current[tab.id]; - return { - ...current, - [tab.id]: { - runId: runSummary.runId, - events: existing?.events ?? [], - resultsByModel: existing?.resultsByModel ?? {}, - activeCellKeys: nextActiveCellKeys - } - }; - }); - if (leadScenarioId && supportsLiveScenarioColumnFocus(runSummary.executionMode ?? tab.executionMode)) { - setLiveScenarioFocus((current) => ({ - ...current, - [tab.id]: { - liveScenarioId: leadScenarioId, - autoFollow: true - } - })); - } - - await wait(1000); - - if (replayRunTokensRef.current.get(tab.id) !== token) { - return; - } - - setLiveRuns((current) => { - const existing = current[tab.id]; - const nextResultsByModel = { ...(existing?.resultsByModel ?? {}) }; - - for (const cell of group) { - nextResultsByModel[cell.modelId] = [ - ...(nextResultsByModel[cell.modelId] ?? []).filter((candidate) => candidate.scenarioId !== cell.scenarioId), - cell.result - ]; - } - - return { - ...current, - [tab.id]: { - runId: runSummary.runId, - events: existing?.events ?? [], - resultsByModel: nextResultsByModel, - activeCellKeys: [] - } - }; - }); - } - - setAppNotice(`Replayed ${runSummary.benchPackName}.`); - } finally { - if (replayRunTokensRef.current.get(tab.id) === token) { - replayRunTokensRef.current.delete(tab.id); - } - - setActiveRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setStoppingRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - } - }; - - const stopTabRun = async (tabId: string) => { - const activeRun = activeRuns[tabId]; - - if (activeRun?.mode === "replay") { - replayRunTokensRef.current.delete(tabId); - setActiveRuns((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - setStoppingRuns((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - setLiveRuns((current) => ({ - ...current, - [tabId]: { - ...(current[tabId] ?? { - events: [], - resultsByModel: {}, - activeCellKeys: [] - }), - activeCellKeys: [] - } - })); - setAppNotice("Stopped replay."); - return; - } - - setStoppingRuns((current) => ({ - ...current, - [tabId]: true - })); - - try { - const result = await window.benchlocal.benchPacks.stop({ tabId }); - - if (!result.stopped) { - setAppNotice("That Bench Pack run was no longer active."); - setActiveRuns((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - setStoppingRuns((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - return; - } - - setAppNotice("Stopping Bench Pack run..."); - } catch (stopError) { - setStoppingRuns((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - setError(stopError instanceof Error ? stopError.message : "Failed to stop Bench Pack run."); - } - }; - - const cancelSettingsVerifierStart = async (benchPackId: string) => { - setStoppingVerifierStarts((current) => ({ - ...current, - [benchPackId]: true - })); - - try { - const result = await window.benchlocal.verifiers.cancelStart({ benchPackId }); - - if (!result.cancelled) { - setSettingsVerifierPreparationModal((current) => (current?.benchPackId === benchPackId ? null : current)); - setStoppingVerifierStarts((current) => { - if (!current[benchPackId]) { - return current; - } - - const next = { ...current }; - delete next[benchPackId]; - return next; - }); - } - } catch (cancelError) { - setStoppingVerifierStarts((current) => { - if (!current[benchPackId]) { - return current; - } - - const next = { ...current }; - delete next[benchPackId]; - return next; - }); - setError(cancelError instanceof Error ? cancelError.message : "Failed to cancel verifier start."); - } - }; - - const createWorkspace = () => { - updateWorkspaceState((current) => { - const now = new Date().toISOString(); - const workspaceId = `workspace-${crypto.randomUUID()}`; - const tabId = `tab-${crypto.randomUUID()}`; - - current.workspaceOrder.push(workspaceId); - current.activeWorkspaceId = workspaceId; - current.workspaces[workspaceId] = { - id: workspaceId, - name: createWorkspaceName(current.workspaceOrder.length - 1), - tabIds: [tabId], - activeTabId: tabId, - createdAt: now, - updatedAt: now - }; - current.tabs[tabId] = { - id: tabId, - title: "New Tab", - benchPackId: null, - loadedRunId: null, - focusedScenarioId: null, - modelSelections: [], - samplingOverrides: {}, - executionMode: "parallel_by_test_case", - createdAt: now, - updatedAt: now - }; - - return current; - }); - }; - - const renameWorkspace = (workspaceId: string, name: string) => { - updateWorkspaceState((current) => { - const workspace = current.workspaces[workspaceId]; - - if (!workspace) { - return current; - } - - workspace.name = name.trim(); - workspace.updatedAt = new Date().toISOString(); - return current; - }); - }; - - const deleteWorkspace = (workspaceId: string) => { - const removedTabIds = new Set(workspaceState?.workspaces[workspaceId]?.tabIds ?? []); - - if (Array.from(removedTabIds).some((tabId) => activeRuns[tabId])) { - setError("Stop active Bench Pack runs before deleting this workspace."); - return; - } - - updateWorkspaceState((current) => { - const workspace = current.workspaces[workspaceId]; - - if (!workspace) { - return current; - } - - for (const tabId of workspace.tabIds) { - delete current.tabs[tabId]; - } - - delete current.workspaces[workspaceId]; - current.workspaceOrder = current.workspaceOrder.filter((id) => id !== workspaceId); - - if (current.workspaceOrder.length === 0) { - const now = new Date().toISOString(); - const nextWorkspaceId = `workspace-${crypto.randomUUID()}`; - const nextTabId = `tab-${crypto.randomUUID()}`; - - current.workspaceOrder = [nextWorkspaceId]; - current.activeWorkspaceId = nextWorkspaceId; - current.workspaces[nextWorkspaceId] = { - id: nextWorkspaceId, - name: "My Workspace", - tabIds: [nextTabId], - activeTabId: nextTabId, - createdAt: now, - updatedAt: now - }; - current.tabs[nextTabId] = { - id: nextTabId, - title: "New Tab", - benchPackId: null, - loadedRunId: null, - focusedScenarioId: null, - modelSelections: [], - samplingOverrides: {}, - executionMode: "parallel_by_test_case", - createdAt: now, - updatedAt: now - }; - } else if (current.activeWorkspaceId === workspaceId) { - current.activeWorkspaceId = current.workspaceOrder[0] ?? null; - } - - return current; - }); - - if (removedTabIds.size > 0) { - setRunSummaries((current) => - Object.fromEntries(Object.entries(current).filter(([tabId]) => !removedTabIds.has(tabId))) - ); - setLiveRuns((current) => - Object.fromEntries(Object.entries(current).filter(([tabId]) => !removedTabIds.has(tabId))) - ); - setActiveRuns((current) => - Object.fromEntries(Object.entries(current).filter(([tabId]) => !removedTabIds.has(tabId))) - ); - setStoppingRuns((current) => - Object.fromEntries(Object.entries(current).filter(([tabId]) => !removedTabIds.has(tabId))) as Record - ); - } - }; - - const exportWorkspace = async (workspaceId: string) => { - if (!workspaceState) { - return; - } - - try { - const result = await window.benchlocal.workspaces.export({ - workspaceId, - state: workspaceState - }); - - if (result.exported) { - setAppNotice(`Exported workspace to ${result.filePath}.`); - } - } catch (workspaceError) { - setError(workspaceError instanceof Error ? workspaceError.message : "Failed to export workspace."); - } - }; - - const importWorkspace = async () => { - try { - const result = await window.benchlocal.workspaces.import(); - - if (!result.imported || !result.workspace || !result.tabs) { - return; - } - - const importedWorkspace = result.workspace; - const importedTabs = result.tabs; - const workspaceIdMap = new Map(); - const tabIdMap = new Map(); - const newWorkspaceId = `workspace-${crypto.randomUUID()}`; - workspaceIdMap.set(importedWorkspace.id, newWorkspaceId); - - updateWorkspaceState((current) => { - const now = new Date().toISOString(); - const nextTabIds = importedWorkspace.tabIds.map((tabId) => { - const nextTabId = `tab-${crypto.randomUUID()}`; - tabIdMap.set(tabId, nextTabId); - const importedTab = importedTabs[tabId]; - - if (importedTab) { - const importedTabRecord = importedTab as typeof importedTab & { - pluginId?: string | null; - }; - current.tabs[nextTabId] = { - ...importedTabRecord, - id: nextTabId, - benchPackId: importedTabRecord.benchPackId ?? importedTabRecord.pluginId ?? null, - samplingOverrides: importedTab.samplingOverrides ?? {}, - createdAt: importedTab.createdAt ?? now, - updatedAt: now - }; - } - - return nextTabId; - }); - - current.workspaceOrder.push(newWorkspaceId); - current.activeWorkspaceId = newWorkspaceId; - current.workspaces[newWorkspaceId] = { - ...importedWorkspace, - id: newWorkspaceId, - name: - Object.values(current.workspaces).some((workspace) => workspace.name === importedWorkspace.name) - ? `${importedWorkspace.name} Imported` - : importedWorkspace.name, - tabIds: nextTabIds, - activeTabId: importedWorkspace.activeTabId ? tabIdMap.get(importedWorkspace.activeTabId) ?? nextTabIds[0] ?? null : nextTabIds[0] ?? null, - createdAt: importedWorkspace.createdAt ?? now, - updatedAt: now - }; - - return current; - }); - - setAppNotice(`Imported workspace "${importedWorkspace.name}".`); - } catch (workspaceError) { - setError(workspaceError instanceof Error ? workspaceError.message : "Failed to import workspace."); - } - }; - - const activateWorkspace = (workspaceId: string) => { - setWorkspaceContextMenu(null); - updateWorkspaceState((current) => { - current.activeWorkspaceId = workspaceId; - return current; - }); - }; - - const createTab = (benchPackId: string) => { - if (!activeWorkspace) { - return; - } - - updateWorkspaceState((current) => { - const workspace = current.workspaces[activeWorkspace.id]; - - if (!workspace) { - return current; - } - - const now = new Date().toISOString(); - const tabId = `tab-${crypto.randomUUID()}`; - current.tabs[tabId] = { - id: tabId, - title: createTabTitle(benchPackId, benchPackInspections), - benchPackId, - loadedRunId: null, - focusedScenarioId: null, - modelSelections: [], - samplingOverrides: {}, - executionMode: "parallel_by_test_case", - createdAt: now, - updatedAt: now - }; - workspace.tabIds.push(tabId); - workspace.activeTabId = tabId; - workspace.updatedAt = now; - return current; - }); - setTabMenuOpen(false); - }; - - const assignBenchPackToTab = (tabId: string, benchPackId: string) => { - updateWorkspaceState((current) => { - const tab = current.tabs[tabId]; - - if (!tab) { - return current; - } - - tab.title = createTabTitle(benchPackId, benchPackInspections); - tab.benchPackId = benchPackId; - tab.loadedRunId = null; - tab.focusedScenarioId = null; - tab.samplingOverrides = {}; - tab.updatedAt = new Date().toISOString(); - - return current; - }); - setTabMenuOpen(false); - }; - - const activateTab = (tabId: string) => { - if (!activeWorkspace) { - return; - } - - updateWorkspaceState((current) => { - const workspace = current.workspaces[activeWorkspace.id]; - - if (!workspace) { - return current; - } - - workspace.activeTabId = tabId; - workspace.updatedAt = new Date().toISOString(); - return current; - }); - }; - - const startEditingTab = (tabId: string, currentTitle: string) => { - const width = tabChipRefs.current.get(tabId)?.offsetWidth ?? 180; - setEditingTab({ - tabId, - value: currentTitle, - width - }); - }; - - const commitEditingTab = () => { - if (!editingTab) { - return; - } - - const nextTitle = editingTab.value.trim() || "New Tab"; - - updateWorkspaceState((current) => { - const tab = current.tabs[editingTab.tabId]; - - if (!tab) { - return current; - } - - tab.title = nextTitle; - tab.updatedAt = new Date().toISOString(); - return current; - }); - - setEditingTab(null); - }; - - const cancelEditingTab = () => { - setEditingTab(null); - }; - - const reorderTab = (draggedId: string, targetId: string) => { - if (!activeWorkspace || draggedId === targetId) { - return; - } - - updateWorkspaceState((current) => { - const workspace = current.workspaces[activeWorkspace.id]; - - if (!workspace) { - return current; - } - - const nextTabIds = [...workspace.tabIds]; - const fromIndex = nextTabIds.indexOf(draggedId); - const toIndex = nextTabIds.indexOf(targetId); - - if (fromIndex < 0 || toIndex < 0) { - return current; - } - - const [moved] = nextTabIds.splice(fromIndex, 1); - nextTabIds.splice(toIndex, 0, moved); - workspace.tabIds = nextTabIds; - workspace.updatedAt = new Date().toISOString(); - return current; - }); - }; - - const closeTab = (tabId: string) => { - if (!activeWorkspace) { - return; - } - - if (activeRuns[tabId]) { - setError("Stop the Bench Pack run before closing this tab."); - return; - } - - updateWorkspaceState((current) => { - const workspace = current.workspaces[activeWorkspace.id]; - - if (!workspace) { - return current; - } - - workspace.tabIds = workspace.tabIds.filter((id) => id !== tabId); - delete current.tabs[tabId]; - - workspace.activeTabId = - workspace.activeTabId === tabId ? workspace.tabIds[workspace.tabIds.length - 1] ?? null : workspace.activeTabId; - workspace.updatedAt = new Date().toISOString(); - - if (workspace.tabIds.length === 0) { - const replacementTabId = `tab-${crypto.randomUUID()}`; - current.tabs[replacementTabId] = { - id: replacementTabId, - title: "New Tab", - benchPackId: null, - loadedRunId: null, - focusedScenarioId: null, - modelSelections: [], - samplingOverrides: {}, - executionMode: "parallel_by_test_case", - createdAt: workspace.updatedAt, - updatedAt: workspace.updatedAt - }; - workspace.tabIds = [replacementTabId]; - workspace.activeTabId = replacementTabId; - } - - return current; - }); - setRunSummaries((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - setLiveRuns((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - setActiveRuns((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - }; - - const restoreHistoryRun = async (benchPackId: string, runId: string, mode: "history" | "replay" = "history") => { - if (!activeTab) { - return; - } - - try { - const summary = await window.benchlocal.benchPacks.loadHistory({ benchPackId, runId }); - setRunSummaries((current) => ({ - ...current, - [activeTab.id]: summary - })); - updateWorkspaceState((current) => { - const tab = current.tabs[activeTab.id]; - - if (!tab) { - return current; - } - - tab.loadedRunId = summary.runId; - tab.updatedAt = new Date().toISOString(); - return current; - }); - setLiveRuns((current) => { - const next = { ...current }; - delete next[activeTab.id]; - return next; - }); - setLoadedHistoryRuns((current) => ({ - ...current, - [activeTab.id]: { - runId, - startedAt: summary.startedAt, - mode - } - })); - if (summary.executionMode) { - updateWorkspaceState((current) => { - const tab = current.tabs[activeTab.id]; - - if (!tab) { - return current; - } - - tab.executionMode = summary.executionMode ?? tab.executionMode; - tab.updatedAt = new Date().toISOString(); - return current; - }); - } - } catch (historyError) { - setError(historyError instanceof Error ? historyError.message : "Failed to load Bench Pack history."); - } - }; - - const retryScenarioFromDetail = async (detail: DetailModalState) => { - if (!workspaceState) { - return; - } - - if (!detail.runId) { - setError("This scenario does not belong to a saved test run yet."); - return; - } - - const tab = workspaceState.tabs[detail.tabId]; - - if (!tab || tab.benchPackId !== detail.benchPackId) { - setError("The original tab for this test is no longer available."); - return; - } - - if (hasUnsavedChanges) { - const saved = await save(); - - if (!saved) { - return; - } - } - - const retryKey = detailModalKey(detail); - const retryCellKey = getCellKey(detail.modelId, detail.scenarioId); - setDetailModal((current) => (current && detailModalKey(current) === retryKey ? null : current)); - setLiveRuns((current) => { - const existing = current[detail.tabId]; - - if (existing) { - return { - ...current, - [detail.tabId]: { - ...existing, - runId: existing.runId ?? detail.runId ?? undefined, - activeCellKeys: existing.activeCellKeys.includes(retryCellKey) - ? existing.activeCellKeys - : [...existing.activeCellKeys, retryCellKey] - } - }; - } - - return { - ...current, - [detail.tabId]: { - runId: detail.runId ?? undefined, - events: [], - resultsByModel: {}, - activeCellKeys: [retryCellKey] - } - }; - }); - - try { - await window.benchlocal.benchPacks.retryScenario({ - tabId: detail.tabId, - benchPackId: detail.benchPackId, - runId: detail.runId, - scenarioId: detail.scenarioId, - modelId: detail.modelId, - generation: tab.samplingOverrides - }); - const refreshedSummary = await window.benchlocal.benchPacks.loadHistory({ - benchPackId: detail.benchPackId, - runId: detail.runId - }); - - if (!activeRuns[detail.tabId]) { - setRunSummaries((current) => ({ - ...current, - [detail.tabId]: refreshedSummary - })); - } - await loadHistoryForBenchPack(detail.benchPackId); - setAppNotice(`Retested ${detail.scenarioId} for ${detail.modelId}.`); - } catch (retryError) { - setLiveRuns((current) => { - const existing = current[detail.tabId]; - - if (!existing || !existing.activeCellKeys.includes(retryCellKey)) { - return current; - } - - return { - ...current, - [detail.tabId]: { - ...existing, - activeCellKeys: existing.activeCellKeys.filter((key) => key !== retryCellKey) - } - }; - }); - setError(retryError instanceof Error ? retryError.message : "Failed to retry the selected test."); - } - }; - - const clearLoadedHistoryRun = (tabId: string) => { - updateWorkspaceState((current) => { - const tab = current.tabs[tabId]; - - if (!tab) { - return current; - } - - tab.loadedRunId = null; - tab.updatedAt = new Date().toISOString(); - return current; - }); - setLoadedHistoryRuns((current) => { - if (!current[tabId]) { - return current; - } - - const next = { ...current }; - delete next[tabId]; - return next; - }); - setRunSummaries((current) => { - if (!current[tabId]) { - return current; - } - - const next = { ...current }; - delete next[tabId]; - return next; - }); - setLiveRuns((current) => { - if (!current[tabId]) { - return current; - } - - const next = { ...current }; - delete next[tabId]; - return next; - }); - }; - - const clearLoadedHistoryForBenchPack = (benchPackId: string) => { - const affectedTabIds = - workspaceState - ? Object.values(workspaceState.tabs) - .filter((tab) => tab.benchPackId === benchPackId && Boolean(loadedHistoryRuns[tab.id])) - .map((tab) => tab.id) - : []; - - if (affectedTabIds.length === 0) { - return; - } - - updateWorkspaceState((current) => { - for (const tabId of affectedTabIds) { - const tab = current.tabs[tabId]; - - if (!tab) { - continue; - } - - tab.loadedRunId = null; - tab.updatedAt = new Date().toISOString(); - } - - return current; - }); - - setLoadedHistoryRuns((current) => { - const next = { ...current }; - for (const tabId of affectedTabIds) { - delete next[tabId]; - } - return next; - }); - - setRunSummaries((current) => { - const next = { ...current }; - for (const tabId of affectedTabIds) { - delete next[tabId]; - } - return next; - }); - - setLiveRuns((current) => { - const next = { ...current }; - for (const tabId of affectedTabIds) { - delete next[tabId]; - } - return next; - }); - }; - - const removeAllHistoryForBenchPack = async (benchPackId: string, benchPackName: string) => { - try { - await window.benchlocal.benchPacks.clearHistory({ benchPackId }); - setRunHistories((current) => ({ - ...current, - [benchPackId]: [] - })); - clearLoadedHistoryForBenchPack(benchPackId); - setHistoryModal(null); - setAppNotice(`Removed all test histories for ${benchPackName}.`); - } catch (historyError) { - setError(historyError instanceof Error ? historyError.message : "Failed to remove Bench Pack history."); - } - }; - - const saveProviderModal = async () => { - if (!providerModal || !draft) { - return; - } - - const providerId = providerModal.form.id.trim(); - const previousDraft = cloneConfig(draft); - const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; - const nextConfig = previousLoadConfig ? cloneConfig(previousLoadConfig) : cloneConfig(draft); - - nextConfig.providers[providerId] = { - kind: providerModal.form.kind, - name: providerModal.form.name.trim() || defaultProviderName(providerModal.form.kind), - enabled: providerModal.form.enabled, - base_url: providerModal.form.base_url.trim(), - api_key: providerModal.form.api_key.trim() || undefined - }; - - const saved = await persistConfig(nextConfig, { - notice: providerModal.mode === "create" ? "Added provider." : "Updated provider.", - preserveFilesystemDraft: true, - previousDraft, - previousLoadConfig - }); - - if (!saved) { - return; - } - - setProviderModal(null); - }; - - const deleteProvider = async (providerId: string): Promise => { - if (!draft) { - return false; - } - - const removedModelIds = new Set((draft?.models ?? []).filter((model) => model.provider === providerId).map((model) => model.id)); - const previousDraft = cloneConfig(draft); - const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; - const nextConfig = previousLoadConfig ? cloneConfig(previousLoadConfig) : cloneConfig(draft); - - delete nextConfig.providers[providerId]; - nextConfig.models = nextConfig.models.filter((model) => model.provider !== providerId); - - const saved = await persistConfig(nextConfig, { - notice: `Deleted provider "${providerId}".`, - preserveFilesystemDraft: true, - previousDraft, - previousLoadConfig - }); - - if (!saved) { - return false; - } - - if (removedModelIds.size > 0) { - updateWorkspaceState((current) => { - for (const tab of Object.values(current.tabs)) { - tab.modelSelections = tab.modelSelections.filter((selection) => !removedModelIds.has(selection.modelId)); - } - return current; - }); - } - - return true; - }; - - const confirmDeleteProvider = (providerId: string) => { - const provider = draft?.providers[providerId]; - const linkedModelCount = (draft?.models ?? []).filter((model) => model.provider === providerId).length; - - setConfirmDialog({ - title: "Delete Provider", - subtitle: - linkedModelCount > 0 - ? `Delete ${provider?.name ?? "this provider"}? This will also delete ${linkedModelCount} linked ${linkedModelCount === 1 ? "model" : "models"} and remove them from any tab selections.` - : `Delete ${provider?.name ?? "this provider"}?`, - confirmLabel: "Delete Provider", - tone: "danger", - onConfirm: () => { - void deleteProvider(providerId).then((deleted) => { - if (deleted) { - setProviderModal(null); - } - }); - } - }); - }; - - const openModelBrowser = async () => { - if (!modelModal || !draft) { - return; - } - - const provider = draft.providers[modelModal.form.provider]; - - if (!provider) { - setError("Select a provider first."); - return; - } - - if (!providerSupportsModelDiscovery(provider)) { - setError(`${provider.name} does not support model browsing yet.`); - return; - } - - const cacheKey = `${provider.kind}::${provider.base_url}`; - const cachedEntries = modelDiscoveryCacheRef.current[cacheKey]; - - setModelBrowserModal({ - providerId: modelModal.form.provider, - providerName: provider.name, - entries: cachedEntries ?? [], - query: "", - selectedModelId: modelModal.form.model.trim() || cachedEntries?.[0]?.id || null, - loading: !cachedEntries, - error: null - }); - - if (cachedEntries) { - return; - } - - try { - const entries = await window.benchlocal.models.discover({ provider }); - modelDiscoveryCacheRef.current[cacheKey] = entries; - setModelBrowserModal((current) => - current && current.providerId === modelModal.form.provider - ? { - ...current, - entries, - selectedModelId: current.selectedModelId ?? entries[0]?.id ?? null, - loading: false - } - : current - ); - } catch (discoverError) { - setModelBrowserModal((current) => - current && current.providerId === modelModal.form.provider - ? { - ...current, - loading: false, - error: - discoverError instanceof Error - ? discoverError.message - : `Failed to load models from ${provider.name}.` - } - : current - ); - } - }; - - const saveModelModal = async () => { - if (!modelModal || !draft) { - return; - } - - const modelConfig = buildModelConfig(modelModal.form, draft?.providers ?? {}); - - if (!modelConfig.provider || !modelConfig.model) { - setError("Model provider and model identifier are required."); - return; - } - - if (!draft?.providers[modelConfig.provider]) { - setError(`Model provider "${modelConfig.provider}" does not exist yet.`); - return; - } - - const previousModelId = modelModal.mode === "edit" ? draft?.models[modelModal.index]?.id ?? null : null; - const previousDraft = cloneConfig(draft); - const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; - const nextConfig = previousLoadConfig ? cloneConfig(previousLoadConfig) : cloneConfig(draft); - - if (modelModal.mode === "create") { - nextConfig.models.push(modelConfig); - } else { - nextConfig.models[modelModal.index] = modelConfig; - } - - const saved = await persistConfig(nextConfig, { - notice: modelModal.mode === "create" ? "Added model." : "Updated model.", - preserveFilesystemDraft: true, - previousDraft, - previousLoadConfig - }); - - if (!saved) { - return; - } - - if (previousModelId && previousModelId !== modelConfig.id) { - updateWorkspaceState((current) => { - for (const tab of Object.values(current.tabs)) { - tab.modelSelections = tab.modelSelections.map((selection) => - selection.modelId === previousModelId ? { ...selection, modelId: modelConfig.id } : selection - ); - } - return current; - }); - } - - setModelModal(null); - }; - - const deleteModel = async (index: number): Promise => { - if (!draft) { - return false; - } - - const removedModelId = draft?.models[index]?.id ?? null; - const previousDraft = cloneConfig(draft); - const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; - const nextConfig = previousLoadConfig ? cloneConfig(previousLoadConfig) : cloneConfig(draft); - nextConfig.models.splice(index, 1); - - const saved = await persistConfig(nextConfig, { - notice: "Deleted model.", - preserveFilesystemDraft: true, - previousDraft, - previousLoadConfig - }); - - if (!saved) { - return false; - } - - if (removedModelId) { - updateWorkspaceState((current) => { - for (const tab of Object.values(current.tabs)) { - tab.modelSelections = tab.modelSelections.filter((selection) => selection.modelId !== removedModelId); - } - return current; - }); - } - - return true; - }; - - const confirmDeleteModel = (index: number) => { - const model = draft?.models[index]; - if (!model) { - return; - } - - const linkedTabCount = workspaceState - ? Object.values(workspaceState.tabs).filter((tab) => - tab.modelSelections.some((selection) => selection.modelId === model.id) - ).length - : 0; - - setConfirmDialog({ - title: "Delete Model", - subtitle: - linkedTabCount > 0 - ? `Delete ${model.label}? This will also remove it from ${linkedTabCount} tab ${linkedTabCount === 1 ? "selection" : "selections"}.` - : `Delete ${model.label}?`, - confirmLabel: "Delete Model", - tone: "danger", - onConfirm: () => { - void deleteModel(index).then((deleted) => { - if (deleted) { - setModelModal(null); - } - }); - } - }); - }; - - return ( -
-
-
-
-
- - {!isMacPlatform ? ( -
-

BenchLocal

-
- ) : null} -
- -
- {isMacPlatform ? ( -
-

BenchLocal

-
- ) : null} - - {!settingsOpen ? ( -
- { - if (activeTab && !activeTab.benchPackId) { - assignBenchPackToTab(activeTab.id, benchPackId); - return; - } - - createTab(benchPackId); - }} - disabled={!activeWorkspace} - /> - - {appUpdateState?.status === "downloaded" ? ( - - ) : null} -
- ) : draft ? ( -
-
- - {themeMenuOpen ? ( -
- {themeOptions.map((themeId) => ( - - ))} -
- ) : null} -
-
- ) : null} -
-
- - {settingsOpen && draft ? ( - { - setSettingsNotice(null); - setSettingsOpen(false); - }} - onDismissNotice={() => setSettingsNotice(null)} - onDismissError={() => setError(null)} - onSaveAdvanced={() => void save()} - onResetAdvanced={reset} - onCreateProvider={() => setProviderModal({ mode: "create", form: createEmptyProvider() })} - onEditProvider={(providerId) => - setProviderModal({ - mode: "edit", - initialId: providerId, - form: toProviderForm(providerId, draft.providers[providerId]) - }) - } - onCreateModel={() => setModelModal({ mode: "create", form: createEmptyModel(providerIds[0] ?? "openrouter") })} - onEditModel={(index) => setModelModal({ mode: "edit", index, form: toModelForm(draft.models[index]) })} - onStartVerifier={async (benchPackId, benchPackName, verifierId) => { - setError(null); - setStoppingVerifierStarts((current) => { - if (!current[benchPackId]) { - return current; - } - - const next = { ...current }; - delete next[benchPackId]; - return next; - }); - setSettingsVerifierPreparationModal({ - benchPackId, - progress: { - type: "verifier_preparing", - benchPackId, - benchPackName, - verifierId, - phase: "checking_docker", - message: "Checking Local Docker availability." - } - }); - - try { - const status = await window.benchlocal.verifiers.start({ benchPackId }); - setVerifierStatuses((current) => ({ ...current, [benchPackId]: status })); - } catch (verifierError) { - if (isAbortLikeError(verifierError)) { - if (settingsOpenRef.current) { - setSettingsNotice(`Cancelled preparing ${verifierId}.`); - } - } else { - setError(verifierError instanceof Error ? verifierError.message : "Failed to start verifier."); - } - } finally { - setSettingsVerifierPreparationModal((current) => (current?.benchPackId === benchPackId ? null : current)); - setStoppingVerifierStarts((current) => { - if (!current[benchPackId]) { - return current; - } - - const next = { ...current }; - delete next[benchPackId]; - return next; - }); - } - }} - onStopVerifier={async (benchPackId) => { - try { - const status = await window.benchlocal.verifiers.stop({ benchPackId }); - setVerifierStatuses((current) => ({ ...current, [benchPackId]: status })); - } catch (verifierError) { - setError(verifierError instanceof Error ? verifierError.message : "Failed to stop verifier."); - } - }} - onDeleteVerifierImage={(benchPackId, benchPackName, verifierId) => { - setConfirmDialog({ - title: "Delete Verifier Image", - subtitle: `Delete the Local Docker image for verifier "${verifierId}" in ${benchPackName}? BenchLocal will pull or rebuild it again the next time this verifier starts.`, - confirmLabel: "Delete Image", - tone: "danger", - onConfirm: () => { - void (async () => { - setIsBusy(true); - setError(null); - - try { - const result = await window.benchlocal.verifiers.deleteImage({ benchPackId, verifierId }); - setVerifierStatuses((current) => ({ ...current, [benchPackId]: result.status })); - if (settingsOpenRef.current) { - setSettingsNotice( - result.removed - ? `Deleted Docker image ${result.image}.` - : `Docker image ${result.image} was already absent.` - ); - } - } catch (verifierError) { - setError(verifierError instanceof Error ? verifierError.message : "Failed to delete verifier image."); - } finally { - setIsBusy(false); - } - })(); - } - }); - }} - onRefreshRegistry={() => void loadRegistryEntries()} - onInstallBenchPack={(benchPackId) => void installBenchPack(benchPackId)} - onInstallBenchPackFromUrl={(url) => installBenchPackFromUrl(url)} - onUpdateBenchPack={(benchPackId) => void updateBenchPack(benchPackId)} - onUninstallBenchPack={(benchPackId) => void uninstallInstalledBenchPack(benchPackId)} - updateDraft={updateDraft} - onUpdateVerifier={(benchPackId, verifierId, updater) => { - void saveVerifierConfig(benchPackId, verifierId, updater); - }} - /> - ) : ( -
- - -
- {appNotice ? ( - -
- {appNotice} - -
-
- ) : null} - {showDownloadedUpdateBanner ? ( - -
- {describeAppUpdateState(appUpdateState)} - -
-
- ) : null} - {error ? {error} : null} - {isBusy && !draft ? Loading BenchLocal config... : null} - -
- {draft ? ( - activeWorkspace ? ( -
-
- {activeTabMask ? ( - - ) : null} -
- {workspaceTabs.map((tab) => { - const inspection = benchPackInspections.find((candidate) => candidate.id === tab.benchPackId); - const isTabRunning = Boolean(activeRuns[tab.id]); - const hasTabRetryActivity = (liveRuns[tab.id]?.activeCellKeys.length ?? 0) > 0; - const showTabSpinner = isTabRunning || hasTabRetryActivity; - const showWarning = !isTabRunning && inspection && inspection.status !== "ready"; - const isEditingTab = editingTab?.tabId === tab.id; - - return ( - - ); - })} - -
-
- - -
-
-
- {activeInspection && activeTab ? ( - { - if (activeRuns[activeTab.id] && supportsLiveScenarioColumnFocus(activeTab.executionMode)) { - setLiveScenarioFocus((current) => { - const existing = current[activeTab.id]; - const liveScenarioId = existing?.liveScenarioId ?? null; - - return { - ...current, - [activeTab.id]: { - liveScenarioId, - autoFollow: liveScenarioId === scenarioId - } - }; - }); - } - - updateWorkspaceState((current) => { - const tab = activeTab ? current.tabs[activeTab.id] : null; - if (!tab) { - return current; - } - tab.focusedScenarioId = scenarioId; - tab.updatedAt = new Date().toISOString(); - return current; - }); - }} - onEditModels={() => - setTabModelsModal({ - tabId: activeTab.id, - selections: structuredClone(activeTab.modelSelections) - }) - } - onEditSampling={() => - setSamplingModal({ - tabId: activeTab.id, - benchPackId: activeInspection.id, - benchPackName: activeInspection.manifest?.name ?? activeInspection.id, - defaults: { - ...DEFAULT_BENCHLOCAL_GENERATION, - ...(activeInspection.manifest?.samplingDefaults ?? {}) - }, - form: createSamplingForm(activeTab.samplingOverrides) - }) - } - executionMode={activeTab.executionMode} - isViewingHistory={Boolean(activeLoadedHistory)} - onOpenHistory={() => - setHistoryModal({ - benchPackId: activeInspection.id, - benchPackName: activeInspection.manifest?.name ?? activeInspection.id, - entries: runHistories[activeInspection.id] ?? [] - }) - } - onEditModelAlias={(model) => - setModelAliasModal({ - tabId: activeTab.id, - modelId: model.id, - baseLabel: model.label, - alias: model.alias ?? "" - }) - } - onChangeExecutionMode={(executionMode) => - updateWorkspaceState((current) => { - const tab = activeTab ? current.tabs[activeTab.id] : null; - if (!tab) { - return current; - } - tab.executionMode = executionMode; - tab.updatedAt = new Date().toISOString(); - return current; - }) - } - isRunning={Boolean(activeRuns[activeTab.id])} - isStopping={Boolean(stoppingRuns[activeTab.id])} - onOpenVerification={() => { - setSettingsTab("verification"); - setSettingsOpen(true); - }} - onRefreshVerification={() => void loadVerifierStatuses()} - onClearHistory={() => clearLoadedHistoryRun(activeTab.id)} - onRun={() => - void ( - activeLoadedHistory?.mode === "replay" && activeRunSummary - ? replayTabRun(activeTab, activeRunSummary) - : activeRunSummary && !isRunSummaryComplete(activeRunSummary) - ? resumeTabRun(activeTab, activeRunSummary) - : runTab(activeTab) - ) - } - onStop={() => void stopTabRun(activeTab.id)} - onOpenDetail={setDetailModal} - /> - ) : ( - { - setSettingsTab("providers"); - setSettingsOpen(true); - }} - onOpenModels={() => { - setSettingsTab("models"); - setSettingsOpen(true); - }} - onOpenBenchPacks={() => { - setSettingsTab("benchPacks"); - setSettingsOpen(true); - }} - onSelectBenchPack={ - activeTab ? () => setTabMenuOpen(true) : undefined - } - /> - )} -
-
- ) : ( - { - setSettingsTab("providers"); - setSettingsOpen(true); - }} - onOpenModels={() => { - setSettingsTab("models"); - setSettingsOpen(true); - }} - onOpenBenchPacks={() => { - setSettingsTab("benchPacks"); - setSettingsOpen(true); - }} - /> - ) - ) : null} -
- {logsOpen && !logsDetached ? ( -
-
{ - document.body.dataset.logResizeActive = "true"; - }} - /> -
-
-

Run Logs

-
- {activeTab ? activeTab.title : "No Active Tab"} -
-
-
- - {activeLogEvents.length} events - -
-
- {activeLogEvents.length > 0 ? ( -
- {activeLogEvents.map((event, index) => ( -
- {event.type} - {JSON.stringify(event)} -
- ))} -
- ) : ( -
No run logs yet for the active tab.
- )} -
- ) : null} -
-
- )} - {!settingsOpen ? ( -
-
- - {activeWorkspace?.name ?? "No Workspace"} - - - - {activeTab?.title ?? "No Tab"} - -
-
- - - {activeLogEvents.length} events -
-
- ) : null} -
- -
- - {providerModal ? ( - setProviderModal(null)} - onSubmit={saveProviderModal} - submitLabel={providerModal.mode === "create" ? "Create Provider" : "Save Provider"} - leadingActions={ - providerModal.mode === "edit" ? ( - - ) : undefined - } - > -
- option.value)} - getOptionLabel={(value) => providerKindLabel(value as BenchLocalProviderKind)} - onChange={(value) => - setProviderModal((current) => - current - ? { - ...current, - form: { - ...current.form, - id: - current.mode === "create" - ? `${value as BenchLocalProviderKind}-${crypto.randomUUID()}` - : current.form.id, - kind: value as BenchLocalProviderKind, - name: - current.form.name.trim() === "" || current.form.name === defaultProviderName(current.form.kind) - ? defaultProviderName(value as BenchLocalProviderKind) - : current.form.name, - base_url: - current.form.base_url === defaultProviderBaseUrl(current.form.kind) - ? defaultProviderBaseUrl(value as BenchLocalProviderKind) - : current.form.base_url - } - } - : current - ) - } - /> - - setProviderModal((current) => current ? { ...current, form: { ...current.form, name: value } } : current) - } - /> - setProviderModal((current) => current ? { ...current, form: { ...current.form, api_key: value } } : current)} - /> - setProviderModal((current) => current ? { ...current, form: { ...current.form, enabled: checked } } : current)} - /> -
- setProviderModal((current) => current ? { ...current, form: { ...current.form, base_url: value } } : current)} /> -
- ) : null} - - {modelModal ? ( - (() => { - const selectedProvider = draft?.providers[modelModal.form.provider]; - const canBrowseModels = providerSupportsModelDiscovery(selectedProvider); - - return ( - setModelModal(null)} - onSubmit={saveModelModal} - submitLabel={modelModal.mode === "create" ? "Create Model" : "Save Model"} - leadingActions={ - modelModal.mode === "edit" ? ( - - ) : undefined - } - > -
- 0 ? providerIds : ["openrouter"]} - getOptionLabel={(value) => { - const provider = draft?.providers[value]; - return provider ? provider.name : value; - }} - onChange={(value) => setModelModal((current) => current ? { ...current, form: { ...current.form, provider: value } } : current)} - /> - setModelModal((current) => current ? { ...current, form: { ...current.form, group: value } } : current)} /> - - setModelModal((current) => current ? { ...current, form: { ...current.form, label: value } } : current)} /> - undefined} /> - setModelModal((current) => current ? { ...current, form: { ...current.form, enabled: checked } } : current)} - /> -
-
- ); - })() - ) : null} - - {modelBrowserModal ? ( - setModelBrowserModal(null)} - onQueryChange={(query) => - setModelBrowserModal((current) => (current ? { ...current, query } : current)) - } - onSelect={(modelId) => - setModelBrowserModal((current) => (current ? { ...current, selectedModelId: modelId } : current)) - } - onSubmit={() => { - if (!modelBrowserModal.selectedModelId) { - return; - } - - const selectedEntry = modelBrowserModal.entries.find( - (entry) => entry.id === modelBrowserModal.selectedModelId - ); - - if (!selectedEntry) { - return; - } - - setModelModal((current) => { - if (!current) { - return current; - } - - const providerName = - draft?.providers[current.form.provider]?.name ?? current.form.provider; - const currentDefaultLabel = current.form.model.trim() - ? defaultModelLabel(providerName, current.form.model, undefined) - : ""; - const nextLabel = defaultModelLabel(providerName, selectedEntry.id, selectedEntry.name); - const shouldAutofillLabel = - current.form.label.trim() === "" || current.form.label.trim() === currentDefaultLabel; - - return { - ...current, - form: { - ...current.form, - model: selectedEntry.id, - label: shouldAutofillLabel ? nextLabel : current.form.label - } - }; - }); - setModelBrowserModal(null); - }} - /> - ) : null} - - {tabModelsModal && draft ? ( - setTabModelsModal(null)} - onChange={(selections) => setTabModelsModal((current) => (current ? { ...current, selections } : current))} - onSubmit={() => { - const nextSelections = normalizeTabModelSelections(tabModelsModal.selections); - - updateWorkspaceState((current) => { - const tab = current.tabs[tabModelsModal.tabId]; - - if (!tab) { - return current; - } - - tab.modelSelections = nextSelections; - tab.updatedAt = new Date().toISOString(); - return current; - }); - - setTabModelsModal(null); - }} - /> - ) : null} - - {samplingModal ? ( - setSamplingModal(null)} - onChange={(form) => setSamplingModal((current) => (current ? { ...current, form } : current))} - onSubmit={() => { - const parsed = parseSamplingForm(samplingModal.form); - - if (parsed.error) { - setError(parsed.error); - return; - } - - updateWorkspaceState((current) => { - const tab = current.tabs[samplingModal.tabId]; - - if (!tab) { - return current; - } - - tab.samplingOverrides = parsed.value ?? {}; - tab.updatedAt = new Date().toISOString(); - return current; - }); - - setSamplingModal(null); - }} - /> - ) : null} - - {modelAliasModal && draft ? ( - setModelAliasModal(null)} - onSubmit={() => { - updateWorkspaceState((current) => { - const tab = current.tabs[modelAliasModal.tabId]; - - if (!tab) { - return current; - } - - tab.modelSelections = upsertTabModelAlias( - tab, - draft.models, - modelAliasModal.modelId, - modelAliasModal.alias - ); - tab.updatedAt = new Date().toISOString(); - return current; - }); - - setModelAliasModal(null); - }} - submitLabel="Save Alias" - > - - setModelAliasModal((current) => (current ? { ...current, alias: value } : current)) - } - /> - - ) : null} - - {aboutDialogOpen ? ( - void checkForAppUpdates()} - onInstallUpdate={() => void installDownloadedAppUpdate()} - onClose={() => setAboutDialogOpen(false)} - /> - ) : null} - - {workspaceModal ? ( - setWorkspaceModal(null)} - onSubmit={() => { - if (!workspaceModal.name.trim()) { - setError("Workspace name is required."); - return; - } - - renameWorkspace(workspaceModal.workspaceId, workspaceModal.name); - setWorkspaceModal(null); - }} - submitLabel="Save Workspace" - > - setWorkspaceModal((current) => (current ? { ...current, name: value } : current))} - /> - - ) : null} - - {historyModal ? ( - setHistoryModal(null)} - onOpenRun={(runId, mode) => { - void restoreHistoryRun(historyModal.benchPackId, runId, mode); - setHistoryModal(null); - }} - onRemoveAll={() => - setConfirmDialog({ - title: `Remove all histories for ${historyModal.benchPackName}?`, - subtitle: "This permanently deletes all saved test runs for this Bench Pack.", - confirmLabel: "Remove All Histories", - tone: "danger", - onConfirm: () => { - void removeAllHistoryForBenchPack(historyModal.benchPackId, historyModal.benchPackName); - } - }) - } - /> - ) : null} - - {confirmDialog ? ( - setConfirmDialog(null)} - onSubmit={() => { - confirmDialog.onConfirm(); - setConfirmDialog(null); - }} - submitLabel={confirmDialog.confirmLabel} - submitTone={confirmDialog.tone === "danger" ? "danger" : "primary"} - /> - ) : null} - - {settingsVerifierPreparationModal ? ( - void cancelSettingsVerifierStart(settingsVerifierPreparationModal.benchPackId)} - /> - ) : verifierPreparationModal ? ( - void stopTabRun(verifierPreparationModal.tabId)} - /> - ) : null} - - {workspaceContextMenu ? ( -
event.stopPropagation()} - > - - -
- ) : null} - - {detailModal ? ( - setDetailModal(null)} - onSubmit={() => setDetailModal(null)} - submitLabel="Close" - leadingActions={ - - } - > -
-
- Status - Validation Result -
- - {detailModal.status} - -
-
{detailModal.rawLog}
-
- ) : null} -
- ); + // Removed detached logs view in web version + + const isMacPlatform = + typeof navigator !== "undefined" && navigator.userAgent.includes("Mac"); + const [loadState, setLoadState] = useState(null); + const [draft, setDraft] = useState(null); + const [workspaceState, setWorkspaceState] = + useState(null); + const [benchPackInspections, setBenchPackInspections] = useState< + BenchPackInspection[] + >([]); + const [registryEntries, setRegistryEntries] = useState< + BenchPackRegistryEntry[] + >([]); + const [registryWarning, setRegistryWarning] = useState(null); + const [availableThemes, setAvailableThemes] = useState< + BenchLocalThemeDescriptor[] + >([]); + const [activeThemeDefinition, setActiveThemeDefinition] = + useState(null); + const [systemPrefersDark, setSystemPrefersDark] = useState( + typeof window !== "undefined" + ? window.matchMedia("(prefers-color-scheme: dark)").matches + : false, + ); + const [verifierStatuses, setVerifierStatuses] = useState< + Record + >({}); + const [tabMenuOpen, setTabMenuOpen] = useState(false); + const [themeMenuOpen, setThemeMenuOpen] = useState(false); + const [sidebarOpen, setSidebarOpen] = useState(() => { + if (typeof window === "undefined") { + return true; + } + + return window.localStorage.getItem(SIDEBAR_OPEN_STORAGE_KEY) !== "false"; + }); + const [settingsOpen, setSettingsOpen] = useState(false); + const [settingsTab, setSettingsTab] = useState("providers"); + const [aboutDialogOpen, setAboutDialogOpen] = useState(false); + const [appMetadata, setAppMetadata] = useState( + null, + ); + const [appUpdateState, setAppUpdateState] = + useState(null); + const [ + dismissedDownloadedUpdateVersion, + setDismissedDownloadedUpdateVersion, + ] = useState(null); + const [providerModal, setProviderModal] = useState( + null, + ); + const [modelModal, setModelModal] = useState(null); + const [modelBrowserModal, setModelBrowserModal] = + useState(null); + const [tabModelsModal, setTabModelsModal] = + useState(null); + const [samplingModal, setSamplingModal] = useState( + null, + ); + const [modelAliasModal, setModelAliasModal] = + useState(null); + const [workspaceModal, setWorkspaceModal] = + useState(null); + const [workspaceContextMenu, setWorkspaceContextMenu] = + useState(null); + const [historyModal, setHistoryModal] = useState( + null, + ); + const [confirmDialog, setConfirmDialog] = useState(null); + const [verifierPreparationModal, setVerifierPreparationModal] = + useState(null); + const [ + settingsVerifierPreparationModal, + setSettingsVerifierPreparationModal, + ] = useState(null); + const [stoppingVerifierStarts, setStoppingVerifierStarts] = useState< + Record + >({}); + const [draggedTabId, setDraggedTabId] = useState(null); + const [editingTab, setEditingTab] = useState<{ + tabId: string; + value: string; + width: number; + } | null>(null); + const [activeRuns, setActiveRuns] = useState>( + {}, + ); + const [stoppingRuns, setStoppingRuns] = useState>({}); + const [runSummaries, setRunSummaries] = useState< + Record + >({}); + const [runHistories, setRunHistories] = useState< + Record + >({}); + const [liveRuns, setLiveRuns] = useState>({}); + const [liveScenarioFocus, setLiveScenarioFocus] = useState< + Record + >({}); + const [loadedHistoryRuns, setLoadedHistoryRuns] = useState< + Record + >({}); + const [logsOpen, setLogsOpen] = useState(false); + const [logsAutoScroll, setLogsAutoScroll] = useState(true); + const [logsDetached, setLogsDetached] = useState(false); + const [logDrawerHeight, setLogDrawerHeight] = useState(240); + const [detailModal, setDetailModal] = useState(null); + const [isBusy, setIsBusy] = useState(true); + const [error, setError] = useState(null); + const [appNotice, setAppNotice] = useState(null); + const [settingsNotice, setSettingsNotice] = useState(null); + const [benchPackMutations, setBenchPackMutations] = useState< + Record + >({}); + const themeMenuRef = useRef(null); + const settingsOpenRef = useRef(false); + + const providerIds = useMemo( + () => Object.keys(draft?.providers ?? {}), + [draft], + ); + const themeOptions = useMemo( + () => ["system", ...availableThemes.map((theme) => theme.id)], + [availableThemes], + ); + const currentThemeLabel = useMemo( + () => + resolveThemeLabel( + draft?.ui.theme ?? "system", + availableThemes, + systemPrefersDark, + ), + [draft?.ui.theme, availableThemes, systemPrefersDark], + ); + const readyInspections = useMemo( + () => + benchPackInspections.filter( + (inspection) => inspection.status === "ready", + ), + [benchPackInspections], + ); + const activeWorkspace = useMemo( + () => + workspaceState?.activeWorkspaceId + ? (workspaceState.workspaces[workspaceState.activeWorkspaceId] ?? null) + : null, + [workspaceState], + ); + const workspaceTabs = useMemo( + () => + activeWorkspace?.tabIds + .map((tabId: any) => workspaceState?.tabs[tabId]) + .filter((tab): tab is BenchLocalWorkspaceTab => Boolean(tab)) ?? [], + [activeWorkspace, workspaceState], + ); + const activeTab = useMemo( + () => + activeWorkspace?.activeTabId + ? (workspaceState?.tabs[activeWorkspace.activeTabId] ?? null) + : (workspaceTabs[0] ?? null), + [activeWorkspace, workspaceState, workspaceTabs], + ); + const activeInspection = useMemo( + () => + benchPackInspections.find( + (inspection) => inspection.id === activeTab?.benchPackId, + ) ?? null, + [benchPackInspections, activeTab], + ); + const activeVerifierStatus = useMemo( + () => + activeInspection ? (verifierStatuses[activeInspection.id] ?? null) : null, + [activeInspection, verifierStatuses], + ); + const activeTabModels = useMemo( + () => (draft ? resolveTabModels(activeTab, draft.models) : []), + [draft, activeTab], + ); + const activeRunSummary = useMemo( + () => (activeTab ? (runSummaries[activeTab.id] ?? null) : null), + [runSummaries, activeTab], + ); + const activeLiveRun = useMemo( + () => (activeTab ? (liveRuns[activeTab.id] ?? null) : null), + [liveRuns, activeTab], + ); + const activeLiveScenarioFocus = useMemo( + () => (activeTab ? (liveScenarioFocus[activeTab.id] ?? null) : null), + [liveScenarioFocus, activeTab], + ); + const activeRunBlocker = useMemo( + () => + activeInspection && draft + ? getRequiredVerifierRunBlocker( + activeInspection.manifest, + draft.benchpacks[activeInspection.id], + activeVerifierStatus ?? undefined, + ) + : null, + [activeInspection, activeVerifierStatus, draft], + ); + const activeLoadedHistory = useMemo( + () => (activeTab ? (loadedHistoryRuns[activeTab.id] ?? null) : null), + [loadedHistoryRuns, activeTab], + ); + const activeDisplayModels = useMemo(() => { + if (!draft) { + return []; + } + + if (activeLoadedHistory) { + return resolveHistoryModels(activeRunSummary, draft.models); + } + + return activeTabModels; + }, [draft, activeLoadedHistory, activeRunSummary, activeTabModels]); + const downloadedUpdateVersion = + appUpdateState?.downloadedVersion ?? + appUpdateState?.availableVersion ?? + null; + const showDownloadedUpdateBanner = + appUpdateState?.status === "downloaded" && + downloadedUpdateVersion !== dismissedDownloadedUpdateVersion; + const activeLogEvents = + activeLiveRun?.events ?? activeRunSummary?.events ?? []; + const logContainerRef = useRef(null); + const tabStripShellRef = useRef(null); + const tabStripRef = useRef(null); + const tabChipRefs = useRef(new Map()); + const modelDiscoveryCacheRef = useRef< + Record + >({}); + const replayRunTokensRef = useRef(new Map()); + const appliedThemeKeysRef = useRef([]); + const [tabStripOverflow, setTabStripOverflow] = useState(false); + const [activeTabMask, setActiveTabMask] = useState<{ + left: number; + width: number; + } | null>(null); + + const hasUnsavedChanges = + loadState && draft + ? JSON.stringify(loadState.config) !== JSON.stringify(draft) + : false; + const effectiveThemeId = useMemo(() => { + const requested = draft?.ui.theme ?? "system"; + + if (requested === "system") { + return systemPrefersDark ? "dark" : "light"; + } + + return requested; + }, [draft?.ui.theme, systemPrefersDark]); + + const updateDraft = ( + updater: (current: BenchLocalConfig) => BenchLocalConfig, + ) => { + setDraft((current) => { + if (!current) { + return current; + } + + return updater(cloneConfig(current)); + }); + }; + + const persistWorkspaceState = async (nextState: BenchLocalWorkspaceState) => { + setWorkspaceState(nextState); + + try { + const saved = await bl.workspaces.save({ state: nextState }); + setWorkspaceState(saved.state); + } catch (workspaceError) { + setError( + workspaceError instanceof Error + ? workspaceError.message + : "Failed to save workspace state.", + ); + } + }; + + const updateWorkspaceState = ( + updater: (current: BenchLocalWorkspaceState) => BenchLocalWorkspaceState, + ) => { + setWorkspaceState((current) => { + if (!current) { + return current; + } + + const next = updater(structuredClone(current)); + void persistWorkspaceState(next); + return next; + }); + }; + + const loadBenchPackInspections = async () => { + try { + const inspections = await bl.benchPacks.list(); + setBenchPackInspections(inspections); + } catch (pluginError) { + setError( + pluginError instanceof Error + ? pluginError.message + : "Failed to inspect configured Bench Packs.", + ); + } + }; + + const loadRegistryEntries = async () => { + try { + const entries = await bl.benchPacks.registry(); + setRegistryEntries(entries); + setRegistryWarning(null); + } catch (registryError) { + setRegistryWarning(formatRegistryWarning(registryError)); + } + }; + + const loadVerifierStatuses = async () => { + try { + const statuses = await bl.verifiers.list(); + setVerifierStatuses( + Object.fromEntries( + statuses.map((status: any) => [status.benchPackId, status]), + ), + ); + } catch (verifierError) { + setError( + verifierError instanceof Error + ? verifierError.message + : "Failed to load verifier status.", + ); + } + }; + + const loadThemes = async () => { + try { + const themes = await bl.themes.list(); + setAvailableThemes(themes); + } catch (themeError) { + setError( + themeError instanceof Error + ? themeError.message + : "Failed to load available themes.", + ); + } + }; + + // Updates removed in web version + const checkForAppUpdates = async () => {}; + const installDownloadedAppUpdate = async () => {}; + + const loadHistoryForBenchPack = async (benchPackId: string) => { + try { + const history = await bl.benchPacks.history(benchPackId); + setRunHistories((current) => ({ + ...current, + [benchPackId]: history, + })); + } catch (historyError) { + setError( + historyError instanceof Error + ? historyError.message + : "Failed to load Bench Pack history.", + ); + } + }; + + useEffect(() => { + let cancelled = false; + + const load = async () => { + setIsBusy(true); + setError(null); + setRegistryWarning(null); + + try { + const [ + result, + workspaceResult, + inspections, + themes, + verifierStatusList, + activeRunsResult, + ] = await Promise.all([ + bl.config.load(), + bl.workspaces.load(), + bl.benchPacks.list(), + bl.themes.list(), + bl.verifiers.list(), + bl.benchPacks.activeRuns(), + ]); + + let registry: BenchPackRegistryEntry[] = []; + let nextRegistryWarning: string | null = null; + + try { + registry = await bl.benchPacks.registry(); + } catch (registryError) { + nextRegistryWarning = formatRegistryWarning(registryError); + } + + if (cancelled) { + return; + } + + const persistedRunEntries = await Promise.all( + Object.values(workspaceResult.state.tabs) + .filter((tab: any) => tab.benchPackId && tab.loadedRunId) + .map(async (tab: any) => { + try { + const summary = await bl.benchPacks.loadHistory( + tab.benchPackId as string, + tab.loadedRunId as string, + ); + return [tab.id, summary] as const; + } catch { + return null; + } + }), + ); + + setLoadState(result); + setDraft(cloneConfig(result.config)); + setWorkspaceState(workspaceResult.state); + setRunSummaries( + Object.fromEntries( + persistedRunEntries.filter( + (entry): entry is readonly [string, BenchPackRunSummary] => + entry !== null, + ), + ), + ); + setLoadedHistoryRuns( + Object.fromEntries( + persistedRunEntries + .filter( + (entry): entry is readonly [string, BenchPackRunSummary] => + entry !== null, + ) + .map(([tabId, summary]) => [ + tabId, + { + runId: summary.runId, + startedAt: summary.startedAt, + mode: "history", + }, + ]), + ), + ); + setBenchPackInspections(inspections); + setRegistryEntries(registry); + setRegistryWarning(nextRegistryWarning); + setAvailableThemes(themes); + setVerifierStatuses( + Object.fromEntries( + verifierStatusList.map((status: any) => [status.benchPackId, status]), + ), + ); + setActiveRuns( + Object.fromEntries( + activeRunsResult.map((run: any) => [ + run.tabId, + { benchPackId: run.benchPackId }, + ]), + ), + ); + setAppNotice( + result.created + ? "Created a fresh ~/.benchlocal/config.toml bootstrap." + : null, + ); + } catch (loadError) { + if (!cancelled) { + setError( + loadError instanceof Error + ? loadError.message + : "Failed to load BenchLocal config.", + ); + } + } finally { + if (!cancelled) { + setIsBusy(false); + } + } + }; + + void load(); + + return () => { + cancelled = true; + }; + }, []); + + useEffect(() => { + if (typeof window === "undefined") { + return; + } + + const media = window.matchMedia("(prefers-color-scheme: dark)"); + const handleChange = () => { + setSystemPrefersDark(media.matches); + }; + + handleChange(); + media.addEventListener("change", handleChange); + + return () => { + media.removeEventListener("change", handleChange); + }; + }, []); + + useEffect(() => { + let cancelled = false; + + void bl.updates + .state() + .then((state) => { + if (!cancelled) { + setAppUpdateState(state); + } + }) + .catch(() => undefined); + + const unsubscribe = bl.updates.onState((state) => { + setAppUpdateState(state); + + if (state.status !== "downloaded") { + setDismissedDownloadedUpdateVersion(null); + } + }); + + return () => { + cancelled = true; + unsubscribe(); + }; + }, []); + + useEffect(() => { + let cancelled = false; + + const loadTheme = async () => { + const theme = await bl.themes.load(effectiveThemeId); + + if (!cancelled) { + setActiveThemeDefinition(theme); + } + }; + + void loadTheme(); + + return () => { + cancelled = true; + }; + }, [effectiveThemeId]); + + useEffect(() => { + if (!activeThemeDefinition || typeof document === "undefined") { + return; + } + + const root = document.documentElement; + + for (const key of appliedThemeKeysRef.current) { + root.style.removeProperty(key); + } + + for (const [key, value] of Object.entries( + activeThemeDefinition.variables, + )) { + root.style.setProperty(key, value); + } + + appliedThemeKeysRef.current = Object.keys(activeThemeDefinition.variables); + root.style.setProperty("color-scheme", activeThemeDefinition.colorScheme); + root.dataset.theme = activeThemeDefinition.id; + }, [activeThemeDefinition]); + + useEffect(() => { + const sse = bl.sse(); + const handleRunEvent = (e: MessageEvent) => { + const { tabId, event } = JSON.parse(e.data) as { + tabId: string; + event: ProgressEvent; + }; + + if (event.type === "verifier_preparing") { + setVerifierPreparationModal({ + tabId, + progress: event, + }); + } else { + setVerifierPreparationModal((current) => + current?.tabId === tabId ? null : current, + ); + } + + if (event.type === "run_finished" || event.type === "run_error") { + setActiveRuns((current) => { + if (!current[tabId]) { + return current; + } + + const next = { ...current }; + delete next[tabId]; + return next; + }); + setStoppingRuns((current) => { + if (!current[tabId]) { + return current; + } + + const next = { ...current }; + delete next[tabId]; + return next; + }); + } + + setLiveRuns((current) => ({ + ...current, + [tabId]: updateLiveRunState(current[tabId], event), + })); + + if (event.type === "run_started") { + setLiveScenarioFocus((current) => ({ + ...current, + [tabId]: { + liveScenarioId: null, + autoFollow: true, + }, + })); + } else if ( + event.type === "scenario_started" || + event.type === "model_progress" || + event.type === "scenario_result" || + event.type === "scenario_finished" + ) { + setLiveScenarioFocus((current) => { + const existing = current[tabId]; + return { + ...current, + [tabId]: { + liveScenarioId: event.scenarioId, + autoFollow: existing?.autoFollow ?? true, + }, + }; + }); + } + }; + const handleMutationProgress = (e: MessageEvent) => { + const payload = JSON.parse(e.data) as BenchPackMutationProgress; + setBenchPackMutations((current) => ({ + ...current, + [payload.benchPackId]: payload, + })); + }; + const handleVerifierProgress = (e: MessageEvent) => { + const { benchPackId, event } = JSON.parse(e.data) as { + benchPackId: string; + event: ProgressEvent; + }; + setSettingsVerifierPreparationModal( + (current) => + current?.benchPackId === benchPackId || current === null + ? ({ benchPackId, progress: event } as any) + : current, + ); + }; + sse.addEventListener("run-event", handleRunEvent); + sse.addEventListener("benchpack-mutation-progress", handleMutationProgress); + sse.addEventListener("verifier-progress", handleVerifierProgress); + return () => { + sse.removeEventListener("run-event", handleRunEvent); + sse.removeEventListener( + "benchpack-mutation-progress", + handleMutationProgress, + ); + sse.removeEventListener("verifier-progress", handleVerifierProgress); + sse.close(); + }; + }, []); + + useEffect(() => { + return bl.benchPacks.onMutationProgress((payload) => { + setBenchPackMutations((current) => ({ + ...current, + [payload.benchPackId]: payload, + })); + }); + }, []); + + useEffect(() => { + return bl.verifiers.onProgress(({ benchPackId, event }) => { + setSettingsVerifierPreparationModal((current) => + current?.benchPackId === benchPackId || current === null + ? { + benchPackId, + progress: event, + } + : current, + ); + }); + }, []); + + useEffect(() => { + if (!settingsOpen || settingsTab !== "verification") { + return; + } + + void loadVerifierStatuses(); + }, [settingsOpen, settingsTab]); + + useEffect(() => { + if (!settingsOpen || settingsTab !== "advanced") { + return; + } + + setSettingsTab("providers"); + }, [settingsOpen, settingsTab]); + + useEffect(() => { + if (!logsOpen || !logsAutoScroll || !logContainerRef.current) { + return; + } + + logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight; + }, [activeLogEvents, logsOpen, logsAutoScroll]); + + useEffect(() => { + if (!activeInspection?.id || activeInspection.status !== "ready") { + return; + } + + void loadHistoryForBenchPack(activeInspection.id); + }, [activeInspection?.id, activeInspection?.status]); + + useEffect(() => { + const dispose = bl.logs.onDetachedWindowClosed(() => { + setLogsDetached(false); + }); + + return dispose; + }, []); + + useEffect(() => { + void bl.logs.publishDetachedState({ + workspaceName: activeWorkspace?.name ?? "No Workspace", + tabTitle: activeTab?.title ?? "No Active Tab", + eventCount: activeLogEvents.length, + events: activeLogEvents, + }); + }, [activeWorkspace?.name, activeTab?.title, activeLogEvents]); + + useEffect(() => { + const handleMove = (event: MouseEvent) => { + const shell = document.querySelector(".desktop-shell"); + + if (!shell || !document.body.dataset.logResizeActive) { + return; + } + + const shellRect = shell.getBoundingClientRect(); + const nextHeight = Math.min( + 420, + Math.max(160, shellRect.bottom - event.clientY - 30), + ); + setLogDrawerHeight(nextHeight); + }; + + const handleUp = () => { + delete document.body.dataset.logResizeActive; + }; + + window.addEventListener("mousemove", handleMove); + window.addEventListener("mouseup", handleUp); + + return () => { + window.removeEventListener("mousemove", handleMove); + window.removeEventListener("mouseup", handleUp); + }; + }, []); + + useEffect(() => { + if (!workspaceContextMenu) { + return; + } + + const closeMenu = () => { + setWorkspaceContextMenu(null); + }; + + const handleKeyDown = (event: KeyboardEvent) => { + if (event.key === "Escape") { + closeMenu(); + } + }; + + window.addEventListener("mousedown", closeMenu); + window.addEventListener("scroll", closeMenu, true); + window.addEventListener("resize", closeMenu); + window.addEventListener("keydown", handleKeyDown); + + return () => { + window.removeEventListener("mousedown", closeMenu); + window.removeEventListener("scroll", closeMenu, true); + window.removeEventListener("resize", closeMenu); + window.removeEventListener("keydown", handleKeyDown); + }; + }, [workspaceContextMenu]); + + useEffect(() => { + if (!themeMenuOpen) { + return; + } + + const handlePointerDown = (event: MouseEvent) => { + const target = event.target as Node; + if (!themeMenuRef.current?.contains(target)) { + setThemeMenuOpen(false); + } + }; + + const handleEscape = (event: KeyboardEvent) => { + if (event.key === "Escape") { + setThemeMenuOpen(false); + } + }; + + window.addEventListener("mousedown", handlePointerDown); + window.addEventListener("keydown", handleEscape); + + return () => { + window.removeEventListener("mousedown", handlePointerDown); + window.removeEventListener("keydown", handleEscape); + }; + }, [themeMenuOpen]); + + useEffect(() => { + return bl.app.onOpenAbout(() => { + setAboutDialogOpen(true); + + if (!appMetadata) { + void bl.app + .metadata() + .then((metadata) => { + setAppMetadata(metadata); + }) + .catch(() => undefined); + } + }); + }, [appMetadata]); + + useEffect(() => { + return bl.app.onOpenSettings(() => { + setSettingsOpen(true); + }); + }, []); + + useEffect(() => { + settingsOpenRef.current = settingsOpen; + + if (!settingsOpen) { + setSettingsNotice(null); + } + }, [settingsOpen]); + + useEffect(() => { + if (typeof window === "undefined") { + return; + } + + window.localStorage.setItem(SIDEBAR_OPEN_STORAGE_KEY, String(sidebarOpen)); + }, [sidebarOpen]); + + useEffect(() => { + const updateOverflow = () => { + const element = tabStripRef.current; + + if (!element) { + setTabStripOverflow(false); + return; + } + + setTabStripOverflow(element.scrollWidth > element.clientWidth + 4); + }; + + updateOverflow(); + window.addEventListener("resize", updateOverflow); + + return () => { + window.removeEventListener("resize", updateOverflow); + }; + }, [workspaceTabs.length, activeWorkspace?.id, sidebarOpen]); + + useEffect(() => { + const shell = tabStripShellRef.current; + const strip = tabStripRef.current; + const activeTabId = activeTab?.id; + + if (!shell || !strip || !activeTabId) { + setActiveTabMask(null); + return; + } + + const updateMask = () => { + const activeElement = tabChipRefs.current.get(activeTabId); + + if (!activeElement) { + setActiveTabMask(null); + return; + } + + const shellRect = shell.getBoundingClientRect(); + const tabRect = activeElement.getBoundingClientRect(); + + setActiveTabMask({ + left: Math.round(tabRect.left - shellRect.left), + width: Math.round(tabRect.width), + }); + }; + + const frameId = window.requestAnimationFrame(updateMask); + window.addEventListener("resize", updateMask); + strip.addEventListener("scroll", updateMask, { passive: true }); + + return () => { + window.cancelAnimationFrame(frameId); + window.removeEventListener("resize", updateMask); + strip.removeEventListener("scroll", updateMask); + }; + }, [activeTab?.id, workspaceTabs, sidebarOpen, tabStripOverflow]); + + const persistConfig = async ( + nextConfig: BenchLocalConfig, + options?: { + notice?: string | null; + preserveFilesystemDraft?: boolean; + previousDraft?: BenchLocalConfig | null; + previousLoadConfig?: BenchLocalConfig | null; + }, + ): Promise => { + if (!nextConfig) { + return false; + } + + setIsBusy(true); + setError(null); + + try { + const result = await bl.config.save(nextConfig); + setLoadState(result); + setDraft( + options?.preserveFilesystemDraft && + options.previousDraft && + options.previousLoadConfig + ? reapplyPendingFilesystemDraft( + result.config, + options.previousDraft, + options.previousLoadConfig, + ) + : cloneConfig(result.config), + ); + await loadBenchPackInspections(); + await loadRegistryEntries(); + if (settingsOpenRef.current && options?.notice) { + setSettingsNotice(options.notice); + } + return true; + } catch (saveError) { + setError( + saveError instanceof Error + ? saveError.message + : "Failed to save BenchLocal config.", + ); + return false; + } finally { + setIsBusy(false); + } + }; + + const save = async (): Promise => { + if (!draft) { + return false; + } + + return persistConfig(draft, { notice: "Saved ~/.benchlocal/config.toml" }); + }; + + const refreshBenchPackState = async (result?: LoadState) => { + const nextLoadState = result ?? (await bl.config.load()); + const inspections = await bl.benchPacks.list(); + const verifierStatusList = await bl.verifiers.list(); + let registry = registryEntries; + + try { + registry = await bl.benchPacks.registry(); + setRegistryWarning(null); + } catch (registryError) { + setRegistryWarning(formatRegistryWarning(registryError)); + } + + setLoadState(nextLoadState); + setDraft(cloneConfig(nextLoadState.config)); + setBenchPackInspections(inspections); + setRegistryEntries(registry); + setVerifierStatuses( + Object.fromEntries( + verifierStatusList.map((status: any) => [status.benchPackId, status]), + ), + ); + }; + + const ensureBenchPackMutationReady = async (): Promise => { + if (!hasUnsavedChanges) { + return true; + } + + return save(); + }; + + const installBenchPack = async (benchPackId: string) => { + if (!(await ensureBenchPackMutationReady())) { + return; + } + + setIsBusy(true); + setError(null); + setBenchPackMutations((current) => ({ + ...current, + [benchPackId]: { + benchPackId, + action: "install", + phase: "resolving", + message: "Resolving Bench Pack from registry.", + }, + })); + + try { + const result = await bl.benchPacks.install(benchPackId); + await refreshBenchPackState(result); + if (settingsOpenRef.current) { + setSettingsNotice(`Installed ${benchPackId}.`); + } + } catch (installError) { + setError( + formatRegistryMutationError("install", benchPackId, installError), + ); + } finally { + setIsBusy(false); + setBenchPackMutations((current) => { + const next = { ...current }; + delete next[benchPackId]; + return next; + }); + } + }; + + const installBenchPackFromUrl = async (url: string) => { + if (!(await ensureBenchPackMutationReady())) { + return; + } + + const normalizedUrl = url.trim(); + + if (!normalizedUrl) { + setError("Bench Pack URL is required."); + return; + } + + setIsBusy(true); + setError(null); + let installedBenchPackId: string | null = null; + setBenchPackMutations((current) => ({ + ...current, + [THIRD_PARTY_INSTALL_MUTATION_ID]: { + benchPackId: THIRD_PARTY_INSTALL_MUTATION_ID, + action: "install", + phase: "resolving", + message: "Resolving Bench Pack from URL.", + }, + })); + + try { + const result = await bl.benchPacks.installFromUrl(normalizedUrl); + await refreshBenchPackState(result); + installedBenchPackId = + Object.entries(result.config.benchpacks).find( + ([, benchPack]: any) => + benchPack.source === "archive" && benchPack.url === normalizedUrl, + )?.[0] ?? null; + if (settingsOpenRef.current) { + setSettingsNotice( + installedBenchPackId + ? `Installed ${installedBenchPackId}.` + : "Installed third-party Bench Pack.", + ); + } + return true; + } catch (installError) { + setError( + formatDesktopErrorMessage(installError) || + "Failed to install Bench Pack from URL.", + ); + return false; + } finally { + setIsBusy(false); + setBenchPackMutations((current) => { + const next = { ...current }; + delete next[THIRD_PARTY_INSTALL_MUTATION_ID]; + delete next["third-party"]; + if (installedBenchPackId) { + delete next[installedBenchPackId]; + } + return next; + }); + } + }; + + const updateBenchPack = async (benchPackId: string) => { + if (!(await ensureBenchPackMutationReady())) { + return; + } + + setIsBusy(true); + setError(null); + setBenchPackMutations((current) => ({ + ...current, + [benchPackId]: { + benchPackId, + action: "update", + phase: "resolving", + message: "Resolving Bench Pack update.", + }, + })); + + try { + const result = await bl.benchPacks.update(benchPackId); + await refreshBenchPackState(result); + if (settingsOpenRef.current) { + setSettingsNotice(`Updated ${benchPackId}.`); + } + } catch (updateError) { + setError(formatRegistryMutationError("update", benchPackId, updateError)); + } finally { + setIsBusy(false); + setBenchPackMutations((current) => { + const next = { ...current }; + delete next[benchPackId]; + return next; + }); + } + }; + + const uninstallInstalledBenchPack = async (benchPackId: string) => { + if (!(await ensureBenchPackMutationReady())) { + return; + } + + if ( + Object.values(activeRuns).some((run) => run.benchPackId === benchPackId) + ) { + setError("Stop active Bench Pack runs before uninstalling this pack."); + return; + } + + setIsBusy(true); + setError(null); + setBenchPackMutations((current) => ({ + ...current, + [benchPackId]: { + benchPackId, + action: "uninstall", + phase: "removing", + message: "Removing Bench Pack.", + }, + })); + + try { + const result = await bl.benchPacks.uninstall(benchPackId); + await refreshBenchPackState(result); + if (settingsOpenRef.current) { + setSettingsNotice(`Uninstalled ${benchPackId}.`); + } + } catch (uninstallError) { + setError( + uninstallError instanceof Error + ? uninstallError.message + : `Failed to uninstall ${benchPackId}.`, + ); + } finally { + setIsBusy(false); + setBenchPackMutations((current) => { + const next = { ...current }; + delete next[benchPackId]; + return next; + }); + } + }; + + const reset = () => { + if (!loadState) { + return; + } + + setDraft(cloneConfig(loadState.config)); + setProviderModal(null); + setModelModal(null); + if (settingsOpenRef.current) { + setSettingsNotice("Reverted unsaved changes."); + } + setError(null); + }; + + const saveThemeSelection = async (themeId: string) => { + if (!draft) { + return; + } + + const previousDraft = cloneConfig(draft); + const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; + const nextConfig = previousLoadConfig + ? cloneConfig(previousLoadConfig) + : cloneConfig(draft); + nextConfig.ui.theme = themeId; + setDraft(nextConfig); + + const saved = await persistConfig(nextConfig, { + preserveFilesystemDraft: true, + previousDraft, + previousLoadConfig, + }); + if (!saved) { + setDraft(previousDraft); + } + }; + + const saveVerifierConfig = async ( + benchPackId: string, + verifierId: string, + updater: (verifier: BenchLocalVerifierConfig) => BenchLocalVerifierConfig, + ) => { + if (!draft) { + return; + } + + const currentVerifier = + draft.benchpacks[benchPackId]?.verifiers?.[verifierId]; + if (!currentVerifier) { + return; + } + + const previousDraft = cloneConfig(draft); + const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; + const nextConfig = previousLoadConfig + ? cloneConfig(previousLoadConfig) + : cloneConfig(draft); + nextConfig.benchpacks[benchPackId].verifiers![verifierId] = + updater(currentVerifier); + setDraft(nextConfig); + + const saved = await persistConfig(nextConfig, { + preserveFilesystemDraft: true, + previousDraft, + previousLoadConfig, + }); + if (!saved) { + setDraft(previousDraft); + } + }; + + const scrollTabStrip = (delta: number) => { + tabStripRef.current?.scrollBy({ + left: delta, + behavior: "smooth", + }); + }; + + const handleTabStripWheel = (event: React.WheelEvent) => { + const strip = tabStripRef.current; + + if (!strip || !tabStripOverflow) { + return; + } + + const horizontalDelta = + Math.abs(event.deltaX) > Math.abs(event.deltaY) + ? event.deltaX + : event.deltaY; + + if (Math.abs(horizontalDelta) < 1) { + return; + } + + event.preventDefault(); + strip.scrollBy({ + left: horizontalDelta, + behavior: "auto", + }); + }; + + const runTab = async (tab: BenchLocalWorkspaceTab) => { + setError(null); + setAppNotice(null); + + if (!tab.benchPackId || !draft) { + setError("Select a Bench Pack for this tab first."); + return; + } + + const benchPackId = tab.benchPackId; + const selectedModels = resolveTabModels(tab, draft.models); + const inspection = benchPackInspections.find( + (candidate) => candidate.id === benchPackId, + ); + + if (inspection?.manifest) { + try { + const verifierStatusList = await bl.verifiers.list(); + const nextVerifierStatuses = Object.fromEntries( + verifierStatusList.map((status: any) => [status.benchPackId, status]), + ); + setVerifierStatuses(nextVerifierStatuses); + + const runBlocker = getRequiredVerifierRunBlocker( + inspection.manifest, + draft.benchpacks[benchPackId], + nextVerifierStatuses[benchPackId], + ); + + if (runBlocker) { + setConfirmDialog({ + title: runBlocker.title, + subtitle: runBlocker.message, + confirmLabel: runBlocker.actionLabel, + onConfirm: () => { + setSettingsTab("verification"); + setSettingsOpen(true); + }, + }); + return; + } + } catch (verifierError) { + setError( + verifierError instanceof Error + ? verifierError.message + : "Failed to refresh verifier status.", + ); + return; + } + } + + if (selectedModels.length === 0) { + setError( + "Select at least one enabled model for this tab before running the Bench Pack.", + ); + return; + } + + if (hasUnsavedChanges) { + const saved = await save(); + + if (!saved) { + return; + } + } + + setActiveRuns((current) => ({ + ...current, + [tab.id]: { benchPackId, mode: "host" }, + })); + setStoppingRuns((current) => { + if (!current[tab.id]) { + return current; + } + + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setLiveRuns((current) => ({ + ...current, + [tab.id]: { + events: [], + resultsByModel: {}, + activeCellKeys: [], + }, + })); + setRunSummaries((current) => { + if (!current[tab.id]) { + return current; + } + + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setLoadedHistoryRuns((current) => { + if (!current[tab.id]) { + return current; + } + + const next = { ...current }; + delete next[tab.id]; + return next; + }); + + try { + const result = await bl.benchPacks.run({ + tabId: tab.id, + benchPackId, + modelIds: selectedModels.map((model) => model.id), + executionMode: tab.executionMode, + generation: tab.samplingOverrides, + }); + setRunSummaries((current) => ({ + ...current, + [tab.id]: result, + })); + updateWorkspaceState((current) => { + const nextTab = current.tabs[tab.id]; + + if (!nextTab) { + return current; + } + + nextTab.loadedRunId = result.runId; + nextTab.updatedAt = new Date().toISOString(); + return current; + }); + if (result.cancelled) { + setAppNotice(`Stopped ${result.benchPackName}.`); + } else { + setAppNotice( + `Completed ${result.benchPackName} across ${result.scenarioCount} scenarios and ${result.modelCount} model${result.modelCount === 1 ? "" : "s"}.`, + ); + } + await loadBenchPackInspections(); + await loadHistoryForBenchPack(benchPackId); + } catch (runError) { + setError( + runError instanceof Error + ? runError.message + : `Failed to run Bench Pack for ${benchPackId}.`, + ); + } finally { + setVerifierPreparationModal((current) => + current?.tabId === tab.id ? null : current, + ); + setActiveRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setStoppingRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setLiveRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setLoadedHistoryRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + } + }; + + const resumeTabRun = async ( + tab: BenchLocalWorkspaceTab, + runSummary: BenchPackRunSummary, + ) => { + setError(null); + setAppNotice(null); + + if (!tab.benchPackId || !draft) { + setError("Select a Bench Pack for this tab first."); + return; + } + + if (isRunSummaryComplete(runSummary)) { + setError("This saved run is already complete."); + return; + } + + const benchPackId = tab.benchPackId; + const previousLoadedHistory = loadedHistoryRuns[tab.id] ?? null; + const previousTabModelSelections = structuredClone(tab.modelSelections); + const previousExecutionMode = tab.executionMode; + + if (hasUnsavedChanges) { + const saved = await save(); + + if (!saved) { + return; + } + } + + const historicalSelections = buildHistoryModelSelections( + runSummary, + draft.models, + ); + updateWorkspaceState((current) => { + const nextTab = current.tabs[tab.id]; + + if (!nextTab) { + return current; + } + + nextTab.modelSelections = + normalizeTabModelSelections(historicalSelections); + nextTab.executionMode = runSummary.executionMode ?? nextTab.executionMode; + nextTab.updatedAt = new Date().toISOString(); + return current; + }); + + setLoadedHistoryRuns((current) => { + if (!current[tab.id]) { + return current; + } + + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setActiveRuns((current) => ({ + ...current, + [tab.id]: { benchPackId, mode: "host" }, + })); + setStoppingRuns((current) => { + if (!current[tab.id]) { + return current; + } + + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setLiveRuns((current) => ({ + ...current, + [tab.id]: { + runId: runSummary.runId, + events: [], + resultsByModel: {}, + activeCellKeys: [], + }, + })); + + try { + const result = await bl.benchPacks.resumeRun({ + tabId: tab.id, + benchPackId, + runId: runSummary.runId, + executionMode: runSummary.executionMode ?? tab.executionMode, + generation: tab.samplingOverrides, + }); + setRunSummaries((current) => ({ + ...current, + [tab.id]: result, + })); + updateWorkspaceState((current) => { + const nextTab = current.tabs[tab.id]; + + if (!nextTab) { + return current; + } + + nextTab.loadedRunId = result.runId; + nextTab.updatedAt = new Date().toISOString(); + return current; + }); + if (result.cancelled) { + setAppNotice(`Stopped ${result.benchPackName}.`); + } else { + setAppNotice( + isRunSummaryComplete(result) + ? `Completed ${result.benchPackName} across ${result.scenarioCount} scenarios and ${result.modelCount} model${result.modelCount === 1 ? "" : "s"}.` + : `Resumed ${result.benchPackName}, but the run is still incomplete.`, + ); + } + await loadBenchPackInspections(); + await loadHistoryForBenchPack(benchPackId); + } catch (runError) { + updateWorkspaceState((current) => { + const nextTab = current.tabs[tab.id]; + + if (!nextTab) { + return current; + } + + nextTab.modelSelections = structuredClone(previousTabModelSelections); + nextTab.executionMode = previousExecutionMode; + nextTab.updatedAt = new Date().toISOString(); + return current; + }); + if (previousLoadedHistory) { + setLoadedHistoryRuns((current) => ({ + ...current, + [tab.id]: previousLoadedHistory, + })); + } + setError( + runError instanceof Error + ? runError.message + : `Failed to resume Bench Pack for ${benchPackId}.`, + ); + } finally { + setVerifierPreparationModal((current) => + current?.tabId === tab.id ? null : current, + ); + setActiveRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setStoppingRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setLiveRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + } + }; + + const replayTabRun = async ( + tab: BenchLocalWorkspaceTab, + runSummary: BenchPackRunSummary, + ) => { + if (!tab.benchPackId) { + setError("Select a Bench Pack for this tab first."); + return; + } + + if (!isRunSummaryComplete(runSummary)) { + setError("Replay is only available for completed test runs."); + return; + } + + const inspection = benchPackInspections.find( + (candidate) => candidate.id === tab.benchPackId, + ); + const scenarios = inspection?.scenarios ?? []; + const modelIds = resolveHistoryModels(runSummary, draft?.models ?? []).map( + (model) => model.id, + ); + const replayGroups = buildReplayGroups(runSummary, scenarios, modelIds); + const token = Symbol(`replay:${tab.id}`); + replayRunTokensRef.current.set(tab.id, token); + + setError(null); + setAppNotice(null); + setActiveRuns((current) => ({ + ...current, + [tab.id]: { benchPackId: tab.benchPackId as string, mode: "replay" }, + })); + setStoppingRuns((current) => { + if (!current[tab.id]) { + return current; + } + + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setLiveRuns((current) => ({ + ...current, + [tab.id]: { + runId: runSummary.runId, + events: [], + resultsByModel: {}, + activeCellKeys: [], + }, + })); + setLiveScenarioFocus((current) => ({ + ...current, + [tab.id]: { + liveScenarioId: null, + autoFollow: supportsLiveScenarioColumnFocus( + runSummary.executionMode ?? tab.executionMode, + ), + }, + })); + + const wait = async (ms: number) => { + await new Promise((resolve) => setTimeout(resolve, ms)); + }; + + try { + for (const group of replayGroups) { + if (replayRunTokensRef.current.get(tab.id) !== token) { + return; + } + + const nextActiveCellKeys = group.map((cell) => + getCellKey(cell.modelId, cell.scenarioId), + ); + const leadScenarioId = group[0]?.scenarioId ?? null; + + setLiveRuns((current) => { + const existing = current[tab.id]; + return { + ...current, + [tab.id]: { + runId: runSummary.runId, + events: existing?.events ?? [], + resultsByModel: existing?.resultsByModel ?? {}, + activeCellKeys: nextActiveCellKeys, + }, + }; + }); + if ( + leadScenarioId && + supportsLiveScenarioColumnFocus( + runSummary.executionMode ?? tab.executionMode, + ) + ) { + setLiveScenarioFocus((current) => ({ + ...current, + [tab.id]: { + liveScenarioId: leadScenarioId, + autoFollow: true, + }, + })); + } + + await wait(1000); + + if (replayRunTokensRef.current.get(tab.id) !== token) { + return; + } + + setLiveRuns((current) => { + const existing = current[tab.id]; + const nextResultsByModel = { ...(existing?.resultsByModel ?? {}) }; + + for (const cell of group) { + nextResultsByModel[cell.modelId] = [ + ...(nextResultsByModel[cell.modelId] ?? []).filter( + (candidate) => candidate.scenarioId !== cell.scenarioId, + ), + cell.result, + ]; + } + + return { + ...current, + [tab.id]: { + runId: runSummary.runId, + events: existing?.events ?? [], + resultsByModel: nextResultsByModel, + activeCellKeys: [], + }, + }; + }); + } + + setAppNotice(`Replayed ${runSummary.benchPackName}.`); + } finally { + if (replayRunTokensRef.current.get(tab.id) === token) { + replayRunTokensRef.current.delete(tab.id); + } + + setActiveRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setStoppingRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + } + }; + + const stopTabRun = async (tabId: string) => { + const activeRun = activeRuns[tabId]; + + if (activeRun?.mode === "replay") { + replayRunTokensRef.current.delete(tabId); + setActiveRuns((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + setStoppingRuns((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + setLiveRuns((current) => ({ + ...current, + [tabId]: { + ...(current[tabId] ?? { + events: [], + resultsByModel: {}, + activeCellKeys: [], + }), + activeCellKeys: [], + }, + })); + setAppNotice("Stopped replay."); + return; + } + + setStoppingRuns((current) => ({ + ...current, + [tabId]: true, + })); + + try { + const result = await bl.benchPacks.stop(tabId); + + if (!result.stopped) { + setAppNotice("That Bench Pack run was no longer active."); + setActiveRuns((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + setStoppingRuns((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + return; + } + + setAppNotice("Stopping Bench Pack run..."); + } catch (stopError) { + setStoppingRuns((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + setError( + stopError instanceof Error + ? stopError.message + : "Failed to stop Bench Pack run.", + ); + } + }; + + const cancelSettingsVerifierStart = async (benchPackId: string) => { + setStoppingVerifierStarts((current) => ({ + ...current, + [benchPackId]: true, + })); + + try { + const result = await bl.verifiers.cancelStart(benchPackId); + + if (!result.cancelled) { + setSettingsVerifierPreparationModal((current) => + current?.benchPackId === benchPackId ? null : current, + ); + setStoppingVerifierStarts((current) => { + if (!current[benchPackId]) { + return current; + } + + const next = { ...current }; + delete next[benchPackId]; + return next; + }); + } + } catch (cancelError) { + setStoppingVerifierStarts((current) => { + if (!current[benchPackId]) { + return current; + } + + const next = { ...current }; + delete next[benchPackId]; + return next; + }); + setError( + cancelError instanceof Error + ? cancelError.message + : "Failed to cancel verifier start.", + ); + } + }; + + const createWorkspace = () => { + updateWorkspaceState((current) => { + const now = new Date().toISOString(); + const workspaceId = `workspace-${crypto.randomUUID()}`; + const tabId = `tab-${crypto.randomUUID()}`; + + current.workspaceOrder.push(workspaceId); + current.activeWorkspaceId = workspaceId; + current.workspaces[workspaceId] = { + id: workspaceId, + name: createWorkspaceName(current.workspaceOrder.length - 1), + tabIds: [tabId], + activeTabId: tabId, + createdAt: now, + updatedAt: now, + }; + current.tabs[tabId] = { + id: tabId, + title: "New Tab", + benchPackId: null, + loadedRunId: null, + focusedScenarioId: null, + modelSelections: [], + samplingOverrides: {}, + executionMode: "parallel_by_test_case", + createdAt: now, + updatedAt: now, + }; + + return current; + }); + }; + + const renameWorkspace = (workspaceId: string, name: string) => { + updateWorkspaceState((current) => { + const workspace = current.workspaces[workspaceId]; + + if (!workspace) { + return current; + } + + workspace.name = name.trim(); + workspace.updatedAt = new Date().toISOString(); + return current; + }); + }; + + const deleteWorkspace = (workspaceId: string) => { + const removedTabIds = new Set( + workspaceState?.workspaces[workspaceId]?.tabIds ?? [], + ); + + if (Array.from(removedTabIds).some((tabId) => activeRuns[tabId])) { + setError("Stop active Bench Pack runs before deleting this workspace."); + return; + } + + updateWorkspaceState((current) => { + const workspace = current.workspaces[workspaceId]; + + if (!workspace) { + return current; + } + + for (const tabId of workspace.tabIds) { + delete current.tabs[tabId]; + } + + delete current.workspaces[workspaceId]; + current.workspaceOrder = current.workspaceOrder.filter( + (id) => id !== workspaceId, + ); + + if (current.workspaceOrder.length === 0) { + const now = new Date().toISOString(); + const nextWorkspaceId = `workspace-${crypto.randomUUID()}`; + const nextTabId = `tab-${crypto.randomUUID()}`; + + current.workspaceOrder = [nextWorkspaceId]; + current.activeWorkspaceId = nextWorkspaceId; + current.workspaces[nextWorkspaceId] = { + id: nextWorkspaceId, + name: "My Workspace", + tabIds: [nextTabId], + activeTabId: nextTabId, + createdAt: now, + updatedAt: now, + }; + current.tabs[nextTabId] = { + id: nextTabId, + title: "New Tab", + benchPackId: null, + loadedRunId: null, + focusedScenarioId: null, + modelSelections: [], + samplingOverrides: {}, + executionMode: "parallel_by_test_case", + createdAt: now, + updatedAt: now, + }; + } else if (current.activeWorkspaceId === workspaceId) { + current.activeWorkspaceId = current.workspaceOrder[0] ?? null; + } + + return current; + }); + + if (removedTabIds.size > 0) { + setRunSummaries((current) => + Object.fromEntries( + Object.entries(current).filter( + ([tabId]) => !removedTabIds.has(tabId), + ), + ), + ); + setLiveRuns((current) => + Object.fromEntries( + Object.entries(current).filter( + ([tabId]) => !removedTabIds.has(tabId), + ), + ), + ); + setActiveRuns((current) => + Object.fromEntries( + Object.entries(current).filter( + ([tabId]) => !removedTabIds.has(tabId), + ), + ), + ); + setStoppingRuns( + (current) => + Object.fromEntries( + Object.entries(current).filter( + ([tabId]) => !removedTabIds.has(tabId), + ), + ) as Record, + ); + } + }; + + const exportWorkspace = async (workspaceId: string) => { + if (!workspaceState) { + return; + } + + try { + const result = await bl.workspaces.export(workspaceId, workspaceState); + + if (result.exported) { + setAppNotice(`Exported workspace to ${result.filePath}.`); + } + } catch (workspaceError) { + setError( + workspaceError instanceof Error + ? workspaceError.message + : "Failed to export workspace.", + ); + } + }; + + const importWorkspace = async () => { + try { + const result = await bl.workspaces.import(null); + + if (!result.imported || !result.workspace || !result.tabs) { + return; + } + + const importedWorkspace = result.workspace; + const importedTabs = result.tabs; + const workspaceIdMap = new Map(); + const tabIdMap = new Map(); + const newWorkspaceId = `workspace-${crypto.randomUUID()}`; + workspaceIdMap.set(importedWorkspace.id, newWorkspaceId); + + updateWorkspaceState((current) => { + const now = new Date().toISOString(); + const nextTabIds = importedWorkspace.tabIds.map((tabId: any) => { + const nextTabId = `tab-${crypto.randomUUID()}`; + tabIdMap.set(tabId, nextTabId); + const importedTab = importedTabs[tabId]; + + if (importedTab) { + const importedTabRecord = importedTab as typeof importedTab & { + pluginId?: string | null; + }; + current.tabs[nextTabId] = { + ...importedTabRecord, + id: nextTabId, + benchPackId: + importedTabRecord.benchPackId ?? + importedTabRecord.pluginId ?? + null, + samplingOverrides: importedTab.samplingOverrides ?? {}, + createdAt: importedTab.createdAt ?? now, + updatedAt: now, + }; + } + + return nextTabId; + }); + + current.workspaceOrder.push(newWorkspaceId); + current.activeWorkspaceId = newWorkspaceId; + current.workspaces[newWorkspaceId] = { + ...importedWorkspace, + id: newWorkspaceId, + name: Object.values(current.workspaces).some( + (workspace) => workspace.name === importedWorkspace.name, + ) + ? `${importedWorkspace.name} Imported` + : importedWorkspace.name, + tabIds: nextTabIds, + activeTabId: importedWorkspace.activeTabId + ? (tabIdMap.get(importedWorkspace.activeTabId) ?? + nextTabIds[0] ?? + null) + : (nextTabIds[0] ?? null), + createdAt: importedWorkspace.createdAt ?? now, + updatedAt: now, + }; + + return current; + }); + + setAppNotice(`Imported workspace "${importedWorkspace.name}".`); + } catch (workspaceError) { + setError( + workspaceError instanceof Error + ? workspaceError.message + : "Failed to import workspace.", + ); + } + }; + + const activateWorkspace = (workspaceId: string) => { + setWorkspaceContextMenu(null); + updateWorkspaceState((current) => { + current.activeWorkspaceId = workspaceId; + return current; + }); + }; + + const createTab = (benchPackId: string) => { + if (!activeWorkspace) { + return; + } + + updateWorkspaceState((current) => { + const workspace = current.workspaces[activeWorkspace.id]; + + if (!workspace) { + return current; + } + + const now = new Date().toISOString(); + const tabId = `tab-${crypto.randomUUID()}`; + current.tabs[tabId] = { + id: tabId, + title: createTabTitle(benchPackId, benchPackInspections), + benchPackId, + loadedRunId: null, + focusedScenarioId: null, + modelSelections: [], + samplingOverrides: {}, + executionMode: "parallel_by_test_case", + createdAt: now, + updatedAt: now, + }; + workspace.tabIds.push(tabId); + workspace.activeTabId = tabId; + workspace.updatedAt = now; + return current; + }); + setTabMenuOpen(false); + }; + + const assignBenchPackToTab = (tabId: string, benchPackId: string) => { + updateWorkspaceState((current) => { + const tab = current.tabs[tabId]; + + if (!tab) { + return current; + } + + tab.title = createTabTitle(benchPackId, benchPackInspections); + tab.benchPackId = benchPackId; + tab.loadedRunId = null; + tab.focusedScenarioId = null; + tab.samplingOverrides = {}; + tab.updatedAt = new Date().toISOString(); + + return current; + }); + setTabMenuOpen(false); + }; + + const activateTab = (tabId: string) => { + if (!activeWorkspace) { + return; + } + + updateWorkspaceState((current) => { + const workspace = current.workspaces[activeWorkspace.id]; + + if (!workspace) { + return current; + } + + workspace.activeTabId = tabId; + workspace.updatedAt = new Date().toISOString(); + return current; + }); + }; + + const startEditingTab = (tabId: string, currentTitle: string) => { + const width = tabChipRefs.current.get(tabId)?.offsetWidth ?? 180; + setEditingTab({ + tabId, + value: currentTitle, + width, + }); + }; + + const commitEditingTab = () => { + if (!editingTab) { + return; + } + + const nextTitle = editingTab.value.trim() || "New Tab"; + + updateWorkspaceState((current) => { + const tab = current.tabs[editingTab.tabId]; + + if (!tab) { + return current; + } + + tab.title = nextTitle; + tab.updatedAt = new Date().toISOString(); + return current; + }); + + setEditingTab(null); + }; + + const cancelEditingTab = () => { + setEditingTab(null); + }; + + const reorderTab = (draggedId: string, targetId: string) => { + if (!activeWorkspace || draggedId === targetId) { + return; + } + + updateWorkspaceState((current) => { + const workspace = current.workspaces[activeWorkspace.id]; + + if (!workspace) { + return current; + } + + const nextTabIds = [...workspace.tabIds]; + const fromIndex = nextTabIds.indexOf(draggedId); + const toIndex = nextTabIds.indexOf(targetId); + + if (fromIndex < 0 || toIndex < 0) { + return current; + } + + const [moved] = nextTabIds.splice(fromIndex, 1); + nextTabIds.splice(toIndex, 0, moved); + workspace.tabIds = nextTabIds; + workspace.updatedAt = new Date().toISOString(); + return current; + }); + }; + + const closeTab = (tabId: string) => { + if (!activeWorkspace) { + return; + } + + if (activeRuns[tabId]) { + setError("Stop the Bench Pack run before closing this tab."); + return; + } + + updateWorkspaceState((current) => { + const workspace = current.workspaces[activeWorkspace.id]; + + if (!workspace) { + return current; + } + + workspace.tabIds = workspace.tabIds.filter((id) => id !== tabId); + delete current.tabs[tabId]; + + workspace.activeTabId = + workspace.activeTabId === tabId + ? (workspace.tabIds[workspace.tabIds.length - 1] ?? null) + : workspace.activeTabId; + workspace.updatedAt = new Date().toISOString(); + + if (workspace.tabIds.length === 0) { + const replacementTabId = `tab-${crypto.randomUUID()}`; + current.tabs[replacementTabId] = { + id: replacementTabId, + title: "New Tab", + benchPackId: null, + loadedRunId: null, + focusedScenarioId: null, + modelSelections: [], + samplingOverrides: {}, + executionMode: "parallel_by_test_case", + createdAt: workspace.updatedAt, + updatedAt: workspace.updatedAt, + }; + workspace.tabIds = [replacementTabId]; + workspace.activeTabId = replacementTabId; + } + + return current; + }); + setRunSummaries((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + setLiveRuns((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + setActiveRuns((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + }; + + const restoreHistoryRun = async ( + benchPackId: string, + runId: string, + mode: "history" | "replay" = "history", + ) => { + if (!activeTab) { + return; + } + + try { + const summary = await bl.benchPacks.loadHistory(benchPackId, runId); + setRunSummaries((current) => ({ + ...current, + [activeTab.id]: summary, + })); + updateWorkspaceState((current) => { + const tab = current.tabs[activeTab.id]; + + if (!tab) { + return current; + } + + tab.loadedRunId = summary.runId; + tab.updatedAt = new Date().toISOString(); + return current; + }); + setLiveRuns((current) => { + const next = { ...current }; + delete next[activeTab.id]; + return next; + }); + setLoadedHistoryRuns((current) => ({ + ...current, + [activeTab.id]: { + runId, + startedAt: summary.startedAt, + mode, + }, + })); + if (summary.executionMode) { + updateWorkspaceState((current) => { + const tab = current.tabs[activeTab.id]; + + if (!tab) { + return current; + } + + tab.executionMode = summary.executionMode ?? tab.executionMode; + tab.updatedAt = new Date().toISOString(); + return current; + }); + } + } catch (historyError) { + setError( + historyError instanceof Error + ? historyError.message + : "Failed to load Bench Pack history.", + ); + } + }; + + const retryScenarioFromDetail = async (detail: DetailModalState) => { + if (!workspaceState) { + return; + } + + if (!detail.runId) { + setError("This scenario does not belong to a saved test run yet."); + return; + } + + const tab = workspaceState.tabs[detail.tabId]; + + if (!tab || tab.benchPackId !== detail.benchPackId) { + setError("The original tab for this test is no longer available."); + return; + } + + if (hasUnsavedChanges) { + const saved = await save(); + + if (!saved) { + return; + } + } + + const retryKey = detailModalKey(detail); + const retryCellKey = getCellKey(detail.modelId, detail.scenarioId); + setDetailModal((current) => + current && detailModalKey(current) === retryKey ? null : current, + ); + setLiveRuns((current) => { + const existing = current[detail.tabId]; + + if (existing) { + return { + ...current, + [detail.tabId]: { + ...existing, + runId: existing.runId ?? detail.runId ?? undefined, + activeCellKeys: existing.activeCellKeys.includes(retryCellKey) + ? existing.activeCellKeys + : [...existing.activeCellKeys, retryCellKey], + }, + }; + } + + return { + ...current, + [detail.tabId]: { + runId: detail.runId ?? undefined, + events: [], + resultsByModel: {}, + activeCellKeys: [retryCellKey], + }, + }; + }); + + try { + await bl.benchPacks.retryScenario({ + tabId: detail.tabId, + benchPackId: detail.benchPackId, + runId: detail.runId, + scenarioId: detail.scenarioId, + modelId: detail.modelId, + generation: tab.samplingOverrides, + }); + const refreshedSummary = await bl.benchPacks.loadHistory( + detail.benchPackId, + detail.runId, + ); + + if (!activeRuns[detail.tabId]) { + setRunSummaries((current) => ({ + ...current, + [detail.tabId]: refreshedSummary, + })); + } + await loadHistoryForBenchPack(detail.benchPackId); + setAppNotice(`Retested ${detail.scenarioId} for ${detail.modelId}.`); + } catch (retryError) { + setLiveRuns((current) => { + const existing = current[detail.tabId]; + + if (!existing || !existing.activeCellKeys.includes(retryCellKey)) { + return current; + } + + return { + ...current, + [detail.tabId]: { + ...existing, + activeCellKeys: existing.activeCellKeys.filter( + (key) => key !== retryCellKey, + ), + }, + }; + }); + setError( + retryError instanceof Error + ? retryError.message + : "Failed to retry the selected test.", + ); + } + }; + + const clearLoadedHistoryRun = (tabId: string) => { + updateWorkspaceState((current) => { + const tab = current.tabs[tabId]; + + if (!tab) { + return current; + } + + tab.loadedRunId = null; + tab.updatedAt = new Date().toISOString(); + return current; + }); + setLoadedHistoryRuns((current) => { + if (!current[tabId]) { + return current; + } + + const next = { ...current }; + delete next[tabId]; + return next; + }); + setRunSummaries((current) => { + if (!current[tabId]) { + return current; + } + + const next = { ...current }; + delete next[tabId]; + return next; + }); + setLiveRuns((current) => { + if (!current[tabId]) { + return current; + } + + const next = { ...current }; + delete next[tabId]; + return next; + }); + }; + + const clearLoadedHistoryForBenchPack = (benchPackId: string) => { + const affectedTabIds = workspaceState + ? Object.values(workspaceState.tabs) + .filter( + (tab) => + tab.benchPackId === benchPackId && + Boolean(loadedHistoryRuns[tab.id]), + ) + .map((tab) => tab.id) + : []; + + if (affectedTabIds.length === 0) { + return; + } + + updateWorkspaceState((current) => { + for (const tabId of affectedTabIds) { + const tab = current.tabs[tabId]; + + if (!tab) { + continue; + } + + tab.loadedRunId = null; + tab.updatedAt = new Date().toISOString(); + } + + return current; + }); + + setLoadedHistoryRuns((current) => { + const next = { ...current }; + for (const tabId of affectedTabIds) { + delete next[tabId]; + } + return next; + }); + + setRunSummaries((current) => { + const next = { ...current }; + for (const tabId of affectedTabIds) { + delete next[tabId]; + } + return next; + }); + + setLiveRuns((current) => { + const next = { ...current }; + for (const tabId of affectedTabIds) { + delete next[tabId]; + } + return next; + }); + }; + + const removeAllHistoryForBenchPack = async ( + benchPackId: string, + benchPackName: string, + ) => { + try { + await bl.benchPacks.clearHistory(benchPackId); + setRunHistories((current) => ({ + ...current, + [benchPackId]: [], + })); + clearLoadedHistoryForBenchPack(benchPackId); + setHistoryModal(null); + setAppNotice(`Removed all test histories for ${benchPackName}.`); + } catch (historyError) { + setError( + historyError instanceof Error + ? historyError.message + : "Failed to remove Bench Pack history.", + ); + } + }; + + const saveProviderModal = async () => { + if (!providerModal || !draft) { + return; + } + + const providerId = providerModal.form.id.trim(); + const previousDraft = cloneConfig(draft); + const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; + const nextConfig = previousLoadConfig + ? cloneConfig(previousLoadConfig) + : cloneConfig(draft); + + nextConfig.providers[providerId] = { + kind: providerModal.form.kind, + name: + providerModal.form.name.trim() || + defaultProviderName(providerModal.form.kind), + enabled: providerModal.form.enabled, + base_url: providerModal.form.base_url.trim(), + api_key: providerModal.form.api_key.trim() || undefined, + }; + + const saved = await persistConfig(nextConfig, { + notice: + providerModal.mode === "create" + ? "Added provider." + : "Updated provider.", + preserveFilesystemDraft: true, + previousDraft, + previousLoadConfig, + }); + + if (!saved) { + return; + } + + setProviderModal(null); + }; + + const deleteProvider = async (providerId: string): Promise => { + if (!draft) { + return false; + } + + const removedModelIds = new Set( + (draft?.models ?? []) + .filter((model) => model.provider === providerId) + .map((model) => model.id), + ); + const previousDraft = cloneConfig(draft); + const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; + const nextConfig = previousLoadConfig + ? cloneConfig(previousLoadConfig) + : cloneConfig(draft); + + delete nextConfig.providers[providerId]; + nextConfig.models = nextConfig.models.filter( + (model) => model.provider !== providerId, + ); + + const saved = await persistConfig(nextConfig, { + notice: `Deleted provider "${providerId}".`, + preserveFilesystemDraft: true, + previousDraft, + previousLoadConfig, + }); + + if (!saved) { + return false; + } + + if (removedModelIds.size > 0) { + updateWorkspaceState((current) => { + for (const tab of Object.values(current.tabs)) { + tab.modelSelections = tab.modelSelections.filter( + (selection) => !removedModelIds.has(selection.modelId), + ); + } + return current; + }); + } + + return true; + }; + + const confirmDeleteProvider = (providerId: string) => { + const provider = draft?.providers[providerId]; + const linkedModelCount = (draft?.models ?? []).filter( + (model) => model.provider === providerId, + ).length; + + setConfirmDialog({ + title: "Delete Provider", + subtitle: + linkedModelCount > 0 + ? `Delete ${provider?.name ?? "this provider"}? This will also delete ${linkedModelCount} linked ${linkedModelCount === 1 ? "model" : "models"} and remove them from any tab selections.` + : `Delete ${provider?.name ?? "this provider"}?`, + confirmLabel: "Delete Provider", + tone: "danger", + onConfirm: () => { + void deleteProvider(providerId).then((deleted) => { + if (deleted) { + setProviderModal(null); + } + }); + }, + }); + }; + + const openModelBrowser = async () => { + if (!modelModal || !draft) { + return; + } + + const provider = draft.providers[modelModal.form.provider]; + + if (!provider) { + setError("Select a provider first."); + return; + } + + if (!providerSupportsModelDiscovery(provider)) { + setError(`${provider.name} does not support model browsing yet.`); + return; + } + + const cacheKey = `${provider.kind}::${provider.base_url}`; + const cachedEntries = modelDiscoveryCacheRef.current[cacheKey]; + + setModelBrowserModal({ + providerId: modelModal.form.provider, + providerName: provider.name, + entries: cachedEntries ?? [], + query: "", + selectedModelId: + modelModal.form.model.trim() || cachedEntries?.[0]?.id || null, + loading: !cachedEntries, + error: null, + }); + + if (cachedEntries) { + return; + } + + try { + const entries = await bl.models.discover(provider); + modelDiscoveryCacheRef.current[cacheKey] = entries; + setModelBrowserModal((current) => + current && current.providerId === modelModal.form.provider + ? { + ...current, + entries, + selectedModelId: + current.selectedModelId ?? entries[0]?.id ?? null, + loading: false, + } + : current, + ); + } catch (discoverError) { + setModelBrowserModal((current) => + current && current.providerId === modelModal.form.provider + ? { + ...current, + loading: false, + error: + discoverError instanceof Error + ? discoverError.message + : `Failed to load models from ${provider.name}.`, + } + : current, + ); + } + }; + + const saveModelModal = async () => { + if (!modelModal || !draft) { + return; + } + + const modelConfig = buildModelConfig( + modelModal.form, + draft?.providers ?? {}, + ); + + if (!modelConfig.provider || !modelConfig.model) { + setError("Model provider and model identifier are required."); + return; + } + + if (!draft?.providers[modelConfig.provider]) { + setError(`Model provider "${modelConfig.provider}" does not exist yet.`); + return; + } + + const previousModelId = + modelModal.mode === "edit" + ? (draft?.models[modelModal.index]?.id ?? null) + : null; + const previousDraft = cloneConfig(draft); + const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; + const nextConfig = previousLoadConfig + ? cloneConfig(previousLoadConfig) + : cloneConfig(draft); + + if (modelModal.mode === "create") { + nextConfig.models.push(modelConfig); + } else { + nextConfig.models[modelModal.index] = modelConfig; + } + + const saved = await persistConfig(nextConfig, { + notice: modelModal.mode === "create" ? "Added model." : "Updated model.", + preserveFilesystemDraft: true, + previousDraft, + previousLoadConfig, + }); + + if (!saved) { + return; + } + + if (previousModelId && previousModelId !== modelConfig.id) { + updateWorkspaceState((current) => { + for (const tab of Object.values(current.tabs)) { + tab.modelSelections = tab.modelSelections.map((selection) => + selection.modelId === previousModelId + ? { ...selection, modelId: modelConfig.id } + : selection, + ); + } + return current; + }); + } + + setModelModal(null); + }; + + const deleteModel = async (index: number): Promise => { + if (!draft) { + return false; + } + + const removedModelId = draft?.models[index]?.id ?? null; + const previousDraft = cloneConfig(draft); + const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; + const nextConfig = previousLoadConfig + ? cloneConfig(previousLoadConfig) + : cloneConfig(draft); + nextConfig.models.splice(index, 1); + + const saved = await persistConfig(nextConfig, { + notice: "Deleted model.", + preserveFilesystemDraft: true, + previousDraft, + previousLoadConfig, + }); + + if (!saved) { + return false; + } + + if (removedModelId) { + updateWorkspaceState((current) => { + for (const tab of Object.values(current.tabs)) { + tab.modelSelections = tab.modelSelections.filter( + (selection) => selection.modelId !== removedModelId, + ); + } + return current; + }); + } + + return true; + }; + + const confirmDeleteModel = (index: number) => { + const model = draft?.models[index]; + if (!model) { + return; + } + + const linkedTabCount = workspaceState + ? Object.values(workspaceState.tabs).filter((tab) => + tab.modelSelections.some( + (selection) => selection.modelId === model.id, + ), + ).length + : 0; + + setConfirmDialog({ + title: "Delete Model", + subtitle: + linkedTabCount > 0 + ? `Delete ${model.label}? This will also remove it from ${linkedTabCount} tab ${linkedTabCount === 1 ? "selection" : "selections"}.` + : `Delete ${model.label}?`, + confirmLabel: "Delete Model", + tone: "danger", + onConfirm: () => { + void deleteModel(index).then((deleted) => { + if (deleted) { + setModelModal(null); + } + }); + }, + }); + }; + + return ( +
+
+
+
+
+ + {!isMacPlatform ? ( +
+

BenchLocal

+
+ ) : null} +
+ +
+ {isMacPlatform ? ( +
+

BenchLocal

+
+ ) : null} + + {!settingsOpen ? ( +
+ { + if (activeTab && !activeTab.benchPackId) { + assignBenchPackToTab(activeTab.id, benchPackId); + return; + } + + createTab(benchPackId); + }} + disabled={!activeWorkspace} + /> + + {appUpdateState?.status === "downloaded" ? ( + + ) : null} +
+ ) : draft ? ( +
+
+ + {themeMenuOpen ? ( +
+ {themeOptions.map((themeId) => ( + + ))} +
+ ) : null} +
+
+ ) : null} +
+
+ + {settingsOpen && draft ? ( + { + setSettingsNotice(null); + setSettingsOpen(false); + }} + onDismissNotice={() => setSettingsNotice(null)} + onDismissError={() => setError(null)} + onSaveAdvanced={() => void save()} + onResetAdvanced={reset} + onCreateProvider={() => + setProviderModal({ + mode: "create", + form: createEmptyProvider(), + }) + } + onEditProvider={(providerId) => + setProviderModal({ + mode: "edit", + initialId: providerId, + form: toProviderForm(providerId, draft.providers[providerId]), + }) + } + onCreateModel={() => + setModelModal({ + mode: "create", + form: createEmptyModel(providerIds[0] ?? "openrouter"), + }) + } + onEditModel={(index) => + setModelModal({ + mode: "edit", + index, + form: toModelForm(draft.models[index]), + }) + } + onStartVerifier={async ( + benchPackId, + benchPackName, + verifierId, + ) => { + setError(null); + setStoppingVerifierStarts((current) => { + if (!current[benchPackId]) { + return current; + } + + const next = { ...current }; + delete next[benchPackId]; + return next; + }); + setSettingsVerifierPreparationModal({ + benchPackId, + progress: { + type: "verifier_preparing", + benchPackId, + benchPackName, + verifierId, + phase: "checking_docker", + message: "Checking Local Docker availability.", + }, + }); + + try { + const status = await bl.verifiers.start(benchPackId); + setVerifierStatuses((current) => ({ + ...current, + [benchPackId]: status, + })); + } catch (verifierError) { + if (isAbortLikeError(verifierError)) { + if (settingsOpenRef.current) { + setSettingsNotice(`Cancelled preparing ${verifierId}.`); + } + } else { + setError( + verifierError instanceof Error + ? verifierError.message + : "Failed to start verifier.", + ); + } + } finally { + setSettingsVerifierPreparationModal((current) => + current?.benchPackId === benchPackId ? null : current, + ); + setStoppingVerifierStarts((current) => { + if (!current[benchPackId]) { + return current; + } + + const next = { ...current }; + delete next[benchPackId]; + return next; + }); + } + }} + onStopVerifier={async (benchPackId) => { + try { + const status = await bl.verifiers.stop(benchPackId); + setVerifierStatuses((current) => ({ + ...current, + [benchPackId]: status, + })); + } catch (verifierError) { + setError( + verifierError instanceof Error + ? verifierError.message + : "Failed to stop verifier.", + ); + } + }} + onDeleteVerifierImage={( + benchPackId, + benchPackName, + verifierId, + ) => { + setConfirmDialog({ + title: "Delete Verifier Image", + subtitle: `Delete the Local Docker image for verifier "${verifierId}" in ${benchPackName}? BenchLocal will pull or rebuild it again the next time this verifier starts.`, + confirmLabel: "Delete Image", + tone: "danger", + onConfirm: () => { + void (async () => { + setIsBusy(true); + setError(null); + + try { + const result = await bl.verifiers.deleteImage( + benchPackId, + verifierId, + ); + setVerifierStatuses((current) => ({ + ...current, + [benchPackId]: result.status, + })); + if (settingsOpenRef.current) { + setSettingsNotice( + result.removed + ? `Deleted Docker image ${result.image}.` + : `Docker image ${result.image} was already absent.`, + ); + } + } catch (verifierError) { + setError( + verifierError instanceof Error + ? verifierError.message + : "Failed to delete verifier image.", + ); + } finally { + setIsBusy(false); + } + })(); + }, + }); + }} + onRefreshRegistry={() => void loadRegistryEntries()} + onInstallBenchPack={(benchPackId) => + void installBenchPack(benchPackId) + } + onInstallBenchPackFromUrl={(url) => installBenchPackFromUrl(url)} + onUpdateBenchPack={(benchPackId) => + void updateBenchPack(benchPackId) + } + onUninstallBenchPack={(benchPackId) => + void uninstallInstalledBenchPack(benchPackId) + } + updateDraft={updateDraft} + onUpdateVerifier={(benchPackId, verifierId, updater) => { + void saveVerifierConfig(benchPackId, verifierId, updater); + }} + /> + ) : ( +
+ + +
+ {appNotice ? ( + +
+ {appNotice} + +
+
+ ) : null} + {showDownloadedUpdateBanner ? ( + +
+ {describeAppUpdateState(appUpdateState)} + +
+
+ ) : null} + {error ? {error} : null} + {isBusy && !draft ? ( + Loading BenchLocal config... + ) : null} + +
+ {draft ? ( + activeWorkspace ? ( +
+
+ {activeTabMask ? ( + + ) : null} +
+ {workspaceTabs.map((tab) => { + const inspection = benchPackInspections.find( + (candidate) => candidate.id === tab.benchPackId, + ); + const isTabRunning = Boolean(activeRuns[tab.id]); + const hasTabRetryActivity = + (liveRuns[tab.id]?.activeCellKeys.length ?? 0) > + 0; + const showTabSpinner = + isTabRunning || hasTabRetryActivity; + const showWarning = + !isTabRunning && + inspection && + inspection.status !== "ready"; + const isEditingTab = editingTab?.tabId === tab.id; + + return ( + + ); + })} + +
+
+ + +
+
+
+ {activeInspection && activeTab ? ( + { + if ( + activeRuns[activeTab.id] && + supportsLiveScenarioColumnFocus( + activeTab.executionMode, + ) + ) { + setLiveScenarioFocus((current) => { + const existing = current[activeTab.id]; + const liveScenarioId = + existing?.liveScenarioId ?? null; + + return { + ...current, + [activeTab.id]: { + liveScenarioId, + autoFollow: + liveScenarioId === scenarioId, + }, + }; + }); + } + + updateWorkspaceState((current) => { + const tab = activeTab + ? current.tabs[activeTab.id] + : null; + if (!tab) { + return current; + } + tab.focusedScenarioId = scenarioId; + tab.updatedAt = new Date().toISOString(); + return current; + }); + }} + onEditModels={() => + setTabModelsModal({ + tabId: activeTab.id, + selections: structuredClone( + activeTab.modelSelections, + ), + }) + } + onEditSampling={() => + setSamplingModal({ + tabId: activeTab.id, + benchPackId: activeInspection.id, + benchPackName: + activeInspection.manifest?.name ?? + activeInspection.id, + defaults: { + ...DEFAULT_BENCHLOCAL_GENERATION, + ...(activeInspection.manifest + ?.samplingDefaults ?? {}), + }, + form: createSamplingForm( + activeTab.samplingOverrides, + ), + }) + } + executionMode={activeTab.executionMode} + isViewingHistory={Boolean(activeLoadedHistory)} + onOpenHistory={() => + setHistoryModal({ + benchPackId: activeInspection.id, + benchPackName: + activeInspection.manifest?.name ?? + activeInspection.id, + entries: + runHistories[activeInspection.id] ?? [], + }) + } + onEditModelAlias={(model) => + setModelAliasModal({ + tabId: activeTab.id, + modelId: model.id, + baseLabel: model.label, + alias: model.alias ?? "", + }) + } + onChangeExecutionMode={(executionMode) => + updateWorkspaceState((current) => { + const tab = activeTab + ? current.tabs[activeTab.id] + : null; + if (!tab) { + return current; + } + tab.executionMode = executionMode; + tab.updatedAt = new Date().toISOString(); + return current; + }) + } + isRunning={Boolean(activeRuns[activeTab.id])} + isStopping={Boolean(stoppingRuns[activeTab.id])} + onOpenVerification={() => { + setSettingsTab("verification"); + setSettingsOpen(true); + }} + onRefreshVerification={() => + void loadVerifierStatuses() + } + onClearHistory={() => + clearLoadedHistoryRun(activeTab.id) + } + onRun={() => + void (activeLoadedHistory?.mode === "replay" && + activeRunSummary + ? replayTabRun(activeTab, activeRunSummary) + : activeRunSummary && + !isRunSummaryComplete(activeRunSummary) + ? resumeTabRun(activeTab, activeRunSummary) + : runTab(activeTab)) + } + onStop={() => void stopTabRun(activeTab.id)} + onOpenDetail={setDetailModal} + /> + ) : ( + { + setSettingsTab("providers"); + setSettingsOpen(true); + }} + onOpenModels={() => { + setSettingsTab("models"); + setSettingsOpen(true); + }} + onOpenBenchPacks={() => { + setSettingsTab("benchPacks"); + setSettingsOpen(true); + }} + onSelectBenchPack={ + activeTab + ? () => setTabMenuOpen(true) + : undefined + } + /> + )} +
+
+ ) : ( + { + setSettingsTab("providers"); + setSettingsOpen(true); + }} + onOpenModels={() => { + setSettingsTab("models"); + setSettingsOpen(true); + }} + onOpenBenchPacks={() => { + setSettingsTab("benchPacks"); + setSettingsOpen(true); + }} + /> + ) + ) : null} +
+ {logsOpen && !logsDetached ? ( +
+
{ + document.body.dataset.logResizeActive = "true"; + }} + /> +
+
+

Run Logs

+
+ {activeTab ? activeTab.title : "No Active Tab"} +
+
+
+ + + {activeLogEvents.length} events + + +
+
+ {activeLogEvents.length > 0 ? ( +
+ {activeLogEvents.map((event, index) => ( +
+ {event.type} + + {" "} + {JSON.stringify(event)} + +
+ ))} +
+ ) : ( +
+ No run logs yet for the active tab. +
+ )} +
+ ) : null} +
+
+ )} + {!settingsOpen ? ( +
+
+ + {activeWorkspace?.name ?? "No Workspace"} + + + + {activeTab?.title ?? "No Tab"} + +
+
+ + + + {activeLogEvents.length} events + +
+
+ ) : null} +
+
+ + {providerModal ? ( + setProviderModal(null)} + onSubmit={saveProviderModal} + submitLabel={ + providerModal.mode === "create" + ? "Create Provider" + : "Save Provider" + } + leadingActions={ + providerModal.mode === "edit" ? ( + + ) : undefined + } + > +
+ option.value)} + getOptionLabel={(value) => + providerKindLabel(value as BenchLocalProviderKind) + } + onChange={(value) => + setProviderModal((current) => + current + ? { + ...current, + form: { + ...current.form, + id: + current.mode === "create" + ? `${value as BenchLocalProviderKind}-${crypto.randomUUID()}` + : current.form.id, + kind: value as BenchLocalProviderKind, + name: + current.form.name.trim() === "" || + current.form.name === + defaultProviderName(current.form.kind) + ? defaultProviderName( + value as BenchLocalProviderKind, + ) + : current.form.name, + base_url: + current.form.base_url === + defaultProviderBaseUrl(current.form.kind) + ? defaultProviderBaseUrl( + value as BenchLocalProviderKind, + ) + : current.form.base_url, + }, + } + : current, + ) + } + /> + + setProviderModal((current) => + current + ? { ...current, form: { ...current.form, name: value } } + : current, + ) + } + /> + + setProviderModal((current) => + current + ? { ...current, form: { ...current.form, api_key: value } } + : current, + ) + } + /> + + setProviderModal((current) => + current + ? { + ...current, + form: { ...current.form, enabled: checked }, + } + : current, + ) + } + /> +
+ + setProviderModal((current) => + current + ? { ...current, form: { ...current.form, base_url: value } } + : current, + ) + } + /> +
+ ) : null} + + {modelModal + ? (() => { + const selectedProvider = draft?.providers[modelModal.form.provider]; + const canBrowseModels = + providerSupportsModelDiscovery(selectedProvider); + + return ( + setModelModal(null)} + onSubmit={saveModelModal} + submitLabel={ + modelModal.mode === "create" ? "Create Model" : "Save Model" + } + leadingActions={ + modelModal.mode === "edit" ? ( + + ) : undefined + } + > +
+ 0 ? providerIds : ["openrouter"] + } + getOptionLabel={(value) => { + const provider = draft?.providers[value]; + return provider ? provider.name : value; + }} + onChange={(value) => + setModelModal((current) => + current + ? { + ...current, + form: { ...current.form, provider: value }, + } + : current, + ) + } + /> + + setModelModal((current) => + current + ? { + ...current, + form: { ...current.form, group: value }, + } + : current, + ) + } + /> + + + setModelModal((current) => + current + ? { + ...current, + form: { ...current.form, label: value }, + } + : current, + ) + } + /> + undefined} + /> + + setModelModal((current) => + current + ? { + ...current, + form: { ...current.form, enabled: checked }, + } + : current, + ) + } + /> +
+
+ ); + })() + : null} + + {modelBrowserModal ? ( + setModelBrowserModal(null)} + onQueryChange={(query) => + setModelBrowserModal((current) => + current ? { ...current, query } : current, + ) + } + onSelect={(modelId) => + setModelBrowserModal((current) => + current ? { ...current, selectedModelId: modelId } : current, + ) + } + onSubmit={() => { + if (!modelBrowserModal.selectedModelId) { + return; + } + + const selectedEntry = modelBrowserModal.entries.find( + (entry) => entry.id === modelBrowserModal.selectedModelId, + ); + + if (!selectedEntry) { + return; + } + + setModelModal((current) => { + if (!current) { + return current; + } + + const providerName = + draft?.providers[current.form.provider]?.name ?? + current.form.provider; + const currentDefaultLabel = current.form.model.trim() + ? defaultModelLabel(providerName, current.form.model, undefined) + : ""; + const nextLabel = defaultModelLabel( + providerName, + selectedEntry.id, + selectedEntry.name, + ); + const shouldAutofillLabel = + current.form.label.trim() === "" || + current.form.label.trim() === currentDefaultLabel; + + return { + ...current, + form: { + ...current.form, + model: selectedEntry.id, + label: shouldAutofillLabel ? nextLabel : current.form.label, + }, + }; + }); + setModelBrowserModal(null); + }} + /> + ) : null} + + {tabModelsModal && draft ? ( + setTabModelsModal(null)} + onChange={(selections) => + setTabModelsModal((current) => + current ? { ...current, selections } : current, + ) + } + onSubmit={() => { + const nextSelections = normalizeTabModelSelections( + tabModelsModal.selections, + ); + + updateWorkspaceState((current) => { + const tab = current.tabs[tabModelsModal.tabId]; + + if (!tab) { + return current; + } + + tab.modelSelections = nextSelections; + tab.updatedAt = new Date().toISOString(); + return current; + }); + + setTabModelsModal(null); + }} + /> + ) : null} + + {samplingModal ? ( + setSamplingModal(null)} + onChange={(form) => + setSamplingModal((current) => + current ? { ...current, form } : current, + ) + } + onSubmit={() => { + const parsed = parseSamplingForm(samplingModal.form); + + if (parsed.error) { + setError(parsed.error); + return; + } + + updateWorkspaceState((current) => { + const tab = current.tabs[samplingModal.tabId]; + + if (!tab) { + return current; + } + + tab.samplingOverrides = parsed.value ?? {}; + tab.updatedAt = new Date().toISOString(); + return current; + }); + + setSamplingModal(null); + }} + /> + ) : null} + + {modelAliasModal && draft ? ( + setModelAliasModal(null)} + onSubmit={() => { + updateWorkspaceState((current) => { + const tab = current.tabs[modelAliasModal.tabId]; + + if (!tab) { + return current; + } + + tab.modelSelections = upsertTabModelAlias( + tab, + draft.models, + modelAliasModal.modelId, + modelAliasModal.alias, + ); + tab.updatedAt = new Date().toISOString(); + return current; + }); + + setModelAliasModal(null); + }} + submitLabel="Save Alias" + > + + setModelAliasModal((current) => + current ? { ...current, alias: value } : current, + ) + } + /> + + ) : null} + + {aboutDialogOpen ? ( + void checkForAppUpdates()} + onInstallUpdate={() => void installDownloadedAppUpdate()} + onClose={() => setAboutDialogOpen(false)} + /> + ) : null} + + {workspaceModal ? ( + setWorkspaceModal(null)} + onSubmit={() => { + if (!workspaceModal.name.trim()) { + setError("Workspace name is required."); + return; + } + + renameWorkspace(workspaceModal.workspaceId, workspaceModal.name); + setWorkspaceModal(null); + }} + submitLabel="Save Workspace" + > + + setWorkspaceModal((current) => + current ? { ...current, name: value } : current, + ) + } + /> + + ) : null} + + {historyModal ? ( + setHistoryModal(null)} + onOpenRun={(runId, mode) => { + void restoreHistoryRun(historyModal.benchPackId, runId, mode); + setHistoryModal(null); + }} + onRemoveAll={() => + setConfirmDialog({ + title: `Remove all histories for ${historyModal.benchPackName}?`, + subtitle: + "This permanently deletes all saved test runs for this Bench Pack.", + confirmLabel: "Remove All Histories", + tone: "danger", + onConfirm: () => { + void removeAllHistoryForBenchPack( + historyModal.benchPackId, + historyModal.benchPackName, + ); + }, + }) + } + /> + ) : null} + + {confirmDialog ? ( + setConfirmDialog(null)} + onSubmit={() => { + confirmDialog.onConfirm(); + setConfirmDialog(null); + }} + submitLabel={confirmDialog.confirmLabel} + submitTone={confirmDialog.tone === "danger" ? "danger" : "primary"} + /> + ) : null} + + {settingsVerifierPreparationModal ? ( + + void cancelSettingsVerifierStart( + settingsVerifierPreparationModal.benchPackId, + ) + } + /> + ) : verifierPreparationModal ? ( + void stopTabRun(verifierPreparationModal.tabId)} + /> + ) : null} + + {workspaceContextMenu ? ( +
event.stopPropagation()} + > + + +
+ ) : null} + + {detailModal ? ( + setDetailModal(null)} + onSubmit={() => setDetailModal(null)} + submitLabel="Close" + leadingActions={ + + } + > +
+
+ Status + Validation Result +
+ + {detailModal.status} + +
+
{detailModal.rawLog}
+
+ ) : null} +
+ ); } function BenchPackPickerDialog({ - inspections, - open, - setOpen, - onSelectBenchPack, - title = "New Tab", - subtitle = "Pick a Bench Pack to open in this workspace.", - actionLabel = "Open Bench Pack" + inspections, + open, + setOpen, + onSelectBenchPack, + title = "New Tab", + subtitle = "Pick a Bench Pack to open in this workspace.", + actionLabel = "Open Bench Pack", }: { - inspections: BenchPackInspection[]; - open: boolean; - setOpen: (open: boolean) => void; - onSelectBenchPack: (benchPackId: string) => void; - title?: string; - subtitle?: string; - actionLabel?: string; + inspections: BenchPackInspection[]; + open: boolean; + setOpen: (open: boolean) => void; + onSelectBenchPack: (benchPackId: string) => void; + title?: string; + subtitle?: string; + actionLabel?: string; }) { - const [query, setQuery] = useState(""); - const filteredInspections = inspections.filter((inspection) => { - const haystack = [ - inspection.manifest?.name, - inspection.id, - inspection.manifest?.description, - inspection.manifest?.author - ] - .filter(Boolean) - .join(" ") - .toLowerCase(); - - return haystack.includes(query.trim().toLowerCase()); - }); - const [selectedId, setSelectedId] = useState(null); - const selectedInspection = - filteredInspections.find((inspection) => inspection.id === selectedId) ?? - filteredInspections[0] ?? - null; - - useEffect(() => { - if (!open) { - return; - } - - setSelectedId((current) => { - if (current && filteredInspections.some((inspection) => inspection.id === current)) { - return current; - } - - return filteredInspections[0]?.id ?? null; - }); - }, [open, filteredInspections]); - - if (!open) { - return null; - } - - return ( -
-
-
-
-

{title}

-

{subtitle}

-
- -
- -
-
- - -
- {filteredInspections.map((inspection) => ( - - ))} - {filteredInspections.length === 0 ? ( -
No Bench Packs match your search.
- ) : null} -
-
- -
- {selectedInspection ? ( - <> -
-

Bench Pack

-

- {selectedInspection.manifest?.name ?? selectedInspection.id} -

-

- {selectedInspection.manifest?.description ?? "No description provided."} -

-
- -
-
- Author - - {selectedInspection.manifest?.author ?? "Unknown"} - -
-
- Tests - {selectedInspection.scenarioCount ?? 0} -
-
- Version - - {selectedInspection.manifest?.version ?? "n/a"} - -
-
- -
- - {selectedInspection.status.replaceAll("_", " ")} - - - {selectedInspection.manifest?.capabilities.tools ? "Supports tools" : "No tools"} - - - {selectedInspection.manifest?.capabilities.verification ? "Requires verifier" : "No extra dependencies"} - -
- -
- -
- - ) : ( -
-

No Installed Bench Packs

-

Install a Bench Pack from Settings

-

- BenchLocal now starts with zero installed Bench Packs. Open Settings, go to Bench Packs, and install one from the official registry. -

-
- )} -
-
-
-
- ); + const [query, setQuery] = useState(""); + const filteredInspections = inspections.filter((inspection) => { + const haystack = [ + inspection.manifest?.name, + inspection.id, + inspection.manifest?.description, + inspection.manifest?.author, + ] + .filter(Boolean) + .join(" ") + .toLowerCase(); + + return haystack.includes(query.trim().toLowerCase()); + }); + const [selectedId, setSelectedId] = useState(null); + const selectedInspection = + filteredInspections.find((inspection) => inspection.id === selectedId) ?? + filteredInspections[0] ?? + null; + + useEffect(() => { + if (!open) { + return; + } + + setSelectedId((current) => { + if ( + current && + filteredInspections.some((inspection) => inspection.id === current) + ) { + return current; + } + + return filteredInspections[0]?.id ?? null; + }); + }, [open, filteredInspections]); + + if (!open) { + return null; + } + + return ( +
+
+
+
+

{title}

+

+ {subtitle} +

+
+ +
+ +
+
+ + +
+ {filteredInspections.map((inspection) => ( + + ))} + {filteredInspections.length === 0 ? ( +
+ No Bench Packs match your search. +
+ ) : null} +
+
+ +
+ {selectedInspection ? ( + <> +
+

Bench Pack

+

+ {selectedInspection.manifest?.name ?? selectedInspection.id} +

+

+ {selectedInspection.manifest?.description ?? + "No description provided."} +

+
+ +
+
+ Author + + {selectedInspection.manifest?.author ?? "Unknown"} + +
+
+ Tests + + {selectedInspection.scenarioCount ?? 0} + +
+
+ Version + + {selectedInspection.manifest?.version ?? "n/a"} + +
+
+ +
+ + {selectedInspection.status.replaceAll("_", " ")} + + + {selectedInspection.manifest?.capabilities.tools + ? "Supports tools" + : "No tools"} + + + {selectedInspection.manifest?.capabilities.verification + ? "Requires verifier" + : "No extra dependencies"} + +
+ +
+ +
+ + ) : ( +
+

No Installed Bench Packs

+

+ Install a Bench Pack from Settings +

+

+ BenchLocal now starts with zero installed Bench Packs. Open + Settings, go to Bench Packs, and install one from the official + registry. +

+
+ )} +
+
+
+
+ ); } function BenchPackPickerTrigger({ - inspections, - open, - setOpen, - onCreateTab, - disabled + inspections, + open, + setOpen, + onCreateTab, + disabled, }: { - inspections: BenchPackInspection[]; - open: boolean; - setOpen: (open: boolean) => void; - onCreateTab: (benchPackId: string) => void; - disabled?: boolean; + inspections: BenchPackInspection[]; + open: boolean; + setOpen: (open: boolean) => void; + onCreateTab: (benchPackId: string) => void; + disabled?: boolean; }) { - return ( - <> - - - - - ); + return ( + <> + + + + + ); } function BenchmarkSection({ - tabId, - inspection, - verifierStatus, - runBlocker, - selectedModels, - runSummary, - historyEntries, - liveRun, - loadedHistory, - focusedScenarioId, - onFocusScenario, - onEditModels, - onEditSampling, - onEditModelAlias, - executionMode, - isViewingHistory, - onChangeExecutionMode, - onOpenHistory, - isRunning, - isStopping, - onOpenVerification, - onRefreshVerification, - onClearHistory, - onRun, - onStop, - onOpenDetail + tabId, + inspection, + verifierStatus, + runBlocker, + selectedModels, + runSummary, + historyEntries, + liveRun, + loadedHistory, + focusedScenarioId, + onFocusScenario, + onEditModels, + onEditSampling, + onEditModelAlias, + executionMode, + isViewingHistory, + onChangeExecutionMode, + onOpenHistory, + isRunning, + isStopping, + onOpenVerification, + onRefreshVerification, + onClearHistory, + onRun, + onStop, + onOpenDetail, }: { - tabId: string; - inspection: BenchPackInspection; - verifierStatus: BenchPackVerifierStatus | null; - runBlocker: BenchPackRunBlocker | null; - selectedModels: ResolvedTabModel[]; - runSummary: BenchPackRunSummary | null; - historyEntries: BenchPackRunHistoryEntry[]; - liveRun: LiveRunState | null; - loadedHistory: LoadedHistoryEntry | null; - focusedScenarioId: string | null; - onFocusScenario: (scenarioId: string) => void; - onEditModels: () => void; - onEditSampling: () => void; - onEditModelAlias: (model: ResolvedTabModel) => void; - executionMode: BenchLocalExecutionMode; - isViewingHistory: boolean; - onChangeExecutionMode: (executionMode: BenchLocalExecutionMode) => void; - onOpenHistory: () => void; - isRunning: boolean; - isStopping: boolean; - onOpenVerification: () => void; - onRefreshVerification: () => void; - onClearHistory: () => void; - onRun: () => void; - onStop: () => void; - onOpenDetail: (detail: DetailModalState) => void; + tabId: string; + inspection: BenchPackInspection; + verifierStatus: BenchPackVerifierStatus | null; + runBlocker: BenchPackRunBlocker | null; + selectedModels: ResolvedTabModel[]; + runSummary: BenchPackRunSummary | null; + historyEntries: BenchPackRunHistoryEntry[]; + liveRun: LiveRunState | null; + loadedHistory: LoadedHistoryEntry | null; + focusedScenarioId: string | null; + onFocusScenario: (scenarioId: string) => void; + onEditModels: () => void; + onEditSampling: () => void; + onEditModelAlias: (model: ResolvedTabModel) => void; + executionMode: BenchLocalExecutionMode; + isViewingHistory: boolean; + onChangeExecutionMode: (executionMode: BenchLocalExecutionMode) => void; + onOpenHistory: () => void; + isRunning: boolean; + isStopping: boolean; + onOpenVerification: () => void; + onRefreshVerification: () => void; + onClearHistory: () => void; + onRun: () => void; + onStop: () => void; + onOpenDetail: (detail: DetailModalState) => void; }) { - const [runModeOpen, setRunModeOpen] = useState(false); - const runModeRef = useRef(null); - const tableScrollViewportRef = useRef(null); - const tableScrollbarTrackRef = useRef(null); - const tableScrollbarDragRef = useRef<{ - startX: number; - startScrollLeft: number; - } | null>(null); - const [tableScrollMetrics, setTableScrollMetrics] = useState({ - clientWidth: 0, - scrollWidth: 0, - scrollLeft: 0 - }); - const scenarios = inspection.scenarios ?? []; - const currentScenario = scenarios.find((scenario) => scenario.id === focusedScenarioId) ?? scenarios[0] ?? null; - const highlightedScenarioId = supportsLiveScenarioColumnFocus(executionMode) - ? currentScenario?.id ?? null - : focusedScenarioId; - const hasRetryActivity = (liveRun?.activeCellKeys.length ?? 0) > 0; - const isReplayMode = loadedHistory?.mode === "replay"; - const isResumableRun = Boolean(runSummary) && !isRunSummaryComplete(runSummary) && !isRunning; - const replayRevealedCellCount = Object.values(liveRun?.resultsByModel ?? {}).reduce( - (total, results) => total + results.length, - 0 - ); - const replayTotalCellCount = Object.values(runSummary?.resultsByModel ?? {}).reduce( - (total, results) => total + results.length, - 0 - ); - const currentExecutionModeLabel = - EXECUTION_MODE_OPTIONS.find((option) => option.value === executionMode)?.label ?? "Run Mode"; - const canReplayRun = isReplayMode && Boolean(runSummary) && isRunSummaryComplete(runSummary); - const runButtonLabel = isRunning ? "Stop" : canReplayRun ? "Replay" : isResumableRun ? "Resume Test" : "Run"; - const hasLiveActivity = isRunning || hasRetryActivity; - const hasCompletedReplay = - isReplayMode && - !hasLiveActivity && - replayTotalCellCount > 0 && - replayRevealedCellCount >= replayTotalCellCount; - const canStartFreshRun = inspection.status === "ready" && selectedModels.length > 0; - const canResumeRun = Boolean(runSummary) && isResumableRun; - const isRunButtonDisabled = isRunning - ? false - : hasRetryActivity || isStopping || !(canReplayRun || canResumeRun || (!isViewingHistory && canStartFreshRun)); - const hasHorizontalOverflow = tableScrollMetrics.scrollWidth > tableScrollMetrics.clientWidth + 1; - const stickyColumnShadow = tableScrollMetrics.scrollLeft > 2; - const scrollbarThumbWidth = hasHorizontalOverflow ? getTableScrollbarThumbWidth(tableScrollMetrics) : 0; - const scrollbarThumbOffset = - hasHorizontalOverflow && tableScrollbarTrackRef.current - ? ((tableScrollMetrics.scrollLeft / Math.max(1, tableScrollMetrics.scrollWidth - tableScrollMetrics.clientWidth)) * - Math.max(0, tableScrollbarTrackRef.current.clientWidth - scrollbarThumbWidth)) - : 0; - - useEffect(() => { - if (!runModeOpen) { - return; - } - - const handlePointerDown = (event: MouseEvent) => { - const target = event.target as Node; - const insideRunMode = runModeRef.current?.contains(target); - - if (!insideRunMode) { - setRunModeOpen(false); - } - }; - - const handleEscape = (event: KeyboardEvent) => { - if (event.key === "Escape") { - setRunModeOpen(false); - } - }; - - window.addEventListener("mousedown", handlePointerDown); - window.addEventListener("keydown", handleEscape); - - return () => { - window.removeEventListener("mousedown", handlePointerDown); - window.removeEventListener("keydown", handleEscape); - }; - }, [runModeOpen]); - - useEffect(() => { - const viewport = tableScrollViewportRef.current; - if (!viewport) { - return; - } - - const updateMetrics = () => { - setTableScrollMetrics({ - clientWidth: viewport.clientWidth, - scrollWidth: viewport.scrollWidth, - scrollLeft: viewport.scrollLeft - }); - }; - - const syncFromViewport = () => { - updateMetrics(); - }; - - updateMetrics(); - viewport.addEventListener("scroll", syncFromViewport); - window.addEventListener("resize", updateMetrics); - - return () => { - viewport.removeEventListener("scroll", syncFromViewport); - window.removeEventListener("resize", updateMetrics); - }; - }, [selectedModels.length, scenarios.length, runSummary, liveRun]); - - useEffect(() => { - const handleMove = (event: MouseEvent) => { - const viewport = tableScrollViewportRef.current; - const track = tableScrollbarTrackRef.current; - const drag = tableScrollbarDragRef.current; - - if (!viewport || !track || !drag) { - return; - } - - const maxScrollLeft = Math.max(0, viewport.scrollWidth - viewport.clientWidth); - const maxThumbOffset = Math.max(1, track.clientWidth - getTableScrollbarThumbWidth(tableScrollMetrics)); - const deltaX = event.clientX - drag.startX; - const nextScrollLeft = Math.min( - maxScrollLeft, - Math.max(0, drag.startScrollLeft + (deltaX / maxThumbOffset) * maxScrollLeft) - ); - viewport.scrollLeft = nextScrollLeft; - }; - - const handleUp = () => { - tableScrollbarDragRef.current = null; - document.body.style.userSelect = ""; - }; - - window.addEventListener("mousemove", handleMove); - window.addEventListener("mouseup", handleUp); - - return () => { - window.removeEventListener("mousemove", handleMove); - window.removeEventListener("mouseup", handleUp); - }; - }, [tableScrollMetrics]); - - if (inspection.status !== "ready") { - return ( -
-
-
-

Bench Pack Session

-
-
{inspection.manifest?.name ?? inspection.id}
-
- {inspection.scenarioCount ?? 0} scenarios - {selectedModels.length} models - Idle -
-
-
-
- - - {inspection.status.replaceAll("_", " ")} - -
-
- -
-
-
- -
-

Bench Pack Unavailable

-

- {inspection.manifest?.name ?? inspection.id} cannot run yet -

-

- {inspection.error ?? "This Bench Pack is not installed or is missing its BenchLocal runtime entry."} -

-
- - {inspection.status.replaceAll("_", " ")} - - {selectedModels.length} selected models -
-
-
-
- ); - } - - function renderResultCell(modelId: string, scenarioId: string) { - const liveResult = liveRun?.resultsByModel[modelId]?.find((candidate) => candidate.scenarioId === scenarioId); - const persistedResult = isReplayMode - ? undefined - : runSummary?.resultsByModel[modelId]?.find((candidate) => candidate.scenarioId === scenarioId); - const result = liveResult ?? persistedResult; - const isActive = liveRun?.activeCellKeys.includes(`${modelId}::${scenarioId}`) ?? false; - - if (isActive) { - return ( -
- -
- ); - } - - if (!result) { - return ( -
- {isActive ? : -} -
- ); - } - - const tone = - result.status === "pass" ? "result-pass" : result.status === "partial" ? "result-partial" : "result-fail"; - - return ( - - ); - } - - return ( -
- {loadedHistory && loadedHistory.mode !== "replay" ? ( -
-
- - Loaded test history from {new Date(loadedHistory.startedAt).toLocaleString()}. - - -
-
- ) : null} -
-
-

Bench Pack Session

-
-
{inspection.manifest?.name ?? inspection.id}
-
- {inspection.scenarioCount ?? 0} scenarios - {selectedModels.length} models - - {hasLiveActivity ? "Live" : runSummary ? "Done" : "Idle"} - -
-
-
-
- - -
-
- - {runBlocker ? ( -
-
- entry.required)?.status)}`}> - Verifier blocked - -
-
{runBlocker.title}
-
{runBlocker.message}
-
-
-
- - -
-
- ) : null} - -
-
-
- -
-

Scenario Detail

-

- {currentScenario ? `${currentScenario.id} · ${currentScenario.title}` : "No scenario selected"} -

-
-
- -
-
- -
- {(currentScenario?.detailCards?.length - ? currentScenario.detailCards - : [ - { - title: "What this tests", - content: - currentScenario?.description ?? - "Click a scenario column in the Bench Pack table below to inspect that scenario." - }, - { - title: "Prompt Contract", - content: - currentScenario?.description ?? - "The active scenario follows the selected table column. Richer prompt or methodology detail will appear here as Bench Pack metadata expands." - }, - { - title: "Run Notes", - content: runSummary - ? "Click a scenario column to switch context. Click any result cell to inspect the trace and summary for that model and scenario." - : "Run this Bench Pack, then use the scenario columns in the table below to switch the preview context." - } - ] - ).map((card) => ( - - ))} -
-
- -
-
- -
Test Results
-
-
-
- - {runModeOpen ? ( -
- {EXECUTION_MODE_OPTIONS.map((option) => ( - - ))} -
- ) : null} -
- - -
-
- -
- {selectedModels.length === 0 ? ( -
-
- -
-
-

No models selected

-

Add one or more models to start running this Bench Pack.

-
-
- - -
-
- ) : ( - <> -
- - - - - {scenarios.map((scenario) => ( - - ))} - - - - {selectedModels.map((model) => ( - - - {scenarios.map((scenario) => ( - - ))} - - ))} - -
- Model - -
- -
-
- {isViewingHistory ? ( -
- {model.displayLabel} -
- ) : ( - - )} -
- {renderResultCell(model.id, scenario.id)} -
-
- {hasHorizontalOverflow ? ( -
- - {runSummary && !hasLiveActivity && (!isReplayMode || hasCompletedReplay) ? ( -
- {Object.entries(runSummary.scores).map(([modelId, score]) => ( -
-
-

{selectedModels.find((model) => model.id === modelId)?.displayLabel ?? modelId}

-

{modelId}

-
-
- {score.totalScore} -
- {score.categories.map((category) => ( - - {category.id}: {category.score} - - ))} -
-
-
- ))} -
- ) : null} -
-
-
- ); + const [runModeOpen, setRunModeOpen] = useState(false); + const runModeRef = useRef(null); + const tableScrollViewportRef = useRef(null); + const tableScrollbarTrackRef = useRef(null); + const tableScrollbarDragRef = useRef<{ + startX: number; + startScrollLeft: number; + } | null>(null); + const [tableScrollMetrics, setTableScrollMetrics] = useState({ + clientWidth: 0, + scrollWidth: 0, + scrollLeft: 0, + }); + const scenarios = inspection.scenarios ?? []; + const currentScenario = + scenarios.find((scenario) => scenario.id === focusedScenarioId) ?? + scenarios[0] ?? + null; + const highlightedScenarioId = supportsLiveScenarioColumnFocus(executionMode) + ? (currentScenario?.id ?? null) + : focusedScenarioId; + const hasRetryActivity = (liveRun?.activeCellKeys.length ?? 0) > 0; + const isReplayMode = loadedHistory?.mode === "replay"; + const isResumableRun = + Boolean(runSummary) && !isRunSummaryComplete(runSummary) && !isRunning; + const replayRevealedCellCount = Object.values( + liveRun?.resultsByModel ?? {}, + ).reduce((total, results) => total + results.length, 0); + const replayTotalCellCount = Object.values( + runSummary?.resultsByModel ?? {}, + ).reduce((total, results) => total + results.length, 0); + const currentExecutionModeLabel = + EXECUTION_MODE_OPTIONS.find((option) => option.value === executionMode) + ?.label ?? "Run Mode"; + const canReplayRun = + isReplayMode && Boolean(runSummary) && isRunSummaryComplete(runSummary); + const runButtonLabel = isRunning + ? "Stop" + : canReplayRun + ? "Replay" + : isResumableRun + ? "Resume Test" + : "Run"; + const hasLiveActivity = isRunning || hasRetryActivity; + const hasCompletedReplay = + isReplayMode && + !hasLiveActivity && + replayTotalCellCount > 0 && + replayRevealedCellCount >= replayTotalCellCount; + const canStartFreshRun = + inspection.status === "ready" && selectedModels.length > 0; + const canResumeRun = Boolean(runSummary) && isResumableRun; + const isRunButtonDisabled = isRunning + ? false + : hasRetryActivity || + isStopping || + !( + canReplayRun || + canResumeRun || + (!isViewingHistory && canStartFreshRun) + ); + const hasHorizontalOverflow = + tableScrollMetrics.scrollWidth > tableScrollMetrics.clientWidth + 1; + const stickyColumnShadow = tableScrollMetrics.scrollLeft > 2; + const scrollbarThumbWidth = hasHorizontalOverflow + ? getTableScrollbarThumbWidth(tableScrollMetrics) + : 0; + const scrollbarThumbOffset = + hasHorizontalOverflow && tableScrollbarTrackRef.current + ? (tableScrollMetrics.scrollLeft / + Math.max( + 1, + tableScrollMetrics.scrollWidth - tableScrollMetrics.clientWidth, + )) * + Math.max( + 0, + tableScrollbarTrackRef.current.clientWidth - scrollbarThumbWidth, + ) + : 0; + + useEffect(() => { + if (!runModeOpen) { + return; + } + + const handlePointerDown = (event: MouseEvent) => { + const target = event.target as Node; + const insideRunMode = runModeRef.current?.contains(target); + + if (!insideRunMode) { + setRunModeOpen(false); + } + }; + + const handleEscape = (event: KeyboardEvent) => { + if (event.key === "Escape") { + setRunModeOpen(false); + } + }; + + window.addEventListener("mousedown", handlePointerDown); + window.addEventListener("keydown", handleEscape); + + return () => { + window.removeEventListener("mousedown", handlePointerDown); + window.removeEventListener("keydown", handleEscape); + }; + }, [runModeOpen]); + + useEffect(() => { + const viewport = tableScrollViewportRef.current; + if (!viewport) { + return; + } + + const updateMetrics = () => { + setTableScrollMetrics({ + clientWidth: viewport.clientWidth, + scrollWidth: viewport.scrollWidth, + scrollLeft: viewport.scrollLeft, + }); + }; + + const syncFromViewport = () => { + updateMetrics(); + }; + + updateMetrics(); + viewport.addEventListener("scroll", syncFromViewport); + window.addEventListener("resize", updateMetrics); + + return () => { + viewport.removeEventListener("scroll", syncFromViewport); + window.removeEventListener("resize", updateMetrics); + }; + }, [selectedModels.length, scenarios.length, runSummary, liveRun]); + + useEffect(() => { + const handleMove = (event: MouseEvent) => { + const viewport = tableScrollViewportRef.current; + const track = tableScrollbarTrackRef.current; + const drag = tableScrollbarDragRef.current; + + if (!viewport || !track || !drag) { + return; + } + + const maxScrollLeft = Math.max( + 0, + viewport.scrollWidth - viewport.clientWidth, + ); + const maxThumbOffset = Math.max( + 1, + track.clientWidth - getTableScrollbarThumbWidth(tableScrollMetrics), + ); + const deltaX = event.clientX - drag.startX; + const nextScrollLeft = Math.min( + maxScrollLeft, + Math.max( + 0, + drag.startScrollLeft + (deltaX / maxThumbOffset) * maxScrollLeft, + ), + ); + viewport.scrollLeft = nextScrollLeft; + }; + + const handleUp = () => { + tableScrollbarDragRef.current = null; + document.body.style.userSelect = ""; + }; + + window.addEventListener("mousemove", handleMove); + window.addEventListener("mouseup", handleUp); + + return () => { + window.removeEventListener("mousemove", handleMove); + window.removeEventListener("mouseup", handleUp); + }; + }, [tableScrollMetrics]); + + if (inspection.status !== "ready") { + return ( +
+
+
+

Bench Pack Session

+
+
+ {inspection.manifest?.name ?? inspection.id} +
+
+ + {inspection.scenarioCount ?? 0} scenarios + + + {selectedModels.length} models + + Idle +
+
+
+
+ + + {inspection.status.replaceAll("_", " ")} + +
+
+ +
+
+
+ +
+

Bench Pack Unavailable

+

+ {inspection.manifest?.name ?? inspection.id} cannot run yet +

+

+ {inspection.error ?? + "This Bench Pack is not installed or is missing its BenchLocal runtime entry."} +

+
+ + {inspection.status.replaceAll("_", " ")} + + + {selectedModels.length} selected models + +
+
+
+
+ ); + } + + function renderResultCell(modelId: string, scenarioId: string) { + const liveResult = liveRun?.resultsByModel[modelId]?.find( + (candidate) => candidate.scenarioId === scenarioId, + ); + const persistedResult = isReplayMode + ? undefined + : runSummary?.resultsByModel[modelId]?.find( + (candidate) => candidate.scenarioId === scenarioId, + ); + const result = liveResult ?? persistedResult; + const isActive = + liveRun?.activeCellKeys.includes(`${modelId}::${scenarioId}`) ?? false; + + if (isActive) { + return ( +
+ +
+ ); + } + + if (!result) { + return ( +
+ {isActive ? ( + + ) : ( + - + )} +
+ ); + } + + const tone = + result.status === "pass" + ? "result-pass" + : result.status === "partial" + ? "result-partial" + : "result-fail"; + + return ( + + ); + } + + return ( +
+ {loadedHistory && loadedHistory.mode !== "replay" ? ( +
+
+ + Loaded test history from{" "} + {new Date(loadedHistory.startedAt).toLocaleString()}. + + +
+
+ ) : null} +
+
+

Bench Pack Session

+
+
+ {inspection.manifest?.name ?? inspection.id} +
+
+ + {inspection.scenarioCount ?? 0} scenarios + + + {selectedModels.length} models + + + {hasLiveActivity ? "Live" : runSummary ? "Done" : "Idle"} + +
+
+
+
+ + +
+
+ + {runBlocker ? ( +
+
+ entry.required)?.status)}`} + > + Verifier blocked + +
+
+ {runBlocker.title} +
+
{runBlocker.message}
+
+
+
+ + +
+
+ ) : null} + +
+
+
+ +
+

Scenario Detail

+

+ {currentScenario + ? `${currentScenario.id} · ${currentScenario.title}` + : "No scenario selected"} +

+
+
+ +
+
+ +
+ {(currentScenario?.detailCards?.length + ? currentScenario.detailCards + : [ + { + title: "What this tests", + content: + currentScenario?.description ?? + "Click a scenario column in the Bench Pack table below to inspect that scenario.", + }, + { + title: "Prompt Contract", + content: + currentScenario?.description ?? + "The active scenario follows the selected table column. Richer prompt or methodology detail will appear here as Bench Pack metadata expands.", + }, + { + title: "Run Notes", + content: runSummary + ? "Click a scenario column to switch context. Click any result cell to inspect the trace and summary for that model and scenario." + : "Run this Bench Pack, then use the scenario columns in the table below to switch the preview context.", + }, + ] + ).map((card) => ( + + ))} +
+
+ +
+
+ +
Test Results
+
+
+
+ + {runModeOpen ? ( +
+ {EXECUTION_MODE_OPTIONS.map((option) => ( + + ))} +
+ ) : null} +
+ + +
+
+ +
+ {selectedModels.length === 0 ? ( +
+
+ +
+
+

+ No models selected +

+

+ Add one or more models to start running this Bench Pack. +

+
+
+ + +
+
+ ) : ( + <> +
+ + + + + {scenarios.map((scenario) => ( + + ))} + + + + {selectedModels.map((model) => ( + + + {scenarios.map((scenario) => ( + + ))} + + ))} + +
+ Model + +
+ +
+
+ {isViewingHistory ? ( +
+ {model.displayLabel} +
+ ) : ( + + )} +
+ {renderResultCell(model.id, scenario.id)} +
+
+ {hasHorizontalOverflow ? ( +
+ + {runSummary && + !hasLiveActivity && + (!isReplayMode || hasCompletedReplay) ? ( +
+ {Object.entries(runSummary.scores).map(([modelId, score]) => ( +
+
+

+ {selectedModels.find((model) => model.id === modelId) + ?.displayLabel ?? modelId} +

+

+ {modelId} +

+
+
+ {score.totalScore} +
+ {score.categories.map((category) => ( + + {category.id}: {category.score} + + ))} +
+
+
+ ))} +
+ ) : null} +
+
+
+ ); } function TabModelsModal({ - providers, - models, - selections, - onClose, - onChange, - onSubmit + providers, + models, + selections, + onClose, + onChange, + onSubmit, }: { - providers: Record; - models: BenchLocalModelConfig[]; - selections: BenchLocalWorkspaceTabModelSelection[]; - onClose: () => void; - onChange: (selections: BenchLocalWorkspaceTabModelSelection[]) => void; - onSubmit: () => void; + providers: Record; + models: BenchLocalModelConfig[]; + selections: BenchLocalWorkspaceTabModelSelection[]; + onClose: () => void; + onChange: (selections: BenchLocalWorkspaceTabModelSelection[]) => void; + onSubmit: () => void; }) { - const [providerFilter, setProviderFilter] = useState("all"); - const [groupFilter, setGroupFilter] = useState("all"); - const [searchQuery, setSearchQuery] = useState(""); - const enabledModels = models.filter((model) => model.enabled); - const editableSelections = normalizeEditableTabModelSelections(selections); - const selectionMap = new Map(editableSelections.map((selection) => [selection.modelId, selection])); - const availableIds = new Set(enabledModels.map((model) => model.id)); - const orderedSelectedIds = editableSelections.map((selection) => selection.modelId).filter((modelId) => availableIds.has(modelId)); - const selectedIdSet = new Set(orderedSelectedIds); - const providerOptions = [ - { value: "all", label: "All Providers" }, - ...Array.from(new Set(enabledModels.map((model) => model.provider))) - .sort((left, right) => (providers[left]?.name ?? left).localeCompare(providers[right]?.name ?? right)) - .map((providerId) => ({ - value: providerId, - label: providers[providerId]?.name ?? providerId - })) - ]; - const groupOptions = [ - { value: "all", label: "All Groups" }, - ...Array.from(new Set(enabledModels.map((model) => model.group.trim() || "__ungrouped__"))) - .sort((left, right) => left.localeCompare(right)) - .map((group) => ({ - value: group, - label: group === "__ungrouped__" ? "Ungrouped" : group - })) - ]; - const filteredAvailableModels = enabledModels.filter((model) => { - const normalizedGroup = model.group.trim() || "__ungrouped__"; - const normalizedQuery = searchQuery.trim().toLowerCase(); - const haystack = [ - model.label, - model.id, - model.group, - providers[model.provider]?.name ?? model.provider - ] - .filter(Boolean) - .join(" ") - .toLowerCase(); - - return ( - (providerFilter === "all" || model.provider === providerFilter) && - (groupFilter === "all" || normalizedGroup === groupFilter) && - (!normalizedQuery || haystack.includes(normalizedQuery)) - ); - }); - const selectedModels = orderedSelectedIds - .map((modelId) => enabledModels.find((model) => model.id === modelId)) - .filter((model): model is BenchLocalModelConfig => Boolean(model)); - - const toggleModel = (modelId: string, enabled: boolean) => { - if (enabled) { - const existing = selectionMap.get(modelId); - onChange([...editableSelections, { modelId, alias: existing?.alias }]); - return; - } - - onChange(editableSelections.filter((selection) => selection.modelId !== modelId)); - }; - - const updateAlias = (modelId: string, alias: string) => { - const next = editableSelections.map((selection) => - selection.modelId === modelId ? { ...selection, alias: alias || undefined } : selection - ); - onChange(next); - }; - - const moveSelection = (draggedId: string, targetId: string) => { - if (draggedId === targetId) { - return; - } - - const next = [...editableSelections]; - const fromIndex = next.findIndex((selection) => selection.modelId === draggedId); - const toIndex = next.findIndex((selection) => selection.modelId === targetId); - - if (fromIndex < 0 || toIndex < 0) { - return; - } - - const [moved] = next.splice(fromIndex, 1); - next.splice(toIndex, 0, moved); - onChange(next); - }; - - useEffect(() => { - if (providerFilter !== "all" && !providerOptions.some((option) => option.value === providerFilter)) { - setProviderFilter("all"); - } - }, [providerFilter, providerOptions]); - - useEffect(() => { - if (groupFilter !== "all" && !groupOptions.some((option) => option.value === groupFilter)) { - setGroupFilter("all"); - } - }, [groupFilter, groupOptions]); - - return ( - -
-
-
-

Available Models

- {filteredAvailableModels.length} -
-
- - - -
-
- {filteredAvailableModels.length === 0 ? ( -
-

No models match the current filters.

-
- ) : filteredAvailableModels.map((model) => { - const isSelected = selectedIdSet.has(model.id); - - return ( -
- - -
- {model.group.trim() || "Ungrouped"} -
-
- ); - })} -
-
- -
-
-

Selected Models

- {selectedModels.length} -
-
- {selectedModels.length === 0 ? ( -
-

Select models from the left to add them to this tab.

-
- ) : selectedModels.map((model) => { - const selection = selectionMap.get(model.id); - - return ( -
{ - event.dataTransfer.setData("text/plain", model.id); - event.dataTransfer.effectAllowed = "move"; - }} - onDragOver={(event) => { - event.preventDefault(); - event.dataTransfer.dropEffect = "move"; - }} - onDrop={(event) => { - event.preventDefault(); - moveSelection(event.dataTransfer.getData("text/plain"), model.id); - }} - > - - -
- updateAlias(model.id, event.target.value)} - className="config-input tab-model-alias-input" - /> -
- -
-
-
- ); - })} -
-
-
-
- ); + const [providerFilter, setProviderFilter] = useState("all"); + const [groupFilter, setGroupFilter] = useState("all"); + const [searchQuery, setSearchQuery] = useState(""); + const enabledModels = models.filter((model) => model.enabled); + const editableSelections = normalizeEditableTabModelSelections(selections); + const selectionMap = new Map( + editableSelections.map((selection) => [selection.modelId, selection]), + ); + const availableIds = new Set(enabledModels.map((model) => model.id)); + const orderedSelectedIds = editableSelections + .map((selection) => selection.modelId) + .filter((modelId) => availableIds.has(modelId)); + const selectedIdSet = new Set(orderedSelectedIds); + const providerOptions = [ + { value: "all", label: "All Providers" }, + ...Array.from(new Set(enabledModels.map((model) => model.provider))) + .sort((left, right) => + (providers[left]?.name ?? left).localeCompare( + providers[right]?.name ?? right, + ), + ) + .map((providerId) => ({ + value: providerId, + label: providers[providerId]?.name ?? providerId, + })), + ]; + const groupOptions = [ + { value: "all", label: "All Groups" }, + ...Array.from( + new Set( + enabledModels.map((model) => model.group.trim() || "__ungrouped__"), + ), + ) + .sort((left, right) => left.localeCompare(right)) + .map((group) => ({ + value: group, + label: group === "__ungrouped__" ? "Ungrouped" : group, + })), + ]; + const filteredAvailableModels = enabledModels.filter((model) => { + const normalizedGroup = model.group.trim() || "__ungrouped__"; + const normalizedQuery = searchQuery.trim().toLowerCase(); + const haystack = [ + model.label, + model.id, + model.group, + providers[model.provider]?.name ?? model.provider, + ] + .filter(Boolean) + .join(" ") + .toLowerCase(); + + return ( + (providerFilter === "all" || model.provider === providerFilter) && + (groupFilter === "all" || normalizedGroup === groupFilter) && + (!normalizedQuery || haystack.includes(normalizedQuery)) + ); + }); + const selectedModels = orderedSelectedIds + .map((modelId) => enabledModels.find((model) => model.id === modelId)) + .filter((model): model is BenchLocalModelConfig => Boolean(model)); + + const toggleModel = (modelId: string, enabled: boolean) => { + if (enabled) { + const existing = selectionMap.get(modelId); + onChange([...editableSelections, { modelId, alias: existing?.alias }]); + return; + } + + onChange( + editableSelections.filter((selection) => selection.modelId !== modelId), + ); + }; + + const updateAlias = (modelId: string, alias: string) => { + const next = editableSelections.map((selection) => + selection.modelId === modelId + ? { ...selection, alias: alias || undefined } + : selection, + ); + onChange(next); + }; + + const moveSelection = (draggedId: string, targetId: string) => { + if (draggedId === targetId) { + return; + } + + const next = [...editableSelections]; + const fromIndex = next.findIndex( + (selection) => selection.modelId === draggedId, + ); + const toIndex = next.findIndex( + (selection) => selection.modelId === targetId, + ); + + if (fromIndex < 0 || toIndex < 0) { + return; + } + + const [moved] = next.splice(fromIndex, 1); + next.splice(toIndex, 0, moved); + onChange(next); + }; + + useEffect(() => { + if ( + providerFilter !== "all" && + !providerOptions.some((option) => option.value === providerFilter) + ) { + setProviderFilter("all"); + } + }, [providerFilter, providerOptions]); + + useEffect(() => { + if ( + groupFilter !== "all" && + !groupOptions.some((option) => option.value === groupFilter) + ) { + setGroupFilter("all"); + } + }, [groupFilter, groupOptions]); + + return ( + +
+
+
+

Available Models

+ + {filteredAvailableModels.length} + +
+
+ + + +
+
+ {filteredAvailableModels.length === 0 ? ( +
+

+ No models match the current filters. +

+
+ ) : ( + filteredAvailableModels.map((model) => { + const isSelected = selectedIdSet.has(model.id); + + return ( +
+ + +
+ + {model.group.trim() || "Ungrouped"} + +
+
+ ); + }) + )} +
+
+ +
+
+

Selected Models

+ + {selectedModels.length} + +
+
+ {selectedModels.length === 0 ? ( +
+

+ Select models from the left to add them to this tab. +

+
+ ) : ( + selectedModels.map((model) => { + const selection = selectionMap.get(model.id); + + return ( +
{ + event.dataTransfer.setData("text/plain", model.id); + event.dataTransfer.effectAllowed = "move"; + }} + onDragOver={(event) => { + event.preventDefault(); + event.dataTransfer.dropEffect = "move"; + }} + onDrop={(event) => { + event.preventDefault(); + moveSelection( + event.dataTransfer.getData("text/plain"), + model.id, + ); + }} + > + + +
+ + updateAlias(model.id, event.target.value) + } + className="config-input tab-model-alias-input" + /> +
+ +
+
+
+ ); + }) + )} +
+
+
+
+ ); } function ModelBrowserModal({ - state, - onClose, - onQueryChange, - onSelect, - onSubmit + state, + onClose, + onQueryChange, + onSelect, + onSubmit, }: { - state: ModelBrowserModalState; - onClose: () => void; - onQueryChange: (query: string) => void; - onSelect: (modelId: string) => void; - onSubmit: () => void; + state: ModelBrowserModalState; + onClose: () => void; + onQueryChange: (query: string) => void; + onSelect: (modelId: string) => void; + onSubmit: () => void; }) { - const normalizedQuery = state.query.trim().toLowerCase(); - const filteredEntries = state.entries.filter((entry) => { - const haystack = [entry.id, entry.name, entry.ownedBy, entry.modality, entry.pricing] - .filter(Boolean) - .join(" ") - .toLowerCase(); - - return !normalizedQuery || haystack.includes(normalizedQuery); - }); - - return ( - - - -
- {state.loading ? ( -
- -

Loading models from {state.providerName}...

-
- ) : state.error ? ( -
-

{state.error}

-
- ) : filteredEntries.length === 0 ? ( -
-

No models match the current search.

-
- ) : ( - filteredEntries.map((entry) => ( - - )) - )} -
-
- ); + const normalizedQuery = state.query.trim().toLowerCase(); + const filteredEntries = state.entries.filter((entry) => { + const haystack = [ + entry.id, + entry.name, + entry.ownedBy, + entry.modality, + entry.pricing, + ] + .filter(Boolean) + .join(" ") + .toLowerCase(); + + return !normalizedQuery || haystack.includes(normalizedQuery); + }); + + return ( + + + +
+ {state.loading ? ( +
+ +

+ Loading models from {state.providerName}... +

+
+ ) : state.error ? ( +
+

{state.error}

+
+ ) : filteredEntries.length === 0 ? ( +
+

No models match the current search.

+
+ ) : ( + filteredEntries.map((entry) => ( + + )) + )} +
+
+ ); } function SamplingModal({ - benchPackName, - defaults, - form, - onChange, - onClose, - onSubmit + benchPackName, + defaults, + form, + onChange, + onClose, + onSubmit, }: { - benchPackName: string; - defaults: GenerationRequest; - form: SamplingFormState; - onChange: (form: SamplingFormState) => void; - onClose: () => void; - onSubmit: () => void; + benchPackName: string; + defaults: GenerationRequest; + form: SamplingFormState; + onChange: (form: SamplingFormState) => void; + onClose: () => void; + onSubmit: () => void; }) { - const hasEffectiveDefaults = Object.values(defaults).some((value) => value !== undefined); - - return ( - onChange(createSamplingForm())} - className="ghost-button" - > - - Reset Overrides - - } - > - {hasEffectiveDefaults ? ( -
-

- Effective defaults: - {" "} - {SAMPLING_FIELDS.map((field) => { - const value = defaults[field.key as keyof GenerationRequest]; - return value === undefined ? null : ( - - {field.label}: {value} - - ); - }).filter(Boolean).reduce((items, item, index) => { - if (index > 0) { - items.push( · ); - } - items.push(item); - return items; - }, [])} -

-
- ) : ( -
-

This Bench Pack does not define recommended defaults yet. Blank fields mean BenchLocal will use its platform defaults and omit any values that are still unset.

-
- )} -
- {SAMPLING_FIELDS.map((field) => ( - onChange({ - ...form, - [field.key]: value - })} - /> - ))} -
-
- ); + const hasEffectiveDefaults = Object.values(defaults).some( + (value) => value !== undefined, + ); + + return ( + onChange(createSamplingForm())} + className="ghost-button" + > + + Reset Overrides + + } + > + {hasEffectiveDefaults ? ( +
+

+ Effective defaults:{" "} + {SAMPLING_FIELDS.map((field) => { + const value = defaults[field.key as keyof GenerationRequest]; + return value === undefined ? null : ( + + {field.label}: {value} + + ); + }) + .filter(Boolean) + .reduce((items, item, index) => { + if (index > 0) { + items.push( · ); + } + items.push(item); + return items; + }, [])} +

+
+ ) : ( +
+

+ This Bench Pack does not define recommended defaults yet. Blank + fields mean BenchLocal will use its platform defaults and omit any + values that are still unset. +

+
+ )} +
+ {SAMPLING_FIELDS.map((field) => ( + + onChange({ + ...form, + [field.key]: value, + }) + } + /> + ))} +
+
+ ); } function EmptyWorkspace({ - providerCount, - modelCount, - installedBenchPackCount, - onOpenProviders, - onOpenModels, - onOpenBenchPacks, - onSelectBenchPack + providerCount, + modelCount, + installedBenchPackCount, + onOpenProviders, + onOpenModels, + onOpenBenchPacks, + onSelectBenchPack, }: { - providerCount: number; - modelCount: number; - installedBenchPackCount: number; - onOpenProviders: () => void; - onOpenModels: () => void; - onOpenBenchPacks: () => void; - onSelectBenchPack?: () => void; + providerCount: number; + modelCount: number; + installedBenchPackCount: number; + onOpenProviders: () => void; + onOpenModels: () => void; + onOpenBenchPacks: () => void; + onSelectBenchPack?: () => void; }) { - const hasProviders = providerCount > 0; - const hasModels = modelCount > 0; - const hasInstalledBenchPacks = installedBenchPackCount > 0; - const checklist = [ - { - key: "providers", - complete: hasProviders, - title: "Set up providers", - detail: hasProviders ? `${providerCount} configured` : "Add at least one provider endpoint.", - actionLabel: "Providers", - onAction: onOpenProviders - }, - { - key: "models", - complete: hasModels, - title: "Add models", - detail: hasModels ? `${modelCount} configured` : "Create shared models that point to your providers.", - actionLabel: "Models", - onAction: onOpenModels - }, - { - key: "benchpacks", - complete: hasInstalledBenchPacks, - title: "Install Bench Packs", - detail: hasInstalledBenchPacks ? `${installedBenchPackCount} installed` : "Install at least one Bench Pack from the official registry.", - actionLabel: "Bench Packs", - onAction: onOpenBenchPacks - } - ]; - - return ( -
-
-
- -
-

No Active Bench Pack

-

Select a Bench Pack to open its workspace

-

- Complete the setup checklist below. BenchLocal keeps providers and models shared across the app, while each Bench Pack owns its own scenarios, sampling defaults, and scoring. -

- -
- {checklist.map((item) => ( -
- -
-
{item.title}
-
{item.detail}
-
- {item.complete ? ( - Done - ) : ( - - )} -
- ))} -
- - {hasInstalledBenchPacks && onSelectBenchPack ? ( - - ) : null} -
-
- ); + const hasProviders = providerCount > 0; + const hasModels = modelCount > 0; + const hasInstalledBenchPacks = installedBenchPackCount > 0; + const checklist = [ + { + key: "providers", + complete: hasProviders, + title: "Set up providers", + detail: hasProviders + ? `${providerCount} configured` + : "Add at least one provider endpoint.", + actionLabel: "Providers", + onAction: onOpenProviders, + }, + { + key: "models", + complete: hasModels, + title: "Add models", + detail: hasModels + ? `${modelCount} configured` + : "Create shared models that point to your providers.", + actionLabel: "Models", + onAction: onOpenModels, + }, + { + key: "benchpacks", + complete: hasInstalledBenchPacks, + title: "Install Bench Packs", + detail: hasInstalledBenchPacks + ? `${installedBenchPackCount} installed` + : "Install at least one Bench Pack from the official registry.", + actionLabel: "Bench Packs", + onAction: onOpenBenchPacks, + }, + ]; + + return ( +
+
+
+ +
+

No Active Bench Pack

+

+ Select a Bench Pack to open its workspace +

+

+ Complete the setup checklist below. BenchLocal keeps providers and + models shared across the app, while each Bench Pack owns its own + scenarios, sampling defaults, and scoring. +

+ +
+ {checklist.map((item) => ( +
+ +
+
{item.title}
+
{item.detail}
+
+ {item.complete ? ( + Done + ) : ( + + )} +
+ ))} +
+ + {hasInstalledBenchPacks && onSelectBenchPack ? ( + + ) : null} +
+
+ ); } function DetachedLogsWindow() { - const [state, setState] = useState({ - workspaceName: "No Workspace", - tabTitle: "No Active Tab", - eventCount: 0, - events: [] - }); - const [autoScroll, setAutoScroll] = useState(true); - const [systemPrefersDark, setSystemPrefersDark] = useState( - typeof window !== "undefined" ? window.matchMedia("(prefers-color-scheme: dark)").matches : false - ); - const [themeDefinition, setThemeDefinition] = useState(null); - const logContainerRef = useRef(null); - const appliedThemeKeysRef = useRef([]); - - useEffect(() => { - return window.benchlocal.logs.onDetachedState((nextState) => { - setState(nextState); - }); - }, []); - - useEffect(() => { - if (typeof window === "undefined") { - return; - } - - const media = window.matchMedia("(prefers-color-scheme: dark)"); - const handleChange = () => { - setSystemPrefersDark(media.matches); - }; - - handleChange(); - media.addEventListener("change", handleChange); - - return () => { - media.removeEventListener("change", handleChange); - }; - }, []); - - useEffect(() => { - let cancelled = false; - - const loadTheme = async () => { - const configResult = await window.benchlocal.config.load(); - const requestedThemeId = configResult.config.ui.theme === "system" - ? systemPrefersDark - ? "dark" - : "light" - : configResult.config.ui.theme; - const nextTheme = await window.benchlocal.themes.load({ themeId: requestedThemeId }); - - if (!cancelled) { - setThemeDefinition(nextTheme); - } - }; - - void loadTheme(); - - return () => { - cancelled = true; - }; - }, [systemPrefersDark]); - - useEffect(() => { - if (!themeDefinition || typeof document === "undefined") { - return; - } - - const root = document.documentElement; - - for (const key of appliedThemeKeysRef.current) { - root.style.removeProperty(key); - } - - for (const [key, value] of Object.entries(themeDefinition.variables)) { - root.style.setProperty(key, value); - } - - appliedThemeKeysRef.current = Object.keys(themeDefinition.variables); - root.style.setProperty("color-scheme", themeDefinition.colorScheme); - root.dataset.theme = themeDefinition.id; - }, [themeDefinition]); - - useEffect(() => { - if (!autoScroll || !logContainerRef.current) { - return; - } - - logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight; - }, [state, autoScroll]); - - useEffect(() => { - document.title = `Run Logs - ${state.workspaceName} - ${state.tabTitle}`; - }, [state.workspaceName, state.tabTitle]); - - return ( -
-
-
-

{state.workspaceName} · {state.tabTitle}

-
-
- - {state.eventCount} events - -
-
- - {state.events.length > 0 ? ( -
- {state.events.map((event, index) => ( -
- {event.type} - {JSON.stringify(event)} -
- ))} -
- ) : ( -
No run logs are being streamed yet.
- )} -
- ); + const [state, setState] = useState({ + workspaceName: "No Workspace", + tabTitle: "No Active Tab", + eventCount: 0, + events: [], + }); + const [autoScroll, setAutoScroll] = useState(true); + const [systemPrefersDark, setSystemPrefersDark] = useState( + typeof window !== "undefined" + ? window.matchMedia("(prefers-color-scheme: dark)").matches + : false, + ); + const [themeDefinition, setThemeDefinition] = + useState(null); + const logContainerRef = useRef(null); + const appliedThemeKeysRef = useRef([]); + + useEffect(() => { + // onDetachedState removed in web version + return () => {}; + }, []); + + useEffect(() => { + if (typeof window === "undefined") { + return; + } + + const media = window.matchMedia("(prefers-color-scheme: dark)"); + const handleChange = () => { + setSystemPrefersDark(media.matches); + }; + + handleChange(); + media.addEventListener("change", handleChange); + + return () => { + media.removeEventListener("change", handleChange); + }; + }, []); + + useEffect(() => { + let cancelled = false; + + const loadTheme = async () => { + const configResult = await bl.config.load(); + const requestedThemeId = + configResult.config.ui.theme === "system" + ? systemPrefersDark + ? "dark" + : "light" + : configResult.config.ui.theme; + const nextTheme = await bl.themes.load(requestedThemeId); + + if (!cancelled) { + setThemeDefinition(nextTheme); + } + }; + + void loadTheme(); + + return () => { + cancelled = true; + }; + }, [systemPrefersDark]); + + useEffect(() => { + if (!themeDefinition || typeof document === "undefined") { + return; + } + + const root = document.documentElement; + + for (const key of appliedThemeKeysRef.current) { + root.style.removeProperty(key); + } + + for (const [key, value] of Object.entries(themeDefinition.variables)) { + root.style.setProperty(key, value); + } + + appliedThemeKeysRef.current = Object.keys(themeDefinition.variables); + root.style.setProperty("color-scheme", themeDefinition.colorScheme); + root.dataset.theme = themeDefinition.id; + }, [themeDefinition]); + + useEffect(() => { + if (!autoScroll || !logContainerRef.current) { + return; + } + + logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight; + }, [state, autoScroll]); + + useEffect(() => { + document.title = `Run Logs - ${state.workspaceName} - ${state.tabTitle}`; + }, [state.workspaceName, state.tabTitle]); + + return ( +
+
+
+

+ {state.workspaceName} · {state.tabTitle} +

+
+
+ + + {state.eventCount} events + + +
+
+ + {state.events.length > 0 ? ( +
+ {state.events.map((event, index) => ( +
+ {event.type} + {JSON.stringify(event)} +
+ ))} +
+ ) : ( +
+ No run logs are being streamed yet. +
+ )} +
+ ); } function SettingsScene({ - settingsTab, - setSettingsTab, - settingsNotice, - error, - draft, - loadState, - hasUnsavedChanges, - isBusy, - providerIds, - benchPackInspections, - registryEntries, - registryWarning, - benchPackMutations, - verifierStatuses, - onBack, - onDismissNotice, - onDismissError, - onSaveAdvanced, - onResetAdvanced, - onCreateProvider, - onEditProvider, - onCreateModel, - onEditModel, - onStartVerifier, - onStopVerifier, - onDeleteVerifierImage, - onRefreshRegistry, - onInstallBenchPack, - onInstallBenchPackFromUrl, - onUpdateBenchPack, - onUninstallBenchPack, - updateDraft, - onUpdateVerifier + settingsTab, + setSettingsTab, + settingsNotice, + error, + draft, + loadState, + hasUnsavedChanges, + isBusy, + providerIds, + benchPackInspections, + registryEntries, + registryWarning, + benchPackMutations, + verifierStatuses, + onBack, + onDismissNotice, + onDismissError, + onSaveAdvanced, + onResetAdvanced, + onCreateProvider, + onEditProvider, + onCreateModel, + onEditModel, + onStartVerifier, + onStopVerifier, + onDeleteVerifierImage, + onRefreshRegistry, + onInstallBenchPack, + onInstallBenchPackFromUrl, + onUpdateBenchPack, + onUninstallBenchPack, + updateDraft, + onUpdateVerifier, }: { - settingsTab: SettingsTab; - setSettingsTab: (tab: SettingsTab) => void; - settingsNotice: string | null; - error: string | null; - draft: BenchLocalConfig; - loadState: LoadState | null; - hasUnsavedChanges: boolean; - isBusy: boolean; - providerIds: string[]; - benchPackInspections: BenchPackInspection[]; - registryEntries: BenchPackRegistryEntry[]; - registryWarning: string | null; - benchPackMutations: Record; - verifierStatuses: Record; - onBack: () => void; - onDismissNotice: () => void; - onDismissError: () => void; - onSaveAdvanced: () => void; - onResetAdvanced: () => void; - onCreateProvider: () => void; - onEditProvider: (providerId: string) => void; - onCreateModel: () => void; - onEditModel: (index: number) => void; - onStartVerifier: (benchPackId: string, benchPackName: string, verifierId: string) => Promise; - onStopVerifier: (benchPackId: string) => Promise; - onDeleteVerifierImage: (benchPackId: string, benchPackName: string, verifierId: string) => void; - onRefreshRegistry: () => void; - onInstallBenchPack: (benchPackId: string) => void; - onInstallBenchPackFromUrl: (url: string) => Promise; - onUpdateBenchPack: (benchPackId: string) => void; - onUninstallBenchPack: (benchPackId: string) => void; - updateDraft: (updater: (current: BenchLocalConfig) => BenchLocalConfig) => void; - onUpdateVerifier: ( - benchPackId: string, - verifierId: string, - updater: (verifier: BenchLocalVerifierConfig) => BenchLocalVerifierConfig - ) => void; + settingsTab: SettingsTab; + setSettingsTab: (tab: SettingsTab) => void; + settingsNotice: string | null; + error: string | null; + draft: BenchLocalConfig; + loadState: LoadState | null; + hasUnsavedChanges: boolean; + isBusy: boolean; + providerIds: string[]; + benchPackInspections: BenchPackInspection[]; + registryEntries: BenchPackRegistryEntry[]; + registryWarning: string | null; + benchPackMutations: Record; + verifierStatuses: Record; + onBack: () => void; + onDismissNotice: () => void; + onDismissError: () => void; + onSaveAdvanced: () => void; + onResetAdvanced: () => void; + onCreateProvider: () => void; + onEditProvider: (providerId: string) => void; + onCreateModel: () => void; + onEditModel: (index: number) => void; + onStartVerifier: ( + benchPackId: string, + benchPackName: string, + verifierId: string, + ) => Promise; + onStopVerifier: (benchPackId: string) => Promise; + onDeleteVerifierImage: ( + benchPackId: string, + benchPackName: string, + verifierId: string, + ) => void; + onRefreshRegistry: () => void; + onInstallBenchPack: (benchPackId: string) => void; + onInstallBenchPackFromUrl: (url: string) => Promise; + onUpdateBenchPack: (benchPackId: string) => void; + onUninstallBenchPack: (benchPackId: string) => void; + updateDraft: ( + updater: (current: BenchLocalConfig) => BenchLocalConfig, + ) => void; + onUpdateVerifier: ( + benchPackId: string, + verifierId: string, + updater: (verifier: BenchLocalVerifierConfig) => BenchLocalVerifierConfig, + ) => void; }) { - return ( -
- - -
- {settingsNotice ? ( - -
- {settingsNotice} - -
-
- ) : null} - {error ? ( - -
- {error} - -
-
- ) : null} -
- {settingsTab === "providers" ? ( - - ) : null} - - {settingsTab === "models" ? ( - - ) : null} - - {settingsTab === "benchPacks" ? ( - - ) : null} - - {settingsTab === "verification" ? ( - { - await onStartVerifier(benchPackId, benchPackName, verifierId); - }} - onStop={async (benchPackId) => { - await onStopVerifier(benchPackId); - }} - onDeleteImage={(benchPackId, benchPackName, verifierId) => { - onDeleteVerifierImage(benchPackId, benchPackName, verifierId); - }} - /> - ) : null} - - {settingsTab === "advanced" ? ( -
- }> - undefined} /> - updateDraft((current) => { - current.run_storage_dir = value; - return current; - })} /> - updateDraft((current) => { - current.benchpack_storage_dir = value; - return current; - })} /> - updateDraft((current) => { - current.log_storage_dir = value; - return current; - })} /> - updateDraft((current) => { - current.cache_dir = value; - return current; - })} /> -
-

These paths are saved to ~/.benchlocal/config.toml.

-
-
- - -
-
-
- ) : null} -
-
-
- ); + return ( +
+ + +
+ {settingsNotice ? ( + +
+ {settingsNotice} + +
+
+ ) : null} + {error ? ( + +
+ {error} + +
+
+ ) : null} +
+ {settingsTab === "providers" ? ( + + ) : null} + + {settingsTab === "models" ? ( + + ) : null} + + {settingsTab === "benchPacks" ? ( + + ) : null} + + {settingsTab === "verification" ? ( + { + await onStartVerifier(benchPackId, benchPackName, verifierId); + }} + onStop={async (benchPackId) => { + await onStopVerifier(benchPackId); + }} + onDeleteImage={(benchPackId, benchPackName, verifierId) => { + onDeleteVerifierImage(benchPackId, benchPackName, verifierId); + }} + /> + ) : null} + + {settingsTab === "advanced" ? ( +
+ } + > + undefined} + /> + + updateDraft((current) => { + current.run_storage_dir = value; + return current; + }) + } + /> + + updateDraft((current) => { + current.benchpack_storage_dir = value; + return current; + }) + } + /> + + updateDraft((current) => { + current.log_storage_dir = value; + return current; + }) + } + /> + + updateDraft((current) => { + current.cache_dir = value; + return current; + }) + } + /> +
+

+ These paths are saved to{" "} + ~/.benchlocal/config.toml. +

+
+
+ + +
+
+
+ ) : null} +
+
+
+ ); } function ProvidersView({ - providers, - models, - onCreate, - onEdit + providers, + models, + onCreate, + onEdit, }: { - providers: Record; - models: BenchLocalModelConfig[]; - onCreate: () => void; - onEdit: (providerId: string) => void; + providers: Record; + models: BenchLocalModelConfig[]; + onCreate: () => void; + onEdit: (providerId: string) => void; }) { - const providerIds = Object.keys(providers); - - return ( - } - actions={ - - } - > - - - - - - - - - - - - - - {providerIds.map((providerId) => { - const provider = providers[providerId]; - const linkedModels = models.filter((model) => model.provider === providerId).length; - - return ( - - - - - - - - - ); - })} - -
ProviderTypeStatusBase URLModelsActions
-
{provider.name}
-
-
{providerKindLabel(provider.kind)}
-
- - {provider.enabled ? "active" : "inactive"} - - {provider.base_url}{linkedModels} -
- -
-
-
-
- ); + const providerIds = Object.keys(providers); + + return ( + } + actions={ + + } + > + + + + + + + + + + + + + + {providerIds.map((providerId) => { + const provider = providers[providerId]; + const linkedModels = models.filter( + (model) => model.provider === providerId, + ).length; + + return ( + + + + + + + + + ); + })} + +
ProviderTypeStatusBase URLModelsActions
+
{provider.name}
+
+
+ {providerKindLabel(provider.kind)} +
+
+ + {provider.enabled ? "active" : "inactive"} + + {provider.base_url}{linkedModels} +
+ +
+
+
+
+ ); } function ModelsView({ - models, - providers, - providerIds, - onCreate, - onEdit + models, + providers, + providerIds, + onCreate, + onEdit, }: { - models: BenchLocalModelConfig[]; - providers: Record; - providerIds: string[]; - onCreate: () => void; - onEdit: (index: number) => void; + models: BenchLocalModelConfig[]; + providers: Record; + providerIds: string[]; + onCreate: () => void; + onEdit: (index: number) => void; }) { - const [providerFilter, setProviderFilter] = useState("all"); - const [groupFilter, setGroupFilter] = useState("all"); - const [searchQuery, setSearchQuery] = useState(""); - const providerOptions = [ - { value: "all", label: "All Providers" }, - ...Array.from(new Set(models.map((model) => model.provider))) - .sort((left, right) => (providers[left]?.name ?? left).localeCompare(providers[right]?.name ?? right)) - .map((providerId) => ({ - value: providerId, - label: providers[providerId]?.name ?? providerId - })) - ]; - const groupOptions = [ - { value: "all", label: "All Groups" }, - ...Array.from(new Set(models.map((model) => model.group.trim() || "__ungrouped__"))) - .sort((left, right) => left.localeCompare(right)) - .map((group) => ({ - value: group, - label: group === "__ungrouped__" ? "Ungrouped" : group - })) - ]; - const filteredModels = models - .map((model, index) => ({ model, index })) - .filter(({ model }) => { - const normalizedGroup = model.group.trim() || "__ungrouped__"; - const normalizedQuery = searchQuery.trim().toLowerCase(); - const providerName = providers[model.provider]?.name ?? model.provider; - const haystack = [model.label, model.id, model.model, model.group, providerName, model.provider] - .filter(Boolean) - .join(" ") - .toLowerCase(); - - return ( - (providerFilter === "all" || model.provider === providerFilter) && - (groupFilter === "all" || normalizedGroup === groupFilter) && - (!normalizedQuery || haystack.includes(normalizedQuery)) - ); - }); - - useEffect(() => { - if (providerFilter !== "all" && !providerOptions.some((option) => option.value === providerFilter)) { - setProviderFilter("all"); - } - }, [providerFilter, providerOptions]); - - useEffect(() => { - if (groupFilter !== "all" && !groupOptions.some((option) => option.value === groupFilter)) { - setGroupFilter("all"); - } - }, [groupFilter, groupOptions]); - - return ( - } - actions={ - - } - > -
- - - -
- - - - - - - - - - - - - - {filteredModels.length === 0 ? ( - - - - ) : ( - filteredModels.map(({ model, index }) => ( - - - - - - - - - )) - )} - -
LabelStatusProviderModelGroupActions
-
No models match the current filters.
-
-
{model.label}
-
{model.id}
-
- - {model.enabled ? "active" : "inactive"} - - {providers[model.provider]?.name ?? model.provider.split("-")[0] ?? model.provider}{model.model}{model.group} -
- -
-
-
-
- ); + const [providerFilter, setProviderFilter] = useState("all"); + const [groupFilter, setGroupFilter] = useState("all"); + const [searchQuery, setSearchQuery] = useState(""); + const providerOptions = [ + { value: "all", label: "All Providers" }, + ...Array.from(new Set(models.map((model) => model.provider))) + .sort((left, right) => + (providers[left]?.name ?? left).localeCompare( + providers[right]?.name ?? right, + ), + ) + .map((providerId) => ({ + value: providerId, + label: providers[providerId]?.name ?? providerId, + })), + ]; + const groupOptions = [ + { value: "all", label: "All Groups" }, + ...Array.from( + new Set(models.map((model) => model.group.trim() || "__ungrouped__")), + ) + .sort((left, right) => left.localeCompare(right)) + .map((group) => ({ + value: group, + label: group === "__ungrouped__" ? "Ungrouped" : group, + })), + ]; + const filteredModels = models + .map((model, index) => ({ model, index })) + .filter(({ model }) => { + const normalizedGroup = model.group.trim() || "__ungrouped__"; + const normalizedQuery = searchQuery.trim().toLowerCase(); + const providerName = providers[model.provider]?.name ?? model.provider; + const haystack = [ + model.label, + model.id, + model.model, + model.group, + providerName, + model.provider, + ] + .filter(Boolean) + .join(" ") + .toLowerCase(); + + return ( + (providerFilter === "all" || model.provider === providerFilter) && + (groupFilter === "all" || normalizedGroup === groupFilter) && + (!normalizedQuery || haystack.includes(normalizedQuery)) + ); + }); + + useEffect(() => { + if ( + providerFilter !== "all" && + !providerOptions.some((option) => option.value === providerFilter) + ) { + setProviderFilter("all"); + } + }, [providerFilter, providerOptions]); + + useEffect(() => { + if ( + groupFilter !== "all" && + !groupOptions.some((option) => option.value === groupFilter) + ) { + setGroupFilter("all"); + } + }, [groupFilter, groupOptions]); + + return ( + } + actions={ + + } + > +
+ + + +
+ + + + + + + + + + + + + + {filteredModels.length === 0 ? ( + + + + ) : ( + filteredModels.map(({ model, index }) => ( + + + + + + + + + )) + )} + +
LabelStatusProviderModelGroupActions
+
+ No models match the current filters. +
+
+
{model.label}
+
+ {model.id} +
+
+ + {model.enabled ? "active" : "inactive"} + + + {providers[model.provider]?.name ?? + model.provider.split("-")[0] ?? + model.provider} + {model.model}{model.group} +
+ +
+
+
+
+ ); } function BenchPackRegistryView({ - draft, - inspections, - registryEntries, - registryWarning, - benchPackMutations, - onRefresh, - onInstall, - onInstallFromUrl, - onUpdate, - onUninstall + draft, + inspections, + registryEntries, + registryWarning, + benchPackMutations, + onRefresh, + onInstall, + onInstallFromUrl, + onUpdate, + onUninstall, }: { - draft: BenchLocalConfig; - inspections: BenchPackInspection[]; - registryEntries: BenchPackRegistryEntry[]; - registryWarning: string | null; - benchPackMutations: Record; - onRefresh: () => void; - onInstall: (benchPackId: string) => void; - onInstallFromUrl: (url: string) => Promise; - onUpdate: (benchPackId: string) => void; - onUninstall: (benchPackId: string) => void; + draft: BenchLocalConfig; + inspections: BenchPackInspection[]; + registryEntries: BenchPackRegistryEntry[]; + registryWarning: string | null; + benchPackMutations: Record; + onRefresh: () => void; + onInstall: (benchPackId: string) => void; + onInstallFromUrl: (url: string) => Promise; + onUpdate: (benchPackId: string) => void; + onUninstall: (benchPackId: string) => void; }) { - const [manualUrl, setManualUrl] = useState(""); - const inspectionsById = Object.fromEntries(inspections.map((inspection) => [inspection.id, inspection])); - const hasActiveMutation = Object.keys(benchPackMutations).length > 0; - const officialRows = registryEntries.map((entry) => { - const installed = draft.benchpacks[entry.id]; - const inspection = inspectionsById[entry.id]; - const mutation = benchPackMutations[entry.id]; - const updateAvailable = - Boolean(installed) && - (installed?.version !== entry.version || - (entry.source.type === "github" ? installed?.ref !== entry.source.tag : false)); - - return { - id: entry.id, - name: entry.name, - description: entry.description ?? "No description provided.", - version: entry.version, - installedVersion: installed?.version, - installed: Boolean(installed), - status: installed ? inspection?.status ?? "not_installed" : "not_installed", - mutation, - updateAvailable, - isRegistryEntry: true - } as const; - }); - const thirdPartyRows = Object.entries(draft.benchpacks) - .filter(([, benchPack]) => benchPack.source !== "registry") - .map(([benchPackId, benchPack]) => { - const inspection = inspectionsById[benchPackId]; - const mutation = benchPackMutations[benchPackId]; - - return { - id: benchPackId, - name: inspection?.manifest?.name ?? benchPackId, - description: inspection?.manifest?.description ?? "Installed from a third-party source maintained outside BenchLocal.", - version: benchPack.version ?? inspection?.manifest?.version ?? "unknown", - status: inspection?.status ?? "not_installed", - sourceLabel: - benchPack.source === "archive" - ? benchPack.url ?? "Archive URL" - : benchPack.source === "github" - ? benchPack.repo ?? "GitHub" - : benchPack.source === "local" - ? benchPack.path ?? "Local path" - : benchPack.source, - mutation - } as const; - }); - - return ( -
- } - actions={} - > - {registryWarning ? {registryWarning} : null} - - - - - - - - - - - - - {officialRows.length === 0 ? ( - - - - ) : ( - officialRows.map((row) => { - const isMutating = Boolean(row.mutation); - const disableRowAction = hasActiveMutation && !isMutating; - - return ( - - - - - - - - ); - }) - )} - -
NameDescriptionVersionStatusActions
-
- {registryWarning - ? "The official registry is currently unavailable." - : "No Bench Packs are available in the official registry."} -
-
-
{row.name}
-
{row.description} -
-
- {row.installed && row.updateAvailable && row.installedVersion ? ( - <> - v{row.installedVersion} - - v{row.version} - - ) : ( - v{row.version} - )} -
- {row.installed && row.isRegistryEntry && row.updateAvailable ? ( - - ) : null} -
-
- - {row.mutation ? benchPackMutationLabel(row.mutation) : row.installed ? row.status.replaceAll("_", " ") : "available"} - - -
- {row.installed ? ( - - ) : ( - - )} -
-
-
-
- - } - > -
-

Third-party Bench Packs are maintained by their authors, not by BenchLocal. Only install packages from sources you trust.

-
-
- - -
- - - - - - - - - - - - - - - {thirdPartyRows.length === 0 ? ( - - - - ) : ( - thirdPartyRows.map((row) => { - const isMutating = Boolean(row.mutation); - const disableRowAction = hasActiveMutation && !isMutating; - - return ( - - - - - - - - - ); - }) - )} - -
NameDescriptionVersionSourceStatusActions
-
No third-party Bench Packs are installed.
-
-
{row.name}
-
{row.description}v{row.version}{row.sourceLabel} - - {row.mutation ? benchPackMutationLabel(row.mutation) : row.status.replaceAll("_", " ")} - - -
- -
-
-
-
-
- ); + const [manualUrl, setManualUrl] = useState(""); + const inspectionsById = Object.fromEntries( + inspections.map((inspection) => [inspection.id, inspection]), + ); + const hasActiveMutation = Object.keys(benchPackMutations).length > 0; + const officialRows = registryEntries.map((entry) => { + const installed = draft.benchpacks[entry.id]; + const inspection = inspectionsById[entry.id]; + const mutation = benchPackMutations[entry.id]; + const updateAvailable = + Boolean(installed) && + (installed?.version !== entry.version || + (entry.source.type === "github" + ? installed?.ref !== entry.source.tag + : false)); + + return { + id: entry.id, + name: entry.name, + description: entry.description ?? "No description provided.", + version: entry.version, + installedVersion: installed?.version, + installed: Boolean(installed), + status: installed + ? (inspection?.status ?? "not_installed") + : "not_installed", + mutation, + updateAvailable, + isRegistryEntry: true, + } as const; + }); + const thirdPartyRows = Object.entries(draft.benchpacks) + .filter(([, benchPack]: any) => benchPack.source !== "registry") + .map(([benchPackId, benchPack]) => { + const inspection = inspectionsById[benchPackId]; + const mutation = benchPackMutations[benchPackId]; + + return { + id: benchPackId, + name: inspection?.manifest?.name ?? benchPackId, + description: + inspection?.manifest?.description ?? + "Installed from a third-party source maintained outside BenchLocal.", + version: + benchPack.version ?? inspection?.manifest?.version ?? "unknown", + status: inspection?.status ?? "not_installed", + sourceLabel: + benchPack.source === "archive" + ? (benchPack.url ?? "Archive URL") + : benchPack.source === "github" + ? (benchPack.repo ?? "GitHub") + : benchPack.source === "local" + ? (benchPack.path ?? "Local path") + : benchPack.source, + mutation, + } as const; + }); + + return ( +
+ } + actions={ + + } + > + {registryWarning ? ( + {registryWarning} + ) : null} + + + + + + + + + + + + + {officialRows.length === 0 ? ( + + + + ) : ( + officialRows.map((row) => { + const isMutating = Boolean(row.mutation); + const disableRowAction = hasActiveMutation && !isMutating; + + return ( + + + + + + + + ); + }) + )} + +
NameDescriptionVersionStatusActions
+
+ {registryWarning + ? "The official registry is currently unavailable." + : "No Bench Packs are available in the official registry."} +
+
+
+ {row.name} +
+
{row.description} +
+
+ {row.installed && + row.updateAvailable && + row.installedVersion ? ( + <> + v{row.installedVersion} + + v{row.version} + + ) : ( + v{row.version} + )} +
+ {row.installed && + row.isRegistryEntry && + row.updateAvailable ? ( + + ) : null} +
+
+ + {row.mutation + ? benchPackMutationLabel(row.mutation) + : row.installed + ? row.status.replaceAll("_", " ") + : "available"} + + +
+ {row.installed ? ( + + ) : ( + + )} +
+
+
+
+ + } + > +
+

+ Third-party Bench Packs are maintained by their authors, not by + BenchLocal. Only install packages from sources you trust. +

+
+
+ + +
+ + + + + + + + + + + + + + + {thirdPartyRows.length === 0 ? ( + + + + ) : ( + thirdPartyRows.map((row) => { + const isMutating = Boolean(row.mutation); + const disableRowAction = hasActiveMutation && !isMutating; + + return ( + + + + + + + + + ); + }) + )} + +
NameDescriptionVersionSourceStatusActions
+
+ No third-party Bench Packs are installed. +
+
+
+ {row.name} +
+
{row.description}v{row.version}{row.sourceLabel} + + {row.mutation + ? benchPackMutationLabel(row.mutation) + : row.status.replaceAll("_", " ")} + + +
+ +
+
+
+
+
+ ); } function verifierModeLabel(mode: BenchLocalVerifierConfig["mode"]): string { - switch (mode) { - case "cloud": - return "BenchLocal Cloud"; - case "custom_url": - return "Custom URL"; - case "docker": - default: - return "Local Docker"; - } + switch (mode) { + case "cloud": + return "BenchLocal Cloud"; + case "custom_url": + return "Custom URL"; + case "docker": + default: + return "Local Docker"; + } } function VerificationView({ - draft, - statuses, - onUpdate, - onStart, - onStop, - onDeleteImage + draft, + statuses, + onUpdate, + onStart, + onStop, + onDeleteImage, }: { - draft: BenchLocalConfig; - statuses: Record; - onUpdate: (benchPackId: string, verifierId: string, updater: (verifier: BenchLocalVerifierConfig) => BenchLocalVerifierConfig) => void; - onStart: (benchPackId: string, benchPackName: string, verifierId: string) => Promise; - onStop: (benchPackId: string) => Promise; - onDeleteImage: (benchPackId: string, benchPackName: string, verifierId: string) => void; + draft: BenchLocalConfig; + statuses: Record; + onUpdate: ( + benchPackId: string, + verifierId: string, + updater: (verifier: BenchLocalVerifierConfig) => BenchLocalVerifierConfig, + ) => void; + onStart: ( + benchPackId: string, + benchPackName: string, + verifierId: string, + ) => Promise; + onStop: (benchPackId: string) => Promise; + onDeleteImage: ( + benchPackId: string, + benchPackName: string, + verifierId: string, + ) => void; }) { - const verificationEntries = Object.entries(draft.benchpacks).filter(([benchPackId]) => { - const status = statuses[benchPackId]; - return Boolean(status && status.verifiers.length > 0); - }); - - const rows = verificationEntries.flatMap(([benchPackId, benchPack]) => { - const status = statuses[benchPackId]; - const inspectionName = status?.benchPackName ?? benchPackId; - - return Object.entries(benchPack.verifiers ?? {}).map(([verifierId, verifier]) => { - const runtime = status?.verifiers.find((entry) => entry.id === verifierId); - return { - benchPackId, - benchPackName: inspectionName, - verifierId, - verifier, - runtime, - docker: status?.docker - }; - }); - }); - - return ( - } - > - - - - - - - - - - - - - - {rows.length === 0 ? ( - - - - ) : ( - rows.map(({ benchPackId, benchPackName, verifierId, verifier, runtime, docker }) => ( - - - - - - - - - )) - )} - -
Bench PackModeStatusEndpointAuto StartActions
-
No installed Bench Packs currently require a verifier.
-
-
{benchPackName}
-
- - onUpdate(benchPackId, verifierId, (current) => ({ - ...current, - mode: value as BenchLocalVerifierConfig["mode"] - })) - } - /> - - - {formatVerifierRuntimeStatus(runtime?.status)} - - -
- {runtime?.url ?? "Managed by BenchLocal"} -
-
- Docker: {docker?.state === "ready" - ? docker.details ?? "ready" - : docker?.state === "not_running" - ? docker.details ?? "not running" - : docker?.details ?? "not installed"} -
-
-
- - onUpdate(benchPackId, verifierId, (current) => ({ - ...current, - auto_start: event.target.checked - })) - } - /> -
-
-
- {runtime?.status === "running" ? ( - - ) : ( - - )} - {runtime?.dockerImagePresent ? ( - - ) : null} -
-
-
-
- ); + const verificationEntries = Object.entries(draft.benchpacks).filter( + ([benchPackId]) => { + const status = statuses[benchPackId]; + return Boolean(status && status.verifiers.length > 0); + }, + ); + + const rows = verificationEntries.flatMap(([benchPackId, benchPack]) => { + const status = statuses[benchPackId]; + const inspectionName = status?.benchPackName ?? benchPackId; + + return Object.entries(benchPack.verifiers ?? {}).map( + ([verifierId, verifier]) => { + const runtime = status?.verifiers.find( + (entry) => entry.id === verifierId, + ); + return { + benchPackId, + benchPackName: inspectionName, + verifierId, + verifier, + runtime, + docker: status?.docker, + }; + }, + ); + }); + + return ( + } + > + + + + + + + + + + + + + + {rows.length === 0 ? ( + + + + ) : ( + rows.map( + ({ + benchPackId, + benchPackName, + verifierId, + verifier, + runtime, + docker, + }) => ( + + + + + + + + + ), + ) + )} + +
Bench PackModeStatusEndpointAuto StartActions
+
+ No installed Bench Packs currently require a verifier. +
+
+
+ {benchPackName} +
+
+ + onUpdate(benchPackId, verifierId, (current) => ({ + ...current, + mode: value as BenchLocalVerifierConfig["mode"], + })) + } + /> + + + {formatVerifierRuntimeStatus(runtime?.status)} + + +
+ {runtime?.url ?? "Managed by BenchLocal"} +
+
+ Docker:{" "} + {docker?.state === "ready" + ? (docker.details ?? "ready") + : docker?.state === "not_running" + ? (docker.details ?? "not running") + : (docker?.details ?? "not installed")} +
+
+
+ + onUpdate(benchPackId, verifierId, (current) => ({ + ...current, + auto_start: event.target.checked, + })) + } + /> +
+
+
+ {runtime?.status === "running" ? ( + + ) : ( + + )} + {runtime?.dockerImagePresent ? ( + + ) : null} +
+
+
+
+ ); } function Panel({ - title, - subtitle, - tone, - icon, - actions, - children + title, + subtitle, + tone, + icon, + actions, + children, }: { - title: string; - subtitle: string; - tone: "sky" | "orange" | "slate"; - icon?: ReactNode; - actions?: ReactNode; - children: ReactNode; + title: string; + subtitle: string; + tone: "sky" | "orange" | "slate"; + icon?: ReactNode; + actions?: ReactNode; + children: ReactNode; }) { - return ( -
-
-
-
{icon}
-
-

{title}

-

{subtitle}

-
-
- {actions ?
{actions}
: null} -
-
{children}
-
- ); + return ( +
+
+
+
{icon}
+
+

{title}

+

{subtitle}

+
+
+ {actions ?
{actions}
: null} +
+
{children}
+
+ ); } function DetailCard({ title, content }: { title: string; content: string }) { - const toneClass = - title === "What this tests" - ? "is-blue" - : title === "Prompt Contract" - ? "is-amber" - : "is-slate"; - - const lines = content.split("\n"); - - return ( -
-
-

{title}

-
-

- {lines.map((line, lineIndex) => ( - - {line.split(/(`[^`]+`)/g).map((part, partIndex) => { - if (part.startsWith("`") && part.endsWith("`") && part.length >= 2) { - return ( - - {part.slice(1, -1)} - - ); - } - - return {part}; - })} - {lineIndex < lines.length - 1 ?
: null} -
- ))} -

-
- ); + const toneClass = + title === "What this tests" + ? "is-blue" + : title === "Prompt Contract" + ? "is-amber" + : "is-slate"; + + const lines = content.split("\n"); + + return ( +
+
+

{title}

+
+

+ {lines.map((line, lineIndex) => ( + + {line.split(/(`[^`]+`)/g).map((part, partIndex) => { + if ( + part.startsWith("`") && + part.endsWith("`") && + part.length >= 2 + ) { + return ( + + {part.slice(1, -1)} + + ); + } + + return ( + {part} + ); + })} + {lineIndex < lines.length - 1 ?
: null} +
+ ))} +

+
+ ); } function HistoryModal({ - benchPackName, - entries, - onClose, - onOpenRun, - onRemoveAll + benchPackName, + entries, + onClose, + onOpenRun, + onRemoveAll, }: { - benchPackName: string; - entries: BenchPackRunHistoryEntry[]; - onClose: () => void; - onOpenRun: (runId: string, mode: "history" | "replay") => void; - onRemoveAll: () => void; + benchPackName: string; + entries: BenchPackRunHistoryEntry[]; + onClose: () => void; + onOpenRun: (runId: string, mode: "history" | "replay") => void; + onRemoveAll: () => void; }) { - return ( -
-
-
-
-

Test Histories

-

{benchPackName}

-
- -
- -
- - - - - - - - - - - - - - {entries.map((entry) => { - const executionModeLabel = - EXECUTION_MODE_OPTIONS.find((option) => option.value === entry.executionMode)?.label ?? "Unknown"; - - return ( - - - - - - - - - ); - })} - -
Date TimeModeModelsCasesStatusAction
-
{new Date(entry.startedAt).toLocaleString()}
-
- {executionModeLabel} - - {entry.modelCount} - - {entry.scenarioCount} - - - {entry.error ? "error" : entry.cancelled ? "stopped" : "completed"} - - - -
-
-
- -
- -
-
-
- ); + return ( +
+
+
+
+

Test Histories

+

+ {benchPackName} +

+
+ +
+ +
+ + + + + + + + + + + + + + {entries.map((entry) => { + const executionModeLabel = + EXECUTION_MODE_OPTIONS.find( + (option) => option.value === entry.executionMode, + )?.label ?? "Unknown"; + + return ( + + + + + + + + + ); + })} + +
Date TimeModeModelsCasesStatusAction
+
+ {new Date(entry.startedAt).toLocaleString()} +
+
+ + {executionModeLabel} + + + + {entry.modelCount} + + + + {entry.scenarioCount} + + + + {entry.error + ? "error" + : entry.cancelled + ? "stopped" + : "completed"} + + + +
+
+
+ +
+ +
+
+
+ ); } function VerifierPreparationModal({ - benchPackName, - verifierId, - message, - isCancelling, - onCancel + benchPackName, + verifierId, + message, + isCancelling, + onCancel, }: { - benchPackName: string; - verifierId: string; - message: string; - isCancelling?: boolean; - onCancel?: () => void; + benchPackName: string; + verifierId: string; + message: string; + isCancelling?: boolean; + onCancel?: () => void; }) { - return ( -
-
-
-
- -
-
-

Preparing Verifier

-

{benchPackName}

-

- BenchLocal is preparing {verifierId} before the run can start. -

-
-
- -

{message}

- - {onCancel ? ( -
- -
- ) : null} -
-
- ); + return ( +
+
+
+
+ +
+
+

Preparing Verifier

+

{benchPackName}

+

+ BenchLocal is preparing{" "} + {verifierId} before + the run can start. +

+
+
+ +

+ {message} +

+ + {onCancel ? ( +
+ +
+ ) : null} +
+
+ ); } -function Banner({ tone, children }: { tone: "success" | "danger" | "neutral" | "warning"; children: ReactNode }) { - const toneClass = - tone === "success" - ? "banner-success" - : tone === "danger" - ? "banner-danger" - : tone === "warning" - ? "banner-warning" - : "banner-neutral"; - return
{children}
; +function Banner({ + tone, + children, +}: { + tone: "success" | "danger" | "neutral" | "warning"; + children: ReactNode; +}) { + const toneClass = + tone === "success" + ? "banner-success" + : tone === "danger" + ? "banner-danger" + : tone === "warning" + ? "banner-warning" + : "banner-neutral"; + return
{children}
; } function AboutDialog({ - metadata, - updateState, - onCheckForUpdates, - onInstallUpdate, - onClose + metadata, + updateState, + onCheckForUpdates, + onInstallUpdate, + onClose, }: { - metadata: BenchLocalAppMetadata | null; - updateState: BenchLocalUpdateState | null; - onCheckForUpdates: () => void; - onInstallUpdate: () => void; - onClose: () => void; + metadata: BenchLocalAppMetadata | null; + updateState: BenchLocalUpdateState | null; + onCheckForUpdates: () => void; + onInstallUpdate: () => void; + onClose: () => void; }) { - const dialogRef = useRef(null); - const productName = metadata?.productName ?? "BenchLocal"; - const version = metadata?.version?.trim(); - const updateMessage = describeAppUpdateState(updateState); - const checkedAtLabel = formatAppUpdateCheckedAt(updateState?.checkedAt); - const updateFeedLabel = updateState?.feedLabel?.trim() || "GitHub Releases"; - const updateFeedUrl = updateState?.feedUrl?.trim(); - const progressPercent = - typeof updateState?.progressPercent === "number" ? Math.max(0, Math.min(100, updateState.progressPercent)) : null; - const canCheckForUpdates = - updateState?.status !== "checking" && - updateState?.status !== "downloading" && - updateState?.status !== "available" && - updateState?.status !== "unsupported"; - const updateActionLabel = - updateState?.status === "downloaded" - ? "Restart to Update" - : updateState?.status === "checking" - ? "Checking..." - : updateState?.status === "downloading" || updateState?.status === "available" - ? progressPercent !== null - ? `Downloading ${Math.round(progressPercent)}%` - : "Downloading..." - : "Check for Updates"; - - useEffect(() => { - const frameId = window.requestAnimationFrame(() => { - dialogRef.current?.focus(); - }); - - return () => { - window.cancelAnimationFrame(frameId); - }; - }, []); - - useEffect(() => { - const handleKeyDown = (event: KeyboardEvent) => { - if (event.key === "Escape" || event.key === "Enter") { - event.preventDefault(); - onClose(); - } - }; - - window.addEventListener("keydown", handleKeyDown); - - return () => { - window.removeEventListener("keydown", handleKeyDown); - }; - }, [onClose]); - - return ( -
-
- -
- -

{productName}

- {version ?

Version {version}

: null} - {metadata?.copyright ?

{metadata.copyright}

: null} -
-
- Self Update - {updateState?.availableVersion ? v{updateState.availableVersion} : null} -
-

{updateMessage}

-

- Feed: {updateFeedUrl ? `${updateFeedLabel} (${updateFeedUrl})` : updateFeedLabel} -

- {progressPercent !== null ? ( -
-
- -
- {Math.round(progressPercent)}% -
- ) : null} - {checkedAtLabel ?

Last checked: {checkedAtLabel}

: null} - {updateState?.releaseNotes ?
{updateState.releaseNotes}
: null} -
- -
-
-
-
-
- ); + const dialogRef = useRef(null); + const productName = metadata?.productName ?? "BenchLocal"; + const version = metadata?.version?.trim(); + const updateMessage = describeAppUpdateState(updateState); + const checkedAtLabel = formatAppUpdateCheckedAt(updateState?.checkedAt); + const updateFeedLabel = updateState?.feedLabel?.trim() || "GitHub Releases"; + const updateFeedUrl = updateState?.feedUrl?.trim(); + const progressPercent = + typeof updateState?.progressPercent === "number" + ? Math.max(0, Math.min(100, updateState.progressPercent)) + : null; + const canCheckForUpdates = + updateState?.status !== "checking" && + updateState?.status !== "downloading" && + updateState?.status !== "available" && + updateState?.status !== "unsupported"; + const updateActionLabel = + updateState?.status === "downloaded" + ? "Restart to Update" + : updateState?.status === "checking" + ? "Checking..." + : updateState?.status === "downloading" || + updateState?.status === "available" + ? progressPercent !== null + ? `Downloading ${Math.round(progressPercent)}%` + : "Downloading..." + : "Check for Updates"; + + useEffect(() => { + const frameId = window.requestAnimationFrame(() => { + dialogRef.current?.focus(); + }); + + return () => { + window.cancelAnimationFrame(frameId); + }; + }, []); + + useEffect(() => { + const handleKeyDown = (event: KeyboardEvent) => { + if (event.key === "Escape" || event.key === "Enter") { + event.preventDefault(); + onClose(); + } + }; + + window.addEventListener("keydown", handleKeyDown); + + return () => { + window.removeEventListener("keydown", handleKeyDown); + }; + }, [onClose]); + + return ( +
+
+ +
+ +

{productName}

+ {version ? ( +

Version {version}

+ ) : null} + {metadata?.copyright ? ( +

{metadata.copyright}

+ ) : null} +
+
+ Self Update + {updateState?.availableVersion ? ( + + v{updateState.availableVersion} + + ) : null} +
+

{updateMessage}

+

+ Feed:{" "} + {updateFeedUrl + ? `${updateFeedLabel} (${updateFeedUrl})` + : updateFeedLabel} +

+ {progressPercent !== null ? ( +
+
+ +
+ + {Math.round(progressPercent)}% + +
+ ) : null} + {checkedAtLabel ? ( +

+ Last checked: {checkedAtLabel} +

+ ) : null} + {updateState?.releaseNotes ? ( +
+								{updateState.releaseNotes}
+							
+ ) : null} +
+ +
+
+
+
+
+ ); } function Modal({ - title, - subtitle, - onClose, - onSubmit, - submitLabel, - submitTone = "primary", - size = "default", - leadingActions, - children + title, + subtitle, + onClose, + onSubmit, + submitLabel, + submitTone = "primary", + size = "default", + leadingActions, + children, }: { - title: string; - subtitle?: string; - onClose: () => void; - onSubmit: () => void; - submitLabel: string; - submitTone?: "primary" | "danger"; - size?: "default" | "wide"; - leadingActions?: ReactNode; - children?: ReactNode; + title: string; + subtitle?: string; + onClose: () => void; + onSubmit: () => void; + submitLabel: string; + submitTone?: "primary" | "danger"; + size?: "default" | "wide"; + leadingActions?: ReactNode; + children?: ReactNode; }) { - const hasBody = Boolean(children); - const hasSubtitle = Boolean(subtitle?.trim()); - const dialogRef = useRef(null); - const submitButtonRef = useRef(null); - - useEffect(() => { - const frameId = window.requestAnimationFrame(() => { - const activeElement = document.activeElement; - const dialog = dialogRef.current; - - if (!dialog) { - return; - } - - if (activeElement instanceof HTMLElement && dialog.contains(activeElement)) { - return; - } - - submitButtonRef.current?.focus(); - }); - - return () => { - window.cancelAnimationFrame(frameId); - }; - }, []); - - useEffect(() => { - const handleKeyDown = (event: KeyboardEvent) => { - if (event.key === "Escape") { - event.preventDefault(); - onClose(); - return; - } - - if (event.key !== "Enter" || event.metaKey || event.ctrlKey || event.altKey || event.shiftKey || event.isComposing) { - return; - } - - const target = event.target; - - if (target instanceof HTMLElement && (target.tagName === "TEXTAREA" || target.isContentEditable)) { - return; - } - - event.preventDefault(); - onSubmit(); - }; - - window.addEventListener("keydown", handleKeyDown); - - return () => { - window.removeEventListener("keydown", handleKeyDown); - }; - }, [onClose, onSubmit]); - - return ( -
-
-
-
-

{title}

- {hasSubtitle ?

{subtitle}

: null} -
- -
- - {hasBody ?
{children}
: null} - -
-
{leadingActions}
- -
-
-
- ); + const hasBody = Boolean(children); + const hasSubtitle = Boolean(subtitle?.trim()); + const dialogRef = useRef(null); + const submitButtonRef = useRef(null); + + useEffect(() => { + const frameId = window.requestAnimationFrame(() => { + const activeElement = document.activeElement; + const dialog = dialogRef.current; + + if (!dialog) { + return; + } + + if ( + activeElement instanceof HTMLElement && + dialog.contains(activeElement) + ) { + return; + } + + submitButtonRef.current?.focus(); + }); + + return () => { + window.cancelAnimationFrame(frameId); + }; + }, []); + + useEffect(() => { + const handleKeyDown = (event: KeyboardEvent) => { + if (event.key === "Escape") { + event.preventDefault(); + onClose(); + return; + } + + if ( + event.key !== "Enter" || + event.metaKey || + event.ctrlKey || + event.altKey || + event.shiftKey || + event.isComposing + ) { + return; + } + + const target = event.target; + + if ( + target instanceof HTMLElement && + (target.tagName === "TEXTAREA" || target.isContentEditable) + ) { + return; + } + + event.preventDefault(); + onSubmit(); + }; + + window.addEventListener("keydown", handleKeyDown); + + return () => { + window.removeEventListener("keydown", handleKeyDown); + }; + }, [onClose, onSubmit]); + + return ( +
+
+
+
+

{title}

+ {hasSubtitle ? ( +

+ {subtitle} +

+ ) : null} +
+ +
+ + {hasBody ?
{children}
: null} + +
+
{leadingActions}
+ +
+
+
+ ); } function Field({ - label, - value, - onChange, - placeholder, - type = "text", - readOnly = false, - className = "" + label, + value, + onChange, + placeholder, + type = "text", + readOnly = false, + className = "", }: { - label?: string; - value: string; - onChange: (value: string) => void; - placeholder?: string; - type?: string; - readOnly?: boolean; - className?: string; + label?: string; + value: string; + onChange: (value: string) => void; + placeholder?: string; + type?: string; + readOnly?: boolean; + className?: string; }) { - return ( - - ); + return ( + + ); } function ToggleRow({ - label, - checked, - onChange + label, + checked, + onChange, }: { - label: string; - checked: boolean; - onChange: (checked: boolean) => void; + label: string; + checked: boolean; + onChange: (checked: boolean) => void; }) { - return ( - - ); + return ( + + ); } function FieldToggle({ - label, - checked, - onChange + label, + checked, + onChange, }: { - label: string; - checked: boolean; - onChange: (checked: boolean) => void; + label: string; + checked: boolean; + onChange: (checked: boolean) => void; }) { - return ( - - ); + return ( + + ); } function InlineSelectField({ - label, - value, - options, - getOptionLabel, - onChange + label, + value, + options, + getOptionLabel, + onChange, }: { - label: string; - value: string; - options: Array; - getOptionLabel?: (value: string) => string; - onChange: (value: string) => void; + label: string; + value: string; + options: Array< + string | { value: string; label?: string; disabled?: boolean } + >; + getOptionLabel?: (value: string) => string; + onChange: (value: string) => void; }) { - return ( - - ); + return ( + + ); } function statusClasses(status: BenchPackInspection["status"]): string { - switch (status) { - case "ready": - return "status-ready"; - case "not_installed": - return "status-not-installed"; - case "incompatible": - return "status-load-error"; - case "manifest_missing": - case "entry_missing": - return "status-entry-missing"; - case "invalid_manifest": - case "load_error": - return "status-load-error"; - } + switch (status) { + case "ready": + return "status-ready"; + case "not_installed": + return "status-not-installed"; + case "incompatible": + return "status-load-error"; + case "manifest_missing": + case "entry_missing": + return "status-entry-missing"; + case "invalid_manifest": + case "load_error": + return "status-load-error"; + } } diff --git a/app/src/renderer/src/api/client.ts b/app/src/renderer/src/api/client.ts new file mode 100644 index 0000000..163f3c7 --- /dev/null +++ b/app/src/renderer/src/api/client.ts @@ -0,0 +1,101 @@ +const BASE = "/api"; + +async function api( + method: string, + path: string, + body?: unknown, +): Promise { + const res = await fetch(`${BASE}${path}`, { + method, + headers: body ? { "Content-Type": "application/json" } : {}, + body: body ? JSON.stringify(body) : undefined, + }); + if (!res.ok) { + const text = await res.text(); + throw new Error(`API ${res.status}: ${text}`); + } + return res.json() as T; +} + +export const bl = { + config: { + load: () => api("GET", "/config"), + save: (c: any) => api("PUT", "/config", { config: c }), + }, + workspaces: { + load: () => api("GET", "/workspaces"), + save: (s: any) => api("PUT", "/workspaces", { state: s }), + export: (id: string, state: any) => + api("POST", "/workspaces/export", { workspaceId: id, state }), + import: (data: any) => api("POST", "/workspaces/import", data), + }, + benchPacks: { + list: () => api("GET", "/benchpacks"), + registry: () => api("GET", "/benchpacks/registry"), + install: (id: string) => api("POST", `/benchpacks/${id}/install`), + installFromUrl: (url: string) => + api("POST", "/benchpacks/install-from-url", { url }), + update: (id: string) => api("POST", `/benchpacks/${id}/update`), + uninstall: (id: string) => api("POST", `/benchpacks/${id}/uninstall`), + activeRuns: () => api("GET", "/benchpacks/active-runs"), + run: (input: any) => api("POST", "/benchpacks/run", input), + retryScenario: (input: any) => + api("POST", "/benchpacks/retry-scenario", input), + resumeRun: (input: any) => api("POST", "/benchpacks/resume-run", input), + stop: (tabId: string) => api("POST", "/benchpacks/stop", { tabId }), + history: (id: string) => api("GET", `/benchpacks/${id}/history`), + loadHistory: (id: string, runId: string) => + api("GET", `/benchpacks/${id}/history/${runId}`), + clearHistory: (id: string) => + api("POST", `/benchpacks/${id}/history/clear`), + // Stubs for Electron IPC event listeners (replaced by SSE in web mode) + onRunEvent: (listener: (payload: any) => void) => () => {}, + onMutationProgress: (listener: (payload: any) => void) => () => {}, + }, + verifiers: { + list: () => api("GET", "/verifiers"), + start: (id: string) => api("POST", "/verifiers/start", { benchPackId: id }), + stop: (id: string) => api("POST", "/verifiers/stop", { benchPackId: id }), + cancelStart: (id: string) => + api("POST", "/verifiers/cancel-start", { benchPackId: id }), + deleteImage: (benchPackId: string, verifierId: string) => + api("POST", "/verifiers/delete-image", { benchPackId, verifierId }), + onProgress: (listener: (payload: any) => void) => () => {}, + }, + themes: { + list: () => api("GET", "/themes"), + load: (id: string) => api("GET", `/themes/${id}`), + }, + models: { + discover: (provider: any) => api("POST", "/models/discover", { provider }), + }, + app: { + metadata: () => api("GET", "/metadata"), + // Stubs for Electron IPC (not available in web mode) + onOpenAbout: (listener: () => void) => () => {}, + onOpenSettings: (listener: () => void) => () => {}, + }, + updates: { + state: () => + Promise.resolve({ + status: "unsupported" as const, + currentVersion: "0.0.0", + } as any), + check: () => + Promise.resolve({ + status: "unsupported" as const, + currentVersion: "0.0.0", + } as any), + install: () => Promise.resolve({ started: false }), + onState: (listener: (state: any) => void) => () => {}, + }, + logs: { + closeDetachedWindow: () => Promise.resolve({ closed: false }), + openDetachedWindow: () => Promise.resolve({ opened: false }), + publishDetachedState: (_state: any) => Promise.resolve(), + onDetachedState: (listener: (state: any) => void) => () => {}, + onDetachedWindowClosed: (listener: () => void) => () => {}, + }, + // SSE connection + sse: () => new EventSource(`${BASE}/events/sse`), +} as const; diff --git a/app/src/renderer/src/vite-env-web.d.ts b/app/src/renderer/src/vite-env-web.d.ts new file mode 100644 index 0000000..9757262 --- /dev/null +++ b/app/src/renderer/src/vite-env-web.d.ts @@ -0,0 +1,20 @@ +declare module "*.png" { + const src: string; + export default src; +} +declare module "*.svg" { + const src: string; + export default src; +} +declare module "*.jpg" { + const src: string; + export default src; +} +declare module "*.jpeg" { + const src: string; + export default src; +} +declare module "*.gif" { + const src: string; + export default src; +} diff --git a/app/src/server/api-routes.ts b/app/src/server/api-routes.ts new file mode 100644 index 0000000..2a44c15 --- /dev/null +++ b/app/src/server/api-routes.ts @@ -0,0 +1,346 @@ +import { + clearRunHistoryForBenchPack, + deleteConfiguredBenchPackVerifierImage, + getConfiguredBenchPackVerifierStatus, + inspectConfiguredBenchPacks, + installBenchPackFromRegistry, + installBenchPackFromUrl, + listRunHistoryForBenchPack, + loadBenchPackRegistry, + loadRunSummaryForBenchPack, + resumeBenchPackRun, + retryScenarioForBenchPackRun, + runConfiguredBenchPack, + startConfiguredBenchPackVerifiers, + stopConfiguredBenchPackVerifiers, + uninstallBenchPack, + updateBenchPackFromRegistry, +} from "@benchlocal/benchpack-host"; +import { + getConfigPath, + getWorkspaceStatePath, + loadOrCreateConfig, + loadOrCreateWorkspaceState, + saveConfigFile, + saveWorkspaceStateFile, +} from "@benchlocal/core"; +import type { FastifyInstance } from "fastify"; +import { loadAppMetadata } from "./app-metadata"; +import { discoverProviderModels } from "./models"; +import { activeRunManager } from "./run-manager"; +import { sseBus } from "./sse-bus"; +import { listAvailableThemes, loadAvailableTheme } from "./themes"; + +async function compat() { + const meta = await loadAppMetadata(); + return { benchLocalVersion: meta.version }; +} + +export function registerApiRoutes(server: FastifyInstance) { + const api = server; + + // --- metadata --- + api.get("/api/metadata", () => loadAppMetadata()); + + // --- config --- + api.get("/api/config", async () => { + const r = await loadOrCreateConfig(); + return { path: r.path, created: r.created, config: r.config }; + }); + + api.put("/api/config", async (req: any) => { + const saved = await saveConfigFile( + (req.body as any).config, + getConfigPath(), + ); + return { path: getConfigPath(), created: false, config: saved }; + }); + + // --- workspaces --- + api.get("/api/workspaces", async () => { + await loadOrCreateConfig(); + const r = await loadOrCreateWorkspaceState(getWorkspaceStatePath()); + return { path: r.path, created: r.created, state: r.state }; + }); + + api.put("/api/workspaces", async (req: any) => { + await loadOrCreateConfig(); + const saved = await saveWorkspaceStateFile( + (req.body as any).state, + getWorkspaceStatePath(), + ); + return { path: getWorkspaceStatePath(), created: false, state: saved }; + }); + + // --- workspaces: export (file download) --- + api.post("/api/workspaces/export", async (req, reply) => { + const { workspaceId, state } = req.body as any; + const workspace = state.workspaces[workspaceId]; + if (!workspace) throw new Error(`Workspace "${workspaceId}" not found.`); + + const tabs = Object.fromEntries( + workspace.tabIds + .map((id: string) => state.tabs[id]) + .filter(Boolean) + .map((tab: any) => [tab.id, tab]), + ); + + const name = + (workspace.name.replace(/[^a-z0-9.-]/gi, "-") || "workspace") + + ".benchlocal-workspace.json"; + + reply.header("Content-Disposition", `attachment; filename="${name}"`); + reply.header("Content-Type", "application/json"); + return { + schemaVersion: 1, + exportedAt: new Date().toISOString(), + workspace, + tabs, + }; + }); + + // --- workspaces: import (file upload) --- + api.post("/api/workspaces/import", async (req: any) => { + const data = req.body as any; + if (!data.workspace || !data.tabs) { + throw new Error("Import file is missing workspace or tab data."); + } + return { imported: true, workspace: data.workspace, tabs: data.tabs }; + }); + + // --- bench packs --- + api.get("/api/benchpacks", async () => { + const { config } = await loadOrCreateConfig(); + return inspectConfiguredBenchPacks(config, await compat()); + }); + + api.get("/api/benchpacks/registry", async () => { + const { config } = await loadOrCreateConfig(); + return loadBenchPackRegistry(config); + }); + + api.post("/api/benchpacks/:benchPackId/install", async (req: any) => { + const { config } = await loadOrCreateConfig(); + const saved = await installBenchPackFromRegistry( + config, + (req.params as any).benchPackId, + (p) => sseBus.emit("benchpack-mutation-progress", p), + await compat(), + ); + return { path: getConfigPath(), created: false, config: saved }; + }); + + api.post("/api/benchpacks/install-from-url", async (req: any) => { + const { config } = await loadOrCreateConfig(); + const saved = await installBenchPackFromUrl( + config, + (req.body as any).url, + (p) => sseBus.emit("benchpack-mutation-progress", p), + await compat(), + ); + return { path: getConfigPath(), created: false, config: saved }; + }); + + api.post("/api/benchpacks/:benchPackId/update", async (req: any) => { + const { config } = await loadOrCreateConfig(); + const saved = await updateBenchPackFromRegistry( + config, + (req.params as any).benchPackId, + (p) => sseBus.emit("benchpack-mutation-progress", p), + await compat(), + ); + return { path: getConfigPath(), created: false, config: saved }; + }); + + api.post("/api/benchpacks/:benchPackId/uninstall", async (req: any) => { + const { config } = await loadOrCreateConfig(); + const saved = await uninstallBenchPack( + config, + (req.params as any).benchPackId, + (p) => sseBus.emit("benchpack-mutation-progress", p), + ); + return { path: getConfigPath(), created: false, config: saved }; + }); + + // --- active runs --- + api.get("/api/benchpacks/active-runs", () => activeRunManager.listActive()); + + // --- run --- + api.post("/api/benchpacks/run", async (req: any) => { + const input = req.body as any; + const { config } = await loadOrCreateConfig(); + const controller = new AbortController(); + activeRunManager.setActive(input.tabId, { + benchPackId: input.benchPackId, + controller, + }); + + try { + return await runConfiguredBenchPack( + config, + input.benchPackId, + { + modelIds: input.modelIds, + executionMode: input.executionMode, + generation: input.generation, + abortSignal: controller.signal, + onEvent: (event) => + sseBus.emit("run-event", { tabId: input.tabId, event }), + }, + await compat(), + ); + } finally { + activeRunManager.clearActive(input.tabId); + } + }); + + // --- retry scenario --- + api.post("/api/benchpacks/retry-scenario", async (req: any) => { + const input = req.body as any; + const { config } = await loadOrCreateConfig(); + return retryScenarioForBenchPackRun( + config, + input.benchPackId, + { + runId: input.runId, + scenarioId: input.scenarioId, + modelId: input.modelId, + generation: input.generation, + onEvent: (event) => + sseBus.emit("run-event", { tabId: input.tabId, event }), + }, + await compat(), + ); + }); + + // --- resume run --- + api.post("/api/benchpacks/resume-run", async (req: any) => { + const input = req.body as any; + const { config } = await loadOrCreateConfig(); + const controller = new AbortController(); + activeRunManager.setActive(input.tabId, { + benchPackId: input.benchPackId, + controller, + }); + + try { + return await resumeBenchPackRun( + config, + input.benchPackId, + { + runId: input.runId, + executionMode: input.executionMode, + generation: input.generation, + abortSignal: controller.signal, + onEvent: (event) => + sseBus.emit("run-event", { tabId: input.tabId, event }), + }, + await compat(), + ); + } finally { + activeRunManager.clearActive(input.tabId); + } + }); + + // --- stop --- + api.post("/api/benchpacks/stop", async (req: any) => { + const { tabId } = req.body as any; + const active = activeRunManager.getActive(tabId); + if (!active) return { stopped: false }; + active.controller.abort(new Error("Run cancelled by user.")); + return { stopped: true }; + }); + + // --- history --- + api.get("/api/benchpacks/:benchPackId/history", async (req: any) => { + const { config } = await loadOrCreateConfig(); + return listRunHistoryForBenchPack(config, (req.params as any).benchPackId); + }); + + api.get("/api/benchpacks/:benchPackId/history/:runId", async (req: any) => { + const { config } = await loadOrCreateConfig(); + return loadRunSummaryForBenchPack( + config, + (req.params as any).benchPackId, + (req.params as any).runId, + ); + }); + + api.post("/api/benchpacks/:benchPackId/history/clear", async (req: any) => { + const { config } = await loadOrCreateConfig(); + return clearRunHistoryForBenchPack(config, (req.params as any).benchPackId); + }); + + // --- verifiers --- + api.get("/api/verifiers", async () => { + const { config } = await loadOrCreateConfig(); + const inspections = await inspectConfiguredBenchPacks( + config, + await compat(), + ); + const relevant = inspections.filter( + (i) => + i.manifest?.capabilities.verification || + i.manifest?.capabilities.sidecars, + ); + return Promise.all( + relevant.map((i) => getConfiguredBenchPackVerifierStatus(config, i.id)), + ); + }); + + api.post("/api/verifiers/start", async (req: any) => { + const { config } = await loadOrCreateConfig(); + const status = await getConfiguredBenchPackVerifierStatus( + config, + (req.body as any).benchPackId, + ); + return startConfiguredBenchPackVerifiers( + config, + (req.body as any).benchPackId, + { + onProgress: (p) => + sseBus.emit("verifier-progress", { + benchPackId: (req.body as any).benchPackId, + event: { + type: "verifier_preparing", + benchPackId: (req.body as any).benchPackId, + benchPackName: status.benchPackName, + verifierId: p.verifierId, + phase: p.phase, + message: p.message, + }, + }), + }, + ); + }); + + api.post("/api/verifiers/stop", async (req: any) => { + const { config } = await loadOrCreateConfig(); + return stopConfiguredBenchPackVerifiers( + config, + (req.body as any).benchPackId, + ); + }); + + api.post("/api/verifiers/cancel-start", async () => ({ cancelled: false })); + + api.post("/api/verifiers/delete-image", async (req: any) => { + const { config } = await loadOrCreateConfig(); + return deleteConfiguredBenchPackVerifierImage( + config, + (req.body as any).benchPackId, + (req.body as any).verifierId, + ); + }); + + // --- themes --- + api.get("/api/themes", () => listAvailableThemes()); + api.get("/api/themes/:themeId", async (req: any) => + loadAvailableTheme((req.params as any).themeId), + ); + + // --- models --- + api.post("/api/models/discover", async (req: any) => + discoverProviderModels((req.body as any).provider), + ); +} diff --git a/app/src/server/app-metadata.ts b/app/src/server/app-metadata.ts new file mode 100644 index 0000000..f82129e --- /dev/null +++ b/app/src/server/app-metadata.ts @@ -0,0 +1,83 @@ +import { promises as fs } from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +export type BenchLocalAppMetadata = { + productName: string; + description: string; + version: string; + author: string; + license?: string; + copyright?: string; +}; + +type AppPackageJson = { + productName?: string; + description?: string; + version?: string; + author?: string; + license?: string; +}; + +function getPackageJsonPath(): string { + return path.resolve( + path.dirname(fileURLToPath(import.meta.url)), + "../../package.json", + ); +} + +function getLicensePath(): string { + const licensePath = path.resolve( + path.dirname(fileURLToPath(import.meta.url)), + "../../../LICENSE", + ); + return licensePath; +} + +function parseCopyrightLine(licenseText: string): string | undefined { + const line = licenseText + .split(/\r?\n/) + .map((entry) => entry.trim()) + .find((entry) => /^copyright\s*\(c\)\s+/i.test(entry)); + + if (!line) { + return undefined; + } + + return line.replace(/^copyright\s*\(c\)\s+/i, "Copyright © "); +} + +function parseLicenseName(licenseText: string): string | undefined { + const firstLine = licenseText + .split(/\r?\n/) + .map((entry) => entry.trim()) + .find((entry) => entry.length > 0); + + return firstLine || undefined; +} + +export async function loadAppMetadata(): Promise { + const packageJsonRaw = await fs.readFile(getPackageJsonPath(), "utf8"); + const packageJson = JSON.parse(packageJsonRaw) as AppPackageJson; + + let license = packageJson.license; + let copyright: string | undefined; + + try { + const licenseRaw = await fs.readFile(getLicensePath(), "utf8"); + license = license ?? parseLicenseName(licenseRaw); + copyright = parseCopyrightLine(licenseRaw); + } catch { + license = packageJson.license; + copyright = undefined; + } + + return { + productName: packageJson.productName ?? "BenchLocal", + description: packageJson.description ?? "", + version: packageJson.version ?? "0.0.0", + author: packageJson.author ?? "", + license, + copyright, + }; +} diff --git a/app/src/server/index.ts b/app/src/server/index.ts new file mode 100644 index 0000000..994546c --- /dev/null +++ b/app/src/server/index.ts @@ -0,0 +1,47 @@ +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import fastifyStatic from "@fastify/static"; +import Fastify from "fastify"; +import { registerApiRoutes } from "./api-routes"; +import { activeRunManager } from "./run-manager"; +import { registerSseRoute } from "./sse-route"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +async function main() { + const server = Fastify({ logger: { level: "info" } }); + + registerApiRoutes(server); + registerSseRoute(server); + + // Serve the React SPA build output + const rendererOut = path.join(__dirname, "..", "renderer-out"); + server.register(fastifyStatic, { root: rendererOut, prefix: "/" }); + + // SPA fallback + server.setNotFoundHandler((req, reply) => { + if (req.url.startsWith("/api/")) { + return reply.code(404).send({ error: "Not found" }); + } + return reply.type("text/html").sendFile("index.html"); + }); + + const port = Number(process.env.BENCHLOCAL_PORT) || 4300; + const host = process.env.BENCHLOCAL_HOST || "0.0.0.0"; + + await server.listen({ port, host }); + console.log(`BenchLocal running at http://${host}:${port}`); +} + +// Graceful shutdown +process.on("SIGINT", async () => { + console.log("Shutting down..."); + await activeRunManager.shutdown(); + process.exit(0); +}); +process.on("SIGTERM", async () => { + await activeRunManager.shutdown(); + process.exit(0); +}); + +main(); diff --git a/app/src/server/models.ts b/app/src/server/models.ts new file mode 100644 index 0000000..227b727 --- /dev/null +++ b/app/src/server/models.ts @@ -0,0 +1,148 @@ +import type { + BenchLocalConfig, + BenchLocalProviderConfig, +} from "@benchlocal/core"; +import type { BenchLocalDiscoveredModel } from "../shared/desktop-api"; + +function providerSupportsModelDiscovery( + provider: BenchLocalProviderConfig, +): boolean { + return ( + provider.kind === "openrouter" || + provider.kind === "huggingface" || + provider.kind === "openai_compatible" + ); +} + +function providerModelsUrl(baseUrl: string): string { + const normalizedBaseUrl = baseUrl.endsWith("/") ? baseUrl : `${baseUrl}/`; + return new URL("models", normalizedBaseUrl).toString(); +} + +function formatModelPricing(value: unknown): string | undefined { + if (!value || typeof value !== "object") { + return undefined; + } + + const record = value as Record; + const prompt = + typeof record.prompt === "string" || typeof record.prompt === "number" + ? String(record.prompt) + : null; + const completion = + typeof record.completion === "string" || + typeof record.completion === "number" + ? String(record.completion) + : null; + + if (prompt && completion) { + return `In ${prompt} · Out ${completion}`; + } + + if (prompt) { + return `Prompt ${prompt}`; + } + + if (completion) { + return `Completion ${completion}`; + } + + return undefined; +} + +function mapDiscoveredModel(input: unknown): BenchLocalDiscoveredModel | null { + if (!input || typeof input !== "object") { + return null; + } + + const record = input as Record; + const id = typeof record.id === "string" ? record.id.trim() : ""; + + if (!id) { + return null; + } + + const name = typeof record.name === "string" ? record.name.trim() : undefined; + const ownedBy = + typeof record.owned_by === "string" ? record.owned_by.trim() : undefined; + const topProvider = + typeof record.top_provider === "object" && record.top_provider !== null + ? (record.top_provider as Record) + : null; + const architecture = + typeof record.architecture === "object" && record.architecture !== null + ? (record.architecture as Record) + : null; + const contextLength = + typeof record.context_length === "number" + ? record.context_length + : typeof topProvider?.context_length === "number" + ? (topProvider.context_length as number) + : undefined; + const modality = Array.isArray(architecture?.modality) + ? architecture.modality + .filter((value): value is string => typeof value === "string") + .join(", ") + : Array.isArray(record.input_modalities) + ? record.input_modalities + .filter((value): value is string => typeof value === "string") + .join(", ") + : Array.isArray(record.output_modalities) + ? record.output_modalities + .filter((value): value is string => typeof value === "string") + .join(", ") + : undefined; + + return { + id, + name, + ownedBy, + contextLength, + pricing: formatModelPricing(record.pricing), + modality, + }; +} + +export async function discoverProviderModels( + provider: BenchLocalProviderConfig, +): Promise { + if (!providerSupportsModelDiscovery(provider)) { + throw new Error(`${provider.name} does not support model browsing yet.`); + } + + const headers = new Headers({ + Accept: "application/json", + }); + const apiKey = + provider.api_key?.trim() || + (provider.api_key_env ? process.env[provider.api_key_env]?.trim() : ""); + + if (apiKey) { + headers.set("Authorization", `Bearer ${apiKey}`); + } + + const response = await fetch(providerModelsUrl(provider.base_url), { + method: "GET", + headers, + }); + + if (!response.ok) { + throw new Error( + `Failed to load models from ${provider.name}: ${response.status} ${response.statusText}`.trim(), + ); + } + + const payload = (await response.json()) as { data?: unknown[] } | unknown[]; + const entries = Array.isArray(payload) + ? payload + : Array.isArray(payload.data) + ? payload.data + : []; + + return entries + .map((entry) => mapDiscoveredModel(entry)) + .filter((entry): entry is BenchLocalDiscoveredModel => Boolean(entry)) + .sort((left, right) => + (left.name ?? left.id).localeCompare(right.name ?? right.id), + ); +} diff --git a/app/src/server/run-manager.ts b/app/src/server/run-manager.ts new file mode 100644 index 0000000..2d6d3f2 --- /dev/null +++ b/app/src/server/run-manager.ts @@ -0,0 +1,37 @@ +export class ActiveRunManager { + private runs = new Map< + string, + { benchPackId: string; controller: AbortController } + >(); + + setActive( + tabId: string, + run: { benchPackId: string; controller: AbortController }, + ) { + this.runs.set(tabId, run); + } + + getActive(tabId: string) { + return this.runs.get(tabId); + } + + clearActive(tabId: string) { + this.runs.delete(tabId); + } + + listActive() { + return Array.from(this.runs.entries()).map(([tabId, run]) => ({ + tabId, + benchPackId: run.benchPackId, + })); + } + + async shutdown() { + for (const run of this.runs.values()) { + run.controller.abort(new Error("Server shutting down.")); + } + this.runs.clear(); + } +} + +export const activeRunManager = new ActiveRunManager(); diff --git a/app/src/server/sse-bus.ts b/app/src/server/sse-bus.ts new file mode 100644 index 0000000..71bfcac --- /dev/null +++ b/app/src/server/sse-bus.ts @@ -0,0 +1,20 @@ +type Handler = (data: unknown) => void; + +export class SseBus { + private subs = new Map>(); + + on(channel: string, handler: Handler): () => void { + const set = this.subs.get(channel) || new Set(); + set.add(handler); + this.subs.set(channel, set); + return () => set.delete(handler); + } + + emit(channel: string, data: unknown) { + for (const handler of this.subs.get(channel) || []) { + handler(data); + } + } +} + +export const sseBus = new SseBus(); diff --git a/app/src/server/sse-route.ts b/app/src/server/sse-route.ts new file mode 100644 index 0000000..b987e2a --- /dev/null +++ b/app/src/server/sse-route.ts @@ -0,0 +1,39 @@ +import type { FastifyInstance, FastifyReply, FastifyRequest } from "fastify"; +import { sseBus } from "./sse-bus"; + +export function registerSseRoute(server: FastifyInstance) { + server.get( + "/api/events/sse", + { handlerTimeout: 0 }, + async (req: FastifyRequest, reply: FastifyReply) => { + reply.header("Content-Type", "text/event-stream"); + reply.header("Cache-Control", "no-cache"); + reply.header("Connection", "keep-alive"); + reply.header("X-Accel-Buffering", "no"); + reply.raw.write(": connected\n\n"); + + const channels = [ + "run-event", + "benchpack-mutation-progress", + "verifier-progress", + ]; + + const unsubscribers = channels.map((ch) => + sseBus.on(ch, (data) => { + reply.raw.write(`event: ${ch}\ndata: ${JSON.stringify(data)}\n\n`); + }), + ); + + const keepAlive = setInterval(() => { + reply.raw.write(": heartbeat\n\n"); + }, 15000); + + req.raw.on("close", () => { + unsubscribers.forEach((u) => u()); + clearInterval(keepAlive); + }); + + return new Promise(() => {}); + }, + ); +} diff --git a/app/src/server/themes.ts b/app/src/server/themes.ts new file mode 100644 index 0000000..3432d51 --- /dev/null +++ b/app/src/server/themes.ts @@ -0,0 +1,116 @@ +import { accessSync, promises as fs } from "node:fs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import type { + BenchLocalThemeDefinition, + BenchLocalThemeDescriptor, +} from "@benchlocal/core"; +import { + getThemeStorageDir, + loadThemeDefinitionFromFile, +} from "@benchlocal/core"; + +function getBenchLocalWorkspaceRoot(): string { + return path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../.."); +} + +function getBuiltInThemesDir(): string { + // Check for a themes directory next to the project root + const workspaceRoot = getBenchLocalWorkspaceRoot(); + const workspaceThemes = path.join(workspaceRoot, "themes"); + + // For the server, we also check a bundled themes directory + const bundledThemes = path.join( + path.dirname(fileURLToPath(import.meta.url)), + "../../themes", + ); + + // Prefer workspace themes if they exist + try { + accessSync(workspaceThemes); + return workspaceThemes; + } catch { + // Fall back to bundled + } + + try { + accessSync(bundledThemes); + return bundledThemes; + } catch { + // Last resort: workspace + return workspaceThemes; + } +} + +async function listThemeFiles(targetDir: string): Promise { + try { + const entries = await fs.readdir(targetDir, { withFileTypes: true }); + return entries + .filter((entry) => entry.isFile() && entry.name.endsWith(".json")) + .map((entry) => path.join(targetDir, entry.name)); + } catch { + return []; + } +} + +export async function listAvailableThemes(): Promise< + BenchLocalThemeDescriptor[] +> { + const builtInFiles = await listThemeFiles(getBuiltInThemesDir()); + const userDir = getThemeStorageDir(); + await fs.mkdir(userDir, { recursive: true }); + const userFiles = await listThemeFiles(userDir); + + const themes: BenchLocalThemeDescriptor[] = []; + + for (const filePath of builtInFiles) { + try { + const theme = await loadThemeDefinitionFromFile(filePath); + themes.push({ + id: theme.id, + name: theme.name, + colorScheme: theme.colorScheme, + source: "builtin", + path: filePath, + }); + } catch { + // Skip invalid theme files. + } + } + + for (const filePath of userFiles) { + try { + const theme = await loadThemeDefinitionFromFile(filePath); + themes.push({ + id: theme.id, + name: theme.name, + colorScheme: theme.colorScheme, + source: "user", + path: filePath, + }); + } catch { + // Skip invalid theme files. + } + } + + return themes.sort((left, right) => { + if (left.source !== right.source) { + return left.source === "builtin" ? -1 : 1; + } + + return left.name.localeCompare(right.name); + }); +} + +export async function loadAvailableTheme( + themeId: string, +): Promise { + const themes = await listAvailableThemes(); + const match = themes.find((theme) => theme.id === themeId); + + if (!match?.path) { + return null; + } + + return loadThemeDefinitionFromFile(match.path); +} diff --git a/app/vite.config.web.ts b/app/vite.config.web.ts new file mode 100644 index 0000000..d990a8f --- /dev/null +++ b/app/vite.config.web.ts @@ -0,0 +1,31 @@ +import path from "node:path"; +import react from "@vitejs/plugin-react"; +import { defineConfig } from "vite"; + +export default defineConfig({ + plugins: [react()], + resolve: { + alias: { + "@": path.resolve(__dirname, "src"), + "@core": path.resolve(__dirname, "../packages/benchlocal-core/src"), + "@benchpack-host": path.resolve( + __dirname, + "../packages/benchpack-host/src", + ), + }, + }, + build: { + outDir: "out/renderer-out", + emptyOutDir: true, + }, + server: { + port: 4300, + host: "0.0.0.0", + proxy: { + "/api": { + target: "http://localhost:4300", + changeOrigin: true, + }, + }, + }, +}); diff --git a/package-lock.json b/package-lock.json index 264ed35..b1e5679 100644 --- a/package-lock.json +++ b/package-lock.json @@ -19,7 +19,11 @@ "name": "benchlocal-app", "version": "0.2.4", "dependencies": { + "@benchlocal/benchpack-host": "file:../packages/benchpack-host", + "@benchlocal/core": "file:../packages/benchlocal-core", + "@fastify/static": "^8.0.0", "electron-updater": "^6.6.2", + "fastify": "^5.0.0", "lucide-react": "^1.7.0", "react": "^19.0.0", "react-dom": "^19.0.0" @@ -31,11 +35,14 @@ "@types/react-dom": "^19.0.3", "@vitejs/plugin-react": "^4.4.1", "autoprefixer": "^10.4.20", + "concurrently": "^9.0.0", "electron": "37.2.0", "electron-builder": "^26.0.12", "electron-vite": "^3.1.0", + "esbuild": "^0.25.0", "postcss": "^8.5.1", "tailwindcss": "^4.1.11", + "tsx": "^4.0.0", "typescript": "^5.8.3", "vite": "^6.3.5" } @@ -1272,6 +1279,287 @@ "node": ">=18" } }, + "node_modules/@fastify/accept-negotiator": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/@fastify/accept-negotiator/-/accept-negotiator-2.0.1.tgz", + "integrity": "sha512-/c/TW2bO/v9JeEgoD/g1G5GxGeCF1Hafdf79WPmUlgYiBXummY0oX3VVq4yFkKKVBKDNlaDUYoab7g38RpPqCQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT" + }, + "node_modules/@fastify/ajv-compiler": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/@fastify/ajv-compiler/-/ajv-compiler-4.0.5.tgz", + "integrity": "sha512-KoWKW+MhvfTRWL4qrhUwAAZoaChluo0m0vbiJlGMt2GXvL4LVPQEjt8kSpHI3IBq5Rez8fg+XeH3cneztq+C7A==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "ajv": "^8.12.0", + "ajv-formats": "^3.0.1", + "fast-uri": "^3.0.0" + } + }, + "node_modules/@fastify/ajv-compiler/node_modules/ajv": { + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.20.0.tgz", + "integrity": "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/@fastify/ajv-compiler/node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "license": "MIT" + }, + "node_modules/@fastify/error": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/@fastify/error/-/error-4.2.0.tgz", + "integrity": "sha512-RSo3sVDXfHskiBZKBPRgnQTtIqpi/7zhJOEmAxCiBcM7d0uwdGdxLlsCaLzGs8v8NnxIRlfG0N51p5yFaOentQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT" + }, + "node_modules/@fastify/fast-json-stringify-compiler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/@fastify/fast-json-stringify-compiler/-/fast-json-stringify-compiler-5.0.3.tgz", + "integrity": "sha512-uik7yYHkLr6fxd8hJSZ8c+xF4WafPK+XzneQDPU+D10r5X19GW8lJcom2YijX2+qtFF1ENJlHXKFM9ouXNJYgQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "fast-json-stringify": "^6.0.0" + } + }, + "node_modules/@fastify/forwarded": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/@fastify/forwarded/-/forwarded-3.0.1.tgz", + "integrity": "sha512-JqDochHFqXs3C3Ml3gOY58zM7OqO9ENqPo0UqAjAjH8L01fRZqwX9iLeX34//kiJubF7r2ZQHtBRU36vONbLlw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT" + }, + "node_modules/@fastify/merge-json-schemas": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/@fastify/merge-json-schemas/-/merge-json-schemas-0.2.1.tgz", + "integrity": "sha512-OA3KGBCy6KtIvLf8DINC5880o5iBlDX4SxzLQS8HorJAbqluzLRn80UXU0bxZn7UOFhFgpRJDasfwn9nG4FG4A==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "dequal": "^2.0.3" + } + }, + "node_modules/@fastify/proxy-addr": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/@fastify/proxy-addr/-/proxy-addr-5.1.0.tgz", + "integrity": "sha512-INS+6gh91cLUjB+PVHfu1UqcB76Sqtpyp7bnL+FYojhjygvOPA9ctiD/JDKsyD9Xgu4hUhCSJBPig/w7duNajw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "@fastify/forwarded": "^3.0.0", + "ipaddr.js": "^2.1.0" + } + }, + "node_modules/@fastify/send": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/@fastify/send/-/send-4.1.0.tgz", + "integrity": "sha512-TMYeQLCBSy2TOFmV95hQWkiTYgC/SEx7vMdV+wnZVX4tt8VBLKzmH8vV9OzJehV0+XBfg+WxPMt5wp+JBUKsVw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "@lukeed/ms": "^2.0.2", + "escape-html": "~1.0.3", + "fast-decode-uri-component": "^1.0.1", + "http-errors": "^2.0.0", + "mime": "^3" + } + }, + "node_modules/@fastify/send/node_modules/mime": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-3.0.0.tgz", + "integrity": "sha512-jSCU7/VB1loIWBZe14aEYHU/+1UMEHoaO7qxCOVJOw9GgH72VAWppxNcjU+x9a2k3GSIBXNKxXQFqRvvZ7vr3A==", + "license": "MIT", + "bin": { + "mime": "cli.js" + }, + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/@fastify/static": { + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/@fastify/static/-/static-8.3.0.tgz", + "integrity": "sha512-yKxviR5PH1OKNnisIzZKmgZSus0r2OZb8qCSbqmw34aolT4g3UlzYfeBRym+HJ1J471CR8e2ldNub4PubD1coA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "@fastify/accept-negotiator": "^2.0.0", + "@fastify/send": "^4.0.0", + "content-disposition": "^0.5.4", + "fastify-plugin": "^5.0.0", + "fastq": "^1.17.1", + "glob": "^11.0.0" + } + }, + "node_modules/@fastify/static/node_modules/@isaacs/cliui": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-9.0.0.tgz", + "integrity": "sha512-AokJm4tuBHillT+FpMtxQ60n8ObyXBatq7jD2/JA9dxbDDokKQm8KMht5ibGzLVU9IJDIKK4TPKgMHEYMn3lMg==", + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=18" + } + }, + "node_modules/@fastify/static/node_modules/glob": { + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/glob/-/glob-11.1.0.tgz", + "integrity": "sha512-vuNwKSaKiqm7g0THUBu2x7ckSs3XJLXE+2ssL7/MfTGPLLcrJQ/4Uq1CjPTtO5cCIiRxqvN6Twy1qOwhL0Xjcw==", + "deprecated": "Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me", + "license": "BlueOak-1.0.0", + "dependencies": { + "foreground-child": "^3.3.1", + "jackspeak": "^4.1.1", + "minimatch": "^10.1.1", + "minipass": "^7.1.2", + "package-json-from-dist": "^1.0.0", + "path-scurry": "^2.0.0" + }, + "bin": { + "glob": "dist/esm/bin.mjs" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@fastify/static/node_modules/jackspeak": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-4.2.3.tgz", + "integrity": "sha512-ykkVRwrYvFm1nb2AJfKKYPr0emF6IiXDYUaFx4Zn9ZuIH7MrzEZ3sD5RlqGXNRpHtvUHJyOnCEFxOlNDtGo7wg==", + "license": "BlueOak-1.0.0", + "dependencies": { + "@isaacs/cliui": "^9.0.0" + }, + "engines": { + "node": "20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@fastify/static/node_modules/lru-cache": { + "version": "11.3.5", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.3.5.tgz", + "integrity": "sha512-NxVFwLAnrd9i7KUBxC4DrUhmgjzOs+1Qm50D3oF1/oL+r1NpZ4gA7xvG0/zJ8evR7zIKn4vLf7qTNduWFtCrRw==", + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/@fastify/static/node_modules/path-scurry": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-2.0.2.tgz", + "integrity": "sha512-3O/iVVsJAPsOnpwWIeD+d6z/7PmqApyQePUtCndjatj/9I5LylHvt5qluFaBT3I5h3r1ejfR056c+FCv+NnNXg==", + "license": "BlueOak-1.0.0", + "dependencies": { + "lru-cache": "^11.0.0", + "minipass": "^7.1.2" + }, + "engines": { + "node": "18 || 20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/@isaacs/cliui": { "version": "8.0.2", "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", @@ -1438,6 +1726,15 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@lukeed/ms": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/@lukeed/ms/-/ms-2.0.2.tgz", + "integrity": "sha512-9I2Zn6+NJLfaGoz9jN3lpwDgAYvfGeNYdbAIjJOqzs4Tpc+VU3Jqq4IofSUBKajiDS8k9fZIg18/z13mpk1bsA==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, "node_modules/@malept/cross-spawn-promise": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/@malept/cross-spawn-promise/-/cross-spawn-promise-2.0.0.tgz", @@ -1566,6 +1863,12 @@ "node": ">=10" } }, + "node_modules/@pinojs/redact": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/@pinojs/redact/-/redact-0.4.0.tgz", + "integrity": "sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg==", + "license": "MIT" + }, "node_modules/@pkgjs/parseargs": { "version": "0.11.0", "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", @@ -2459,6 +2762,12 @@ "node": "^18.17.0 || >=20.5.0" } }, + "node_modules/abstract-logging": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/abstract-logging/-/abstract-logging-2.0.1.tgz", + "integrity": "sha512-2BjRTZxTPvheOvGbBslFSYOUkr+SjPtOnrLP33f+VIWLzezQpZcqVg7ja3L4dBXmzzgwT+a029jRx5PCi3JuiA==", + "license": "MIT" + }, "node_modules/agent-base": { "version": "7.1.4", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", @@ -2486,6 +2795,45 @@ "url": "https://github.com/sponsors/epoberezkin" } }, + "node_modules/ajv-formats": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-3.0.1.tgz", + "integrity": "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==", + "license": "MIT", + "dependencies": { + "ajv": "^8.0.0" + }, + "peerDependencies": { + "ajv": "^8.0.0" + }, + "peerDependenciesMeta": { + "ajv": { + "optional": true + } + } + }, + "node_modules/ajv-formats/node_modules/ajv": { + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.20.0.tgz", + "integrity": "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ajv-formats/node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "license": "MIT" + }, "node_modules/ajv-keywords": { "version": "3.5.2", "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz", @@ -2757,6 +3105,15 @@ "node": ">= 4.0.0" } }, + "node_modules/atomic-sleep": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/atomic-sleep/-/atomic-sleep-1.0.0.tgz", + "integrity": "sha512-kNOjDqAh7px0XWNI+4QbzoiR/nTkHAWNud2uvnJquD1/x5a7EQZMJT0AczqK0Qn67oY/TTQ1LbUKajZpp3I9tQ==", + "license": "MIT", + "engines": { + "node": ">=8.0.0" + } + }, "node_modules/autoprefixer": { "version": "10.4.27", "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.27.tgz", @@ -2794,11 +3151,30 @@ "postcss": "^8.1.0" } }, + "node_modules/avvio": { + "version": "9.2.0", + "resolved": "https://registry.npmjs.org/avvio/-/avvio-9.2.0.tgz", + "integrity": "sha512-2t/sy01ArdHHE0vRH5Hsay+RtCZt3dLPji7W7/MMOCEgze5b7SNDC4j5H6FnVgPkI1MTNFGzHdHrVXDDl7QSSQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "@fastify/error": "^4.0.0", + "fastq": "^1.17.1" + } + }, "node_modules/balanced-match": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==", - "dev": true, "license": "MIT", "engines": { "node": "18 || 20 || >=22" @@ -2867,7 +3243,6 @@ "version": "5.0.5", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz", "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==", - "dev": true, "license": "MIT", "dependencies": { "balanced-match": "^4.0.2" @@ -3380,28 +3755,94 @@ "dev": true, "license": "MIT" }, - "node_modules/convert-source-map": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", - "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", - "dev": true, - "license": "MIT" - }, - "node_modules/core-util-is": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", - "integrity": "sha512-3lqz5YjWTYnW6dlDa5TLaTCcShfar1e40rmcJVwCBJC6mWlFuj0eCHIElmG1g5kyuJ/GD+8Wn4FFCcz4gJPfaQ==", + "node_modules/concurrently": { + "version": "9.2.1", + "resolved": "https://registry.npmjs.org/concurrently/-/concurrently-9.2.1.tgz", + "integrity": "sha512-fsfrO0MxV64Znoy8/l1vVIjjHa29SZyyqPgQBwhiDcaW8wJc2W3XWVOGx4M3oJBnv/zdUZIIp1gDeS98GzP8Ng==", "dev": true, "license": "MIT", - "optional": true + "dependencies": { + "chalk": "4.1.2", + "rxjs": "7.8.2", + "shell-quote": "1.8.3", + "supports-color": "8.1.1", + "tree-kill": "1.2.2", + "yargs": "17.7.2" + }, + "bin": { + "conc": "dist/bin/concurrently.js", + "concurrently": "dist/bin/concurrently.js" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/open-cli-tools/concurrently?sponsor=1" + } }, - "node_modules/crc": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/crc/-/crc-3.8.0.tgz", - "integrity": "sha512-iX3mfgcTMIq3ZKLIsVFAbv7+Mc10kxabAGQb8HvjA1o3T1PIYprbakQ65d3I+2HGHt6nSKkM9PYjgoJO2KcFBQ==", + "node_modules/concurrently/node_modules/supports-color": { + "version": "8.1.1", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", + "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", "dev": true, "license": "MIT", - "optional": true, + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/supports-color?sponsor=1" + } + }, + "node_modules/content-disposition": { + "version": "0.5.4", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz", + "integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==", + "license": "MIT", + "dependencies": { + "safe-buffer": "5.2.1" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/convert-source-map": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", + "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", + "dev": true, + "license": "MIT" + }, + "node_modules/cookie": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-1.1.1.tgz", + "integrity": "sha512-ei8Aos7ja0weRpFzJnEA9UHJ/7XQmqglbRwnf2ATjcB9Wq874VKH9kfjjirM6UhU2/E5fFYadylyhFldcqSidQ==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/core-util-is": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", + "integrity": "sha512-3lqz5YjWTYnW6dlDa5TLaTCcShfar1e40rmcJVwCBJC6mWlFuj0eCHIElmG1g5kyuJ/GD+8Wn4FFCcz4gJPfaQ==", + "dev": true, + "license": "MIT", + "optional": true + }, + "node_modules/crc": { + "version": "3.8.0", + "resolved": "https://registry.npmjs.org/crc/-/crc-3.8.0.tgz", + "integrity": "sha512-iX3mfgcTMIq3ZKLIsVFAbv7+Mc10kxabAGQb8HvjA1o3T1PIYprbakQ65d3I+2HGHt6nSKkM9PYjgoJO2KcFBQ==", + "dev": true, + "license": "MIT", + "optional": true, "dependencies": { "buffer": "^5.1.0" } @@ -3419,7 +3860,6 @@ "version": "7.0.6", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", - "dev": true, "license": "MIT", "dependencies": { "path-key": "^3.1.0", @@ -3434,14 +3874,12 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", - "dev": true, "license": "ISC" }, "node_modules/cross-spawn/node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dev": true, "license": "ISC", "dependencies": { "isexe": "^2.0.0" @@ -3577,6 +4015,24 @@ "node": ">=0.4.0" } }, + "node_modules/depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/dequal": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", + "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/detect-libc": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", @@ -4260,6 +4716,12 @@ "node": ">=6" } }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==", + "license": "MIT" + }, "node_modules/escape-string-regexp": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", @@ -4313,11 +4775,16 @@ "license": "MIT", "optional": true }, + "node_modules/fast-decode-uri-component": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/fast-decode-uri-component/-/fast-decode-uri-component-1.0.1.tgz", + "integrity": "sha512-WKgKWg5eUxvRZGwW8FvfbaH7AXSh2cL+3j5fMGzUMCxWBJ3dV3a7Wz8y2f/uQ0e3B6WmodD3oS54jTQ9HVTIIg==", + "license": "MIT" + }, "node_modules/fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", - "dev": true, "license": "MIT" }, "node_modules/fast-json-stable-stringify": { @@ -4327,6 +4794,147 @@ "dev": true, "license": "MIT" }, + "node_modules/fast-json-stringify": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/fast-json-stringify/-/fast-json-stringify-6.3.0.tgz", + "integrity": "sha512-oRCntNDY/329HJPlmdNLIdogNtt6Vyjb1WuT01Soss3slIdyUp8kAcDU3saQTOquEK8KFVfwIIF7FebxUAu+yA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "@fastify/merge-json-schemas": "^0.2.0", + "ajv": "^8.12.0", + "ajv-formats": "^3.0.1", + "fast-uri": "^3.0.0", + "json-schema-ref-resolver": "^3.0.0", + "rfdc": "^1.2.0" + } + }, + "node_modules/fast-json-stringify/node_modules/ajv": { + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.20.0.tgz", + "integrity": "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/fast-json-stringify/node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "license": "MIT" + }, + "node_modules/fast-querystring": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/fast-querystring/-/fast-querystring-1.1.2.tgz", + "integrity": "sha512-g6KuKWmFXc0fID8WWH0jit4g0AGBoJhCkJMb1RmbsSEUNvQ+ZC8D6CUZ+GtF8nMzSPXnhiePyyqqipzNNEnHjg==", + "license": "MIT", + "dependencies": { + "fast-decode-uri-component": "^1.0.1" + } + }, + "node_modules/fast-uri": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz", + "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, + "node_modules/fastify": { + "version": "5.8.5", + "resolved": "https://registry.npmjs.org/fastify/-/fastify-5.8.5.tgz", + "integrity": "sha512-Yqptv59pQzPgQUSIm87hMqHJmdkb1+GPxdE6vW6FRyVE9G86mt7rOghitiU4JHRaTyDUk9pfeKmDeu70lAwM4Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "@fastify/ajv-compiler": "^4.0.5", + "@fastify/error": "^4.0.0", + "@fastify/fast-json-stringify-compiler": "^5.0.0", + "@fastify/proxy-addr": "^5.0.0", + "abstract-logging": "^2.0.1", + "avvio": "^9.0.0", + "fast-json-stringify": "^6.0.0", + "find-my-way": "^9.0.0", + "light-my-request": "^6.0.0", + "pino": "^9.14.0 || ^10.1.0", + "process-warning": "^5.0.0", + "rfdc": "^1.3.1", + "secure-json-parse": "^4.0.0", + "semver": "^7.6.0", + "toad-cache": "^3.7.0" + } + }, + "node_modules/fastify-plugin": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/fastify-plugin/-/fastify-plugin-5.1.0.tgz", + "integrity": "sha512-FAIDA8eovSt5qcDgcBvDuX/v0Cjz0ohGhENZ/wpc3y+oZCY2afZ9Baqql3g/lC+OHRnciQol4ww7tuthOb9idw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT" + }, + "node_modules/fastify/node_modules/semver": { + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/fastq": { + "version": "1.20.1", + "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.20.1.tgz", + "integrity": "sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw==", + "license": "ISC", + "dependencies": { + "reusify": "^1.0.4" + } + }, "node_modules/fd-slicer": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz", @@ -4395,11 +5003,24 @@ "node": ">=10" } }, + "node_modules/find-my-way": { + "version": "9.5.0", + "resolved": "https://registry.npmjs.org/find-my-way/-/find-my-way-9.5.0.tgz", + "integrity": "sha512-VW2RfnmscZO5KgBY5XVyKREMW5nMZcxDy+buTOsL+zIPnBlbKm+00sgzoQzq1EVh4aALZLfKdwv6atBGcjvjrQ==", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-querystring": "^1.0.0", + "safe-regex2": "^5.0.0" + }, + "engines": { + "node": ">=20" + } + }, "node_modules/foreground-child": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", "integrity": "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==", - "dev": true, "license": "ISC", "dependencies": { "cross-spawn": "^7.0.6", @@ -4416,7 +5037,6 @@ "version": "4.1.0", "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", - "dev": true, "license": "ISC", "engines": { "node": ">=14" @@ -4591,6 +5211,19 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/get-tsconfig": { + "version": "4.14.0", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.14.0.tgz", + "integrity": "sha512-yTb+8DXzDREzgvYmh6s9vHsSVCHeC0G3PI5bEXNBHtmshPnO+S5O7qgLEOn0I5QvMy6kpZN8K1NKGyilLb93wA==", + "dev": true, + "license": "MIT", + "dependencies": { + "resolve-pkg-maps": "^1.0.0" + }, + "funding": { + "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + } + }, "node_modules/glob": { "version": "7.2.3", "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", @@ -4846,6 +5479,26 @@ "dev": true, "license": "BSD-2-Clause" }, + "node_modules/http-errors": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", + "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==", + "license": "MIT", + "dependencies": { + "depd": "~2.0.0", + "inherits": "~2.0.4", + "setprototypeof": "~1.2.0", + "statuses": "~2.0.2", + "toidentifier": "~1.0.1" + }, + "engines": { + "node": ">= 0.8" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/http-proxy-agent": { "version": "7.0.2", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", @@ -4966,7 +5619,6 @@ "version": "2.0.4", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", - "dev": true, "license": "ISC" }, "node_modules/ip-address": { @@ -4979,6 +5631,15 @@ "node": ">= 12" } }, + "node_modules/ipaddr.js": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.3.0.tgz", + "integrity": "sha512-Zv/pA+ciVFbCSBBjGfaKUya/CcGmUHzTydLMaTwrUUEM2DIEO3iZvueGxmacvmN50fGpGVKeTXpb2LcYQxeVdg==", + "license": "MIT", + "engines": { + "node": ">= 10" + } + }, "node_modules/is-fullwidth-code-point": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", @@ -5118,6 +5779,25 @@ "dev": true, "license": "MIT" }, + "node_modules/json-schema-ref-resolver": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/json-schema-ref-resolver/-/json-schema-ref-resolver-3.0.0.tgz", + "integrity": "sha512-hOrZIVL5jyYFjzk7+y7n5JDzGlU8rfWDuYyHwGa2WA8/pcmMHezp2xsVwxrebD/Q9t8Nc5DboieySDpCp4WG4A==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "dequal": "^2.0.3" + } + }, "node_modules/json-schema-traverse": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", @@ -5172,6 +5852,43 @@ "integrity": "sha512-0/BnGCCfyUMkBpeDgWihanIAF9JmZhHBgUhEqzvf+adhNGLoP6TaiI5oF8oyb3I45P+PcnrqihSf01M0l0G5+Q==", "license": "MIT" }, + "node_modules/light-my-request": { + "version": "6.6.0", + "resolved": "https://registry.npmjs.org/light-my-request/-/light-my-request-6.6.0.tgz", + "integrity": "sha512-CHYbu8RtboSIoVsHZ6Ye4cj4Aw/yg2oAFimlF7mNvfDV192LR7nDiKtSIfCuLT7KokPSTn/9kfVLm5OGN0A28A==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause", + "dependencies": { + "cookie": "^1.0.1", + "process-warning": "^4.0.0", + "set-cookie-parser": "^2.6.0" + } + }, + "node_modules/light-my-request/node_modules/process-warning": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/process-warning/-/process-warning-4.0.1.tgz", + "integrity": "sha512-3c2LzQ3rY9d0hc1emcsHhfT9Jwz0cChib/QN89oME2R451w5fy3f0afAhERFZAwrbDU43wk12d0ORBpDVME50Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT" + }, "node_modules/lightningcss": { "version": "1.32.0", "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.32.0.tgz", @@ -5616,7 +6333,6 @@ "version": "10.2.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz", "integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==", - "dev": true, "license": "BlueOak-1.0.0", "dependencies": { "brace-expansion": "^5.0.5" @@ -5642,7 +6358,6 @@ "version": "7.1.3", "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.3.tgz", "integrity": "sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==", - "dev": true, "license": "BlueOak-1.0.0", "engines": { "node": ">=16 || 14 >=14.17" @@ -5982,6 +6697,15 @@ "node": ">= 0.4" } }, + "node_modules/on-exit-leak-free": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/on-exit-leak-free/-/on-exit-leak-free-2.1.2.tgz", + "integrity": "sha512-0eJJY6hXLGf1udHwfNftBqH+g73EU4B504nZeKpz1sYRKafAghwxEJunB2O7rDZkL4PGfsMVnTXZ2EjibbqcsA==", + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, "node_modules/once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -6075,7 +6799,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", - "dev": true, "license": "BlueOak-1.0.0" }, "node_modules/path-is-absolute": { @@ -6092,7 +6815,6 @@ "version": "3.1.1", "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -6164,6 +6886,43 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/pino": { + "version": "10.3.1", + "resolved": "https://registry.npmjs.org/pino/-/pino-10.3.1.tgz", + "integrity": "sha512-r34yH/GlQpKZbU1BvFFqOjhISRo1MNx1tWYsYvmj6KIRHSPMT2+yHOEb1SG6NMvRoHRF0a07kCOox/9yakl1vg==", + "license": "MIT", + "dependencies": { + "@pinojs/redact": "^0.4.0", + "atomic-sleep": "^1.0.0", + "on-exit-leak-free": "^2.1.0", + "pino-abstract-transport": "^3.0.0", + "pino-std-serializers": "^7.0.0", + "process-warning": "^5.0.0", + "quick-format-unescaped": "^4.0.3", + "real-require": "^0.2.0", + "safe-stable-stringify": "^2.3.1", + "sonic-boom": "^4.0.1", + "thread-stream": "^4.0.0" + }, + "bin": { + "pino": "bin.js" + } + }, + "node_modules/pino-abstract-transport": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pino-abstract-transport/-/pino-abstract-transport-3.0.0.tgz", + "integrity": "sha512-wlfUczU+n7Hy/Ha5j9a/gZNy7We5+cXp8YL+X+PG8S0KXxw7n/JXA3c46Y0zQznIJ83URJiwy7Lh56WLokNuxg==", + "license": "MIT", + "dependencies": { + "split2": "^4.0.0" + } + }, + "node_modules/pino-std-serializers": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/pino-std-serializers/-/pino-std-serializers-7.1.0.tgz", + "integrity": "sha512-BndPH67/JxGExRgiX1dX0w1FvZck5Wa4aal9198SrRhZjH3GxKQUKIBnYJTdj2HDN3UQAS06HlfcSbQj2OHmaw==", + "license": "MIT" + }, "node_modules/plist": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/plist/-/plist-3.1.0.tgz", @@ -6255,6 +7014,22 @@ "node": "^18.17.0 || >=20.5.0" } }, + "node_modules/process-warning": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/process-warning/-/process-warning-5.0.0.tgz", + "integrity": "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT" + }, "node_modules/progress": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", @@ -6312,6 +7087,12 @@ "node": ">=6" } }, + "node_modules/quick-format-unescaped": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/quick-format-unescaped/-/quick-format-unescaped-4.0.4.tgz", + "integrity": "sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==", + "license": "MIT" + }, "node_modules/quick-lru": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz", @@ -6384,11 +7165,29 @@ "node": ">= 6" } }, - "node_modules/require-directory": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", - "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", - "dev": true, + "node_modules/real-require": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/real-require/-/real-require-0.2.0.tgz", + "integrity": "sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg==", + "license": "MIT", + "engines": { + "node": ">= 12.13.0" + } + }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", "license": "MIT", "engines": { "node": ">=0.10.0" @@ -6419,6 +7218,16 @@ "dev": true, "license": "MIT" }, + "node_modules/resolve-pkg-maps": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", + "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" + } + }, "node_modules/responselike": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/responselike/-/responselike-2.0.1.tgz", @@ -6446,6 +7255,15 @@ "node": ">=8" } }, + "node_modules/ret": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/ret/-/ret-0.5.0.tgz", + "integrity": "sha512-I1XxrZSQ+oErkRR4jYbAyEEu2I0avBvvMM5JN+6EBprOGRCs63ENqZ3vjavq8fBw2+62G5LF5XelKwuJpcvcxw==", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, "node_modules/retry": { "version": "0.12.0", "resolved": "https://registry.npmjs.org/retry/-/retry-0.12.0.tgz", @@ -6456,6 +7274,22 @@ "node": ">= 4" } }, + "node_modules/reusify": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.1.0.tgz", + "integrity": "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==", + "license": "MIT", + "engines": { + "iojs": ">=1.0.0", + "node": ">=0.10.0" + } + }, + "node_modules/rfdc": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/rfdc/-/rfdc-1.4.1.tgz", + "integrity": "sha512-q1b3N5QkRUWUl7iyylaaj3kOpIT0N2i9MqIEQXP73GVsN9cw3fdx8X63cEmWhJGi2PPCF23Ijp7ktmd39rawIA==", + "license": "MIT" + }, "node_modules/rimraf": { "version": "2.6.3", "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.3.tgz", @@ -6535,11 +7369,20 @@ "fsevents": "~2.3.2" } }, + "node_modules/rxjs": { + "version": "7.8.2", + "resolved": "https://registry.npmjs.org/rxjs/-/rxjs-7.8.2.tgz", + "integrity": "sha512-dhKf903U/PQZY6boNNtAGdWbG85WAbjT/1xYoZIC7FAY0yWapOBQVsVrDl58W86//e1VpMNBtRV4MaXfdMySFA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.1.0" + } + }, "node_modules/safe-buffer": { "version": "5.2.1", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", - "dev": true, "funding": [ { "type": "github", @@ -6556,6 +7399,37 @@ ], "license": "MIT" }, + "node_modules/safe-regex2": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/safe-regex2/-/safe-regex2-5.1.1.tgz", + "integrity": "sha512-mOSBvHGDZMuIEZMdOz/aCEYDCv0E7nfcNsIhUF+/P+xC7Hyf3FkvymqgPbg9D1EdSGu+uKbJgy09K/RKKc7kJA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT", + "dependencies": { + "ret": "~0.5.0" + }, + "bin": { + "safe-regex2": "bin/safe-regex2.js" + } + }, + "node_modules/safe-stable-stringify": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.5.0.tgz", + "integrity": "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, "node_modules/safer-buffer": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", @@ -6588,6 +7462,22 @@ "integrity": "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==", "license": "MIT" }, + "node_modules/secure-json-parse": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-4.1.0.tgz", + "integrity": "sha512-l4KnYfEyqYJxDwlNVyRfO2E4NTHfMKAWdUuA8J0yve2Dz/E/PdBepY03RvyJpssIpRFwJoCD55wA+mEDs6ByWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, "node_modules/semver": { "version": "6.3.1", "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", @@ -6623,11 +7513,22 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/set-cookie-parser": { + "version": "2.7.2", + "resolved": "https://registry.npmjs.org/set-cookie-parser/-/set-cookie-parser-2.7.2.tgz", + "integrity": "sha512-oeM1lpU/UvhTxw+g3cIfxXHyJRc/uidd3yK1P242gzHds0udQBYzs3y8j4gCCW+ZJ7ad0yctld8RYO+bdurlvw==", + "license": "MIT" + }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", + "license": "ISC" + }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dev": true, "license": "MIT", "dependencies": { "shebang-regex": "^3.0.0" @@ -6640,12 +7541,24 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "dev": true, "license": "MIT", "engines": { "node": ">=8" } }, + "node_modules/shell-quote": { + "version": "1.8.3", + "resolved": "https://registry.npmjs.org/shell-quote/-/shell-quote-1.8.3.tgz", + "integrity": "sha512-ObmnIF4hXNg1BqhnHmgbDETF8dLPCggZWBjkQfhZpbszZnYur5DUljTcCHii5LC3J5E0yeO/1LIMyH+UvHQgyw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/signal-exit": { "version": "3.0.7", "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", @@ -6748,6 +7661,15 @@ "node": ">= 14" } }, + "node_modules/sonic-boom": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/sonic-boom/-/sonic-boom-4.2.1.tgz", + "integrity": "sha512-w6AxtubXa2wTXAUsZMMWERrsIRAdrK0Sc+FUytWvYAhBJLyuI4llrMIC1DtlNSdI99EI86KZum2MMq3EAZlF9Q==", + "license": "MIT", + "dependencies": { + "atomic-sleep": "^1.0.0" + } + }, "node_modules/source-map": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", @@ -6779,6 +7701,15 @@ "source-map": "^0.6.0" } }, + "node_modules/split2": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz", + "integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==", + "license": "ISC", + "engines": { + "node": ">= 10.x" + } + }, "node_modules/sprintf-js": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz", @@ -6810,6 +7741,15 @@ "node": ">= 6" } }, + "node_modules/statuses": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz", + "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/string_decoder": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", @@ -7016,6 +7956,18 @@ "node": ">= 10.0.0" } }, + "node_modules/thread-stream": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/thread-stream/-/thread-stream-4.0.0.tgz", + "integrity": "sha512-4iMVL6HAINXWf1ZKZjIPcz5wYaOdPhtO8ATvZ+Xqp3BTdaqtAwQkNmKORqcIo5YkQqGXq5cwfswDwMqqQNrpJA==", + "license": "MIT", + "dependencies": { + "real-require": "^0.2.0" + }, + "engines": { + "node": ">=20" + } + }, "node_modules/tiny-async-pool": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/tiny-async-pool/-/tiny-async-pool-1.3.0.tgz", @@ -7079,6 +8031,34 @@ "tmp": "^0.2.0" } }, + "node_modules/toad-cache": { + "version": "3.7.0", + "resolved": "https://registry.npmjs.org/toad-cache/-/toad-cache-3.7.0.tgz", + "integrity": "sha512-/m8M+2BJUpoJdgAHoG+baCwBT+tf2VraSfkBgl0Y00qIWt41DJ8R5B8nsEw0I58YwF5IZH6z24/2TobDKnqSWw==", + "license": "MIT", + "engines": { + "node": ">=12" + } + }, + "node_modules/toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", + "license": "MIT", + "engines": { + "node": ">=0.6" + } + }, + "node_modules/tree-kill": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/tree-kill/-/tree-kill-1.2.2.tgz", + "integrity": "sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==", + "dev": true, + "license": "MIT", + "bin": { + "tree-kill": "cli.js" + } + }, "node_modules/truncate-utf8-bytes": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz", @@ -7089,6 +8069,517 @@ "utf8-byte-length": "^1.0.1" } }, + "node_modules/tslib": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "dev": true, + "license": "0BSD" + }, + "node_modules/tsx": { + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz", + "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", + "dev": true, + "license": "MIT", + "dependencies": { + "esbuild": "~0.27.0", + "get-tsconfig": "^4.7.5" + }, + "bin": { + "tsx": "dist/cli.mjs" + }, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + } + }, + "node_modules/tsx/node_modules/@esbuild/aix-ppc64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.7.tgz", + "integrity": "sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/android-arm": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.7.tgz", + "integrity": "sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/android-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.7.tgz", + "integrity": "sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/android-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.7.tgz", + "integrity": "sha512-x5VpMODneVDb70PYV2VQOmIUUiBtY3D3mPBG8NxVk5CogneYhkR7MmM3yR/uMdITLrC1ml/NV1rj4bMJuy9MCg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/darwin-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.7.tgz", + "integrity": "sha512-5lckdqeuBPlKUwvoCXIgI2D9/ABmPq3Rdp7IfL70393YgaASt7tbju3Ac+ePVi3KDH6N2RqePfHnXkaDtY9fkw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/darwin-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.7.tgz", + "integrity": "sha512-rYnXrKcXuT7Z+WL5K980jVFdvVKhCHhUwid+dDYQpH+qu+TefcomiMAJpIiC2EM3Rjtq0sO3StMV/+3w3MyyqQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/freebsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.7.tgz", + "integrity": "sha512-B48PqeCsEgOtzME2GbNM2roU29AMTuOIN91dsMO30t+Ydis3z/3Ngoj5hhnsOSSwNzS+6JppqWsuhTp6E82l2w==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/freebsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.7.tgz", + "integrity": "sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-arm": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.7.tgz", + "integrity": "sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.7.tgz", + "integrity": "sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-ia32": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.7.tgz", + "integrity": "sha512-GA48aKNkyQDbd3KtkplYWT102C5sn/EZTY4XROkxONgruHPU72l+gW+FfF8tf2cFjeHaRbWpOYa/uRBz/Xq1Pg==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-loong64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.7.tgz", + "integrity": "sha512-a4POruNM2oWsD4WKvBSEKGIiWQF8fZOAsycHOt6JBpZ+JN2n2JH9WAv56SOyu9X5IqAjqSIPTaJkqN8F7XOQ5Q==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-mips64el": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.7.tgz", + "integrity": "sha512-KabT5I6StirGfIz0FMgl1I+R1H73Gp0ofL9A3nG3i/cYFJzKHhouBV5VWK1CSgKvVaG4q1RNpCTR2LuTVB3fIw==", + "cpu": [ + "mips64el" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-ppc64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.7.tgz", + "integrity": "sha512-gRsL4x6wsGHGRqhtI+ifpN/vpOFTQtnbsupUF5R5YTAg+y/lKelYR1hXbnBdzDjGbMYjVJLJTd2OFmMewAgwlQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-riscv64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.7.tgz", + "integrity": "sha512-hL25LbxO1QOngGzu2U5xeXtxXcW+/GvMN3ejANqXkxZ/opySAZMrc+9LY/WyjAan41unrR3YrmtTsUpwT66InQ==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-s390x": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.7.tgz", + "integrity": "sha512-2k8go8Ycu1Kb46vEelhu1vqEP+UeRVj2zY1pSuPdgvbd5ykAw82Lrro28vXUrRmzEsUV0NzCf54yARIK8r0fdw==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.7.tgz", + "integrity": "sha512-hzznmADPt+OmsYzw1EE33ccA+HPdIqiCRq7cQeL1Jlq2gb1+OyWBkMCrYGBJ+sxVzve2ZJEVeePbLM2iEIZSxA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/netbsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.7.tgz", + "integrity": "sha512-b6pqtrQdigZBwZxAn1UpazEisvwaIDvdbMbmrly7cDTMFnw/+3lVxxCTGOrkPVnsYIosJJXAsILG9XcQS+Yu6w==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/netbsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.7.tgz", + "integrity": "sha512-OfatkLojr6U+WN5EDYuoQhtM+1xco+/6FSzJJnuWiUw5eVcicbyK3dq5EeV/QHT1uy6GoDhGbFpprUiHUYggrw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/openbsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.7.tgz", + "integrity": "sha512-AFuojMQTxAz75Fo8idVcqoQWEHIXFRbOc1TrVcFSgCZtQfSdc1RXgB3tjOn/krRHENUB4j00bfGjyl2mJrU37A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/openbsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.7.tgz", + "integrity": "sha512-+A1NJmfM8WNDv5CLVQYJ5PshuRm/4cI6WMZRg1by1GwPIQPCTs1GLEUHwiiQGT5zDdyLiRM/l1G0Pv54gvtKIg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/openharmony-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.7.tgz", + "integrity": "sha512-+KrvYb/C8zA9CU/g0sR6w2RBw7IGc5J2BPnc3dYc5VJxHCSF1yNMxTV5LQ7GuKteQXZtspjFbiuW5/dOj7H4Yw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/sunos-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.7.tgz", + "integrity": "sha512-ikktIhFBzQNt/QDyOL580ti9+5mL/YZeUPKU2ivGtGjdTYoqz6jObj6nOMfhASpS4GU4Q/Clh1QtxWAvcYKamA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/win32-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.7.tgz", + "integrity": "sha512-7yRhbHvPqSpRUV7Q20VuDwbjW5kIMwTHpptuUzV+AA46kiPze5Z7qgt6CLCK3pWFrHeNfDd1VKgyP4O+ng17CA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/win32-ia32": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.7.tgz", + "integrity": "sha512-SmwKXe6VHIyZYbBLJrhOoCJRB/Z1tckzmgTLfFYOfpMAx63BJEaL9ExI8x7v0oAO3Zh6D/Oi1gVxEYr5oUCFhw==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/win32-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.7.tgz", + "integrity": "sha512-56hiAJPhwQ1R4i+21FVF7V8kSD5zZTdHcVuRFMW0hn753vVfQN8xlx4uOPT4xoGH0Z/oVATuR82AiqSTDIpaHg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/esbuild": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz", + "integrity": "sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.27.7", + "@esbuild/android-arm": "0.27.7", + "@esbuild/android-arm64": "0.27.7", + "@esbuild/android-x64": "0.27.7", + "@esbuild/darwin-arm64": "0.27.7", + "@esbuild/darwin-x64": "0.27.7", + "@esbuild/freebsd-arm64": "0.27.7", + "@esbuild/freebsd-x64": "0.27.7", + "@esbuild/linux-arm": "0.27.7", + "@esbuild/linux-arm64": "0.27.7", + "@esbuild/linux-ia32": "0.27.7", + "@esbuild/linux-loong64": "0.27.7", + "@esbuild/linux-mips64el": "0.27.7", + "@esbuild/linux-ppc64": "0.27.7", + "@esbuild/linux-riscv64": "0.27.7", + "@esbuild/linux-s390x": "0.27.7", + "@esbuild/linux-x64": "0.27.7", + "@esbuild/netbsd-arm64": "0.27.7", + "@esbuild/netbsd-x64": "0.27.7", + "@esbuild/openbsd-arm64": "0.27.7", + "@esbuild/openbsd-x64": "0.27.7", + "@esbuild/openharmony-arm64": "0.27.7", + "@esbuild/sunos-x64": "0.27.7", + "@esbuild/win32-arm64": "0.27.7", + "@esbuild/win32-ia32": "0.27.7", + "@esbuild/win32-x64": "0.27.7" + } + }, "node_modules/type-fest": { "version": "0.13.1", "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.13.1.tgz", From 61f04cdbd98e33951abfac0f69048baa2c49616e Mon Sep 17 00:00:00 2001 From: Tiberiu Ichim Date: Sun, 3 May 2026 13:50:31 +0300 Subject: [PATCH 3/4] Add webport plan --- webapp-plan.md | 1076 +++++++++++++++++++++++++++++++++++++ webapp-singleuser-plan.md | 1076 +++++++++++++++++++++++++++++++++++++ 2 files changed, 2152 insertions(+) create mode 100644 webapp-plan.md create mode 100644 webapp-singleuser-plan.md diff --git a/webapp-plan.md b/webapp-plan.md new file mode 100644 index 0000000..285baa6 --- /dev/null +++ b/webapp-plan.md @@ -0,0 +1,1076 @@ +# BenchLocal Web App — Detailed Plan + +## Executive Summary + +Transform BenchLocal from an Electron desktop application into a server-hosted web application. The user runs a Node.js server process on their machine (where LLM providers, Docker, and Bench Packs live), then connects via any browser — on the same machine or remotely. + +**Key insight**: The existing architecture already cleanly separates UI (`app/src/renderer/`) from orchestration (`packages/benchpack-host/`). The plan is to extract these into a web-server backend and a standalone React frontend, replacing Electron IPC with HTTP/SSE. + +--- + +## 1. Current Architecture (as-is) + +``` +┌─────────────────────────────────────────────────┐ +│ Electron Main Process (app/src/main/) │ +│ │ +│ ┌──────────────┐ ┌──────────────────────────┐ │ +│ │ IPC Handlers │ │ benchpack-host package │ │ +│ │ (ipc.ts) │──│ (run orchestration, │ │ +│ │ │ │ Docker, verifiers, │ │ +│ │ config, │ │ install/uninstall) │ │ +│ │ themes, │ └──────────────────────────┘ │ +│ │ workspaces │ │ +│ │ updates │ │ +│ └──────────────┘ │ +│ ▲ IPC bridge (preload) │ +│ │ │ +│ ┌──────────────┐ │ +│ │ React UI │ (app/src/renderer/src/) │ +│ │ (App.tsx ~7900│ │ +│ │ lines) │ │ +│ └──────────────┘ │ +│ │ +│ Storage: ~/.benchlocal/ │ +│ config.toml, state.json, │ +│ runs/, benchpacks/, logs/, cache/, themes/ │ +└─────────────────────────────────────────────────┘ +``` + +### Key packages + +| Package | Location | Role | +| ---------------------------- | --------------------------- | ------------------------------------------------------------------------------------------------- | +| `@benchlocal/core` | `packages/benchlocal-core/` | Shared types, config parsing, workspace state, themes (pure TypeScript, already published to npm) | +| `@benchlocal/benchpack-host` | `packages/benchpack-host/` | Run orchestration, Docker verifier lifecycle, Bench Pack install/inspect/run | +| `benchlocal-app` | `app/` | Electron shell: main process, IPC bridge, React renderer UI | + +### Data layer + +All user data lives under `~/.benchlocal/`: + +``` +~/.benchlocal/ + config.toml ← providers, models, benchpacks, UI theme (TOML) + state.json ← workspaces, tabs, per-tab models/sampling/execution mode (JSON) + runs/ ← per-run directories: summary.json, events.jsonl, host.log + benchpacks/ ← installed Bench Pack artifacts + logs/ ← host log files + cache/ ← cache directory + themes/ ← user-installed theme JSON files +``` + +### IPC API surface (what the renderer calls through the preload bridge) + +The preload bridge (`app/src/preload/index.ts`) exposes `window.benchlocal` with these namespaces: + +| Namespace | Methods | +| ------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | `metadata()`, `onOpenAbout()`, `onOpenSettings()` | +| `updates` | `state()`, `check()`, `install()`, `onState()` | +| `config` | `load()`, `save()` | +| `models` | `discover()` | +| `themes` | `list()`, `load()` | +| `workspaces` | `load()`, `save()`, `export()`, `import()` | +| `benchPacks` | `list()`, `registry()`, `install()`, `installFromUrl()`, `update()`, `uninstall()`, `onMutationProgress()`, `activeRuns()`, `run()`, `retryScenario()`, `resumeRun()`, `stop()`, `history()`, `loadHistory()`, `clearHistory()`, `onRunEvent()` | +| `verifiers` | `list()`, `start()`, `stop()`, `cancelStart()`, `deleteImage()`, `onProgress()` | +| `logs` | `openDetachedWindow()`, `closeDetachedWindow()`, `publishDetachedState()`, `onDetachedState()`, `onDetachedWindowClosed()` | + +### React renderer UI (App.tsx — single ~7900 line file) + +The UI is a single massive React component with: + +- **Sidebar** — workspace list, tab chips, drag-and-drop tab reordering +- **Top bar** — settings, theme switcher, about dialog, update notifications +- **Main content** — benchmark run controls, results grid (scenarios × models), scoring +- **Log drawer** — bottom panel showing real-time run events +- **Settings panels** — Providers, Models, Bench Packs (install/registry), Verification (Docker verifiers) +- **Modals** — detail view, sampling overrides, model selection, model aliases, run history, confirm dialogs, verifier preparation + +Styling: Tailwind CSS v4 + CSS custom properties (CSS variables) driven by theme JSON files. + +--- + +## 2. Target Architecture + +``` +┌───────────────────────────────────────────────────────┐ +│ Node.js HTTP Server (NEW: app/src/server/) │ +│ │ +│ ┌─────────────────────────┐ ┌────────────────────┐ │ +│ │ REST API + SSE │──│ benchpack-host pkg │ │ +│ │ (Fastify/Express) │ │ (run orchestration │ │ +│ │ │ │ Docker, verifiers │ │ +│ │ GET/POST endpoints for │ │ install/uninstall)│ │ +│ │ all IPC operations │ └────────────────────┘ │ +│ │ SSE endpoint for │ │ +│ │ run events (real-time) │ │ +│ └─────────────────────────┘ │ +│ │ +│ Storage: ~/.benchlocal/ (unchanged) │ +└───────────────────────────────────────────────────────┘ + ▲ HTTP + │ port 3540 (default) + │ +┌───────┴───────────────────────────────────────────────┐ +│ React SPA (EXTRACTED: app/src/renderer/) │ +│ │ +│ Served statically by the same server OR standalone │ +│ Single-page app: fetch() + EventSource instead of │ +│ window.benchlocal IPC │ +│ │ +│ Access via: http://server-host:3540 │ +└───────────────────────────────────────────────────────┘ +``` + +--- + +## 3. Option Analysis + +### Option A — Full web app (server + SPA) — **RECOMMENDED** + +Build a proper HTTP server backend and adapt the existing React renderer as a standalone SPA. + +**Pros:** + +- Full parity with the desktop app +- Can be accessed from any browser, any device +- Real-time streaming of run events via Server-Sent Events (SSE) +- Reuses existing React UI with minimal changes (only IPC → HTTP) +- Single deployment: one Node.js process serves both API and static files +- Docker verifier management works identically on the server + +**Cons:** + +- Requires building a new server layer +- Loses desktop features: auto-updater, system menu, window state persistence, file dialogs + +### Option B — CLI producer + standalone viewer + +CLI tool runs benchmarks and writes JSON output files; separate viewer HTML loads them. + +**Pros:** + +- Simplest to build +- Viewer is a single HTML file +- Good for CI/CD pipelines + +**Cons:** + +- No real-time feedback during runs +- No remote access — must be on the same machine to read files +- No configuration management from the viewer +- Cannot start/stop runs from the viewer +- Loses the interactive benchmark experience + +### Option C — Desktop app with remote renderer + +Keep the Electron app but serve the renderer UI over HTTP. + +**Pros:** + +- Minimal code changes + +**Cons:** + +- Still requires Electron on the server +- Complex IPC-over-network bridge needed +- No advantage over Option A + +### Decision: **Option A** + +It provides the best balance of remote accessibility, real-time interaction, and code reuse. The existing codebase is already well-structured for this split — `benchpack-host` is pure Node.js and needs no Electron dependencies. + +--- + +## 4. Implementation Plan (Option A) + +### Phase 1: Server Backend (`app/src/server/`) + +#### 4.1 Server framework + +Use **Fastify** (or Express) for the HTTP server. Fastify is recommended for its speed, built-in validation, and SSE support. + +**Dependencies to add to `app/package.json`:** + +```json +{ + "fastify": "^5.x", + "@fastify/static": "^8.x", + "@fastify/cors": "^10.x", + "@fastify/formbody": "^8.x" +} +``` + +#### 4.2 Server entry point + +New file: `app/src/server/index.ts` + +```typescript +// app/src/server/index.ts +import Fastify from "fastify"; +import cors from "@fastify/cors"; +import fastifyStatic from "@fastify/static"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { registerApiRoutes } from "./api-routes"; +import { registerSseRoutes } from "./sse-routes"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +const DEFAULT_PORT = 3540; +const DEFAULT_HOST = "127.0.0.1"; // secure by default + +async function startServer() { + const fastify = Fastify({ logger: true }); + + await fastify.register(cors, { + origin: process.env.BENCHLOCAL_CORS_ORIGIN || true, + }); + + // API routes + registerApiRoutes(fastify); + registerSseRoutes(fastify); + + // Static file serving (React SPA build output) + const rendererOut = path.join(__dirname, "..", "renderer-out"); + fastify.register(fastifyStatic, { + root: rendererOut, + prefix: "/", + }); + + // SPA fallback: serve index.html for all non-API routes + fastify.setNotFoundHandler((request, reply) => { + if (request.url.startsWith("/api/")) { + return reply.code(404).send({ error: "Not found" }); + } + return reply.type("text/html").sendFile("index.html"); + }); + + const port = Number(process.env.BENCHLOCAL_PORT) || DEFAULT_PORT; + const host = process.env.BENCHLOCAL_HOST || DEFAULT_HOST; + + await fastify.listen({ port, host }); + console.log(`BenchLocal web server running at http://${host}:${port}`); +} + +startServer(); +``` + +#### 4.3 API routes mapping (IPC → HTTP) + +Each IPC channel becomes an HTTP endpoint under `/api/`: + +| IPC Channel | HTTP Method | Endpoint | Notes | +| ----------------------------- | ----------- | --------------------------------------------- | ------------------------------------------------------------------- | +| `config:load` | GET | `/api/config` | Returns `{ config }` | +| `config:save` | PUT | `/api/config` | Body: `{ config }` | +| `app:metadata` | GET | `/api/metadata` | App version info | +| `updates:get-state` | GET | `/api/updates/state` | | +| `updates:check` | POST | `/api/updates/check` | | +| `updates:install` | POST | `/api/updates/install` | | +| `models:discover` | POST | `/api/models/discover` | Body: `{ provider }` | +| `themes:list` | GET | `/api/themes` | | +| `themes:load` | GET | `/api/themes/:themeId` | | +| `workspaces:load` | GET | `/api/workspaces` | | +| `workspaces:save` | PUT | `/api/workspaces` | Body: `{ state }` | +| `workspaces:export` | POST | `/api/workspaces/export` | Body: `{ workspaceId, state }` → returns file stream | +| `workspaces:import` | POST | `/api/workspaces/import` | Multipart file upload | +| `benchpacks:list` | GET | `/api/benchpacks` | | +| `benchpacks:registry` | GET | `/api/benchpacks/registry` | | +| `benchpacks:install` | POST | `/api/benchpacks/:benchPackId/install` | SSE: mutation progress | +| `benchpacks:install-from-url` | POST | `/api/benchpacks/install-from-url` | Body: `{ url }` | +| `benchpacks:update` | POST | `/api/benchpacks/:benchPackId/update` | | +| `benchpacks:uninstall` | POST | `/api/benchpacks/:benchPackId/uninstall` | | +| `benchpacks:active-runs` | GET | `/api/benchpacks/active-runs` | | +| `benchpacks:run` | POST | `/api/benchpacks/run` | Body: `{ tabId, benchPackId, modelIds, executionMode, generation }` | +| `benchpacks:retry-scenario` | POST | `/api/benchpacks/retry-scenario` | | +| `benchpacks:resume-run` | POST | `/api/benchpacks/resume-run` | | +| `benchpacks:stop` | POST | `/api/benchpacks/stop` | Body: `{ tabId }` | +| `benchpacks:history` | GET | `/api/benchpacks/:benchPackId/history` | | +| `benchpacks:history-load` | GET | `/api/benchpacks/:benchPackId/history/:runId` | | +| `benchpacks:history-clear` | POST | `/api/benchpacks/:benchPackId/history/clear` | | +| `verifiers:list` | GET | `/api/verifiers` | | +| `verifiers:start` | POST | `/api/verifiers/start` | Body: `{ benchPackId }` | +| `verifiers:stop` | POST | `/api/verifiers/stop` | Body: `{ benchPackId }` | +| `verifiers:cancel-start` | POST | `/api/verifiers/cancel-start` | | +| `verifiers:delete-image` | POST | `/api/verifiers/delete-image` | Body: `{ benchPackId, verifierId }` | + +**File: `app/src/server/api-routes.ts`** + +```typescript +// app/src/server/api-routes.ts +import type { FastifyInstance } from "fastify"; +import { + loadOrCreateConfig, + saveConfigFile, + getConfigPath, + loadOrCreateWorkspaceState, + getWorkspaceStatePath, + saveWorkspaceStateFile, +} from "@benchlocal/core"; +import { + inspectConfiguredBenchPacks, + loadBenchPackRegistry, + installBenchPackFromRegistry, + installBenchPackFromUrl, + updateBenchPackFromRegistry, + uninstallBenchPack, + runConfiguredBenchPack, + resumeBenchPackRun, + retryScenarioForBenchPackRun, + listRunHistoryForBenchPack, + loadRunSummaryForBenchPack, + clearRunHistoryForBenchPack, + getConfiguredBenchPackVerifierStatus, + startConfiguredBenchPackVerifiers, + stopConfiguredBenchPackVerifiers, + deleteConfiguredBenchPackVerifierImage, +} from "@benchlocal/benchpack-host"; +import { listAvailableThemes, loadAvailableTheme } from "./themes"; +import { loadAppMetadata } from "./app-metadata"; +import { discoverProviderModels } from "./models"; +import { activeRunManager } from "./run-manager"; +import { sseBus } from "./sse-bus"; + +export function registerApiRoutes(fastify: FastifyInstance) { + const api = fastify.prefix("/api"); + + // --- App metadata --- + api.get("/metadata", async () => loadAppMetadata()); + + // --- Config --- + api.get("/config", async () => { + const result = await loadOrCreateConfig(); + return { + path: result.path, + created: result.created, + config: result.config, + }; + }); + + api.put("/config", async (request, reply) => { + const config = (request.body as any).config; + const saved = await saveConfigFile(config, getConfigPath()); + return { path: getConfigPath(), created: false, config: saved }; + }); + + // --- Workspaces --- + api.get("/workspaces", async () => { + await loadOrCreateConfig(); + const result = await loadOrCreateWorkspaceState(getWorkspaceStatePath()); + return { path: result.path, created: result.created, state: result.state }; + }); + + api.put("/workspaces", async (request, reply) => { + await loadOrCreateConfig(); + const state = (request.body as any).state; + const saved = await saveWorkspaceStateFile(state, getWorkspaceStatePath()); + return { path: getWorkspaceStatePath(), created: false, state: saved }; + }); + + // --- Bench Packs --- + api.get("/benchpacks", async () => { + const { config } = await loadOrCreateConfig(); + return inspectConfiguredBenchPacks(config, await getRuntimeCompatibility()); + }); + + api.get("/benchpacks/registry", async () => { + const { config } = await loadOrCreateConfig(); + return loadBenchPackRegistry(config); + }); + + api.post("/benchpacks/:benchPackId/install", async (request) => { + const { benchPackId } = request.params as any; + const { config } = await loadOrCreateConfig(); + const saved = await installBenchPackFromRegistry( + config, + benchPackId, + (progress) => sseBus.emit("benchpack-mutation-progress", progress), + await getRuntimeCompatibility() + ); + return { path: getConfigPath(), created: false, config: saved }; + }); + + // ... (all other bench pack routes follow the same pattern) + + // --- Run management --- + api.post("/benchpacks/run", async (request) => { + const input = request.body as any; + const { config } = await loadOrCreateConfig(); + const controller = new AbortController(); + activeRunManager.setActive(input.tabId, { + benchPackId: input.benchPackId, + controller, + }); + + try { + return await runConfiguredBenchPack( + config, + input.benchPackId, + { + modelIds: input.modelIds, + executionMode: input.executionMode, + generation: input.generation, + abortSignal: controller.signal, + onEvent: (event) => { + sseBus.emit("run-event", { tabId: input.tabId, event }); + }, + }, + await getRuntimeCompatibility() + ); + } finally { + activeRunManager.clearActive(input.tabId); + } + }); + + api.post("/benchpacks/stop", async (request) => { + const { tabId } = request.body as any; + const active = activeRunManager.getActive(tabId); + if (!active) return { stopped: false }; + active.controller.abort(new Error("Run cancelled by user.")); + return { stopped: true }; + }); + + // --- Verifiers --- + api.get("/verifiers", async () => { + const { config } = await loadOrCreateConfig(); + const inspections = await inspectConfiguredBenchPacks( + config, + await getRuntimeCompatibility() + ); + const relevant = inspections.filter( + (i) => + i.manifest?.capabilities.verification || + i.manifest?.capabilities.sidecars + ); + return Promise.all( + relevant.map((i) => getConfiguredBenchPackVerifierStatus(config, i.id)) + ); + }); + + // --- Themes --- + api.get("/themes", async () => listAvailableThemes()); + api.get("/themes/:themeId", async (request) => { + return loadAvailableTheme((request.params as any).themeId); + }); + + // --- Models --- + api.post("/models/discover", async (request) => { + const { provider } = request.body as any; + return discoverProviderModels(provider); + }); + + // --- Updates (can be no-op for web or forwarded) --- + api.get("/updates/state", async () => ({ + status: "unsupported", + currentVersion: (await loadAppMetadata()).version, + })); + api.post("/updates/check", async () => ({ status: "not_available" })); + api.post("/updates/install", async () => ({ started: false })); +} +``` + +#### 4.4 SSE event bus + +Replace Electron's `ipcRenderer.on()` pattern with Server-Sent Events. + +**File: `app/src/server/sse-bus.ts`** + +```typescript +// app/src/server/sse-bus.ts +type EventHandler = (data: any) => void; + +export class SseBus { + private handlers = new Map>(); + + on(channel: string, handler: EventHandler): () => void { + if (!this.handlers.has(channel)) { + this.handlers.set(channel, new Set()); + } + this.handlers.get(channel)!.add(handler); + return () => { + this.handlers.get(channel)?.delete(handler); + }; + } + + emit(channel: string, data: any): void { + for (const handler of this.handlers.get(channel) || []) { + handler(data); + } + } +} + +export const sseBus = new SseBus(); +``` + +**File: `app/src/server/sse-routes.ts`** + +```typescript +// app/src/server/sse-routes.ts +import type { FastifyInstance, FastifyReply } from "fastify"; +import { sseBus } from "./sse-bus"; + +export function registerSseRoutes(fastify: FastifyInstance) { + fastify.get( + "/api/events/sse", + { handlerTimeout: 0 }, + async (request, reply) => { + reply.header("Content-Type", "text/event-stream"); + reply.header("Cache-Control", "no-cache"); + reply.header("Connection", "keep-alive"); + reply.header("X-Accel-Buffering", "no"); // disable Nginx buffering + + // Send initial heartbeat + reply.raw.write(": connected\n\n"); + + // Register handlers for each event channel + const channels = [ + "run-event", + "benchpack-mutation-progress", + "verifier-progress", + "app-update-state", + ]; + + const unsubscribers = channels.map((channel) => + sseBus.on(channel, (data) => { + reply.raw.write( + `event: ${channel}\ndata: ${JSON.stringify(data)}\n\n` + ); + }) + ); + + // Cleanup on client disconnect + request.raw.on("close", () => { + unsubscribers.forEach((unsub) => unsub()); + }); + + // Keep-alive + const keepAliveInterval = setInterval(() => { + reply.raw.write(": heartbeat\n\n"); + }, 15000); + + request.raw.on("close", () => clearInterval(keepAliveInterval)); + + // Hold the response open + return new Promise(() => {}); + } + ); +} +``` + +#### 4.5 Run manager (replaces activeBenchPackRuns map) + +**File: `app/src/server/run-manager.ts`** + +```typescript +// app/src/server/run-manager.ts +export class ActiveRunManager { + private runs = new Map< + string, + { benchPackId: string; controller: AbortController } + >(); + + setActive( + tabId: string, + run: { benchPackId: string; controller: AbortController } + ) { + this.runs.set(tabId, run); + } + + getActive(tabId: string) { + return this.runs.get(tabId); + } + + clearActive(tabId: string) { + this.runs.delete(tabId); + } + + listActive() { + return Array.from(this.runs.entries()).map(([tabId, run]) => ({ + tabId, + benchPackId: run.benchPackId, + })); + } + + async shutdown() { + for (const run of this.runs.values()) { + run.controller.abort(new Error("Server shutting down.")); + } + this.runs.clear(); + } +} + +export const activeRunManager = new ActiveRunManager(); +``` + +#### 4.6 Desktop-only features to stub out + +| Feature | Desktop behavior | Web replacement | +| ---------------------------- | ------------------------- | --------------------------------------------------------------------- | +| `workspaces:export` | `dialog.showSaveDialog()` | Return file as download stream with `Content-Disposition: attachment` | +| `workspaces:import` | `dialog.showOpenDialog()` | Accept multipart file upload | +| `logs:openDetachedWindow` | `new BrowserWindow()` | Open new browser tab with `?view=logs` | +| `logs:closeDetachedWindow` | `window.close()` | Close tab (browser handles this) | +| `app:updates` | `electron-updater` | Stub — no auto-update in web mode (or external update mechanism) | +| `app:metadata` | Read from `package.json` | Same, read from `package.json` | +| Window state persistence | `window-state.json` | Browser handles window size (localStorage for sidebar open state) | +| System menu (Cmd+,) | Electron `Menu` | Keyboard shortcut handled in React | +| macOS `app.showAboutPanel()` | Native dialog | Custom about modal in React | + +--- + +### Phase 2: React Renderer Adaptation + +#### 5.1 New API client (replaces `window.benchlocal`) + +**File: `app/src/renderer/src/api/client.ts`** + +This replaces the IPC bridge with fetch-based HTTP calls: + +```typescript +// app/src/renderer/src/api/client.ts +const API_BASE = "/api"; // Same-origin by default; configurable via env + +export async function fetchApi( + method: string, + path: string, + body?: any +): Promise { + const response = await fetch(`${API_BASE}${path}`, { + method, + headers: body ? { "Content-Type": "application/json" } : undefined, + body: body ? JSON.stringify(body) : undefined, + }); + + if (!response.ok) { + const errorBody = await response.text(); + throw new Error(`API error ${response.status}: ${errorBody}`); + } + + return response.json(); +} + +export function createSseConnection(): EventSource { + return new EventSource(`${API_BASE}/events/sse`); +} + +// Thin wrapper matching the existing desktop API shape +export const benchlocalApi = { + config: { + load: () => fetchApi("GET", "/config"), + save: (config: any) => fetchApi("PUT", "/config", { config }), + }, + workspaces: { + load: () => fetchApi("GET", "/workspaces"), + save: (state: any) => fetchApi("PUT", "/workspaces", { state }), + // export/import handled separately (file download/upload) + }, + benchPacks: { + list: () => fetchApi("GET", "/benchpacks"), + registry: () => fetchApi("GET", "/benchpacks/registry"), + install: ({ benchPackId }: { benchPackId: string }) => + fetchApi("POST", `/benchpacks/${benchPackId}/install`), + installFromUrl: ({ url }: { url: string }) => + fetchApi("POST", "/benchpacks/install-from-url", { url }), + update: ({ benchPackId }: { benchPackId: string }) => + fetchApi("POST", `/benchpacks/${benchPackId}/update`), + uninstall: ({ benchPackId }: { benchPackId: string }) => + fetchApi("POST", `/benchpacks/${benchPackId}/uninstall`), + run: (input: any) => fetchApi("POST", "/benchpacks/run", input), + stop: ({ tabId }: { tabId: string }) => + fetchApi("POST", "/benchpacks/stop", { tabId }), + history: ({ benchPackId }: { benchPackId: string }) => + fetchApi("GET", `/benchpacks/${benchPackId}/history`), + loadHistory: ({ + benchPackId, + runId, + }: { + benchPackId: string; + runId: string; + }) => fetchApi("GET", `/benchpacks/${benchPackId}/history/${runId}`), + clearHistory: ({ benchPackId }: { benchPackId: string }) => + fetchApi("POST", `/benchpacks/${benchPackId}/history/clear`), + }, + verifiers: { + list: () => fetchApi("GET", "/verifiers"), + start: ({ benchPackId }: { benchPackId: string }) => + fetchApi("POST", "/verifiers/start", { benchPackId }), + stop: ({ benchPackId }: { benchPackId: string }) => + fetchApi("POST", "/verifiers/stop", { benchPackId }), + }, + themes: { + list: () => fetchApi("GET", "/themes"), + load: ({ themeId }: { themeId: string }) => + fetchApi("GET", `/themes/${themeId}`), + }, + models: { + discover: ({ provider }: { provider: any }) => + fetchApi("POST", "/models/discover", { provider }), + }, + app: { + metadata: () => fetchApi("GET", "/metadata"), + }, + // SSE-based event subscription (replaces ipcRenderer.on) + sse: { + connect: () => createSseConnection(), + }, +}; +``` + +#### 5.2 Changes to App.tsx + +The changes to `App.tsx` are targeted replacements of `window.benchlocal` calls: + +| Desktop code | Web replacement | +| ----------------------------------------------------------- | ------------------------------------------------------------------------------ | +| `window.benchlocal.config.load()` | `benchlocalApi.config.load()` | +| `window.benchlocal.benchPacks.run(...)` | `benchlocalApi.benchPacks.run(...)` | +| `window.benchlocal.benchPacks.onRunEvent(listener)` | `sseSource.addEventListener('run-event', (e) => listener(JSON.parse(e.data)))` | +| `window.benchlocal.benchPacks.onMutationProgress(listener)` | `sseSource.addEventListener('benchpack-mutation-progress', ...)` | +| `window.benchlocal.workspaces.export(...)` | File download via `` + fetch with `Content-Disposition` | +| `window.benchlocal.workspaces.import()` | `` + POST multipart | +| `window.benchlocal.updates.*` | Stubbed (no-op) | +| `window.benchlocal.logs.openDetachedWindow()` | `window.open('?view=logs')` | + +The SSE connection is established once at app mount and torn down on unmount. All real-time events flow through the single `EventSource`. + +#### 5.3 Build configuration changes + +**File: `app/electron.vite.config.ts`** (or new `app/vite.config.web.ts`) + +The existing electron-vite config builds both the Electron main process and the renderer. For the web app, we need a separate Vite config that: + +1. Builds only the renderer as a standalone SPA +2. Outputs to `app/out/renderer-out/` (or similar) +3. Uses standard browser polyfills (no Electron) +4. No preload, no context bridge + +```typescript +// app/vite.config.web.ts (new) +import { defineConfig } from "vite"; +import react from "@vitejs/plugin-react"; +import path from "node:path"; + +export default defineConfig({ + plugins: [react()], + resolve: { + alias: { + "@": path.resolve(__dirname, "src"), + "@core": path.resolve(__dirname, "../packages/benchlocal-core/src"), + }, + }, + build: { + outDir: "out/renderer-out", + emptyOutDir: true, + }, +}); +``` + +#### 5.4 CSS and assets + +No changes needed. The existing Tailwind CSS, CSS custom properties (themes), and assets work identically in a browser. + +--- + +### Phase 3: Build & Deployment + +#### 6.1 New scripts in `package.json` (root) + +```json +{ + "scripts": { + "web:dev": "concurrently \"npm run web:dev:server\" \"npm run web:dev:renderer\"", + "web:dev:server": "tsx watch app/src/server/index.ts", + "web:dev:renderer": "vite --config app/vite.config.web.ts", + "web:build": "npm run build --workspace @benchlocal/core && npm run build --workspace @benchlocal/benchpack-host && npm run web:build:renderer && npm run web:build:server", + "web:build:renderer": "vite build --config app/vite.config.web.ts", + "web:build:server": "esbuild app/src/server/index.ts --bundle --platform=node --target=node20 --format=esm --outfile=app/out/server/index.js --external:@benchlocal/* --external:fastify", + "web:start": "node app/out/server/index.js" + } +} +``` + +#### 6.2 Docker deployment (optional but recommended) + +**File: `Dockerfile.web`** + +```dockerfile +FROM node:20-alpine AS builder +WORKDIR /app +COPY package.json package-lock.json ./ +RUN npm ci +COPY . . +RUN npm run web:build + +FROM node:20-alpine +WORKDIR /app +COPY --from=builder /app/app/out/server ./server +COPY --from=builder /app/app/out/renderer-out ./renderer-out +COPY --from=builder /app/packages ./packages +COPY --from=builder /app/themes ./themes + +# Expose Docker socket for verifier containers (requires -v /var/run/docker.sock) +ENV BENCHLOCAL_PORT=3540 +ENV BENCHLOCAL_HOST=0.0.0.0 + +EXPOSE 3540 +CMD ["node", "server/index.js"] +``` + +Run: + +```bash +docker run -p 3540:3540 \ + -v ~/.benchlocal:/root/.benchlocal \ + -v /var/run/docker.sock:/var/run/docker.sock \ + benchlocal-web +``` + +#### 6.3 Reverse proxy (optional) + +For production behind Nginx/Caddy: + +```nginx +location / { + proxy_pass http://127.0.0.1:3540; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + # SSE needs no buffering + proxy_buffering off; +} +``` + +--- + +### Phase 4: Files to create/modify + +#### New files + +| File | Purpose | +| -------------------------------------------- | --------------------------------------------------------------------------------------- | +| `app/src/server/index.ts` | Fastify server entry point | +| `app/src/server/api-routes.ts` | REST API endpoint definitions | +| `app/src/server/sse-routes.ts` | SSE event streaming endpoint | +| `app/src/server/sse-bus.ts` | In-process event bus (replaces IPC event channels) | +| `app/src/server/run-manager.ts` | Active run tracking (replaces `activeBenchPackRuns` map) | +| `app/src/server/themes.ts` | Theme listing/loading (extracted from `app/src/main/themes.ts`, no Electron dependency) | +| `app/src/server/app-metadata.ts` | App metadata (extracted from `app/src/main/app-metadata.ts`, no Electron dependency) | +| `app/src/server/models.ts` | Model discovery (extracted from `app/src/main/ipc.ts`, no Electron dependency) | +| `app/src/server/updater.ts` | Stubbed update handler (no auto-update in web mode) | +| `app/src/renderer/src/api/client.ts` | HTTP/SSE API client (replaces `window.benchlocal` IPC bridge) | +| `app/src/renderer/src/api/sse-subscriber.ts` | SSE connection manager with reconnect logic | +| `app/vite.config.web.ts` | Vite config for renderer-only (no Electron) build | +| `app/tsconfig.server.json` | TypeScript config for server-side code | +| `Dockerfile.web` | Docker image for the web server | + +#### Modified files + +| File | Changes | +| ------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app/src/renderer/src/App.tsx` | Replace `window.benchlocal.*` with `benchlocalApi.*`; replace IPC event listeners with SSE listeners; stub desktop-only features (updates, dialogs) | +| `app/src/renderer/src/main.tsx` | Remove Electron preload expectations; initialize API client | +| `app/src/shared/desktop-api.ts` | Rename to `api.ts` (optional) or keep as reference; create web-compatible version | +| `app/package.json` | Add server dependencies (Fastify, etc.); add web build scripts | +| `package.json` (root) | Add `web:dev`, `web:build`, `web:start` scripts | +| `app/electron.vite.config.ts` | Add renderer-out build target (or keep separate via `vite.config.web.ts`) | + +#### Files to extract (copy + remove Electron deps) + +| Source | Target | What to remove | +| --------------------------------------- | -------------------------------- | ------------------------------------------------------------------------------------ | +| `app/src/main/themes.ts` | `app/src/server/themes.ts` | `import { app } from 'electron'`, `process.resourcesPath`, `__dirname` Electron path | +| `app/src/main/app-metadata.ts` | `app/src/server/app-metadata.ts` | Electron-specific `app.getVersion()` fallbacks | +| `app/src/main/ipc.ts` (model discovery) | `app/src/server/models.ts` | `ipcRenderer`, dialog imports | + +--- + +## 5. Data flow comparison + +### Desktop (current) + +``` +React UI ──IPC──► Electron Main ──calls──► benchpack-host + ▲ │ + │ IPC event ▼ + └──── ipcRenderer.on ─── run events ◄─── runConfiguredBenchPack() +``` + +### Web app (target) + +``` +React SPA ──fetch()──► Fastify API ──calls──► benchpack-host + ▲ │ + │ SSE stream ▼ + └── EventSource.on ◄── /api/events ◄─── runConfiguredBenchPack() +``` + +The benchpack-host package is **unchanged**. It is pure Node.js with no Electron dependency. + +--- + +## 6. What stays the same + +- `~/.benchlocal/` directory structure (unchanged) +- `config.toml` format (unchanged) +- `state.json` format (unchanged) +- Bench Pack install artifacts (unchanged) +- Run storage format (unchanged) +- `@benchlocal/core` package (unchanged) +- `@benchlocal/benchpack-host` package (unchanged) +- Bench Pack registry (unchanged) +- Docker verifier management (unchanged) +- Theme JSON files (unchanged) +- React UI styling (Tailwind + CSS variables, unchanged) + +--- + +## 7. What changes + +| Area | Desktop | Web | +| ------------- | ---------------------------------- | ------------------------------------------------------ | +| Process model | Electron main + renderer | Single Node.js HTTP server | +| IPC | Electron IPC | HTTP REST + SSE | +| UI hosting | Embedded Chromium | Any browser | +| File dialogs | `dialog.showSaveDialog/OpenDialog` | Browser download/upload | +| App updates | `electron-updater` (auto) | Manual (git pull + restart, or container image update) | +| Window state | Persisted to JSON file | Browser localStorage | +| System menu | Electron Menu | Browser keyboard shortcuts | +| About dialog | Native `showAboutPanel()` | Custom React modal | +| Detached logs | New BrowserWindow | New browser tab (`?view=logs`) | + +--- + +## 8. Risk assessment + +| Risk | Impact | Mitigation | +| --------------------------------- | ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | +| SSE reconnect on network drop | Run events lost during disconnect | Server buffers recent events; client can poll `/api/benchpacks/active-runs` on reconnect | +| CORS when accessing remote server | Blocked requests | Server sets `Access-Control-Allow-Origin` | +| Docker not available on server | Verifier-dependent packs fail | Same as desktop — Docker required. Document clearly. | +| Long-running runs on server | Server process must stay alive | Use `pm2`, systemd, or Docker container with restart policy | +| Large SSE payloads | Memory pressure during fast runs | Batch events; limit SSE buffer size | +| Security: no authentication | Anyone who reaches the port can run benchmarks | Default bind to `127.0.0.1`. Document `--host 0.0.0.0` requires reverse proxy with auth. Optional: add basic auth middleware. | + +--- + +## 9. Migration path + +The web app and desktop app can coexist: + +1. **Phase 1**: Build the server backend alongside the existing Electron app. Both read/write the same `~/.benchlocal/` directory. +2. **Phase 2**: Build the web renderer alongside the Electron renderer. They share the same React codebase (same `App.tsx`, different API client). +3. **Phase 3**: Users can switch between desktop and web by running either `npm run dev` (Electron) or `npm run web:dev` (web). +4. **Phase 4**: When satisfied, the desktop build can be deprecated or kept as an alternative distribution. + +This means **no data migration** is needed — both apps use the same `~/.benchlocal/` directory. + +--- + +## 10. Effort estimate + +| Task | Estimated effort | +| ------------------------------------------------ | ---------------- | +| Server backend (Fastify, routes, SSE) | 2-3 days | +| Extract Electron-only modules (themes, metadata) | 0.5 day | +| React API client (fetch + SSE wrapper) | 1 day | +| Adapt App.tsx (replace IPC with HTTP calls) | 2-3 days | +| Build configuration (Vite web config, scripts) | 0.5 day | +| Docker deployment config | 0.5 day | +| Testing & polish | 1-2 days | +| **Total** | **~7-11 days** | + +--- + +## 11. API reference (target REST API) + +Complete list of endpoints for the web server: + +### App + +- `GET /api/metadata` — App version info +- `GET /api/updates/state` — Update state (stubbed) +- `POST /api/updates/check` — Check for updates (stubbed) +- `POST /api/updates/install` — Install update (stubbed) + +### Configuration + +- `GET /api/config` — Load current config +- `PUT /api/config` — Save config (body: `{ config: BenchLocalConfig }`) + +### Workspaces + +- `GET /api/workspaces` — Load workspace state +- `PUT /api/workspaces` — Save workspace state (body: `{ state: BenchLocalWorkspaceState }`) +- `POST /api/workspaces/export` — Export workspace (body: `{ workspaceId, state }`) → file download +- `POST /api/workspaces/import` — Import workspace (multipart: `.benchlocal-workspace.json`) + +### Bench Packs + +- `GET /api/benchpacks` — List installed Bench Packs +- `GET /api/benchpacks/registry` — Fetch official registry +- `POST /api/benchpacks/:benchPackId/install` — Install from registry +- `POST /api/benchpacks/install-from-url` — Install from URL (body: `{ url }`) +- `POST /api/benchpacks/:benchPackId/update` — Update from registry +- `POST /api/benchpacks/:benchPackId/uninstall` — Uninstall +- `GET /api/benchpacks/active-runs` — List active runs +- `POST /api/benchpacks/run` — Start a new run +- `POST /api/benchpacks/retry-scenario` — Retry a single scenario +- `POST /api/benchpacks/resume-run` — Resume an incomplete run +- `POST /api/benchpacks/stop` — Stop an active run (body: `{ tabId }`) +- `GET /api/benchpacks/:benchPackId/history` — List run history +- `GET /api/benchpacks/:benchPackId/history/:runId` — Load a specific run summary +- `POST /api/benchpacks/:benchPackId/history/clear` — Clear run history + +### Verifiers + +- `GET /api/verifiers` — List verifier statuses +- `POST /api/verifiers/start` — Start verifiers (body: `{ benchPackId }`) +- `POST /api/verifiers/stop` — Stop verifiers (body: `{ benchPackId }`) +- `POST /api/verifiers/cancel-start` — Cancel verifier startup +- `POST /api/verifiers/delete-image` — Delete Docker image (body: `{ benchPackId, verifierId }`) + +### Themes + +- `GET /api/themes` — List available themes +- `GET /api/themes/:themeId` — Load a specific theme + +### Models + +- `POST /api/models/discover` — Discover models from a provider (body: `{ provider }`) + +### Real-time events + +- `GET /api/events/sse` — Server-Sent Events stream + - `event: run-event` — Run progress events + - `event: benchpack-mutation-progress` — Install/update/uninstall progress + - `event: verifier-progress` — Verifier preparation progress + - `event: app-update-state` — Update state changes (stubbed) + +--- + +## 12. Environment variables + +| Variable | Default | Description | +| ------------------------ | --------------- | ------------------------------------------ | +| `BENCHLOCAL_PORT` | `3540` | HTTP server port | +| `BENCHLOCAL_HOST` | `127.0.0.1` | Bind address (`0.0.0.0` for remote access) | +| `BENCHLOCAL_CORS_ORIGIN` | `*` | CORS origin restriction | +| `BENCHLOCAL_BASIC_AUTH` | _(none)_ | Optional `user:password` for basic auth | +| `BENCHLOCAL_HOME` | `~/.benchlocal` | Override data directory | + +--- + +## 13. Future enhancements (post-MVP) + +1. **Authentication** — JWT-based auth for multi-user access +2. **WebSocket alternative to SSE** — For bidirectional communication (e.g., interactive logs) +3. **Run scheduling** — Queue and schedule benchmark runs +4. **Multi-user workspaces** — Separate workspace isolation per user +5. **Export to PDF/HTML** — Run result reports +6. **Comparison view** — Side-by-side comparison of multiple runs +7. **CI/CD integration** — Headless mode with JSON output only +8. **Plugin system** — Allow community web UI extensions diff --git a/webapp-singleuser-plan.md b/webapp-singleuser-plan.md new file mode 100644 index 0000000..9fa4915 --- /dev/null +++ b/webapp-singleuser-plan.md @@ -0,0 +1,1076 @@ +# BenchLocal Web App — Single-User Plan + +## Context + +This plan is for a **single-user, closed-environment** deployment. The server runs on your local machine. You connect from any browser on the same LAN. No authentication, no multi-user isolation, no public exposure. + +--- + +## 1. Current Architecture + +``` +┌─────────────────────────────────────────────────────┐ +│ Electron Main Process (app/src/main/) │ +│ │ +│ ┌──────────────┐ ┌─────────────────────────────┐ │ +│ │ IPC Handlers │ │ benchpack-host package │ │ +│ │ (ipc.ts) │──│ (run orchestration, │ │ +│ │ │ │ Docker, verifiers, │ │ +│ │ config, │ │ install/uninstall) │ │ +│ │ themes, │ └─────────────────────────────┘ │ +│ │ workspaces │ │ +│ └──────────────┘ │ +│ ▲ IPC bridge (preload) │ +│ │ │ +│ ┌──────────────┐ │ +│ │ React UI │ (app/src/renderer/src/App.tsx │ +│ │ (App.tsx) │ ~7900 lines, single file) │ +│ └──────────────┘ │ +│ │ +│ Storage: ~/.benchlocal/ │ +└─────────────────────────────────────────────────────┘ +``` + +### Key packages + +| Package | Role | +| ---------------------------- | ------------------------------------------------------ | +| `@benchlocal/core` | Types, config (TOML), workspace state (JSON), themes | +| `@benchlocal/benchpack-host` | Run orchestration, Docker verifiers, install/uninstall | +| `benchlocal-app` | Electron shell: main process, IPC, React renderer | + +### Data layout (`~/.benchlocal/`) + +``` +config.toml ← providers, models, benchpacks, theme +state.json ← workspaces, tabs, per-tab model/sampling settings +runs/ ← per-run: summary.json, events.jsonl, host.log +benchpacks/ ← installed Bench Pack artifacts +logs/ ← host log files +cache/ ← cache +themes/ ← user-installed theme JSON files +``` + +### IPC API surface + +The preload bridge (`app/src/preload/index.ts`) exposes `window.benchlocal`: + +| Namespace | Methods | +| ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `config` | `load()`, `save()` | +| `workspaces` | `load()`, `save()`, `export()`, `import()` | +| `benchPacks` | `list()`, `registry()`, `install()`, `installFromUrl()`, `update()`, `uninstall()`, `run()`, `retryScenario()`, `resumeRun()`, `stop()`, `history()`, `loadHistory()`, `clearHistory()`, `onRunEvent()`, `onMutationProgress()` | +| `verifiers` | `list()`, `start()`, `stop()`, `cancelStart()`, `deleteImage()`, `onProgress()` | +| `themes` | `list()`, `load()` | +| `models` | `discover()` | +| `app` | `metadata()`, `onOpenAbout()`, `onOpenSettings()` | +| `updates` | `state()`, `check()`, `install()`, `onState()` | +| `logs` | `openDetachedWindow()`, `closeDetachedWindow()`, `publishDetachedState()`, `onDetachedState()`, `onDetachedWindowClosed()` | + +--- + +## 2. Target Architecture + +``` +┌──────────────────────────────────────────────────┐ +│ Node.js HTTP Server (app/src/server/) │ +│ │ +│ ┌──────────────────┐ ┌─────────────────────┐ │ +│ │ REST + SSE │──│ benchpack-host pkg │ │ +│ │ (Fastify) │ │ (run orchestration │ │ +│ │ │ │ Docker, verifiers │ │ +│ └──────────────────┘ └─────────────────────┘ │ +│ │ +│ Storage: ~/.benchlocal/ (unchanged) │ +└──────────────────────────────────────────────────┘ + ▲ HTTP :3540 + │ +┌───────┴──────────────────────────────────────────┐ +│ React SPA (same App.tsx, fetch instead of IPC) │ +│ Served statically by the same server │ +│ Access: http://your-server:3540 │ +└──────────────────────────────────────────────────┘ +``` + +**The benchpack-host package is unchanged** — it's pure Node.js with no Electron dependency. + +--- + +## 3. Implementation + +### 3.1 Server entry point + +**`app/src/server/index.ts`** + +```typescript +import Fastify from "fastify"; +import fastifyStatic from "@fastify/static"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { registerApiRoutes } from "./api-routes"; +import { registerSseRoute } from "./sse-route"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +async function main() { + const server = Fastify({ logger: { level: "info" } }); + + registerApiRoutes(server); + registerSseRoute(server); + + // Serve the React SPA build output + const rendererOut = path.join(__dirname, "..", "renderer-out"); + server.register(fastifyStatic, { root: rendererOut, prefix: "/" }); + + // SPA fallback + server.setNotFoundHandler((req, reply) => { + if (req.url.startsWith("/api/")) { + return reply.code(404).send({ error: "Not found" }); + } + return reply.type("text/html").sendFile("index.html"); + }); + + const port = Number(process.env.BENCHLOCAL_PORT) || 3540; + const host = process.env.BENCHLOCAL_HOST || "0.0.0.0"; + + await server.listen({ port, host }); + console.log(`BenchLocal running at http://${host}:${port}`); +} + +// Graceful shutdown +process.on("SIGINT", async () => { + console.log("Shutting down..."); + await activeRunManager.shutdown(); + process.exit(0); +}); +process.on("SIGTERM", async () => { + await activeRunManager.shutdown(); + process.exit(0); +}); + +main(); +``` + +### 3.2 SSE event bus + +**`app/src/server/sse-bus.ts`** — in-process pub/sub replacing Electron IPC event channels. + +```typescript +type Handler = (data: unknown) => void; + +export class SseBus { + private subs = new Map>(); + + on(channel: string, handler: Handler): () => void { + const set = this.subs.get(channel) || new Set(); + set.add(handler); + this.subs.set(channel, set); + return () => set.delete(handler); + } + + emit(channel: string, data: unknown) { + for (const handler of this.subs.get(channel) || []) { + handler(data); + } + } +} + +export const sseBus = new SseBus(); +``` + +### 3.3 SSE endpoint + +**`app/src/server/sse-route.ts`** — single SSE stream for all real-time events. + +```typescript +import type { FastifyInstance } from "fastify"; +import { sseBus } from "./sse-bus"; + +export function registerSseRoute(server: FastifyInstance) { + server.get("/api/events/sse", { handlerTimeout: 0 }, async (req, reply) => { + reply.header("Content-Type", "text/event-stream"); + reply.header("Cache-Control", "no-cache"); + reply.header("Connection", "keep-alive"); + reply.raw.write(": connected\n\n"); + + const channels = [ + "run-event", + "benchpack-mutation-progress", + "verifier-progress", + ]; + + const unsubscribers = channels.map((ch) => + sseBus.on(ch, (data) => { + reply.raw.write(`event: ${ch}\ndata: ${JSON.stringify(data)}\n\n`); + }) + ); + + const keepAlive = setInterval(() => { + reply.raw.write(": heartbeat\n\n"); + }, 15000); + + req.raw.on("close", () => { + unsubscribers.forEach((u) => u()); + clearInterval(keepAlive); + }); + + return new Promise(() => {}); + }); +} +``` + +### 3.4 Active run tracker + +**`app/src/server/run-manager.ts`** — replaces the `activeBenchPackRuns` Map in `ipc.ts`. + +```typescript +export class ActiveRunManager { + private runs = new Map< + string, + { benchPackId: string; controller: AbortController } + >(); + + setActive( + tabId: string, + run: { benchPackId: string; controller: AbortController } + ) { + this.runs.set(tabId, run); + } + + getActive(tabId: string) { + return this.runs.get(tabId); + } + + clearActive(tabId: string) { + this.runs.delete(tabId); + } + + listActive() { + return Array.from(this.runs.entries()).map(([tabId, run]) => ({ + tabId, + benchPackId: run.benchPackId, + })); + } + + async shutdown() { + for (const run of this.runs.values()) { + run.controller.abort(new Error("Server shutting down.")); + } + this.runs.clear(); + } +} + +export const activeRunManager = new ActiveRunManager(); +``` + +### 3.5 API routes + +**`app/src/server/api-routes.ts`** — one route per IPC channel. All routes delegate to `@benchlocal/core` or `@benchlocal/benchpack-host`. + +```typescript +import type { FastifyInstance } from "fastify"; +import { + loadOrCreateConfig, + saveConfigFile, + getConfigPath, + loadOrCreateWorkspaceState, + getWorkspaceStatePath, + saveWorkspaceStateFile, +} from "@benchlocal/core"; +import { + inspectConfiguredBenchPacks, + loadBenchPackRegistry, + installBenchPackFromRegistry, + installBenchPackFromUrl, + updateBenchPackFromRegistry, + uninstallBenchPack, + runConfiguredBenchPack, + resumeBenchPackRun, + retryScenarioForBenchPackRun, + listRunHistoryForBenchPack, + loadRunSummaryForBenchPack, + clearRunHistoryForBenchPack, + getConfiguredBenchPackVerifierStatus, + startConfiguredBenchPackVerifiers, + stopConfiguredBenchPackVerifiers, + deleteConfiguredBenchPackVerifierImage, +} from "@benchlocal/benchpack-host"; +import { listAvailableThemes, loadAvailableTheme } from "./themes"; +import { loadAppMetadata } from "./app-metadata"; +import { discoverProviderModels } from "./models"; +import { activeRunManager } from "./run-manager"; +import { sseBus } from "./sse-bus"; + +async function compat() { + const meta = await loadAppMetadata(); + return { benchLocalVersion: meta.version }; +} + +export function registerApiRoutes(server: FastifyInstance) { + const api = server.prefix("/api"); + + // --- metadata --- + api.get("/metadata", () => loadAppMetadata()); + + // --- config --- + api.get("/config", async () => { + const r = await loadOrCreateConfig(); + return { path: r.path, created: r.created, config: r.config }; + }); + + api.put("/config", async (req) => { + const saved = await saveConfigFile( + (req.body as any).config, + getConfigPath() + ); + return { path: getConfigPath(), created: false, config: saved }; + }); + + // --- workspaces --- + api.get("/workspaces", async () => { + await loadOrCreateConfig(); + const r = await loadOrCreateWorkspaceState(getWorkspaceStatePath()); + return { path: r.path, created: r.created, state: r.state }; + }); + + api.put("/workspaces", async (req) => { + await loadOrCreateConfig(); + const saved = await saveWorkspaceStateFile( + (req.body as any).state, + getWorkspaceStatePath() + ); + return { path: getWorkspaceStatePath(), created: false, state: saved }; + }); + + // --- workspaces: export (file download) --- + api.post("/workspaces/export", async (req, reply) => { + const { workspaceId, state } = req.body as any; + const workspace = state.workspaces[workspaceId]; + if (!workspace) throw new Error(`Workspace "${workspaceId}" not found.`); + + const tabs = Object.fromEntries( + workspace.tabIds + .map((id: string) => state.tabs[id]) + .filter(Boolean) + .map((tab: any) => [tab.id, tab]) + ); + + const name = + (workspace.name.replace(/[^a-z0-9.-]/gi, "-") || "workspace") + + ".benchlocal-workspace.json"; + + reply.header("Content-Disposition", `attachment; filename="${name}"`); + reply.header("Content-Type", "application/json"); + return { + schemaVersion: 1, + exportedAt: new Date().toISOString(), + workspace, + tabs, + }; + }); + + // --- workspaces: import (file upload) --- + api.post("/workspaces/import", async (req) => { + // Accept JSON body directly (browser-side file reader) + const data = req.body as any; + if (!data.workspace || !data.tabs) { + throw new Error("Import file is missing workspace or tab data."); + } + return { imported: true, workspace: data.workspace, tabs: data.tabs }; + }); + + // --- bench packs --- + api.get("/benchpacks", async () => { + const { config } = await loadOrCreateConfig(); + return inspectConfiguredBenchPacks(config, await compat()); + }); + + api.get("/benchpacks/registry", async () => { + const { config } = await loadOrCreateConfig(); + return loadBenchPackRegistry(config); + }); + + api.post("/benchpacks/:benchPackId/install", async (req) => { + const { config } = await loadOrCreateConfig(); + const saved = await installBenchPackFromRegistry( + config, + (req.params as any).benchPackId, + (p) => sseBus.emit("benchpack-mutation-progress", p), + await compat() + ); + return { path: getConfigPath(), created: false, config: saved }; + }); + + api.post("/benchpacks/install-from-url", async (req) => { + const { config } = await loadOrCreateConfig(); + const saved = await installBenchPackFromUrl( + config, + (req.body as any).url, + (p) => sseBus.emit("benchpack-mutation-progress", p), + await compat() + ); + return { path: getConfigPath(), created: false, config: saved }; + }); + + api.post("/benchpacks/:benchPackId/update", async (req) => { + const { config } = await loadOrCreateConfig(); + const saved = await updateBenchPackFromRegistry( + config, + (req.params as any).benchPackId, + (p) => sseBus.emit("benchpack-mutation-progress", p), + await compat() + ); + return { path: getConfigPath(), created: false, config: saved }; + }); + + api.post("/benchpacks/:benchPackId/uninstall", async (req) => { + const { config } = await loadOrCreateConfig(); + const saved = await uninstallBenchPack( + config, + (req.params as any).benchPackId, + (p) => sseBus.emit("benchpack-mutation-progress", p) + ); + return { path: getConfigPath(), created: false, config: saved }; + }); + + // --- active runs --- + api.get("/benchpacks/active-runs", () => activeRunManager.listActive()); + + // --- run --- + api.post("/benchpacks/run", async (req) => { + const input = req.body as any; + const { config } = await loadOrCreateConfig(); + const controller = new AbortController(); + activeRunManager.setActive(input.tabId, { + benchPackId: input.benchPackId, + controller, + }); + + try { + return await runConfiguredBenchPack( + config, + input.benchPackId, + { + modelIds: input.modelIds, + executionMode: input.executionMode, + generation: input.generation, + abortSignal: controller.signal, + onEvent: (event) => + sseBus.emit("run-event", { tabId: input.tabId, event }), + }, + await compat() + ); + } finally { + activeRunManager.clearActive(input.tabId); + } + }); + + // --- retry scenario --- + api.post("/benchpacks/retry-scenario", async (req) => { + const input = req.body as any; + const { config } = await loadOrCreateConfig(); + return retryScenarioForBenchPackRun( + config, + input.benchPackId, + { + runId: input.runId, + scenarioId: input.scenarioId, + modelId: input.modelId, + generation: input.generation, + onEvent: (event) => + sseBus.emit("run-event", { tabId: input.tabId, event }), + }, + await compat() + ); + }); + + // --- resume run --- + api.post("/benchpacks/resume-run", async (req) => { + const input = req.body as any; + const { config } = await loadOrCreateConfig(); + const controller = new AbortController(); + activeRunManager.setActive(input.tabId, { + benchPackId: input.benchPackId, + controller, + }); + + try { + return await resumeBenchPackRun( + config, + input.benchPackId, + { + runId: input.runId, + executionMode: input.executionMode, + generation: input.generation, + abortSignal: controller.signal, + onEvent: (event) => + sseBus.emit("run-event", { tabId: input.tabId, event }), + }, + await compat() + ); + } finally { + activeRunManager.clearActive(input.tabId); + } + }); + + // --- stop --- + api.post("/benchpacks/stop", async (req) => { + const { tabId } = req.body as any; + const active = activeRunManager.getActive(tabId); + if (!active) return { stopped: false }; + active.controller.abort(new Error("Run cancelled by user.")); + return { stopped: true }; + }); + + // --- history --- + api.get("/benchpacks/:benchPackId/history", async (req) => { + const { config } = await loadOrCreateConfig(); + return listRunHistoryForBenchPack(config, (req.params as any).benchPackId); + }); + + api.get("/benchpacks/:benchPackId/history/:runId", async (req) => { + const { config } = await loadOrCreateConfig(); + return loadRunSummaryForBenchPack( + config, + (req.params as any).benchPackId, + (req.params as any).runId + ); + }); + + api.post("/benchpacks/:benchPackId/history/clear", async (req) => { + const { config } = await loadOrCreateConfig(); + return clearRunHistoryForBenchPack(config, (req.params as any).benchPackId); + }); + + // --- verifiers --- + api.get("/verifiers", async () => { + const { config } = await loadOrCreateConfig(); + const inspections = await inspectConfiguredBenchPacks( + config, + await compat() + ); + const relevant = inspections.filter( + (i) => + i.manifest?.capabilities.verification || + i.manifest?.capabilities.sidecars + ); + return Promise.all( + relevant.map((i) => getConfiguredBenchPackVerifierStatus(config, i.id)) + ); + }); + + api.post("/verifiers/start", async (req) => { + const { config } = await loadOrCreateConfig(); + const status = await getConfiguredBenchPackVerifierStatus( + config, + (req.body as any).benchPackId + ); + return startConfiguredBenchPackVerifiers( + config, + (req.body as any).benchPackId, + { + onProgress: (p) => + sseBus.emit("verifier-progress", { + benchPackId: (req.body as any).benchPackId, + event: { + type: "verifier_preparing", + benchPackId: (req.body as any).benchPackId, + benchPackName: status.benchPackName, + verifierId: p.verifierId, + phase: p.phase, + message: p.message, + }, + }), + } + ); + }); + + api.post("/verifiers/stop", async (req) => { + const { config } = await loadOrCreateConfig(); + return stopConfiguredBenchPackVerifiers( + config, + (req.body as any).benchPackId + ); + }); + + api.post("/verifiers/cancel-start", async () => ({ cancelled: false })); + + api.post("/verifiers/delete-image", async (req) => { + const { config } = await loadOrCreateConfig(); + return deleteConfiguredBenchPackVerifierImage( + config, + (req.body as any).benchPackId, + (req.body as any).verifierId + ); + }); + + // --- themes --- + api.get("/themes", () => listAvailableThemes()); + api.get("/themes/:themeId", async (req) => + loadAvailableTheme((req.params as any).themeId) + ); + + // --- models --- + api.post("/models/discover", async (req) => + discoverProviderModels((req.body as any).provider) + ); +} +``` + +### 3.6 Extracted helpers (no Electron) + +Three small files extracted from `app/src/main/` with Electron imports removed: + +| Source | Target | Change | +| ------------------------------------ | ------------------------ | ------------------------------------------------------------------- | +| `main/themes.ts` | `server/themes.ts` | Remove `app` import, use `import.meta.url` for theme dir resolution | +| `main/app-metadata.ts` | `server/app-metadata.ts` | Remove `app.getVersion()` fallback, read `package.json` directly | +| `main/ipc.ts` (model discovery only) | `server/models.ts` | Remove `ipcRenderer`, keep fetch-based model discovery logic | + +### 3.7 React API client + +**`app/src/renderer/src/api/client.ts`** — replaces `window.benchlocal` IPC bridge with fetch + EventSource. + +```typescript +const BASE = "/api"; + +async function api( + method: string, + path: string, + body?: unknown +): Promise { + const res = await fetch(`${BASE}${path}`, { + method, + headers: body ? { "Content-Type": "application/json" } : {}, + body: body ? JSON.stringify(body) : undefined, + }); + if (!res.ok) { + const text = await res.text(); + throw new Error(`API ${res.status}: ${text}`); + } + return res.json(); +} + +export const bl = { + config: { + load: () => api("GET", "/config"), + save: (c: any) => api("PUT", "/config", { config: c }), + }, + workspaces: { + load: () => api("GET", "/workspaces"), + save: (s: any) => api("PUT", "/workspaces", { state: s }), + export: (id: string, state: any) => + api("POST", "/workspaces/export", { workspaceId: id, state }), + import: (data: any) => api("POST", "/workspaces/import", data), + }, + benchPacks: { + list: () => api("GET", "/benchpacks"), + registry: () => api("GET", "/benchpacks/registry"), + install: (id: string) => api("POST", `/benchpacks/${id}/install`), + installFromUrl: (url: string) => + api("POST", "/benchpacks/install-from-url", { url }), + update: (id: string) => api("POST", `/benchpacks/${id}/update`), + uninstall: (id: string) => api("POST", `/benchpacks/${id}/uninstall`), + activeRuns: () => api("GET", "/benchpacks/active-runs"), + run: (input: any) => api("POST", "/benchpacks/run", input), + retryScenario: (input: any) => + api("POST", "/benchpacks/retry-scenario", input), + resumeRun: (input: any) => api("POST", "/benchpacks/resume-run", input), + stop: (tabId: string) => api("POST", "/benchpacks/stop", { tabId }), + history: (id: string) => api("GET", `/benchpacks/${id}/history`), + loadHistory: (id: string, runId: string) => + api("GET", `/benchpacks/${id}/history/${runId}`), + clearHistory: (id: string) => + api("POST", `/benchpacks/${id}/history/clear`), + }, + verifiers: { + list: () => api("GET", "/verifiers"), + start: (id: string) => api("POST", "/verifiers/start", { benchPackId: id }), + stop: (id: string) => api("POST", "/verifiers/stop", { benchPackId: id }), + cancelStart: (id: string) => + api("POST", "/verifiers/cancel-start", { benchPackId: id }), + deleteImage: (benchPackId: string, verifierId: string) => + api("POST", "/verifiers/delete-image", { benchPackId, verifierId }), + }, + themes: { + list: () => api("GET", "/themes"), + load: (id: string) => api("GET", `/themes/${id}`), + }, + models: { + discover: (provider: any) => api("POST", "/models/discover", { provider }), + }, + app: { + metadata: () => api("GET", "/metadata"), + }, + sse: () => new EventSource(`${BASE}/events/sse`), +}; +``` + +### 3.8 App.tsx changes + +In `App.tsx`, every `window.benchlocal` call maps to `bl.*`: + +| Desktop | Web | +| ---------------------------------------------------------------- | ----------------------------------------------- | +| `window.benchlocal.config.load()` | `bl.config.load()` | +| `window.benchlocal.config.save(c)` | `bl.config.save(c)` | +| `window.benchlocal.workspaces.load()` | `bl.workspaces.load()` | +| `window.benchlocal.workspaces.save(s)` | `bl.workspaces.save(s)` | +| `window.benchlocal.benchPacks.run(i)` | `bl.benchPacks.run(i)` | +| `window.benchlocal.benchPacks.stop({tabId})` | `bl.benchPacks.stop(tabId)` | +| `window.benchlocal.benchPacks.list()` | `bl.benchPacks.list()` | +| `window.benchlocal.benchPacks.registry()` | `bl.benchPacks.registry()` | +| `window.benchlocal.benchPacks.history({benchPackId})` | `bl.benchPacks.history(benchPackId)` | +| `window.benchlocal.benchPacks.loadHistory({benchPackId, runId})` | `bl.benchPacks.loadHistory(benchPackId, runId)` | +| `window.benchlocal.verifiers.list()` | `bl.verifiers.list()` | +| `window.benchlocal.verifiers.start({benchPackId})` | `bl.verifiers.start(benchPackId)` | +| `window.benchlocal.verifiers.stop({benchPackId})` | `bl.verifiers.stop(benchPackId)` | +| `window.benchlocal.themes.list()` | `bl.themes.list()` | +| `window.benchlocal.themes.load({themeId})` | `bl.themes.load(themeId)` | +| `window.benchlocal.models.discover({provider})` | `bl.models.discover(provider)` | +| `window.benchlocal.app.metadata()` | `bl.app.metadata()` | +| `window.benchlocal.updates.*` | removed (no-op) | +| `window.benchlocal.logs.*` | removed (no-op) | + +**SSE event listeners** replace `onRunEvent` / `onMutationProgress` / `onProgress`: + +```typescript +// In App.tsx useEffect, replace: +// window.benchlocal.benchPacks.onRunEvent(({ tabId, event }) => { ... }); +// window.benchlocal.benchPacks.onMutationProgress((p) => { ... }); +// window.benchlocal.verifiers.onProgress(({ benchPackId, event }) => { ... }); + +// With: +const sse = bl.sse(); +sse.addEventListener("run-event", (e: MessageEvent) => { + const { tabId, event } = JSON.parse(e.data); + // same handler logic as current App.tsx +}); +sse.addEventListener("benchpack-mutation-progress", (e: MessageEvent) => { + const progress = JSON.parse(e.data); + // same handler logic +}); +sse.addEventListener("verifier-progress", (e: MessageEvent) => { + const { benchPackId, event } = JSON.parse(e.data); + // same handler logic +}); + +// cleanup on unmount +return () => sse.close(); +``` + +**Removed UI features** (desktop-only, not needed in single-user web): + +- App update banner / check-for-updates button +- Detached logs window (keep the inline log drawer, remove the "open detached" button) +- `onOpenAbout` / `onOpenSettings` IPC channels (keep the modals, trigger via UI buttons only) +- System keyboard shortcut Cmd+, (Electron Menu → Settings) + +**Workspace import/export**: replace Electron file dialogs with browser-native equivalents: + +- Export: POST `/api/workspaces/export` → triggers browser download via `Content-Disposition` header +- Import: `` → read file with `FileReader` → POST `/api/workspaces/import` + +### 3.9 Build configuration + +**`app/vite.config.web.ts`** — standalone Vite config for the renderer (no Electron). + +```typescript +import { defineConfig } from "vite"; +import react from "@vitejs/plugin-react"; +import path from "node:path"; + +export default defineConfig({ + plugins: [react()], + resolve: { + alias: { + "@": path.resolve(__dirname, "src"), + "@core": path.resolve(__dirname, "../packages/benchlocal-core/src"), + }, + }, + build: { + outDir: "out/renderer-out", + emptyOutDir: true, + }, +}); +``` + +**New scripts** in root `package.json`: + +```json +{ + "scripts": { + "web:dev:renderer": "vite --config app/vite.config.web.ts", + "web:dev:server": "tsx watch app/src/server/index.ts", + "web:dev": "concurrently \"npm run web:dev:renderer\" \"npm run web:dev:server\"", + "web:build": "npm run build:compile && vite build --config app/vite.config.web.ts && esbuild app/src/server/index.ts --bundle --platform=node --target=node20 --format=esm --outfile=dist/server.js --external:@benchlocal/*", + "web:start": "node dist/server.js" + } +} +``` + +New dependency in `app/package.json`: + +```json +{ + "dependencies": { + "fastify": "^5.0.0", + "@fastify/static": "^8.0.0" + }, + "devDependencies": { + "tsx": "^4.0.0", + "concurrently": "^9.0.0", + "esbuild": "^0.25.0" + } +} +``` + +--- + +## 4. REST API reference + +All endpoints prefixed with `/api/`. No auth required. + +### Config + +| Method | Path | Body | Response | +| ------ | --------- | ------------ | --------------------------- | +| GET | `/config` | — | `{ path, created, config }` | +| PUT | `/config` | `{ config }` | `{ path, created, config }` | + +### Workspaces + +| Method | Path | Body | Response | +| ------ | -------------------- | ------------------------ | -------------------------- | +| GET | `/workspaces` | — | `{ path, created, state }` | +| PUT | `/workspaces` | `{ state }` | `{ path, created, state }` | +| POST | `/workspaces/export` | `{ workspaceId, state }` | JSON file download | +| POST | `/workspaces/import` | `{ workspace, tabs }` | `{ imported }` | + +### Bench Packs + +| Method | Path | Body | Notes | +| ------ | -------------------------------- | ----------------------------------------------------------------- | ----------------------- | +| GET | `/benchpacks` | — | Inspect installed packs | +| GET | `/benchpacks/registry` | — | Official registry | +| POST | `/benchpacks/:id/install` | — | SSE: mutation progress | +| POST | `/benchpacks/install-from-url` | `{ url }` | Third-party install | +| POST | `/benchpacks/:id/update` | — | SSE: mutation progress | +| POST | `/benchpacks/:id/uninstall` | — | SSE: mutation progress | +| GET | `/benchpacks/active-runs` | — | Active run list | +| POST | `/benchpacks/run` | `{ tabId, benchPackId, modelIds?, executionMode?, generation? }` | SSE: run events | +| POST | `/benchpacks/retry-scenario` | `{ tabId, benchPackId, runId, scenarioId, modelId, generation? }` | SSE: run events | +| POST | `/benchpacks/resume-run` | `{ tabId, benchPackId, runId, executionMode?, generation? }` | SSE: run events | +| POST | `/benchpacks/stop` | `{ tabId }` | — | +| GET | `/benchpacks/:id/history` | — | Run history list | +| GET | `/benchpacks/:id/history/:runId` | — | Run summary | +| POST | `/benchpacks/:id/history/clear` | — | — | + +### Verifiers + +| Method | Path | Body | Notes | +| ------ | ------------------------- | ----------------------------- | ---------------------- | +| GET | `/verifiers` | — | Status list | +| POST | `/verifiers/start` | `{ benchPackId }` | SSE: verifier progress | +| POST | `/verifiers/stop` | `{ benchPackId }` | — | +| POST | `/verifiers/cancel-start` | — | — | +| POST | `/verifiers/delete-image` | `{ benchPackId, verifierId }` | — | + +### Themes + +| Method | Path | Response | +| ------ | ------------------ | --------------------- | +| GET | `/themes` | Theme descriptor list | +| GET | `/themes/:themeId` | Theme definition | + +### Models + +| Method | Path | Body | Response | +| ------ | ------------------ | -------------- | --------------------- | +| POST | `/models/discover` | `{ provider }` | Discovered model list | + +### App + +| Method | Path | Response | +| ------ | ----------- | ------------------------------- | +| GET | `/metadata` | `{ productName, version, ... }` | + +### Events (SSE) + +| Path | Events | +| ----------------- | --------------------------------------------------------------- | +| GET `/events/sse` | `run-event`, `benchpack-mutation-progress`, `verifier-progress` | + +--- + +## 5. Files summary + +### New files (8) + +| File | Purpose | +| ------------------------------------ | ---------------------------------------------------- | +| `app/src/server/index.ts` | Fastify server entry, SPA serving, graceful shutdown | +| `app/src/server/api-routes.ts` | All REST endpoint handlers | +| `app/src/server/sse-route.ts` | SSE stream endpoint | +| `app/src/server/sse-bus.ts` | In-process event bus | +| `app/src/server/run-manager.ts` | Active run tracking + shutdown | +| `app/src/server/themes.ts` | Theme loader (Electron-free) | +| `app/src/server/app-metadata.ts` | App metadata (Electron-free) | +| `app/src/server/models.ts` | Model discovery (Electron-free) | +| `app/src/renderer/src/api/client.ts` | HTTP + SSE API client | +| `app/vite.config.web.ts` | Renderer-only Vite config | + +### Modified files (3) + +| File | Change | +| ------------------------------- | ----------------------------------------------------------------------------------- | +| `app/src/renderer/src/App.tsx` | `window.benchlocal.*` → `bl.*`; IPC listeners → SSE; remove update/detached-logs UI | +| `app/src/renderer/src/main.tsx` | Remove Electron preload check; no change to React bootstrap | +| `app/package.json` | Add `fastify`, `@fastify/static`, `tsx`, `esbuild`, `concurrently` | + +### Unchanged packages (3) + +| Package | Status | +| ---------------------------- | ---------------------------------- | +| `@benchlocal/core` | Unchanged — no Electron dependency | +| `@benchlocal/benchpack-host` | Unchanged — no Electron dependency | +| `@benchlocal/sdk` | Unchanged | + +--- + +## 6. Data flow + +``` +Browser SPA ── fetch() ──► Fastify routes ──► @benchlocal/benchpack-host + ▲ (run, install, Docker...) + │ SSE /api/events/sse │ + └──── EventSource ◄─────────────────────────┘ + │ + ▼ + ~/.benchlocal/ + (config.toml, state.json, runs/, benchpacks/) +``` + +--- + +## 7. What stays the same + +- `~/.benchlocal/` layout, `config.toml`, `state.json` +- All Bench Pack artifacts and registry +- Docker verifier management +- Run storage format (`summary.json`, `events.jsonl`, `host.log`) +- Theme JSON files +- React UI styling (Tailwind + CSS variables) +- Workspace/tab concept (organizational, not multi-user) +- `@benchlocal/core` and `@benchlocal/benchpack-host` packages + +--- + +## 8. What goes away + +| Feature | Reason | +| --------------------------------- | --------------------------------------------- | +| Auto-updater (`electron-updater`) | No desktop update mechanism in web mode | +| Detached logs window | No `BrowserWindow` — keep inline log drawer | +| Electron system menu | Not applicable | +| Native about panel | Custom React modal is sufficient | +| Window state persistence | Browser handles window size | +| File dialog (save/open) | Browser download/upload | +| `logs` IPC namespace | Removed entirely | +| `updates` IPC namespace | Removed (stubbed where App.tsx references it) | + +--- + +## 9. Deployment + +### Direct (npm) + +```bash +npm install +npm run web:build +npm run web:start +# → http://0.0.0.0:3540 +``` + +### Docker + +```dockerfile +FROM node:20-alpine AS builder +WORKDIR /app +COPY package.json package-lock.json ./ +RUN npm ci +COPY . . +RUN npm run web:build + +FROM node:20-alpine +WORKDIR /app +COPY --from=builder /app/dist/server.js ./ +COPY --from=builder /app/app/out/renderer-out ./renderer-out +COPY --from=builder /app/packages ./packages +COPY --from=builder /app/themes ./themes +EXPOSE 3540 +CMD ["node", "server.js"] +``` + +```bash +docker build -f Dockerfile.web -t benchlocal-web . +docker run -p 3540:3540 \ + -v ~/.benchlocal:/root/.benchlocal \ + -v /var/run/docker.sock:/var/run/docker.sock \ + benchlocal-web +``` + +### systemd service (Linux) + +```ini +[Unit] +Description=BenchLocal Web +After=network.target + +[Service] +Type=simple +User=benchlocal +WorkingDirectory=/opt/benchlocal +ExecStart=/usr/bin/node /opt/benchlocal/dist/server.js +Restart=on-failure +Environment=NODE_ENV=production +Environment=BENCHLOCAL_PORT=3540 +Environment=BENCHLOCAL_HOST=0.0.0.0 + +[Install] +WantedBy=multi-user.target +``` + +--- + +## 10. Environment variables + +| Variable | Default | Description | +| ----------------- | ------------- | --------------------------------- | +| `BENCHLOCAL_PORT` | `3540` | HTTP port | +| `BENCHLOCAL_HOST` | `0.0.0.0` | Bind address | +| `NODE_ENV` | `development` | `production` disables dev logging | + +--- + +## 11. Effort estimate + +| Task | Effort | +| ---------------------------------------------------------- | ----------- | +| Server backend (api-routes.ts, SSE, run-manager) | 2 days | +| Extract Electron-free helpers (themes, metadata, models) | 0.5 day | +| React API client (client.ts) | 0.5 day | +| Adapt App.tsx (IPC → fetch + SSE, remove desktop features) | 2 days | +| Build config (vite.config.web.ts, scripts) | 0.5 day | +| Dockerfile + systemd unit | 0.5 day | +| Testing & polish | 1 day | +| **Total** | **~7 days** | + +--- + +## 12. Known limitations (acceptable for single-user) + +- **No auto-update** — update by pulling new code and restarting the server +- **No file dialog** — workspace import uses ``, export uses browser download +- **No detached logs window** — the inline log drawer at the bottom of the page is sufficient +- **SSE reconnect** — on network drop, the client reconnects automatically via `EventSource`'s built-in reconnection. Any run events missed during the gap are recoverable by calling `/api/benchpacks/active-runs` and `/api/benchpacks/:id/history/:runId` after reconnect. From dd46e4aa6e1a58d4a9ab10284d11912ca380f9c2 Mon Sep 17 00:00:00 2001 From: David Ichim Date: Sun, 3 May 2026 19:38:55 +0300 Subject: [PATCH 4/4] refactor: reorganize app metadata loading and path resolution - Moved path resolution logic to a new module for better separation of concerns. - Updated app-metadata.ts to utilize new path resolution functions. - Enhanced error handling for license and package.json loading. - Improved server initialization in index.ts to check for renderer output directory. - Refactored SSE route handling for better response management. - Simplified theme loading logic in themes.ts by using path resolution functions. - Updated Vite configuration to use environment variables for ports and output directories. --- app/package.json | 5 +- app/src/renderer/src/App.tsx | 18276 ++++++++++++++-------------- app/src/server/app-metadata.ts | 112 +- app/src/server/index.ts | 89 +- app/src/server/path-resolution.ts | 180 + app/src/server/sse-route.ts | 65 +- app/src/server/themes.ts | 158 +- app/vite.config.web.ts | 62 +- 8 files changed, 9555 insertions(+), 9392 deletions(-) create mode 100644 app/src/server/path-resolution.ts diff --git a/app/package.json b/app/package.json index d31c211..6e054a7 100644 --- a/app/package.json +++ b/app/package.json @@ -24,8 +24,9 @@ "web:dev:renderer": "vite --config vite.config.web.ts", "web:dev:server": "tsx watch src/server/index.ts", "web:dev": "concurrently \"npm run web:dev:renderer\" \"npm run web:dev:server\"", - "web:build": "npm run build:compile && vite build --config vite.config.web.ts && esbuild src/server/index.ts --bundle --platform=node --target=node20 --format=esm --outdir=../dist/server --external:@benchlocal/*", - "web:start": "node ../dist/server/index.js" + "web:build": "npm run build:compile && vite build --config vite.config.web.ts && esbuild src/server/index.ts --bundle --platform=node --target=node20 --format=esm --packages=external --outfile=../dist/server/index.mjs --external:@benchlocal/*", + "preweb:start": "npm run web:build", + "web:start": "node ../dist/server/index.mjs" }, "dependencies": { "electron-updater": "^6.6.2", diff --git a/app/src/renderer/src/App.tsx b/app/src/renderer/src/App.tsx index 2ac32ac..fd33929 100644 --- a/app/src/renderer/src/App.tsx +++ b/app/src/renderer/src/App.tsx @@ -1,9548 +1,9550 @@ import type { - BenchLocalConfig, - BenchLocalExecutionMode, - BenchLocalModelConfig, - BenchLocalProviderConfig, - BenchLocalProviderKind, - BenchLocalThemeDefinition, - BenchLocalThemeDescriptor, - BenchLocalVerifierConfig, - BenchLocalWorkspace, - BenchLocalWorkspaceState, - BenchLocalWorkspaceTab, - BenchLocalWorkspaceTabModelSelection, - BenchPackInspection, - BenchPackManifest, - BenchPackRegistryEntry, - BenchPackRunHistoryEntry, - BenchPackRunSummary, - GenerationRequest, - ProgressEvent, - ScenarioMeta, - ScenarioResult, -} from "@core"; + BenchLocalConfig, + BenchLocalExecutionMode, + BenchLocalModelConfig, + BenchLocalProviderConfig, + BenchLocalProviderKind, + BenchLocalThemeDefinition, + BenchLocalThemeDescriptor, + BenchLocalVerifierConfig, + BenchLocalWorkspace, + BenchLocalWorkspaceState, + BenchLocalWorkspaceTab, + BenchLocalWorkspaceTabModelSelection, + BenchPackInspection, + BenchPackManifest, + BenchPackRegistryEntry, + BenchPackRunHistoryEntry, + BenchPackRunSummary, + GenerationRequest, + ProgressEvent, + ScenarioMeta, + ScenarioResult, +} from '@core'; import { - ArrowRight, - ArrowUp, - Bot, - Check, - ChevronDown, - ChevronLeft, - ChevronRight, - CircleAlert, - Cog, - FolderOpen, - GripVertical, - LayoutList, - Logs, - Palette, - Pencil, - Play, - PlugZap, - Plus, - RotateCcw, - Save, - Server, - Sidebar, - SlidersHorizontal, - Square, - Trash2, - Wrench, - X, -} from "lucide-react"; -import { type ReactNode, useEffect, useMemo, useRef, useState } from "react"; + ArrowRight, + ArrowUp, + Bot, + Check, + ChevronDown, + ChevronLeft, + ChevronRight, + CircleAlert, + Cog, + FolderOpen, + GripVertical, + LayoutList, + Logs, + Palette, + Pencil, + Play, + PlugZap, + Plus, + RotateCcw, + Save, + Server, + Sidebar, + SlidersHorizontal, + Square, + Trash2, + Wrench, + X, +} from 'lucide-react'; +import { type ReactNode, useEffect, useMemo, useRef, useState } from 'react'; import type { - BenchLocalAppMetadata, - BenchLocalDiscoveredModel, - BenchLocalUpdateState, - BenchPackMutationProgress, - BenchPackVerifierStatus, - DetachedLogsState, -} from "@/shared/desktop-api"; -import benchlocalIcon from "../../../assets/benchlocal-icon.png"; -import { bl } from "./api/client"; + BenchLocalAppMetadata, + BenchLocalDiscoveredModel, + BenchLocalUpdateState, + BenchPackMutationProgress, + BenchPackVerifierStatus, + DetachedLogsState, +} from '@/shared/desktop-api'; +import benchlocalIcon from '../../../assets/benchlocal-icon.png'; +import { bl } from './api/client'; const IS_IS_DETACHED_LOGS_VIEW = - typeof window !== "undefined" && - new URLSearchParams(window.location.search).get("view") === "logs"; + typeof window !== 'undefined' && + new URLSearchParams(window.location.search).get('view') === 'logs'; function describeAppUpdateState(state: BenchLocalUpdateState | null): string { - if (!state) { - return "Updater is initializing."; - } - - if (state.message?.trim()) { - return state.message.trim(); - } - - switch (state.status) { - case "unsupported": - return "Self-update is unavailable in this BenchLocal build."; - case "checking": - return "Checking for BenchLocal updates."; - case "available": - return state.availableVersion - ? `BenchLocal ${state.availableVersion} is available. Downloading update.` - : "A BenchLocal update is available. Downloading update."; - case "downloading": - return state.availableVersion - ? `Downloading BenchLocal ${state.availableVersion}.` - : "Downloading BenchLocal update."; - case "downloaded": - return state.downloadedVersion - ? `BenchLocal ${state.downloadedVersion} is ready to install.` - : "A BenchLocal update is ready to install."; - case "not_available": - return "BenchLocal is up to date."; - case "error": - return "BenchLocal could not complete the update request."; - default: - return "BenchLocal can check for updates."; - } + if (!state) { + return 'Updater is initializing.'; + } + + if (state.message?.trim()) { + return state.message.trim(); + } + + switch (state.status) { + case 'unsupported': + return 'Self-update is unavailable in this BenchLocal build.'; + case 'checking': + return 'Checking for BenchLocal updates.'; + case 'available': + return state.availableVersion + ? `BenchLocal ${state.availableVersion} is available. Downloading update.` + : 'A BenchLocal update is available. Downloading update.'; + case 'downloading': + return state.availableVersion + ? `Downloading BenchLocal ${state.availableVersion}.` + : 'Downloading BenchLocal update.'; + case 'downloaded': + return state.downloadedVersion + ? `BenchLocal ${state.downloadedVersion} is ready to install.` + : 'A BenchLocal update is ready to install.'; + case 'not_available': + return 'BenchLocal is up to date.'; + case 'error': + return 'BenchLocal could not complete the update request.'; + default: + return 'BenchLocal can check for updates.'; + } } function formatAppUpdateCheckedAt(checkedAt?: string): string | null { - if (!checkedAt) { - return null; - } + if (!checkedAt) { + return null; + } - const date = new Date(checkedAt); - if (Number.isNaN(date.valueOf())) { - return null; - } + const date = new Date(checkedAt); + if (Number.isNaN(date.valueOf())) { + return null; + } - return date.toLocaleString(); + return date.toLocaleString(); } type SettingsTab = - | "providers" - | "models" - | "benchPacks" - | "verification" - | "advanced"; + | 'providers' + | 'models' + | 'benchPacks' + | 'verification' + | 'advanced'; type LoadState = { - path: string; - created: boolean; - config: BenchLocalConfig; + path: string; + created: boolean; + config: BenchLocalConfig; }; type ProviderFormState = { - id: string; - kind: BenchLocalProviderKind; - name: string; - enabled: boolean; - base_url: string; - api_key: string; + id: string; + kind: BenchLocalProviderKind; + name: string; + enabled: boolean; + base_url: string; + api_key: string; }; type ProviderModalState = - | { - mode: "create"; - initialId?: undefined; - form: ProviderFormState; - } - | { - mode: "edit"; - initialId: string; - form: ProviderFormState; - }; + | { + mode: 'create'; + initialId?: undefined; + form: ProviderFormState; + } + | { + mode: 'edit'; + initialId: string; + form: ProviderFormState; + }; type ModelFormState = { - provider: string; - model: string; - label: string; - group: string; - enabled: boolean; + provider: string; + model: string; + label: string; + group: string; + enabled: boolean; }; type ModelModalState = - | { - mode: "create"; - index?: undefined; - form: ModelFormState; - } - | { - mode: "edit"; - index: number; - form: ModelFormState; - }; + | { + mode: 'create'; + index?: undefined; + form: ModelFormState; + } + | { + mode: 'edit'; + index: number; + form: ModelFormState; + }; type ModelBrowserModalState = { - providerId: string; - providerName: string; - entries: BenchLocalDiscoveredModel[]; - query: string; - selectedModelId: string | null; - loading: boolean; - error: string | null; + providerId: string; + providerName: string; + entries: BenchLocalDiscoveredModel[]; + query: string; + selectedModelId: string | null; + loading: boolean; + error: string | null; }; type DetailModalState = { - tabId: string; - runId: string | null; - benchPackId: string; - modelId: string; - scenarioId: string; - summary: string; - rawLog: string; - status: "pass" | "partial" | "fail"; + tabId: string; + runId: string | null; + benchPackId: string; + modelId: string; + scenarioId: string; + summary: string; + rawLog: string; + status: 'pass' | 'partial' | 'fail'; }; type TabModelsModalState = { - tabId: string; - selections: BenchLocalWorkspaceTabModelSelection[]; + tabId: string; + selections: BenchLocalWorkspaceTabModelSelection[]; }; type SamplingFormState = { - temperature: string; - top_p: string; - top_k: string; - min_p: string; - repetition_penalty: string; - max_tokens: string; - request_timeout_seconds: string; + temperature: string; + top_p: string; + top_k: string; + min_p: string; + repetition_penalty: string; + max_tokens: string; + request_timeout_seconds: string; }; type SamplingModalState = { - tabId: string; - benchPackId: string; - benchPackName: string; - defaults: GenerationRequest; - form: SamplingFormState; + tabId: string; + benchPackId: string; + benchPackName: string; + defaults: GenerationRequest; + form: SamplingFormState; }; type ModelAliasModalState = { - tabId: string; - modelId: string; - baseLabel: string; - alias: string; + tabId: string; + modelId: string; + baseLabel: string; + alias: string; }; type HistoryModalState = { - benchPackId: string; - benchPackName: string; - entries: BenchPackRunHistoryEntry[]; + benchPackId: string; + benchPackName: string; + entries: BenchPackRunHistoryEntry[]; }; type WorkspaceModalState = { - mode: "rename"; - workspaceId: string; - name: string; + mode: 'rename'; + workspaceId: string; + name: string; } | null; type WorkspaceContextMenuState = { - workspaceId: string; - workspaceName: string; - x: number; - y: number; + workspaceId: string; + workspaceName: string; + x: number; + y: number; } | null; type ConfirmDialogState = { - title: string; - subtitle: string; - confirmLabel: string; - tone?: "danger" | "neutral"; - onConfirm: () => void; + title: string; + subtitle: string; + confirmLabel: string; + tone?: 'danger' | 'neutral'; + onConfirm: () => void; } | null; type ResolvedTabModel = BenchLocalModelConfig & { - displayLabel: string; - alias?: string; + displayLabel: string; + alias?: string; }; type LiveRunState = { - runId?: string; - events: ProgressEvent[]; - resultsByModel: Record; - activeCellKeys: string[]; + runId?: string; + events: ProgressEvent[]; + resultsByModel: Record; + activeCellKeys: string[]; }; type ActiveRunEntry = { - benchPackId: string; - mode?: "host" | "replay"; + benchPackId: string; + mode?: 'host' | 'replay'; }; type LoadedHistoryEntry = { - runId: string; - startedAt: string; - mode?: "history" | "replay"; + runId: string; + startedAt: string; + mode?: 'history' | 'replay'; }; type LiveScenarioFocusState = { - liveScenarioId: string | null; - autoFollow: boolean; + liveScenarioId: string | null; + autoFollow: boolean; }; type VerifierPreparingProgress = Extract< - ProgressEvent, - { type: "verifier_preparing" } + ProgressEvent, + { type: 'verifier_preparing' } >; type VerifierPreparationModalState = { - tabId: string; - progress: VerifierPreparingProgress; + tabId: string; + progress: VerifierPreparingProgress; }; type SettingsVerifierPreparationModalState = { - benchPackId: string; - progress: VerifierPreparingProgress; + benchPackId: string; + progress: VerifierPreparingProgress; }; type BenchPackRunBlocker = { - title: string; - message: string; - actionLabel: string; + title: string; + message: string; + actionLabel: string; }; type BenchPackMutationState = BenchPackMutationProgress; -const THIRD_PARTY_INSTALL_MUTATION_ID = "__third_party_install__"; +const THIRD_PARTY_INSTALL_MUTATION_ID = '__third_party_install__'; const DEFAULT_BENCHLOCAL_GENERATION: GenerationRequest = { - max_tokens: 2048, - request_timeout_seconds: 300, + max_tokens: 2048, + request_timeout_seconds: 300, }; function isAbortLikeError(error: unknown): boolean { - return ( - error instanceof Error && - /abort|cancel/i.test(error.name + " " + error.message) - ); + return ( + error instanceof Error && + /abort|cancel/i.test(error.name + ' ' + error.message) + ); } function resolveThemeLabel( - themeId: string, - themes: BenchLocalThemeDescriptor[], - prefersDark: boolean, + themeId: string, + themes: BenchLocalThemeDescriptor[], + prefersDark: boolean, ): string { - if (themeId === "system") { - return `System (${prefersDark ? "Dark" : "Light"})`; - } + if (themeId === 'system') { + return `System (${prefersDark ? 'Dark' : 'Light'})`; + } - return themes.find((theme) => theme.id === themeId)?.name ?? themeId; + return themes.find((theme) => theme.id === themeId)?.name ?? themeId; } const EXECUTION_MODE_OPTIONS: Array<{ - value: BenchLocalExecutionMode; - label: string; + value: BenchLocalExecutionMode; + label: string; }> = [ - { value: "serial", label: "Serial per Test Case" }, - { value: "serial_by_model", label: "Serial per Model" }, - { value: "parallel_by_model", label: "Parallel per Model" }, - { value: "parallel_by_test_case", label: "Parallel per Test Case" }, - { value: "full_parallel", label: "Parallel for All" }, + { value: 'serial', label: 'Serial per Test Case' }, + { value: 'serial_by_model', label: 'Serial per Model' }, + { value: 'parallel_by_model', label: 'Parallel per Model' }, + { value: 'parallel_by_test_case', label: 'Parallel per Test Case' }, + { value: 'full_parallel', label: 'Parallel for All' }, ]; function supportsLiveScenarioColumnFocus( - executionMode: BenchLocalExecutionMode, + executionMode: BenchLocalExecutionMode, ): boolean { - return ( - executionMode !== "parallel_by_model" && executionMode !== "full_parallel" - ); + return ( + executionMode !== 'parallel_by_model' && executionMode !== 'full_parallel' + ); } -const SIDEBAR_OPEN_STORAGE_KEY = "benchlocal.sidebar-open"; +const SIDEBAR_OPEN_STORAGE_KEY = 'benchlocal.sidebar-open'; const PROVIDER_KIND_OPTIONS: Array<{ - value: BenchLocalProviderKind; - label: string; + value: BenchLocalProviderKind; + label: string; }> = [ - { value: "openai_compatible", label: "OpenAI Compatible" }, - { value: "openrouter", label: "OpenRouter" }, - { value: "huggingface", label: "Hugging Face" }, - { value: "ollama", label: "Ollama" }, - { value: "llamacpp", label: "llama.cpp" }, - { value: "mlx", label: "MLX" }, - { value: "lmstudio", label: "LM Studio" }, - { value: "pico", label: "Pico" }, + { value: 'openai_compatible', label: 'OpenAI Compatible' }, + { value: 'openrouter', label: 'OpenRouter' }, + { value: 'huggingface', label: 'Hugging Face' }, + { value: 'ollama', label: 'Ollama' }, + { value: 'llamacpp', label: 'llama.cpp' }, + { value: 'mlx', label: 'MLX' }, + { value: 'lmstudio', label: 'LM Studio' }, + { value: 'pico', label: 'Pico' }, ]; const SETTINGS_TABS: Array<{ - id: SettingsTab; - label: string; - blurb: string; - icon: ReactNode; + id: SettingsTab; + label: string; + blurb: string; + icon: ReactNode; }> = [ - { - id: "providers", - label: "Providers", - blurb: "Provider endpoints and credentials.", - icon: , - }, - { - id: "models", - label: "Models", - blurb: "Shared model registry across Bench Packs.", - icon: , - }, - { - id: "benchPacks", - label: "Bench Packs", - blurb: "Browse, install, update, and remove official Bench Packs.", - icon: , - }, - { - id: "verification", - label: "Verification", - blurb: "Managed verifiers and dependency modes.", - icon: , - }, + { + id: 'providers', + label: 'Providers', + blurb: 'Provider endpoints and credentials.', + icon: , + }, + { + id: 'models', + label: 'Models', + blurb: 'Shared model registry across Bench Packs.', + icon: , + }, + { + id: 'benchPacks', + label: 'Bench Packs', + blurb: 'Browse, install, update, and remove official Bench Packs.', + icon: , + }, + { + id: 'verification', + label: 'Verification', + blurb: 'Managed verifiers and dependency modes.', + icon: , + }, ]; const SAMPLING_FIELDS: Array<{ - key: keyof SamplingFormState; - label: string; - placeholder: string; - integer?: boolean; + key: keyof SamplingFormState; + label: string; + placeholder: string; + integer?: boolean; }> = [ - { key: "temperature", label: "Temperature", placeholder: "Leave blank" }, - { key: "top_p", label: "Top P", placeholder: "Leave blank" }, - { key: "top_k", label: "Top K", placeholder: "Leave blank", integer: true }, - { key: "min_p", label: "Min P", placeholder: "Leave blank" }, - { - key: "repetition_penalty", - label: "Repetition Penalty", - placeholder: "Leave blank", - }, - { - key: "max_tokens", - label: "Max Tokens", - placeholder: "Leave blank", - integer: true, - }, - { - key: "request_timeout_seconds", - label: "Request Timeout Seconds", - placeholder: "Leave blank", - integer: true, - }, + { key: 'temperature', label: 'Temperature', placeholder: 'Leave blank' }, + { key: 'top_p', label: 'Top P', placeholder: 'Leave blank' }, + { key: 'top_k', label: 'Top K', placeholder: 'Leave blank', integer: true }, + { key: 'min_p', label: 'Min P', placeholder: 'Leave blank' }, + { + key: 'repetition_penalty', + label: 'Repetition Penalty', + placeholder: 'Leave blank', + }, + { + key: 'max_tokens', + label: 'Max Tokens', + placeholder: 'Leave blank', + integer: true, + }, + { + key: 'request_timeout_seconds', + label: 'Request Timeout Seconds', + placeholder: 'Leave blank', + integer: true, + }, ]; function cloneConfig(config: BenchLocalConfig): BenchLocalConfig { - return structuredClone(config); + return structuredClone(config); } const FILESYSTEM_CONFIG_KEYS = [ - "run_storage_dir", - "benchpack_storage_dir", - "log_storage_dir", - "cache_dir", + 'run_storage_dir', + 'benchpack_storage_dir', + 'log_storage_dir', + 'cache_dir', ] as const satisfies Array; function reapplyPendingFilesystemDraft( - baseConfig: BenchLocalConfig, - currentDraft: BenchLocalConfig, - persistedConfig: BenchLocalConfig, + baseConfig: BenchLocalConfig, + currentDraft: BenchLocalConfig, + persistedConfig: BenchLocalConfig, ): BenchLocalConfig { - const nextConfig = cloneConfig(baseConfig); + const nextConfig = cloneConfig(baseConfig); - for (const key of FILESYSTEM_CONFIG_KEYS) { - if (currentDraft[key] !== persistedConfig[key]) { - nextConfig[key] = currentDraft[key]; - } - } + for (const key of FILESYSTEM_CONFIG_KEYS) { + if (currentDraft[key] !== persistedConfig[key]) { + nextConfig[key] = currentDraft[key]; + } + } - return nextConfig; + return nextConfig; } function providerKindLabel(kind: BenchLocalProviderKind): string { - return ( - PROVIDER_KIND_OPTIONS.find((option) => option.value === kind)?.label ?? kind - ); + return ( + PROVIDER_KIND_OPTIONS.find((option) => option.value === kind)?.label ?? kind + ); } function defaultProviderName(kind: BenchLocalProviderKind): string { - return providerKindLabel(kind); + return providerKindLabel(kind); } function defaultProviderApiKeyPlaceholder( - kind: BenchLocalProviderKind, + kind: BenchLocalProviderKind, ): string { - switch (kind) { - case "huggingface": - return "hf_..."; - default: - return "sk-or-v1-..."; - } + switch (kind) { + case 'huggingface': + return 'hf_...'; + default: + return 'sk-or-v1-...'; + } } function benchPackMutationLabel(mutation: BenchPackMutationState): string { - switch (mutation.action) { - case "install": - return mutation.phase === "complete" ? "Installed" : "Installing..."; - case "update": - return mutation.phase === "complete" ? "Updated" : "Updating..."; - case "uninstall": - return mutation.phase === "complete" ? "Removed" : "Removing..."; - default: - return mutation.message; - } + switch (mutation.action) { + case 'install': + return mutation.phase === 'complete' ? 'Installed' : 'Installing...'; + case 'update': + return mutation.phase === 'complete' ? 'Updated' : 'Updating...'; + case 'uninstall': + return mutation.phase === 'complete' ? 'Removed' : 'Removing...'; + default: + return mutation.message; + } } function defaultProviderBaseUrl(kind: BenchLocalProviderKind): string { - switch (kind) { - case "openrouter": - return "https://openrouter.ai/api/v1"; - case "huggingface": - return "https://router.huggingface.co/v1"; - case "ollama": - return "http://127.0.0.1:11434/v1"; - case "llamacpp": - return "http://127.0.0.1:8080/v1"; - case "mlx": - return "http://127.0.0.1:8082/v1"; - case "lmstudio": - return "http://127.0.0.1:1234/v1"; - case "pico": - return "http://127.0.0.1:7426/v1"; - case "openai_compatible": - default: - return "https://api.example.com/v1"; - } + switch (kind) { + case 'openrouter': + return 'https://openrouter.ai/api/v1'; + case 'huggingface': + return 'https://router.huggingface.co/v1'; + case 'ollama': + return 'http://127.0.0.1:11434/v1'; + case 'llamacpp': + return 'http://127.0.0.1:8080/v1'; + case 'mlx': + return 'http://127.0.0.1:8082/v1'; + case 'lmstudio': + return 'http://127.0.0.1:1234/v1'; + case 'pico': + return 'http://127.0.0.1:7426/v1'; + case 'openai_compatible': + default: + return 'https://api.example.com/v1'; + } } function createEmptyProvider(): ProviderFormState { - return { - id: `openai_compatible-${crypto.randomUUID()}`, - kind: "openai_compatible", - name: "", - enabled: true, - base_url: "https://api.example.com/v1", - api_key: "", - }; + return { + id: `openai_compatible-${crypto.randomUUID()}`, + kind: 'openai_compatible', + name: '', + enabled: true, + base_url: 'https://api.example.com/v1', + api_key: '', + }; } -function createEmptyModel(providerId = "openrouter"): ModelFormState { - return { - provider: providerId, - model: "", - label: "", - group: "primary", - enabled: true, - }; +function createEmptyModel(providerId = 'openrouter'): ModelFormState { + return { + provider: providerId, + model: '', + label: '', + group: 'primary', + enabled: true, + }; } function providerSupportsModelDiscovery( - provider?: BenchLocalProviderConfig | null, + provider?: BenchLocalProviderConfig | null, ): boolean { - return ( - provider?.kind === "openrouter" || - provider?.kind === "huggingface" || - provider?.kind === "openai_compatible" - ); + return ( + provider?.kind === 'openrouter' || + provider?.kind === 'huggingface' || + provider?.kind === 'openai_compatible' + ); } function defaultModelLabel( - providerName: string, - modelId: string, - discoveredName?: string, + providerName: string, + modelId: string, + discoveredName?: string, ): string { - const trimmedDiscoveredName = discoveredName?.trim(); + const trimmedDiscoveredName = discoveredName?.trim(); - if (trimmedDiscoveredName) { - return trimmedDiscoveredName; - } + if (trimmedDiscoveredName) { + return trimmedDiscoveredName; + } - return `${modelId.trim()} via ${providerName}`.trim(); + return `${modelId.trim()} via ${providerName}`.trim(); } function createSamplingForm(input?: GenerationRequest): SamplingFormState { - return { - temperature: input?.temperature?.toString() ?? "", - top_p: input?.top_p?.toString() ?? "", - top_k: input?.top_k?.toString() ?? "", - min_p: input?.min_p?.toString() ?? "", - repetition_penalty: input?.repetition_penalty?.toString() ?? "", - max_tokens: input?.max_tokens?.toString() ?? "", - request_timeout_seconds: input?.request_timeout_seconds?.toString() ?? "", - }; + return { + temperature: input?.temperature?.toString() ?? '', + top_p: input?.top_p?.toString() ?? '', + top_k: input?.top_k?.toString() ?? '', + min_p: input?.min_p?.toString() ?? '', + repetition_penalty: input?.repetition_penalty?.toString() ?? '', + max_tokens: input?.max_tokens?.toString() ?? '', + request_timeout_seconds: input?.request_timeout_seconds?.toString() ?? '', + }; } function parseSamplingForm(form: SamplingFormState): { - value?: GenerationRequest; - error?: string; + value?: GenerationRequest; + error?: string; } { - const result: GenerationRequest = {}; + const result: GenerationRequest = {}; - for (const field of SAMPLING_FIELDS) { - const rawValue = form[field.key].trim(); + for (const field of SAMPLING_FIELDS) { + const rawValue = form[field.key].trim(); - if (!rawValue) { - continue; - } + if (!rawValue) { + continue; + } - const parsed = field.integer - ? Number.parseInt(rawValue, 10) - : Number(rawValue); + const parsed = field.integer + ? Number.parseInt(rawValue, 10) + : Number(rawValue); - if (!Number.isFinite(parsed)) { - return { error: `${field.label} must be a valid number.` }; - } + if (!Number.isFinite(parsed)) { + return { error: `${field.label} must be a valid number.` }; + } - if (field.integer && parsed <= 0) { - return { error: `${field.label} must be greater than zero.` }; - } + if (field.integer && parsed <= 0) { + return { error: `${field.label} must be greater than zero.` }; + } - result[field.key as keyof GenerationRequest] = parsed; - } + result[field.key as keyof GenerationRequest] = parsed; + } - return { value: result }; + return { value: result }; } function toProviderForm( - id: string, - provider: BenchLocalProviderConfig, + id: string, + provider: BenchLocalProviderConfig, ): ProviderFormState { - return { - id, - kind: provider.kind, - name: provider.name, - enabled: provider.enabled, - base_url: provider.base_url, - api_key: provider.api_key ?? "", - }; + return { + id, + kind: provider.kind, + name: provider.name, + enabled: provider.enabled, + base_url: provider.base_url, + api_key: provider.api_key ?? '', + }; } function toModelForm(model: BenchLocalModelConfig): ModelFormState { - return { - provider: model.provider, - model: model.model, - label: model.label, - group: model.group, - enabled: model.enabled, - }; + return { + provider: model.provider, + model: model.model, + label: model.label, + group: model.group, + enabled: model.enabled, + }; } function buildModelConfig( - form: ModelFormState, - providers: Record, + form: ModelFormState, + providers: Record, ): BenchLocalModelConfig { - const provider = providers[form.provider.trim()]; - const providerLabel = provider?.name?.trim() || form.provider.trim(); - - return { - id: `${form.provider}:${form.model}`.trim(), - provider: form.provider.trim(), - model: form.model.trim(), - label: form.label.trim() || `${form.model.trim()} via ${providerLabel}`, - group: form.group.trim() || "primary", - enabled: form.enabled, - }; + const provider = providers[form.provider.trim()]; + const providerLabel = provider?.name?.trim() || form.provider.trim(); + + return { + id: `${form.provider}:${form.model}`.trim(), + provider: form.provider.trim(), + model: form.model.trim(), + label: form.label.trim() || `${form.model.trim()} via ${providerLabel}`, + group: form.group.trim() || 'primary', + enabled: form.enabled, + }; } function createWorkspaceName(existingCount: number): string { - return existingCount === 0 - ? "My Workspace" - : `Workspace ${existingCount + 1}`; + return existingCount === 0 + ? 'My Workspace' + : `Workspace ${existingCount + 1}`; } function createTabTitle( - benchPackId: string, - inspections: BenchPackInspection[], + benchPackId: string, + inspections: BenchPackInspection[], ): string { - return ( - inspections.find((inspection) => inspection.id === benchPackId)?.manifest - ?.name ?? benchPackId - ); + return ( + inspections.find((inspection) => inspection.id === benchPackId)?.manifest + ?.name ?? benchPackId + ); } function normalizeTabModelSelections( - selections: BenchLocalWorkspaceTabModelSelection[], + selections: BenchLocalWorkspaceTabModelSelection[], ): BenchLocalWorkspaceTabModelSelection[] { - const seen = new Set(); - - return selections - .filter((selection) => { - const modelId = selection.modelId.trim(); - - if (!modelId || seen.has(modelId)) { - return false; - } - - seen.add(modelId); - return true; - }) - .map((selection) => ({ - modelId: selection.modelId.trim(), - alias: selection.alias?.trim() || undefined, - })); + const seen = new Set(); + + return selections + .filter((selection) => { + const modelId = selection.modelId.trim(); + + if (!modelId || seen.has(modelId)) { + return false; + } + + seen.add(modelId); + return true; + }) + .map((selection) => ({ + modelId: selection.modelId.trim(), + alias: selection.alias?.trim() || undefined, + })); } function normalizeEditableTabModelSelections( - selections: BenchLocalWorkspaceTabModelSelection[], + selections: BenchLocalWorkspaceTabModelSelection[], ): BenchLocalWorkspaceTabModelSelection[] { - const seen = new Set(); - - return selections - .filter((selection) => { - const modelId = selection.modelId.trim(); - - if (!modelId || seen.has(modelId)) { - return false; - } - - seen.add(modelId); - return true; - }) - .map((selection) => ({ - modelId: selection.modelId.trim(), - alias: selection.alias, - })); + const seen = new Set(); + + return selections + .filter((selection) => { + const modelId = selection.modelId.trim(); + + if (!modelId || seen.has(modelId)) { + return false; + } + + seen.add(modelId); + return true; + }) + .map((selection) => ({ + modelId: selection.modelId.trim(), + alias: selection.alias, + })); } function getTableScrollbarThumbWidth(metrics: { - clientWidth: number; - scrollWidth: number; - scrollLeft: number; + clientWidth: number; + scrollWidth: number; + scrollLeft: number; }): number { - if (metrics.scrollWidth <= 0 || metrics.clientWidth <= 0) { - return 0; - } + if (metrics.scrollWidth <= 0 || metrics.clientWidth <= 0) { + return 0; + } - const ratio = metrics.clientWidth / metrics.scrollWidth; - return Math.max(56, Math.round(metrics.clientWidth * ratio)); + const ratio = metrics.clientWidth / metrics.scrollWidth; + return Math.max(56, Math.round(metrics.clientWidth * ratio)); } function SettingsTableShell({ - children, - className, + children, + className, }: { - children: ReactNode; - className?: string; + children: ReactNode; + className?: string; }) { - const viewportRef = useRef(null); - const scrollbarTrackRef = useRef(null); - const scrollbarDragRef = useRef<{ - startX: number; - startScrollLeft: number; - } | null>(null); - const [scrollMetrics, setScrollMetrics] = useState({ - clientWidth: 0, - scrollWidth: 0, - scrollLeft: 0, - }); - - const hasHorizontalOverflow = - scrollMetrics.scrollWidth > scrollMetrics.clientWidth + 1; - const scrollbarThumbWidth = hasHorizontalOverflow - ? getTableScrollbarThumbWidth(scrollMetrics) - : 0; - const scrollbarThumbOffset = - hasHorizontalOverflow && scrollbarTrackRef.current - ? (scrollMetrics.scrollLeft / - Math.max(1, scrollMetrics.scrollWidth - scrollMetrics.clientWidth)) * - Math.max(0, scrollbarTrackRef.current.clientWidth - scrollbarThumbWidth) - : 0; - const wrapClassName = [ - "settings-list-table-wrap", - className, - hasHorizontalOverflow ? "has-sticky-last-column-shadow" : "", - ] - .filter(Boolean) - .join(" "); - - useEffect(() => { - const viewport = viewportRef.current; - - if (!viewport) { - return; - } - - const updateMetrics = () => { - setScrollMetrics({ - clientWidth: viewport.clientWidth, - scrollWidth: viewport.scrollWidth, - scrollLeft: viewport.scrollLeft, - }); - }; - - const syncFromViewport = () => { - updateMetrics(); - }; - - updateMetrics(); - viewport.addEventListener("scroll", syncFromViewport); - window.addEventListener("resize", updateMetrics); - - const resizeObserver = - typeof ResizeObserver !== "undefined" - ? new ResizeObserver(() => { - updateMetrics(); - }) - : null; - - resizeObserver?.observe(viewport); - - if (viewport.firstElementChild instanceof HTMLElement) { - resizeObserver?.observe(viewport.firstElementChild); - } - - return () => { - viewport.removeEventListener("scroll", syncFromViewport); - window.removeEventListener("resize", updateMetrics); - resizeObserver?.disconnect(); - }; - }, [children]); - - useEffect(() => { - const handleMove = (event: MouseEvent) => { - const viewport = viewportRef.current; - const track = scrollbarTrackRef.current; - const drag = scrollbarDragRef.current; - - if (!viewport || !track || !drag) { - return; - } - - const maxScrollLeft = Math.max( - 0, - viewport.scrollWidth - viewport.clientWidth, - ); - const maxThumbOffset = Math.max( - 1, - track.clientWidth - getTableScrollbarThumbWidth(scrollMetrics), - ); - const deltaX = event.clientX - drag.startX; - const nextScrollLeft = Math.min( - maxScrollLeft, - Math.max( - 0, - drag.startScrollLeft + (deltaX / maxThumbOffset) * maxScrollLeft, - ), - ); - viewport.scrollLeft = nextScrollLeft; - }; - - const handleUp = () => { - scrollbarDragRef.current = null; - document.body.style.userSelect = ""; - }; - - window.addEventListener("mousemove", handleMove); - window.addEventListener("mouseup", handleUp); - - return () => { - window.removeEventListener("mousemove", handleMove); - window.removeEventListener("mouseup", handleUp); - }; - }, [scrollMetrics]); - - return ( -
-
- {children} -
- {hasHorizontalOverflow ? ( - - ); + const viewportRef = useRef(null); + const scrollbarTrackRef = useRef(null); + const scrollbarDragRef = useRef<{ + startX: number; + startScrollLeft: number; + } | null>(null); + const [scrollMetrics, setScrollMetrics] = useState({ + clientWidth: 0, + scrollWidth: 0, + scrollLeft: 0, + }); + + const hasHorizontalOverflow = + scrollMetrics.scrollWidth > scrollMetrics.clientWidth + 1; + const scrollbarThumbWidth = hasHorizontalOverflow + ? getTableScrollbarThumbWidth(scrollMetrics) + : 0; + const scrollbarThumbOffset = + hasHorizontalOverflow && scrollbarTrackRef.current + ? (scrollMetrics.scrollLeft / + Math.max(1, scrollMetrics.scrollWidth - scrollMetrics.clientWidth)) * + Math.max(0, scrollbarTrackRef.current.clientWidth - scrollbarThumbWidth) + : 0; + const wrapClassName = [ + 'settings-list-table-wrap', + className, + hasHorizontalOverflow ? 'has-sticky-last-column-shadow' : '', + ] + .filter(Boolean) + .join(' '); + + useEffect(() => { + const viewport = viewportRef.current; + + if (!viewport) { + return; + } + + const updateMetrics = () => { + setScrollMetrics({ + clientWidth: viewport.clientWidth, + scrollWidth: viewport.scrollWidth, + scrollLeft: viewport.scrollLeft, + }); + }; + + const syncFromViewport = () => { + updateMetrics(); + }; + + updateMetrics(); + viewport.addEventListener('scroll', syncFromViewport); + window.addEventListener('resize', updateMetrics); + + const resizeObserver = + typeof ResizeObserver !== 'undefined' + ? new ResizeObserver(() => { + updateMetrics(); + }) + : null; + + resizeObserver?.observe(viewport); + + if (viewport.firstElementChild instanceof HTMLElement) { + resizeObserver?.observe(viewport.firstElementChild); + } + + return () => { + viewport.removeEventListener('scroll', syncFromViewport); + window.removeEventListener('resize', updateMetrics); + resizeObserver?.disconnect(); + }; + }, [children]); + + useEffect(() => { + const handleMove = (event: MouseEvent) => { + const viewport = viewportRef.current; + const track = scrollbarTrackRef.current; + const drag = scrollbarDragRef.current; + + if (!viewport || !track || !drag) { + return; + } + + const maxScrollLeft = Math.max( + 0, + viewport.scrollWidth - viewport.clientWidth, + ); + const maxThumbOffset = Math.max( + 1, + track.clientWidth - getTableScrollbarThumbWidth(scrollMetrics), + ); + const deltaX = event.clientX - drag.startX; + const nextScrollLeft = Math.min( + maxScrollLeft, + Math.max( + 0, + drag.startScrollLeft + (deltaX / maxThumbOffset) * maxScrollLeft, + ), + ); + viewport.scrollLeft = nextScrollLeft; + }; + + const handleUp = () => { + scrollbarDragRef.current = null; + document.body.style.userSelect = ''; + }; + + window.addEventListener('mousemove', handleMove); + window.addEventListener('mouseup', handleUp); + + return () => { + window.removeEventListener('mousemove', handleMove); + window.removeEventListener('mouseup', handleUp); + }; + }, [scrollMetrics]); + + return ( +
+
+ {children} +
+ {hasHorizontalOverflow ? ( + + ); } function resolveTabModels( - tab: BenchLocalWorkspaceTab | null, - models: BenchLocalModelConfig[], + tab: BenchLocalWorkspaceTab | null, + models: BenchLocalModelConfig[], ): ResolvedTabModel[] { - const enabledModels = models.filter((model) => model.enabled); - const modelMap = new Map(enabledModels.map((model) => [model.id, model])); - - return normalizeTabModelSelections(tab?.modelSelections ?? []).reduce< - ResolvedTabModel[] - >((resolved, selection) => { - const model = modelMap.get(selection.modelId); - - if (!model) { - return resolved; - } - - resolved.push({ - ...model, - alias: selection.alias, - displayLabel: selection.alias || model.label, - }); - - return resolved; - }, []); + const enabledModels = models.filter((model) => model.enabled); + const modelMap = new Map(enabledModels.map((model) => [model.id, model])); + + return normalizeTabModelSelections(tab?.modelSelections ?? []).reduce< + ResolvedTabModel[] + >((resolved, selection) => { + const model = modelMap.get(selection.modelId); + + if (!model) { + return resolved; + } + + resolved.push({ + ...model, + alias: selection.alias, + displayLabel: selection.alias || model.label, + }); + + return resolved; + }, []); } function resolveHistoryModels( - runSummary: BenchPackRunSummary | null, - models: BenchLocalModelConfig[], + runSummary: BenchPackRunSummary | null, + models: BenchLocalModelConfig[], ): ResolvedTabModel[] { - if (!runSummary) { - return []; - } - - const modelMap = new Map(models.map((model) => [model.id, model])); - const runStartedEvent = runSummary.events.find( - (event): event is Extract => - event.type === "run_started", - ); - const orderedModelIds = [ - ...(runStartedEvent?.models.map((model) => model.id) ?? []), - ...Object.keys(runSummary.resultsByModel), - ].filter((modelId, index, all) => modelId && all.indexOf(modelId) === index); - - return orderedModelIds.map((modelId) => { - const currentModel = modelMap.get(modelId); - const historicalLabel = runStartedEvent?.models.find( - (model) => model.id === modelId, - )?.label; - const label = currentModel?.label ?? historicalLabel ?? modelId; - - return { - id: modelId, - provider: currentModel?.provider ?? "history", - model: currentModel?.model ?? modelId, - label, - group: currentModel?.group ?? "history", - enabled: currentModel?.enabled ?? false, - displayLabel: label, - }; - }); + if (!runSummary) { + return []; + } + + const modelMap = new Map(models.map((model) => [model.id, model])); + const runStartedEvent = runSummary.events.find( + (event): event is Extract => + event.type === 'run_started', + ); + const orderedModelIds = [ + ...(runStartedEvent?.models.map((model) => model.id) ?? []), + ...Object.keys(runSummary.resultsByModel), + ].filter((modelId, index, all) => modelId && all.indexOf(modelId) === index); + + return orderedModelIds.map((modelId) => { + const currentModel = modelMap.get(modelId); + const historicalLabel = runStartedEvent?.models.find( + (model) => model.id === modelId, + )?.label; + const label = currentModel?.label ?? historicalLabel ?? modelId; + + return { + id: modelId, + provider: currentModel?.provider ?? 'history', + model: currentModel?.model ?? modelId, + label, + group: currentModel?.group ?? 'history', + enabled: currentModel?.enabled ?? false, + displayLabel: label, + }; + }); } function countStoredRunResults(summary: BenchPackRunSummary | null): number { - if (!summary) { - return 0; - } - - return Object.values(summary.resultsByModel).reduce( - (total, results) => total + results.length, - 0, - ); + if (!summary) { + return 0; + } + + return Object.values(summary.resultsByModel).reduce( + (total, results) => total + results.length, + 0, + ); } function isRunSummaryComplete(summary: BenchPackRunSummary | null): boolean { - if (!summary) { - return false; - } + if (!summary) { + return false; + } - return ( - countStoredRunResults(summary) >= summary.modelCount * summary.scenarioCount - ); + return ( + countStoredRunResults(summary) >= summary.modelCount * summary.scenarioCount + ); } function buildHistoryModelSelections( - runSummary: BenchPackRunSummary | null, - models: BenchLocalModelConfig[], + runSummary: BenchPackRunSummary | null, + models: BenchLocalModelConfig[], ): BenchLocalWorkspaceTabModelSelection[] { - return resolveHistoryModels(runSummary, models).map((model) => ({ - modelId: model.id, - alias: model.displayLabel !== model.label ? model.displayLabel : undefined, - })); + return resolveHistoryModels(runSummary, models).map((model) => ({ + modelId: model.id, + alias: model.displayLabel !== model.label ? model.displayLabel : undefined, + })); } type ReplayCell = { - modelId: string; - scenarioId: string; - result: ScenarioResult; + modelId: string; + scenarioId: string; + result: ScenarioResult; }; function buildReplayGroups( - summary: BenchPackRunSummary, - scenarios: ScenarioMeta[], - modelIds: string[], + summary: BenchPackRunSummary, + scenarios: ScenarioMeta[], + modelIds: string[], ): ReplayCell[][] { - const scenarioOrder = scenarios.map((scenario) => scenario.id); - const resultMap = new Map(); - - for (const [modelId, results] of Object.entries(summary.resultsByModel)) { - for (const result of results) { - resultMap.set(`${modelId}::${result.scenarioId}`, result); - } - } - - const singletonCellsByScenarioThenModel = scenarioOrder.flatMap( - (scenarioId) => - modelIds.flatMap((modelId) => { - const result = resultMap.get(`${modelId}::${scenarioId}`); - return result - ? [[{ modelId, scenarioId, result } satisfies ReplayCell]] - : []; - }), - ); - - switch (summary.executionMode ?? "parallel_by_test_case") { - case "serial": - return singletonCellsByScenarioThenModel; - case "serial_by_model": - return modelIds.flatMap((modelId) => - scenarioOrder.flatMap((scenarioId) => { - const result = resultMap.get(`${modelId}::${scenarioId}`); - return result - ? [[{ modelId, scenarioId, result } satisfies ReplayCell]] - : []; - }), - ); - case "parallel_by_test_case": - return scenarioOrder - .map((scenarioId) => - modelIds.flatMap((modelId) => { - const result = resultMap.get(`${modelId}::${scenarioId}`); - return result - ? [{ modelId, scenarioId, result } satisfies ReplayCell] - : []; - }), - ) - .filter((group) => group.length > 0); - case "parallel_by_model": - return modelIds - .map((modelId) => - scenarioOrder.flatMap((scenarioId) => { - const result = resultMap.get(`${modelId}::${scenarioId}`); - return result - ? [{ modelId, scenarioId, result } satisfies ReplayCell] - : []; - }), - ) - .filter((group) => group.length > 0); - case "full_parallel": - return [ - scenarioOrder.flatMap((scenarioId) => - modelIds.flatMap((modelId) => { - const result = resultMap.get(`${modelId}::${scenarioId}`); - return result - ? [{ modelId, scenarioId, result } satisfies ReplayCell] - : []; - }), - ), - ].filter((group) => group.length > 0); - default: - return singletonCellsByScenarioThenModel; - } + const scenarioOrder = scenarios.map((scenario) => scenario.id); + const resultMap = new Map(); + + for (const [modelId, results] of Object.entries(summary.resultsByModel)) { + for (const result of results) { + resultMap.set(`${modelId}::${result.scenarioId}`, result); + } + } + + const singletonCellsByScenarioThenModel = scenarioOrder.flatMap( + (scenarioId) => + modelIds.flatMap((modelId) => { + const result = resultMap.get(`${modelId}::${scenarioId}`); + return result + ? [[{ modelId, scenarioId, result } satisfies ReplayCell]] + : []; + }), + ); + + switch (summary.executionMode ?? 'parallel_by_test_case') { + case 'serial': + return singletonCellsByScenarioThenModel; + case 'serial_by_model': + return modelIds.flatMap((modelId) => + scenarioOrder.flatMap((scenarioId) => { + const result = resultMap.get(`${modelId}::${scenarioId}`); + return result + ? [[{ modelId, scenarioId, result } satisfies ReplayCell]] + : []; + }), + ); + case 'parallel_by_test_case': + return scenarioOrder + .map((scenarioId) => + modelIds.flatMap((modelId) => { + const result = resultMap.get(`${modelId}::${scenarioId}`); + return result + ? [{ modelId, scenarioId, result } satisfies ReplayCell] + : []; + }), + ) + .filter((group) => group.length > 0); + case 'parallel_by_model': + return modelIds + .map((modelId) => + scenarioOrder.flatMap((scenarioId) => { + const result = resultMap.get(`${modelId}::${scenarioId}`); + return result + ? [{ modelId, scenarioId, result } satisfies ReplayCell] + : []; + }), + ) + .filter((group) => group.length > 0); + case 'full_parallel': + return [ + scenarioOrder.flatMap((scenarioId) => + modelIds.flatMap((modelId) => { + const result = resultMap.get(`${modelId}::${scenarioId}`); + return result + ? [{ modelId, scenarioId, result } satisfies ReplayCell] + : []; + }), + ), + ].filter((group) => group.length > 0); + default: + return singletonCellsByScenarioThenModel; + } } function upsertTabModelAlias( - tab: BenchLocalWorkspaceTab, - models: BenchLocalModelConfig[], - modelId: string, - alias: string, + tab: BenchLocalWorkspaceTab, + models: BenchLocalModelConfig[], + modelId: string, + alias: string, ): BenchLocalWorkspaceTabModelSelection[] { - const normalized = normalizeTabModelSelections(tab.modelSelections); - const nextAlias = alias.trim() || undefined; - let found = false; - - const next = normalized.map((selection) => { - if (selection.modelId !== modelId) { - return selection; - } - - found = true; - return { - ...selection, - alias: nextAlias, - }; - }); - - if (!found) { - next.push({ - modelId, - alias: nextAlias, - }); - } - - return next; + const normalized = normalizeTabModelSelections(tab.modelSelections); + const nextAlias = alias.trim() || undefined; + let found = false; + + const next = normalized.map((selection) => { + if (selection.modelId !== modelId) { + return selection; + } + + found = true; + return { + ...selection, + alias: nextAlias, + }; + }); + + if (!found) { + next.push({ + modelId, + alias: nextAlias, + }); + } + + return next; } function pushScenarioResult( - current: Record, - modelId: string, - result: ScenarioResult, + current: Record, + modelId: string, + result: ScenarioResult, ): Record { - return { - ...current, - [modelId]: [ - ...(current[modelId] ?? []).filter( - (candidate) => candidate.scenarioId !== result.scenarioId, - ), - result, - ], - }; + return { + ...current, + [modelId]: [ + ...(current[modelId] ?? []).filter( + (candidate) => candidate.scenarioId !== result.scenarioId, + ), + result, + ], + }; } function updateLiveRunState( - current: LiveRunState | undefined, - event: ProgressEvent, + current: LiveRunState | undefined, + event: ProgressEvent, ): LiveRunState { - const next: LiveRunState = current ?? { - events: [], - resultsByModel: {}, - activeCellKeys: [], - }; - - const eventKey = - "modelId" in event && "scenarioId" in event - ? `${event.modelId}::${event.scenarioId}` - : null; - - next.events = [...next.events, event]; - - if (event.type === "run_started") { - next.runId = event.runId; - } - - if ( - event.type === "model_progress" && - eventKey && - !next.activeCellKeys.includes(eventKey) - ) { - next.activeCellKeys = [...next.activeCellKeys, eventKey]; - } - - if (event.type === "scenario_result" && eventKey) { - next.resultsByModel = pushScenarioResult( - next.resultsByModel, - event.modelId, - event.result, - ); - next.activeCellKeys = next.activeCellKeys.filter((key) => key !== eventKey); - } - - if (event.type === "run_finished" || event.type === "run_error") { - next.activeCellKeys = []; - } - - return next; + const next: LiveRunState = current ?? { + events: [], + resultsByModel: {}, + activeCellKeys: [], + }; + + const eventKey = + 'modelId' in event && 'scenarioId' in event + ? `${event.modelId}::${event.scenarioId}` + : null; + + next.events = [...next.events, event]; + + if (event.type === 'run_started') { + next.runId = event.runId; + } + + if ( + event.type === 'model_progress' && + eventKey && + !next.activeCellKeys.includes(eventKey) + ) { + next.activeCellKeys = [...next.activeCellKeys, eventKey]; + } + + if (event.type === 'scenario_result' && eventKey) { + next.resultsByModel = pushScenarioResult( + next.resultsByModel, + event.modelId, + event.result, + ); + next.activeCellKeys = next.activeCellKeys.filter((key) => key !== eventKey); + } + + if (event.type === 'run_finished' || event.type === 'run_error') { + next.activeCellKeys = []; + } + + return next; } function detailModalKey( - detail: Pick, + detail: Pick, ): string { - return `${detail.tabId}::${detail.modelId}::${detail.scenarioId}`; + return `${detail.tabId}::${detail.modelId}::${detail.scenarioId}`; } function getCellKey(modelId: string, scenarioId: string): string { - return `${modelId}::${scenarioId}`; + return `${modelId}::${scenarioId}`; } const REGISTRY_UNAVAILABLE_MESSAGE = - "Official Bench Pack registry is unavailable right now. Installed Bench Packs remain usable."; + 'Official Bench Pack registry is unavailable right now. Installed Bench Packs remain usable.'; function formatDesktopErrorMessage(error: unknown): string { - if (!(error instanceof Error)) { - return ""; - } + if (!(error instanceof Error)) { + return ''; + } - return error.message - .replace(/^Error invoking remote method '[^']+':\s*/u, "") - .trim(); + return error.message + .replace(/^Error invoking remote method '[^']+':\s*/u, '') + .trim(); } function isRegistryConnectivityError(error: unknown): boolean { - const message = formatDesktopErrorMessage(error); - return /fetch failed/i.test(message); + const message = formatDesktopErrorMessage(error); + return /fetch failed/i.test(message); } function formatRegistryWarning(error: unknown): string { - const message = formatDesktopErrorMessage(error); + const message = formatDesktopErrorMessage(error); - if (!message) { - return REGISTRY_UNAVAILABLE_MESSAGE; - } + if (!message) { + return REGISTRY_UNAVAILABLE_MESSAGE; + } - if (!message || /fetch failed/i.test(message)) { - return REGISTRY_UNAVAILABLE_MESSAGE; - } + if (!message || /fetch failed/i.test(message)) { + return REGISTRY_UNAVAILABLE_MESSAGE; + } - return `${REGISTRY_UNAVAILABLE_MESSAGE} ${message}`; + return `${REGISTRY_UNAVAILABLE_MESSAGE} ${message}`; } function formatRegistryMutationError( - action: "install" | "update", - benchPackId: string, - error: unknown, + action: 'install' | 'update', + benchPackId: string, + error: unknown, ): string { - if (isRegistryConnectivityError(error)) { - return `Failed to ${action} ${benchPackId}. Official Bench Pack registry is unavailable right now.`; - } + if (isRegistryConnectivityError(error)) { + return `Failed to ${action} ${benchPackId}. Official Bench Pack registry is unavailable right now.`; + } - return ( - formatDesktopErrorMessage(error) || `Failed to ${action} ${benchPackId}.` - ); + return ( + formatDesktopErrorMessage(error) || `Failed to ${action} ${benchPackId}.` + ); } function getRequiredVerifierRunBlocker( - manifest: BenchPackManifest | undefined, - benchPackConfig: BenchLocalConfig["benchpacks"][string] | undefined, - verifierStatus: BenchPackVerifierStatus | undefined, + manifest: BenchPackManifest | undefined, + benchPackConfig: BenchLocalConfig['benchpacks'][string] | undefined, + verifierStatus: BenchPackVerifierStatus | undefined, ): BenchPackRunBlocker | null { - const requiredVerifierSpecs = ( - manifest?.verifiers ?? - manifest?.sidecars ?? - [] - ).filter((spec) => spec.required); - - if (requiredVerifierSpecs.length === 0) { - return null; - } - - if (verifierStatus?.docker.state === "not_installed") { - return { - title: "Docker Required", - message: - "This Bench Pack needs a local verifier runtime. Install Docker Desktop before starting the test run.", - actionLabel: "Open Verification", - }; - } - - if (verifierStatus?.docker.state === "not_running") { - return { - title: "Docker Not Running", - message: - "This Bench Pack needs a local verifier runtime. Start Docker Desktop, then try the run again.", - actionLabel: "Open Verification", - }; - } - - for (const spec of requiredVerifierSpecs) { - const runtimeConfig = - benchPackConfig?.verifiers?.[spec.id] ?? - benchPackConfig?.sidecars?.[spec.id]; - const runtimeStatus = verifierStatus?.verifiers.find( - (entry) => entry.id === spec.id, - ); - - if ( - (runtimeConfig?.mode ?? spec.defaultMode) === "docker" && - runtimeConfig?.auto_start === false && - runtimeStatus?.status !== "running" - ) { - return { - title: "Verifier Not Started", - message: - "Auto Start is disabled for this required verifier. Start it from Verification settings before running the Bench Pack.", - actionLabel: "Open Verification", - }; - } - - if (runtimeStatus?.status === "missing_dependency") { - return { - title: "Docker Required", - message: - runtimeStatus.details ?? - "This Bench Pack needs Local Docker before it can run.", - actionLabel: "Open Verification", - }; - } - - if (runtimeStatus?.status === "dependency_not_running") { - return { - title: "Docker Not Running", - message: - runtimeStatus.details ?? - "This Bench Pack needs Local Docker to be running before it can run.", - actionLabel: "Open Verification", - }; - } - } - - return null; + const requiredVerifierSpecs = ( + manifest?.verifiers ?? + manifest?.sidecars ?? + [] + ).filter((spec) => spec.required); + + if (requiredVerifierSpecs.length === 0) { + return null; + } + + if (verifierStatus?.docker.state === 'not_installed') { + return { + title: 'Docker Required', + message: + 'This Bench Pack needs a local verifier runtime. Install Docker Desktop before starting the test run.', + actionLabel: 'Open Verification', + }; + } + + if (verifierStatus?.docker.state === 'not_running') { + return { + title: 'Docker Not Running', + message: + 'This Bench Pack needs a local verifier runtime. Start Docker Desktop, then try the run again.', + actionLabel: 'Open Verification', + }; + } + + for (const spec of requiredVerifierSpecs) { + const runtimeConfig = + benchPackConfig?.verifiers?.[spec.id] ?? + benchPackConfig?.sidecars?.[spec.id]; + const runtimeStatus = verifierStatus?.verifiers.find( + (entry) => entry.id === spec.id, + ); + + if ( + (runtimeConfig?.mode ?? spec.defaultMode) === 'docker' && + runtimeConfig?.auto_start === false && + runtimeStatus?.status !== 'running' + ) { + return { + title: 'Verifier Not Started', + message: + 'Auto Start is disabled for this required verifier. Start it from Verification settings before running the Bench Pack.', + actionLabel: 'Open Verification', + }; + } + + if (runtimeStatus?.status === 'missing_dependency') { + return { + title: 'Docker Required', + message: + runtimeStatus.details ?? + 'This Bench Pack needs Local Docker before it can run.', + actionLabel: 'Open Verification', + }; + } + + if (runtimeStatus?.status === 'dependency_not_running') { + return { + title: 'Docker Not Running', + message: + runtimeStatus.details ?? + 'This Bench Pack needs Local Docker to be running before it can run.', + actionLabel: 'Open Verification', + }; + } + } + + return null; } function getVerifierStatusTone( - status: BenchPackVerifierStatus["verifiers"][number]["status"] | undefined, + status: BenchPackVerifierStatus['verifiers'][number]['status'] | undefined, ): string { - switch (status) { - case "running": - return "status-ready"; - case "missing_dependency": - return "status-not-installed"; - case "dependency_not_running": - case "failed": - return "status-danger"; - default: - return "status-idle"; - } + switch (status) { + case 'running': + return 'status-ready'; + case 'missing_dependency': + return 'status-not-installed'; + case 'dependency_not_running': + case 'failed': + return 'status-danger'; + default: + return 'status-idle'; + } } function formatVerifierRuntimeStatus( - status: BenchPackVerifierStatus["verifiers"][number]["status"] | undefined, + status: BenchPackVerifierStatus['verifiers'][number]['status'] | undefined, ): string { - switch (status) { - case "missing_dependency": - return "docker required"; - case "dependency_not_running": - return "docker not running"; - default: - return (status ?? "stopped").replaceAll("_", " "); - } + switch (status) { + case 'missing_dependency': + return 'docker required'; + case 'dependency_not_running': + return 'docker not running'; + default: + return (status ?? 'stopped').replaceAll('_', ' '); + } } export function App() { - // Removed detached logs view in web version - - const isMacPlatform = - typeof navigator !== "undefined" && navigator.userAgent.includes("Mac"); - const [loadState, setLoadState] = useState(null); - const [draft, setDraft] = useState(null); - const [workspaceState, setWorkspaceState] = - useState(null); - const [benchPackInspections, setBenchPackInspections] = useState< - BenchPackInspection[] - >([]); - const [registryEntries, setRegistryEntries] = useState< - BenchPackRegistryEntry[] - >([]); - const [registryWarning, setRegistryWarning] = useState(null); - const [availableThemes, setAvailableThemes] = useState< - BenchLocalThemeDescriptor[] - >([]); - const [activeThemeDefinition, setActiveThemeDefinition] = - useState(null); - const [systemPrefersDark, setSystemPrefersDark] = useState( - typeof window !== "undefined" - ? window.matchMedia("(prefers-color-scheme: dark)").matches - : false, - ); - const [verifierStatuses, setVerifierStatuses] = useState< - Record - >({}); - const [tabMenuOpen, setTabMenuOpen] = useState(false); - const [themeMenuOpen, setThemeMenuOpen] = useState(false); - const [sidebarOpen, setSidebarOpen] = useState(() => { - if (typeof window === "undefined") { - return true; - } - - return window.localStorage.getItem(SIDEBAR_OPEN_STORAGE_KEY) !== "false"; - }); - const [settingsOpen, setSettingsOpen] = useState(false); - const [settingsTab, setSettingsTab] = useState("providers"); - const [aboutDialogOpen, setAboutDialogOpen] = useState(false); - const [appMetadata, setAppMetadata] = useState( - null, - ); - const [appUpdateState, setAppUpdateState] = - useState(null); - const [ - dismissedDownloadedUpdateVersion, - setDismissedDownloadedUpdateVersion, - ] = useState(null); - const [providerModal, setProviderModal] = useState( - null, - ); - const [modelModal, setModelModal] = useState(null); - const [modelBrowserModal, setModelBrowserModal] = - useState(null); - const [tabModelsModal, setTabModelsModal] = - useState(null); - const [samplingModal, setSamplingModal] = useState( - null, - ); - const [modelAliasModal, setModelAliasModal] = - useState(null); - const [workspaceModal, setWorkspaceModal] = - useState(null); - const [workspaceContextMenu, setWorkspaceContextMenu] = - useState(null); - const [historyModal, setHistoryModal] = useState( - null, - ); - const [confirmDialog, setConfirmDialog] = useState(null); - const [verifierPreparationModal, setVerifierPreparationModal] = - useState(null); - const [ - settingsVerifierPreparationModal, - setSettingsVerifierPreparationModal, - ] = useState(null); - const [stoppingVerifierStarts, setStoppingVerifierStarts] = useState< - Record - >({}); - const [draggedTabId, setDraggedTabId] = useState(null); - const [editingTab, setEditingTab] = useState<{ - tabId: string; - value: string; - width: number; - } | null>(null); - const [activeRuns, setActiveRuns] = useState>( - {}, - ); - const [stoppingRuns, setStoppingRuns] = useState>({}); - const [runSummaries, setRunSummaries] = useState< - Record - >({}); - const [runHistories, setRunHistories] = useState< - Record - >({}); - const [liveRuns, setLiveRuns] = useState>({}); - const [liveScenarioFocus, setLiveScenarioFocus] = useState< - Record - >({}); - const [loadedHistoryRuns, setLoadedHistoryRuns] = useState< - Record - >({}); - const [logsOpen, setLogsOpen] = useState(false); - const [logsAutoScroll, setLogsAutoScroll] = useState(true); - const [logsDetached, setLogsDetached] = useState(false); - const [logDrawerHeight, setLogDrawerHeight] = useState(240); - const [detailModal, setDetailModal] = useState(null); - const [isBusy, setIsBusy] = useState(true); - const [error, setError] = useState(null); - const [appNotice, setAppNotice] = useState(null); - const [settingsNotice, setSettingsNotice] = useState(null); - const [benchPackMutations, setBenchPackMutations] = useState< - Record - >({}); - const themeMenuRef = useRef(null); - const settingsOpenRef = useRef(false); - - const providerIds = useMemo( - () => Object.keys(draft?.providers ?? {}), - [draft], - ); - const themeOptions = useMemo( - () => ["system", ...availableThemes.map((theme) => theme.id)], - [availableThemes], - ); - const currentThemeLabel = useMemo( - () => - resolveThemeLabel( - draft?.ui.theme ?? "system", - availableThemes, - systemPrefersDark, - ), - [draft?.ui.theme, availableThemes, systemPrefersDark], - ); - const readyInspections = useMemo( - () => - benchPackInspections.filter( - (inspection) => inspection.status === "ready", - ), - [benchPackInspections], - ); - const activeWorkspace = useMemo( - () => - workspaceState?.activeWorkspaceId - ? (workspaceState.workspaces[workspaceState.activeWorkspaceId] ?? null) - : null, - [workspaceState], - ); - const workspaceTabs = useMemo( - () => - activeWorkspace?.tabIds - .map((tabId: any) => workspaceState?.tabs[tabId]) - .filter((tab): tab is BenchLocalWorkspaceTab => Boolean(tab)) ?? [], - [activeWorkspace, workspaceState], - ); - const activeTab = useMemo( - () => - activeWorkspace?.activeTabId - ? (workspaceState?.tabs[activeWorkspace.activeTabId] ?? null) - : (workspaceTabs[0] ?? null), - [activeWorkspace, workspaceState, workspaceTabs], - ); - const activeInspection = useMemo( - () => - benchPackInspections.find( - (inspection) => inspection.id === activeTab?.benchPackId, - ) ?? null, - [benchPackInspections, activeTab], - ); - const activeVerifierStatus = useMemo( - () => - activeInspection ? (verifierStatuses[activeInspection.id] ?? null) : null, - [activeInspection, verifierStatuses], - ); - const activeTabModels = useMemo( - () => (draft ? resolveTabModels(activeTab, draft.models) : []), - [draft, activeTab], - ); - const activeRunSummary = useMemo( - () => (activeTab ? (runSummaries[activeTab.id] ?? null) : null), - [runSummaries, activeTab], - ); - const activeLiveRun = useMemo( - () => (activeTab ? (liveRuns[activeTab.id] ?? null) : null), - [liveRuns, activeTab], - ); - const activeLiveScenarioFocus = useMemo( - () => (activeTab ? (liveScenarioFocus[activeTab.id] ?? null) : null), - [liveScenarioFocus, activeTab], - ); - const activeRunBlocker = useMemo( - () => - activeInspection && draft - ? getRequiredVerifierRunBlocker( - activeInspection.manifest, - draft.benchpacks[activeInspection.id], - activeVerifierStatus ?? undefined, - ) - : null, - [activeInspection, activeVerifierStatus, draft], - ); - const activeLoadedHistory = useMemo( - () => (activeTab ? (loadedHistoryRuns[activeTab.id] ?? null) : null), - [loadedHistoryRuns, activeTab], - ); - const activeDisplayModels = useMemo(() => { - if (!draft) { - return []; - } - - if (activeLoadedHistory) { - return resolveHistoryModels(activeRunSummary, draft.models); - } - - return activeTabModels; - }, [draft, activeLoadedHistory, activeRunSummary, activeTabModels]); - const downloadedUpdateVersion = - appUpdateState?.downloadedVersion ?? - appUpdateState?.availableVersion ?? - null; - const showDownloadedUpdateBanner = - appUpdateState?.status === "downloaded" && - downloadedUpdateVersion !== dismissedDownloadedUpdateVersion; - const activeLogEvents = - activeLiveRun?.events ?? activeRunSummary?.events ?? []; - const logContainerRef = useRef(null); - const tabStripShellRef = useRef(null); - const tabStripRef = useRef(null); - const tabChipRefs = useRef(new Map()); - const modelDiscoveryCacheRef = useRef< - Record - >({}); - const replayRunTokensRef = useRef(new Map()); - const appliedThemeKeysRef = useRef([]); - const [tabStripOverflow, setTabStripOverflow] = useState(false); - const [activeTabMask, setActiveTabMask] = useState<{ - left: number; - width: number; - } | null>(null); - - const hasUnsavedChanges = - loadState && draft - ? JSON.stringify(loadState.config) !== JSON.stringify(draft) - : false; - const effectiveThemeId = useMemo(() => { - const requested = draft?.ui.theme ?? "system"; - - if (requested === "system") { - return systemPrefersDark ? "dark" : "light"; - } - - return requested; - }, [draft?.ui.theme, systemPrefersDark]); - - const updateDraft = ( - updater: (current: BenchLocalConfig) => BenchLocalConfig, - ) => { - setDraft((current) => { - if (!current) { - return current; - } - - return updater(cloneConfig(current)); - }); - }; - - const persistWorkspaceState = async (nextState: BenchLocalWorkspaceState) => { - setWorkspaceState(nextState); - - try { - const saved = await bl.workspaces.save({ state: nextState }); - setWorkspaceState(saved.state); - } catch (workspaceError) { - setError( - workspaceError instanceof Error - ? workspaceError.message - : "Failed to save workspace state.", - ); - } - }; - - const updateWorkspaceState = ( - updater: (current: BenchLocalWorkspaceState) => BenchLocalWorkspaceState, - ) => { - setWorkspaceState((current) => { - if (!current) { - return current; - } - - const next = updater(structuredClone(current)); - void persistWorkspaceState(next); - return next; - }); - }; - - const loadBenchPackInspections = async () => { - try { - const inspections = await bl.benchPacks.list(); - setBenchPackInspections(inspections); - } catch (pluginError) { - setError( - pluginError instanceof Error - ? pluginError.message - : "Failed to inspect configured Bench Packs.", - ); - } - }; - - const loadRegistryEntries = async () => { - try { - const entries = await bl.benchPacks.registry(); - setRegistryEntries(entries); - setRegistryWarning(null); - } catch (registryError) { - setRegistryWarning(formatRegistryWarning(registryError)); - } - }; - - const loadVerifierStatuses = async () => { - try { - const statuses = await bl.verifiers.list(); - setVerifierStatuses( - Object.fromEntries( - statuses.map((status: any) => [status.benchPackId, status]), - ), - ); - } catch (verifierError) { - setError( - verifierError instanceof Error - ? verifierError.message - : "Failed to load verifier status.", - ); - } - }; - - const loadThemes = async () => { - try { - const themes = await bl.themes.list(); - setAvailableThemes(themes); - } catch (themeError) { - setError( - themeError instanceof Error - ? themeError.message - : "Failed to load available themes.", - ); - } - }; - - // Updates removed in web version - const checkForAppUpdates = async () => {}; - const installDownloadedAppUpdate = async () => {}; - - const loadHistoryForBenchPack = async (benchPackId: string) => { - try { - const history = await bl.benchPacks.history(benchPackId); - setRunHistories((current) => ({ - ...current, - [benchPackId]: history, - })); - } catch (historyError) { - setError( - historyError instanceof Error - ? historyError.message - : "Failed to load Bench Pack history.", - ); - } - }; - - useEffect(() => { - let cancelled = false; - - const load = async () => { - setIsBusy(true); - setError(null); - setRegistryWarning(null); - - try { - const [ - result, - workspaceResult, - inspections, - themes, - verifierStatusList, - activeRunsResult, - ] = await Promise.all([ - bl.config.load(), - bl.workspaces.load(), - bl.benchPacks.list(), - bl.themes.list(), - bl.verifiers.list(), - bl.benchPacks.activeRuns(), - ]); - - let registry: BenchPackRegistryEntry[] = []; - let nextRegistryWarning: string | null = null; - - try { - registry = await bl.benchPacks.registry(); - } catch (registryError) { - nextRegistryWarning = formatRegistryWarning(registryError); - } - - if (cancelled) { - return; - } - - const persistedRunEntries = await Promise.all( - Object.values(workspaceResult.state.tabs) - .filter((tab: any) => tab.benchPackId && tab.loadedRunId) - .map(async (tab: any) => { - try { - const summary = await bl.benchPacks.loadHistory( - tab.benchPackId as string, - tab.loadedRunId as string, - ); - return [tab.id, summary] as const; - } catch { - return null; - } - }), - ); - - setLoadState(result); - setDraft(cloneConfig(result.config)); - setWorkspaceState(workspaceResult.state); - setRunSummaries( - Object.fromEntries( - persistedRunEntries.filter( - (entry): entry is readonly [string, BenchPackRunSummary] => - entry !== null, - ), - ), - ); - setLoadedHistoryRuns( - Object.fromEntries( - persistedRunEntries - .filter( - (entry): entry is readonly [string, BenchPackRunSummary] => - entry !== null, - ) - .map(([tabId, summary]) => [ - tabId, - { - runId: summary.runId, - startedAt: summary.startedAt, - mode: "history", - }, - ]), - ), - ); - setBenchPackInspections(inspections); - setRegistryEntries(registry); - setRegistryWarning(nextRegistryWarning); - setAvailableThemes(themes); - setVerifierStatuses( - Object.fromEntries( - verifierStatusList.map((status: any) => [status.benchPackId, status]), - ), - ); - setActiveRuns( - Object.fromEntries( - activeRunsResult.map((run: any) => [ - run.tabId, - { benchPackId: run.benchPackId }, - ]), - ), - ); - setAppNotice( - result.created - ? "Created a fresh ~/.benchlocal/config.toml bootstrap." - : null, - ); - } catch (loadError) { - if (!cancelled) { - setError( - loadError instanceof Error - ? loadError.message - : "Failed to load BenchLocal config.", - ); - } - } finally { - if (!cancelled) { - setIsBusy(false); - } - } - }; - - void load(); - - return () => { - cancelled = true; - }; - }, []); - - useEffect(() => { - if (typeof window === "undefined") { - return; - } - - const media = window.matchMedia("(prefers-color-scheme: dark)"); - const handleChange = () => { - setSystemPrefersDark(media.matches); - }; - - handleChange(); - media.addEventListener("change", handleChange); - - return () => { - media.removeEventListener("change", handleChange); - }; - }, []); - - useEffect(() => { - let cancelled = false; - - void bl.updates - .state() - .then((state) => { - if (!cancelled) { - setAppUpdateState(state); - } - }) - .catch(() => undefined); - - const unsubscribe = bl.updates.onState((state) => { - setAppUpdateState(state); - - if (state.status !== "downloaded") { - setDismissedDownloadedUpdateVersion(null); - } - }); - - return () => { - cancelled = true; - unsubscribe(); - }; - }, []); - - useEffect(() => { - let cancelled = false; - - const loadTheme = async () => { - const theme = await bl.themes.load(effectiveThemeId); - - if (!cancelled) { - setActiveThemeDefinition(theme); - } - }; - - void loadTheme(); - - return () => { - cancelled = true; - }; - }, [effectiveThemeId]); - - useEffect(() => { - if (!activeThemeDefinition || typeof document === "undefined") { - return; - } - - const root = document.documentElement; - - for (const key of appliedThemeKeysRef.current) { - root.style.removeProperty(key); - } - - for (const [key, value] of Object.entries( - activeThemeDefinition.variables, - )) { - root.style.setProperty(key, value); - } - - appliedThemeKeysRef.current = Object.keys(activeThemeDefinition.variables); - root.style.setProperty("color-scheme", activeThemeDefinition.colorScheme); - root.dataset.theme = activeThemeDefinition.id; - }, [activeThemeDefinition]); - - useEffect(() => { - const sse = bl.sse(); - const handleRunEvent = (e: MessageEvent) => { - const { tabId, event } = JSON.parse(e.data) as { - tabId: string; - event: ProgressEvent; - }; - - if (event.type === "verifier_preparing") { - setVerifierPreparationModal({ - tabId, - progress: event, - }); - } else { - setVerifierPreparationModal((current) => - current?.tabId === tabId ? null : current, - ); - } - - if (event.type === "run_finished" || event.type === "run_error") { - setActiveRuns((current) => { - if (!current[tabId]) { - return current; - } - - const next = { ...current }; - delete next[tabId]; - return next; - }); - setStoppingRuns((current) => { - if (!current[tabId]) { - return current; - } - - const next = { ...current }; - delete next[tabId]; - return next; - }); - } - - setLiveRuns((current) => ({ - ...current, - [tabId]: updateLiveRunState(current[tabId], event), - })); - - if (event.type === "run_started") { - setLiveScenarioFocus((current) => ({ - ...current, - [tabId]: { - liveScenarioId: null, - autoFollow: true, - }, - })); - } else if ( - event.type === "scenario_started" || - event.type === "model_progress" || - event.type === "scenario_result" || - event.type === "scenario_finished" - ) { - setLiveScenarioFocus((current) => { - const existing = current[tabId]; - return { - ...current, - [tabId]: { - liveScenarioId: event.scenarioId, - autoFollow: existing?.autoFollow ?? true, - }, - }; - }); - } - }; - const handleMutationProgress = (e: MessageEvent) => { - const payload = JSON.parse(e.data) as BenchPackMutationProgress; - setBenchPackMutations((current) => ({ - ...current, - [payload.benchPackId]: payload, - })); - }; - const handleVerifierProgress = (e: MessageEvent) => { - const { benchPackId, event } = JSON.parse(e.data) as { - benchPackId: string; - event: ProgressEvent; - }; - setSettingsVerifierPreparationModal( - (current) => - current?.benchPackId === benchPackId || current === null - ? ({ benchPackId, progress: event } as any) - : current, - ); - }; - sse.addEventListener("run-event", handleRunEvent); - sse.addEventListener("benchpack-mutation-progress", handleMutationProgress); - sse.addEventListener("verifier-progress", handleVerifierProgress); - return () => { - sse.removeEventListener("run-event", handleRunEvent); - sse.removeEventListener( - "benchpack-mutation-progress", - handleMutationProgress, - ); - sse.removeEventListener("verifier-progress", handleVerifierProgress); - sse.close(); - }; - }, []); - - useEffect(() => { - return bl.benchPacks.onMutationProgress((payload) => { - setBenchPackMutations((current) => ({ - ...current, - [payload.benchPackId]: payload, - })); - }); - }, []); - - useEffect(() => { - return bl.verifiers.onProgress(({ benchPackId, event }) => { - setSettingsVerifierPreparationModal((current) => - current?.benchPackId === benchPackId || current === null - ? { - benchPackId, - progress: event, - } - : current, - ); - }); - }, []); - - useEffect(() => { - if (!settingsOpen || settingsTab !== "verification") { - return; - } - - void loadVerifierStatuses(); - }, [settingsOpen, settingsTab]); - - useEffect(() => { - if (!settingsOpen || settingsTab !== "advanced") { - return; - } - - setSettingsTab("providers"); - }, [settingsOpen, settingsTab]); - - useEffect(() => { - if (!logsOpen || !logsAutoScroll || !logContainerRef.current) { - return; - } - - logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight; - }, [activeLogEvents, logsOpen, logsAutoScroll]); - - useEffect(() => { - if (!activeInspection?.id || activeInspection.status !== "ready") { - return; - } - - void loadHistoryForBenchPack(activeInspection.id); - }, [activeInspection?.id, activeInspection?.status]); - - useEffect(() => { - const dispose = bl.logs.onDetachedWindowClosed(() => { - setLogsDetached(false); - }); - - return dispose; - }, []); - - useEffect(() => { - void bl.logs.publishDetachedState({ - workspaceName: activeWorkspace?.name ?? "No Workspace", - tabTitle: activeTab?.title ?? "No Active Tab", - eventCount: activeLogEvents.length, - events: activeLogEvents, - }); - }, [activeWorkspace?.name, activeTab?.title, activeLogEvents]); - - useEffect(() => { - const handleMove = (event: MouseEvent) => { - const shell = document.querySelector(".desktop-shell"); - - if (!shell || !document.body.dataset.logResizeActive) { - return; - } - - const shellRect = shell.getBoundingClientRect(); - const nextHeight = Math.min( - 420, - Math.max(160, shellRect.bottom - event.clientY - 30), - ); - setLogDrawerHeight(nextHeight); - }; - - const handleUp = () => { - delete document.body.dataset.logResizeActive; - }; - - window.addEventListener("mousemove", handleMove); - window.addEventListener("mouseup", handleUp); - - return () => { - window.removeEventListener("mousemove", handleMove); - window.removeEventListener("mouseup", handleUp); - }; - }, []); - - useEffect(() => { - if (!workspaceContextMenu) { - return; - } - - const closeMenu = () => { - setWorkspaceContextMenu(null); - }; - - const handleKeyDown = (event: KeyboardEvent) => { - if (event.key === "Escape") { - closeMenu(); - } - }; - - window.addEventListener("mousedown", closeMenu); - window.addEventListener("scroll", closeMenu, true); - window.addEventListener("resize", closeMenu); - window.addEventListener("keydown", handleKeyDown); - - return () => { - window.removeEventListener("mousedown", closeMenu); - window.removeEventListener("scroll", closeMenu, true); - window.removeEventListener("resize", closeMenu); - window.removeEventListener("keydown", handleKeyDown); - }; - }, [workspaceContextMenu]); - - useEffect(() => { - if (!themeMenuOpen) { - return; - } - - const handlePointerDown = (event: MouseEvent) => { - const target = event.target as Node; - if (!themeMenuRef.current?.contains(target)) { - setThemeMenuOpen(false); - } - }; - - const handleEscape = (event: KeyboardEvent) => { - if (event.key === "Escape") { - setThemeMenuOpen(false); - } - }; - - window.addEventListener("mousedown", handlePointerDown); - window.addEventListener("keydown", handleEscape); - - return () => { - window.removeEventListener("mousedown", handlePointerDown); - window.removeEventListener("keydown", handleEscape); - }; - }, [themeMenuOpen]); - - useEffect(() => { - return bl.app.onOpenAbout(() => { - setAboutDialogOpen(true); - - if (!appMetadata) { - void bl.app - .metadata() - .then((metadata) => { - setAppMetadata(metadata); - }) - .catch(() => undefined); - } - }); - }, [appMetadata]); - - useEffect(() => { - return bl.app.onOpenSettings(() => { - setSettingsOpen(true); - }); - }, []); - - useEffect(() => { - settingsOpenRef.current = settingsOpen; - - if (!settingsOpen) { - setSettingsNotice(null); - } - }, [settingsOpen]); - - useEffect(() => { - if (typeof window === "undefined") { - return; - } - - window.localStorage.setItem(SIDEBAR_OPEN_STORAGE_KEY, String(sidebarOpen)); - }, [sidebarOpen]); - - useEffect(() => { - const updateOverflow = () => { - const element = tabStripRef.current; - - if (!element) { - setTabStripOverflow(false); - return; - } - - setTabStripOverflow(element.scrollWidth > element.clientWidth + 4); - }; - - updateOverflow(); - window.addEventListener("resize", updateOverflow); - - return () => { - window.removeEventListener("resize", updateOverflow); - }; - }, [workspaceTabs.length, activeWorkspace?.id, sidebarOpen]); - - useEffect(() => { - const shell = tabStripShellRef.current; - const strip = tabStripRef.current; - const activeTabId = activeTab?.id; - - if (!shell || !strip || !activeTabId) { - setActiveTabMask(null); - return; - } - - const updateMask = () => { - const activeElement = tabChipRefs.current.get(activeTabId); - - if (!activeElement) { - setActiveTabMask(null); - return; - } - - const shellRect = shell.getBoundingClientRect(); - const tabRect = activeElement.getBoundingClientRect(); - - setActiveTabMask({ - left: Math.round(tabRect.left - shellRect.left), - width: Math.round(tabRect.width), - }); - }; - - const frameId = window.requestAnimationFrame(updateMask); - window.addEventListener("resize", updateMask); - strip.addEventListener("scroll", updateMask, { passive: true }); - - return () => { - window.cancelAnimationFrame(frameId); - window.removeEventListener("resize", updateMask); - strip.removeEventListener("scroll", updateMask); - }; - }, [activeTab?.id, workspaceTabs, sidebarOpen, tabStripOverflow]); - - const persistConfig = async ( - nextConfig: BenchLocalConfig, - options?: { - notice?: string | null; - preserveFilesystemDraft?: boolean; - previousDraft?: BenchLocalConfig | null; - previousLoadConfig?: BenchLocalConfig | null; - }, - ): Promise => { - if (!nextConfig) { - return false; - } - - setIsBusy(true); - setError(null); - - try { - const result = await bl.config.save(nextConfig); - setLoadState(result); - setDraft( - options?.preserveFilesystemDraft && - options.previousDraft && - options.previousLoadConfig - ? reapplyPendingFilesystemDraft( - result.config, - options.previousDraft, - options.previousLoadConfig, - ) - : cloneConfig(result.config), - ); - await loadBenchPackInspections(); - await loadRegistryEntries(); - if (settingsOpenRef.current && options?.notice) { - setSettingsNotice(options.notice); - } - return true; - } catch (saveError) { - setError( - saveError instanceof Error - ? saveError.message - : "Failed to save BenchLocal config.", - ); - return false; - } finally { - setIsBusy(false); - } - }; - - const save = async (): Promise => { - if (!draft) { - return false; - } - - return persistConfig(draft, { notice: "Saved ~/.benchlocal/config.toml" }); - }; - - const refreshBenchPackState = async (result?: LoadState) => { - const nextLoadState = result ?? (await bl.config.load()); - const inspections = await bl.benchPacks.list(); - const verifierStatusList = await bl.verifiers.list(); - let registry = registryEntries; - - try { - registry = await bl.benchPacks.registry(); - setRegistryWarning(null); - } catch (registryError) { - setRegistryWarning(formatRegistryWarning(registryError)); - } - - setLoadState(nextLoadState); - setDraft(cloneConfig(nextLoadState.config)); - setBenchPackInspections(inspections); - setRegistryEntries(registry); - setVerifierStatuses( - Object.fromEntries( - verifierStatusList.map((status: any) => [status.benchPackId, status]), - ), - ); - }; - - const ensureBenchPackMutationReady = async (): Promise => { - if (!hasUnsavedChanges) { - return true; - } - - return save(); - }; - - const installBenchPack = async (benchPackId: string) => { - if (!(await ensureBenchPackMutationReady())) { - return; - } - - setIsBusy(true); - setError(null); - setBenchPackMutations((current) => ({ - ...current, - [benchPackId]: { - benchPackId, - action: "install", - phase: "resolving", - message: "Resolving Bench Pack from registry.", - }, - })); - - try { - const result = await bl.benchPacks.install(benchPackId); - await refreshBenchPackState(result); - if (settingsOpenRef.current) { - setSettingsNotice(`Installed ${benchPackId}.`); - } - } catch (installError) { - setError( - formatRegistryMutationError("install", benchPackId, installError), - ); - } finally { - setIsBusy(false); - setBenchPackMutations((current) => { - const next = { ...current }; - delete next[benchPackId]; - return next; - }); - } - }; - - const installBenchPackFromUrl = async (url: string) => { - if (!(await ensureBenchPackMutationReady())) { - return; - } - - const normalizedUrl = url.trim(); - - if (!normalizedUrl) { - setError("Bench Pack URL is required."); - return; - } - - setIsBusy(true); - setError(null); - let installedBenchPackId: string | null = null; - setBenchPackMutations((current) => ({ - ...current, - [THIRD_PARTY_INSTALL_MUTATION_ID]: { - benchPackId: THIRD_PARTY_INSTALL_MUTATION_ID, - action: "install", - phase: "resolving", - message: "Resolving Bench Pack from URL.", - }, - })); - - try { - const result = await bl.benchPacks.installFromUrl(normalizedUrl); - await refreshBenchPackState(result); - installedBenchPackId = - Object.entries(result.config.benchpacks).find( - ([, benchPack]: any) => - benchPack.source === "archive" && benchPack.url === normalizedUrl, - )?.[0] ?? null; - if (settingsOpenRef.current) { - setSettingsNotice( - installedBenchPackId - ? `Installed ${installedBenchPackId}.` - : "Installed third-party Bench Pack.", - ); - } - return true; - } catch (installError) { - setError( - formatDesktopErrorMessage(installError) || - "Failed to install Bench Pack from URL.", - ); - return false; - } finally { - setIsBusy(false); - setBenchPackMutations((current) => { - const next = { ...current }; - delete next[THIRD_PARTY_INSTALL_MUTATION_ID]; - delete next["third-party"]; - if (installedBenchPackId) { - delete next[installedBenchPackId]; - } - return next; - }); - } - }; - - const updateBenchPack = async (benchPackId: string) => { - if (!(await ensureBenchPackMutationReady())) { - return; - } - - setIsBusy(true); - setError(null); - setBenchPackMutations((current) => ({ - ...current, - [benchPackId]: { - benchPackId, - action: "update", - phase: "resolving", - message: "Resolving Bench Pack update.", - }, - })); - - try { - const result = await bl.benchPacks.update(benchPackId); - await refreshBenchPackState(result); - if (settingsOpenRef.current) { - setSettingsNotice(`Updated ${benchPackId}.`); - } - } catch (updateError) { - setError(formatRegistryMutationError("update", benchPackId, updateError)); - } finally { - setIsBusy(false); - setBenchPackMutations((current) => { - const next = { ...current }; - delete next[benchPackId]; - return next; - }); - } - }; - - const uninstallInstalledBenchPack = async (benchPackId: string) => { - if (!(await ensureBenchPackMutationReady())) { - return; - } - - if ( - Object.values(activeRuns).some((run) => run.benchPackId === benchPackId) - ) { - setError("Stop active Bench Pack runs before uninstalling this pack."); - return; - } - - setIsBusy(true); - setError(null); - setBenchPackMutations((current) => ({ - ...current, - [benchPackId]: { - benchPackId, - action: "uninstall", - phase: "removing", - message: "Removing Bench Pack.", - }, - })); - - try { - const result = await bl.benchPacks.uninstall(benchPackId); - await refreshBenchPackState(result); - if (settingsOpenRef.current) { - setSettingsNotice(`Uninstalled ${benchPackId}.`); - } - } catch (uninstallError) { - setError( - uninstallError instanceof Error - ? uninstallError.message - : `Failed to uninstall ${benchPackId}.`, - ); - } finally { - setIsBusy(false); - setBenchPackMutations((current) => { - const next = { ...current }; - delete next[benchPackId]; - return next; - }); - } - }; - - const reset = () => { - if (!loadState) { - return; - } - - setDraft(cloneConfig(loadState.config)); - setProviderModal(null); - setModelModal(null); - if (settingsOpenRef.current) { - setSettingsNotice("Reverted unsaved changes."); - } - setError(null); - }; - - const saveThemeSelection = async (themeId: string) => { - if (!draft) { - return; - } - - const previousDraft = cloneConfig(draft); - const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; - const nextConfig = previousLoadConfig - ? cloneConfig(previousLoadConfig) - : cloneConfig(draft); - nextConfig.ui.theme = themeId; - setDraft(nextConfig); - - const saved = await persistConfig(nextConfig, { - preserveFilesystemDraft: true, - previousDraft, - previousLoadConfig, - }); - if (!saved) { - setDraft(previousDraft); - } - }; - - const saveVerifierConfig = async ( - benchPackId: string, - verifierId: string, - updater: (verifier: BenchLocalVerifierConfig) => BenchLocalVerifierConfig, - ) => { - if (!draft) { - return; - } - - const currentVerifier = - draft.benchpacks[benchPackId]?.verifiers?.[verifierId]; - if (!currentVerifier) { - return; - } - - const previousDraft = cloneConfig(draft); - const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; - const nextConfig = previousLoadConfig - ? cloneConfig(previousLoadConfig) - : cloneConfig(draft); - nextConfig.benchpacks[benchPackId].verifiers![verifierId] = - updater(currentVerifier); - setDraft(nextConfig); - - const saved = await persistConfig(nextConfig, { - preserveFilesystemDraft: true, - previousDraft, - previousLoadConfig, - }); - if (!saved) { - setDraft(previousDraft); - } - }; - - const scrollTabStrip = (delta: number) => { - tabStripRef.current?.scrollBy({ - left: delta, - behavior: "smooth", - }); - }; - - const handleTabStripWheel = (event: React.WheelEvent) => { - const strip = tabStripRef.current; - - if (!strip || !tabStripOverflow) { - return; - } - - const horizontalDelta = - Math.abs(event.deltaX) > Math.abs(event.deltaY) - ? event.deltaX - : event.deltaY; - - if (Math.abs(horizontalDelta) < 1) { - return; - } - - event.preventDefault(); - strip.scrollBy({ - left: horizontalDelta, - behavior: "auto", - }); - }; - - const runTab = async (tab: BenchLocalWorkspaceTab) => { - setError(null); - setAppNotice(null); - - if (!tab.benchPackId || !draft) { - setError("Select a Bench Pack for this tab first."); - return; - } - - const benchPackId = tab.benchPackId; - const selectedModels = resolveTabModels(tab, draft.models); - const inspection = benchPackInspections.find( - (candidate) => candidate.id === benchPackId, - ); - - if (inspection?.manifest) { - try { - const verifierStatusList = await bl.verifiers.list(); - const nextVerifierStatuses = Object.fromEntries( - verifierStatusList.map((status: any) => [status.benchPackId, status]), - ); - setVerifierStatuses(nextVerifierStatuses); - - const runBlocker = getRequiredVerifierRunBlocker( - inspection.manifest, - draft.benchpacks[benchPackId], - nextVerifierStatuses[benchPackId], - ); - - if (runBlocker) { - setConfirmDialog({ - title: runBlocker.title, - subtitle: runBlocker.message, - confirmLabel: runBlocker.actionLabel, - onConfirm: () => { - setSettingsTab("verification"); - setSettingsOpen(true); - }, - }); - return; - } - } catch (verifierError) { - setError( - verifierError instanceof Error - ? verifierError.message - : "Failed to refresh verifier status.", - ); - return; - } - } - - if (selectedModels.length === 0) { - setError( - "Select at least one enabled model for this tab before running the Bench Pack.", - ); - return; - } - - if (hasUnsavedChanges) { - const saved = await save(); - - if (!saved) { - return; - } - } - - setActiveRuns((current) => ({ - ...current, - [tab.id]: { benchPackId, mode: "host" }, - })); - setStoppingRuns((current) => { - if (!current[tab.id]) { - return current; - } - - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setLiveRuns((current) => ({ - ...current, - [tab.id]: { - events: [], - resultsByModel: {}, - activeCellKeys: [], - }, - })); - setRunSummaries((current) => { - if (!current[tab.id]) { - return current; - } - - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setLoadedHistoryRuns((current) => { - if (!current[tab.id]) { - return current; - } - - const next = { ...current }; - delete next[tab.id]; - return next; - }); - - try { - const result = await bl.benchPacks.run({ - tabId: tab.id, - benchPackId, - modelIds: selectedModels.map((model) => model.id), - executionMode: tab.executionMode, - generation: tab.samplingOverrides, - }); - setRunSummaries((current) => ({ - ...current, - [tab.id]: result, - })); - updateWorkspaceState((current) => { - const nextTab = current.tabs[tab.id]; - - if (!nextTab) { - return current; - } - - nextTab.loadedRunId = result.runId; - nextTab.updatedAt = new Date().toISOString(); - return current; - }); - if (result.cancelled) { - setAppNotice(`Stopped ${result.benchPackName}.`); - } else { - setAppNotice( - `Completed ${result.benchPackName} across ${result.scenarioCount} scenarios and ${result.modelCount} model${result.modelCount === 1 ? "" : "s"}.`, - ); - } - await loadBenchPackInspections(); - await loadHistoryForBenchPack(benchPackId); - } catch (runError) { - setError( - runError instanceof Error - ? runError.message - : `Failed to run Bench Pack for ${benchPackId}.`, - ); - } finally { - setVerifierPreparationModal((current) => - current?.tabId === tab.id ? null : current, - ); - setActiveRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setStoppingRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setLiveRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setLoadedHistoryRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - } - }; - - const resumeTabRun = async ( - tab: BenchLocalWorkspaceTab, - runSummary: BenchPackRunSummary, - ) => { - setError(null); - setAppNotice(null); - - if (!tab.benchPackId || !draft) { - setError("Select a Bench Pack for this tab first."); - return; - } - - if (isRunSummaryComplete(runSummary)) { - setError("This saved run is already complete."); - return; - } - - const benchPackId = tab.benchPackId; - const previousLoadedHistory = loadedHistoryRuns[tab.id] ?? null; - const previousTabModelSelections = structuredClone(tab.modelSelections); - const previousExecutionMode = tab.executionMode; - - if (hasUnsavedChanges) { - const saved = await save(); - - if (!saved) { - return; - } - } - - const historicalSelections = buildHistoryModelSelections( - runSummary, - draft.models, - ); - updateWorkspaceState((current) => { - const nextTab = current.tabs[tab.id]; - - if (!nextTab) { - return current; - } - - nextTab.modelSelections = - normalizeTabModelSelections(historicalSelections); - nextTab.executionMode = runSummary.executionMode ?? nextTab.executionMode; - nextTab.updatedAt = new Date().toISOString(); - return current; - }); - - setLoadedHistoryRuns((current) => { - if (!current[tab.id]) { - return current; - } - - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setActiveRuns((current) => ({ - ...current, - [tab.id]: { benchPackId, mode: "host" }, - })); - setStoppingRuns((current) => { - if (!current[tab.id]) { - return current; - } - - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setLiveRuns((current) => ({ - ...current, - [tab.id]: { - runId: runSummary.runId, - events: [], - resultsByModel: {}, - activeCellKeys: [], - }, - })); - - try { - const result = await bl.benchPacks.resumeRun({ - tabId: tab.id, - benchPackId, - runId: runSummary.runId, - executionMode: runSummary.executionMode ?? tab.executionMode, - generation: tab.samplingOverrides, - }); - setRunSummaries((current) => ({ - ...current, - [tab.id]: result, - })); - updateWorkspaceState((current) => { - const nextTab = current.tabs[tab.id]; - - if (!nextTab) { - return current; - } - - nextTab.loadedRunId = result.runId; - nextTab.updatedAt = new Date().toISOString(); - return current; - }); - if (result.cancelled) { - setAppNotice(`Stopped ${result.benchPackName}.`); - } else { - setAppNotice( - isRunSummaryComplete(result) - ? `Completed ${result.benchPackName} across ${result.scenarioCount} scenarios and ${result.modelCount} model${result.modelCount === 1 ? "" : "s"}.` - : `Resumed ${result.benchPackName}, but the run is still incomplete.`, - ); - } - await loadBenchPackInspections(); - await loadHistoryForBenchPack(benchPackId); - } catch (runError) { - updateWorkspaceState((current) => { - const nextTab = current.tabs[tab.id]; - - if (!nextTab) { - return current; - } - - nextTab.modelSelections = structuredClone(previousTabModelSelections); - nextTab.executionMode = previousExecutionMode; - nextTab.updatedAt = new Date().toISOString(); - return current; - }); - if (previousLoadedHistory) { - setLoadedHistoryRuns((current) => ({ - ...current, - [tab.id]: previousLoadedHistory, - })); - } - setError( - runError instanceof Error - ? runError.message - : `Failed to resume Bench Pack for ${benchPackId}.`, - ); - } finally { - setVerifierPreparationModal((current) => - current?.tabId === tab.id ? null : current, - ); - setActiveRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setStoppingRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setLiveRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - } - }; - - const replayTabRun = async ( - tab: BenchLocalWorkspaceTab, - runSummary: BenchPackRunSummary, - ) => { - if (!tab.benchPackId) { - setError("Select a Bench Pack for this tab first."); - return; - } - - if (!isRunSummaryComplete(runSummary)) { - setError("Replay is only available for completed test runs."); - return; - } - - const inspection = benchPackInspections.find( - (candidate) => candidate.id === tab.benchPackId, - ); - const scenarios = inspection?.scenarios ?? []; - const modelIds = resolveHistoryModels(runSummary, draft?.models ?? []).map( - (model) => model.id, - ); - const replayGroups = buildReplayGroups(runSummary, scenarios, modelIds); - const token = Symbol(`replay:${tab.id}`); - replayRunTokensRef.current.set(tab.id, token); - - setError(null); - setAppNotice(null); - setActiveRuns((current) => ({ - ...current, - [tab.id]: { benchPackId: tab.benchPackId as string, mode: "replay" }, - })); - setStoppingRuns((current) => { - if (!current[tab.id]) { - return current; - } - - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setLiveRuns((current) => ({ - ...current, - [tab.id]: { - runId: runSummary.runId, - events: [], - resultsByModel: {}, - activeCellKeys: [], - }, - })); - setLiveScenarioFocus((current) => ({ - ...current, - [tab.id]: { - liveScenarioId: null, - autoFollow: supportsLiveScenarioColumnFocus( - runSummary.executionMode ?? tab.executionMode, - ), - }, - })); - - const wait = async (ms: number) => { - await new Promise((resolve) => setTimeout(resolve, ms)); - }; - - try { - for (const group of replayGroups) { - if (replayRunTokensRef.current.get(tab.id) !== token) { - return; - } - - const nextActiveCellKeys = group.map((cell) => - getCellKey(cell.modelId, cell.scenarioId), - ); - const leadScenarioId = group[0]?.scenarioId ?? null; - - setLiveRuns((current) => { - const existing = current[tab.id]; - return { - ...current, - [tab.id]: { - runId: runSummary.runId, - events: existing?.events ?? [], - resultsByModel: existing?.resultsByModel ?? {}, - activeCellKeys: nextActiveCellKeys, - }, - }; - }); - if ( - leadScenarioId && - supportsLiveScenarioColumnFocus( - runSummary.executionMode ?? tab.executionMode, - ) - ) { - setLiveScenarioFocus((current) => ({ - ...current, - [tab.id]: { - liveScenarioId: leadScenarioId, - autoFollow: true, - }, - })); - } - - await wait(1000); - - if (replayRunTokensRef.current.get(tab.id) !== token) { - return; - } - - setLiveRuns((current) => { - const existing = current[tab.id]; - const nextResultsByModel = { ...(existing?.resultsByModel ?? {}) }; - - for (const cell of group) { - nextResultsByModel[cell.modelId] = [ - ...(nextResultsByModel[cell.modelId] ?? []).filter( - (candidate) => candidate.scenarioId !== cell.scenarioId, - ), - cell.result, - ]; - } - - return { - ...current, - [tab.id]: { - runId: runSummary.runId, - events: existing?.events ?? [], - resultsByModel: nextResultsByModel, - activeCellKeys: [], - }, - }; - }); - } - - setAppNotice(`Replayed ${runSummary.benchPackName}.`); - } finally { - if (replayRunTokensRef.current.get(tab.id) === token) { - replayRunTokensRef.current.delete(tab.id); - } - - setActiveRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - setStoppingRuns((current) => { - const next = { ...current }; - delete next[tab.id]; - return next; - }); - } - }; - - const stopTabRun = async (tabId: string) => { - const activeRun = activeRuns[tabId]; - - if (activeRun?.mode === "replay") { - replayRunTokensRef.current.delete(tabId); - setActiveRuns((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - setStoppingRuns((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - setLiveRuns((current) => ({ - ...current, - [tabId]: { - ...(current[tabId] ?? { - events: [], - resultsByModel: {}, - activeCellKeys: [], - }), - activeCellKeys: [], - }, - })); - setAppNotice("Stopped replay."); - return; - } - - setStoppingRuns((current) => ({ - ...current, - [tabId]: true, - })); - - try { - const result = await bl.benchPacks.stop(tabId); - - if (!result.stopped) { - setAppNotice("That Bench Pack run was no longer active."); - setActiveRuns((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - setStoppingRuns((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - return; - } - - setAppNotice("Stopping Bench Pack run..."); - } catch (stopError) { - setStoppingRuns((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - setError( - stopError instanceof Error - ? stopError.message - : "Failed to stop Bench Pack run.", - ); - } - }; - - const cancelSettingsVerifierStart = async (benchPackId: string) => { - setStoppingVerifierStarts((current) => ({ - ...current, - [benchPackId]: true, - })); - - try { - const result = await bl.verifiers.cancelStart(benchPackId); - - if (!result.cancelled) { - setSettingsVerifierPreparationModal((current) => - current?.benchPackId === benchPackId ? null : current, - ); - setStoppingVerifierStarts((current) => { - if (!current[benchPackId]) { - return current; - } - - const next = { ...current }; - delete next[benchPackId]; - return next; - }); - } - } catch (cancelError) { - setStoppingVerifierStarts((current) => { - if (!current[benchPackId]) { - return current; - } - - const next = { ...current }; - delete next[benchPackId]; - return next; - }); - setError( - cancelError instanceof Error - ? cancelError.message - : "Failed to cancel verifier start.", - ); - } - }; - - const createWorkspace = () => { - updateWorkspaceState((current) => { - const now = new Date().toISOString(); - const workspaceId = `workspace-${crypto.randomUUID()}`; - const tabId = `tab-${crypto.randomUUID()}`; - - current.workspaceOrder.push(workspaceId); - current.activeWorkspaceId = workspaceId; - current.workspaces[workspaceId] = { - id: workspaceId, - name: createWorkspaceName(current.workspaceOrder.length - 1), - tabIds: [tabId], - activeTabId: tabId, - createdAt: now, - updatedAt: now, - }; - current.tabs[tabId] = { - id: tabId, - title: "New Tab", - benchPackId: null, - loadedRunId: null, - focusedScenarioId: null, - modelSelections: [], - samplingOverrides: {}, - executionMode: "parallel_by_test_case", - createdAt: now, - updatedAt: now, - }; - - return current; - }); - }; - - const renameWorkspace = (workspaceId: string, name: string) => { - updateWorkspaceState((current) => { - const workspace = current.workspaces[workspaceId]; - - if (!workspace) { - return current; - } - - workspace.name = name.trim(); - workspace.updatedAt = new Date().toISOString(); - return current; - }); - }; - - const deleteWorkspace = (workspaceId: string) => { - const removedTabIds = new Set( - workspaceState?.workspaces[workspaceId]?.tabIds ?? [], - ); - - if (Array.from(removedTabIds).some((tabId) => activeRuns[tabId])) { - setError("Stop active Bench Pack runs before deleting this workspace."); - return; - } - - updateWorkspaceState((current) => { - const workspace = current.workspaces[workspaceId]; - - if (!workspace) { - return current; - } - - for (const tabId of workspace.tabIds) { - delete current.tabs[tabId]; - } - - delete current.workspaces[workspaceId]; - current.workspaceOrder = current.workspaceOrder.filter( - (id) => id !== workspaceId, - ); - - if (current.workspaceOrder.length === 0) { - const now = new Date().toISOString(); - const nextWorkspaceId = `workspace-${crypto.randomUUID()}`; - const nextTabId = `tab-${crypto.randomUUID()}`; - - current.workspaceOrder = [nextWorkspaceId]; - current.activeWorkspaceId = nextWorkspaceId; - current.workspaces[nextWorkspaceId] = { - id: nextWorkspaceId, - name: "My Workspace", - tabIds: [nextTabId], - activeTabId: nextTabId, - createdAt: now, - updatedAt: now, - }; - current.tabs[nextTabId] = { - id: nextTabId, - title: "New Tab", - benchPackId: null, - loadedRunId: null, - focusedScenarioId: null, - modelSelections: [], - samplingOverrides: {}, - executionMode: "parallel_by_test_case", - createdAt: now, - updatedAt: now, - }; - } else if (current.activeWorkspaceId === workspaceId) { - current.activeWorkspaceId = current.workspaceOrder[0] ?? null; - } - - return current; - }); - - if (removedTabIds.size > 0) { - setRunSummaries((current) => - Object.fromEntries( - Object.entries(current).filter( - ([tabId]) => !removedTabIds.has(tabId), - ), - ), - ); - setLiveRuns((current) => - Object.fromEntries( - Object.entries(current).filter( - ([tabId]) => !removedTabIds.has(tabId), - ), - ), - ); - setActiveRuns((current) => - Object.fromEntries( - Object.entries(current).filter( - ([tabId]) => !removedTabIds.has(tabId), - ), - ), - ); - setStoppingRuns( - (current) => - Object.fromEntries( - Object.entries(current).filter( - ([tabId]) => !removedTabIds.has(tabId), - ), - ) as Record, - ); - } - }; - - const exportWorkspace = async (workspaceId: string) => { - if (!workspaceState) { - return; - } - - try { - const result = await bl.workspaces.export(workspaceId, workspaceState); - - if (result.exported) { - setAppNotice(`Exported workspace to ${result.filePath}.`); - } - } catch (workspaceError) { - setError( - workspaceError instanceof Error - ? workspaceError.message - : "Failed to export workspace.", - ); - } - }; - - const importWorkspace = async () => { - try { - const result = await bl.workspaces.import(null); - - if (!result.imported || !result.workspace || !result.tabs) { - return; - } - - const importedWorkspace = result.workspace; - const importedTabs = result.tabs; - const workspaceIdMap = new Map(); - const tabIdMap = new Map(); - const newWorkspaceId = `workspace-${crypto.randomUUID()}`; - workspaceIdMap.set(importedWorkspace.id, newWorkspaceId); - - updateWorkspaceState((current) => { - const now = new Date().toISOString(); - const nextTabIds = importedWorkspace.tabIds.map((tabId: any) => { - const nextTabId = `tab-${crypto.randomUUID()}`; - tabIdMap.set(tabId, nextTabId); - const importedTab = importedTabs[tabId]; - - if (importedTab) { - const importedTabRecord = importedTab as typeof importedTab & { - pluginId?: string | null; - }; - current.tabs[nextTabId] = { - ...importedTabRecord, - id: nextTabId, - benchPackId: - importedTabRecord.benchPackId ?? - importedTabRecord.pluginId ?? - null, - samplingOverrides: importedTab.samplingOverrides ?? {}, - createdAt: importedTab.createdAt ?? now, - updatedAt: now, - }; - } - - return nextTabId; - }); - - current.workspaceOrder.push(newWorkspaceId); - current.activeWorkspaceId = newWorkspaceId; - current.workspaces[newWorkspaceId] = { - ...importedWorkspace, - id: newWorkspaceId, - name: Object.values(current.workspaces).some( - (workspace) => workspace.name === importedWorkspace.name, - ) - ? `${importedWorkspace.name} Imported` - : importedWorkspace.name, - tabIds: nextTabIds, - activeTabId: importedWorkspace.activeTabId - ? (tabIdMap.get(importedWorkspace.activeTabId) ?? - nextTabIds[0] ?? - null) - : (nextTabIds[0] ?? null), - createdAt: importedWorkspace.createdAt ?? now, - updatedAt: now, - }; - - return current; - }); - - setAppNotice(`Imported workspace "${importedWorkspace.name}".`); - } catch (workspaceError) { - setError( - workspaceError instanceof Error - ? workspaceError.message - : "Failed to import workspace.", - ); - } - }; - - const activateWorkspace = (workspaceId: string) => { - setWorkspaceContextMenu(null); - updateWorkspaceState((current) => { - current.activeWorkspaceId = workspaceId; - return current; - }); - }; - - const createTab = (benchPackId: string) => { - if (!activeWorkspace) { - return; - } - - updateWorkspaceState((current) => { - const workspace = current.workspaces[activeWorkspace.id]; - - if (!workspace) { - return current; - } - - const now = new Date().toISOString(); - const tabId = `tab-${crypto.randomUUID()}`; - current.tabs[tabId] = { - id: tabId, - title: createTabTitle(benchPackId, benchPackInspections), - benchPackId, - loadedRunId: null, - focusedScenarioId: null, - modelSelections: [], - samplingOverrides: {}, - executionMode: "parallel_by_test_case", - createdAt: now, - updatedAt: now, - }; - workspace.tabIds.push(tabId); - workspace.activeTabId = tabId; - workspace.updatedAt = now; - return current; - }); - setTabMenuOpen(false); - }; - - const assignBenchPackToTab = (tabId: string, benchPackId: string) => { - updateWorkspaceState((current) => { - const tab = current.tabs[tabId]; - - if (!tab) { - return current; - } - - tab.title = createTabTitle(benchPackId, benchPackInspections); - tab.benchPackId = benchPackId; - tab.loadedRunId = null; - tab.focusedScenarioId = null; - tab.samplingOverrides = {}; - tab.updatedAt = new Date().toISOString(); - - return current; - }); - setTabMenuOpen(false); - }; - - const activateTab = (tabId: string) => { - if (!activeWorkspace) { - return; - } - - updateWorkspaceState((current) => { - const workspace = current.workspaces[activeWorkspace.id]; - - if (!workspace) { - return current; - } - - workspace.activeTabId = tabId; - workspace.updatedAt = new Date().toISOString(); - return current; - }); - }; - - const startEditingTab = (tabId: string, currentTitle: string) => { - const width = tabChipRefs.current.get(tabId)?.offsetWidth ?? 180; - setEditingTab({ - tabId, - value: currentTitle, - width, - }); - }; - - const commitEditingTab = () => { - if (!editingTab) { - return; - } - - const nextTitle = editingTab.value.trim() || "New Tab"; - - updateWorkspaceState((current) => { - const tab = current.tabs[editingTab.tabId]; - - if (!tab) { - return current; - } - - tab.title = nextTitle; - tab.updatedAt = new Date().toISOString(); - return current; - }); - - setEditingTab(null); - }; - - const cancelEditingTab = () => { - setEditingTab(null); - }; - - const reorderTab = (draggedId: string, targetId: string) => { - if (!activeWorkspace || draggedId === targetId) { - return; - } - - updateWorkspaceState((current) => { - const workspace = current.workspaces[activeWorkspace.id]; - - if (!workspace) { - return current; - } - - const nextTabIds = [...workspace.tabIds]; - const fromIndex = nextTabIds.indexOf(draggedId); - const toIndex = nextTabIds.indexOf(targetId); - - if (fromIndex < 0 || toIndex < 0) { - return current; - } - - const [moved] = nextTabIds.splice(fromIndex, 1); - nextTabIds.splice(toIndex, 0, moved); - workspace.tabIds = nextTabIds; - workspace.updatedAt = new Date().toISOString(); - return current; - }); - }; - - const closeTab = (tabId: string) => { - if (!activeWorkspace) { - return; - } - - if (activeRuns[tabId]) { - setError("Stop the Bench Pack run before closing this tab."); - return; - } - - updateWorkspaceState((current) => { - const workspace = current.workspaces[activeWorkspace.id]; - - if (!workspace) { - return current; - } - - workspace.tabIds = workspace.tabIds.filter((id) => id !== tabId); - delete current.tabs[tabId]; - - workspace.activeTabId = - workspace.activeTabId === tabId - ? (workspace.tabIds[workspace.tabIds.length - 1] ?? null) - : workspace.activeTabId; - workspace.updatedAt = new Date().toISOString(); - - if (workspace.tabIds.length === 0) { - const replacementTabId = `tab-${crypto.randomUUID()}`; - current.tabs[replacementTabId] = { - id: replacementTabId, - title: "New Tab", - benchPackId: null, - loadedRunId: null, - focusedScenarioId: null, - modelSelections: [], - samplingOverrides: {}, - executionMode: "parallel_by_test_case", - createdAt: workspace.updatedAt, - updatedAt: workspace.updatedAt, - }; - workspace.tabIds = [replacementTabId]; - workspace.activeTabId = replacementTabId; - } - - return current; - }); - setRunSummaries((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - setLiveRuns((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - setActiveRuns((current) => { - const next = { ...current }; - delete next[tabId]; - return next; - }); - }; - - const restoreHistoryRun = async ( - benchPackId: string, - runId: string, - mode: "history" | "replay" = "history", - ) => { - if (!activeTab) { - return; - } - - try { - const summary = await bl.benchPacks.loadHistory(benchPackId, runId); - setRunSummaries((current) => ({ - ...current, - [activeTab.id]: summary, - })); - updateWorkspaceState((current) => { - const tab = current.tabs[activeTab.id]; - - if (!tab) { - return current; - } - - tab.loadedRunId = summary.runId; - tab.updatedAt = new Date().toISOString(); - return current; - }); - setLiveRuns((current) => { - const next = { ...current }; - delete next[activeTab.id]; - return next; - }); - setLoadedHistoryRuns((current) => ({ - ...current, - [activeTab.id]: { - runId, - startedAt: summary.startedAt, - mode, - }, - })); - if (summary.executionMode) { - updateWorkspaceState((current) => { - const tab = current.tabs[activeTab.id]; - - if (!tab) { - return current; - } - - tab.executionMode = summary.executionMode ?? tab.executionMode; - tab.updatedAt = new Date().toISOString(); - return current; - }); - } - } catch (historyError) { - setError( - historyError instanceof Error - ? historyError.message - : "Failed to load Bench Pack history.", - ); - } - }; - - const retryScenarioFromDetail = async (detail: DetailModalState) => { - if (!workspaceState) { - return; - } - - if (!detail.runId) { - setError("This scenario does not belong to a saved test run yet."); - return; - } - - const tab = workspaceState.tabs[detail.tabId]; - - if (!tab || tab.benchPackId !== detail.benchPackId) { - setError("The original tab for this test is no longer available."); - return; - } - - if (hasUnsavedChanges) { - const saved = await save(); - - if (!saved) { - return; - } - } - - const retryKey = detailModalKey(detail); - const retryCellKey = getCellKey(detail.modelId, detail.scenarioId); - setDetailModal((current) => - current && detailModalKey(current) === retryKey ? null : current, - ); - setLiveRuns((current) => { - const existing = current[detail.tabId]; - - if (existing) { - return { - ...current, - [detail.tabId]: { - ...existing, - runId: existing.runId ?? detail.runId ?? undefined, - activeCellKeys: existing.activeCellKeys.includes(retryCellKey) - ? existing.activeCellKeys - : [...existing.activeCellKeys, retryCellKey], - }, - }; - } - - return { - ...current, - [detail.tabId]: { - runId: detail.runId ?? undefined, - events: [], - resultsByModel: {}, - activeCellKeys: [retryCellKey], - }, - }; - }); - - try { - await bl.benchPacks.retryScenario({ - tabId: detail.tabId, - benchPackId: detail.benchPackId, - runId: detail.runId, - scenarioId: detail.scenarioId, - modelId: detail.modelId, - generation: tab.samplingOverrides, - }); - const refreshedSummary = await bl.benchPacks.loadHistory( - detail.benchPackId, - detail.runId, - ); - - if (!activeRuns[detail.tabId]) { - setRunSummaries((current) => ({ - ...current, - [detail.tabId]: refreshedSummary, - })); - } - await loadHistoryForBenchPack(detail.benchPackId); - setAppNotice(`Retested ${detail.scenarioId} for ${detail.modelId}.`); - } catch (retryError) { - setLiveRuns((current) => { - const existing = current[detail.tabId]; - - if (!existing || !existing.activeCellKeys.includes(retryCellKey)) { - return current; - } - - return { - ...current, - [detail.tabId]: { - ...existing, - activeCellKeys: existing.activeCellKeys.filter( - (key) => key !== retryCellKey, - ), - }, - }; - }); - setError( - retryError instanceof Error - ? retryError.message - : "Failed to retry the selected test.", - ); - } - }; - - const clearLoadedHistoryRun = (tabId: string) => { - updateWorkspaceState((current) => { - const tab = current.tabs[tabId]; - - if (!tab) { - return current; - } - - tab.loadedRunId = null; - tab.updatedAt = new Date().toISOString(); - return current; - }); - setLoadedHistoryRuns((current) => { - if (!current[tabId]) { - return current; - } - - const next = { ...current }; - delete next[tabId]; - return next; - }); - setRunSummaries((current) => { - if (!current[tabId]) { - return current; - } - - const next = { ...current }; - delete next[tabId]; - return next; - }); - setLiveRuns((current) => { - if (!current[tabId]) { - return current; - } - - const next = { ...current }; - delete next[tabId]; - return next; - }); - }; - - const clearLoadedHistoryForBenchPack = (benchPackId: string) => { - const affectedTabIds = workspaceState - ? Object.values(workspaceState.tabs) - .filter( - (tab) => - tab.benchPackId === benchPackId && - Boolean(loadedHistoryRuns[tab.id]), - ) - .map((tab) => tab.id) - : []; - - if (affectedTabIds.length === 0) { - return; - } - - updateWorkspaceState((current) => { - for (const tabId of affectedTabIds) { - const tab = current.tabs[tabId]; - - if (!tab) { - continue; - } - - tab.loadedRunId = null; - tab.updatedAt = new Date().toISOString(); - } - - return current; - }); - - setLoadedHistoryRuns((current) => { - const next = { ...current }; - for (const tabId of affectedTabIds) { - delete next[tabId]; - } - return next; - }); - - setRunSummaries((current) => { - const next = { ...current }; - for (const tabId of affectedTabIds) { - delete next[tabId]; - } - return next; - }); - - setLiveRuns((current) => { - const next = { ...current }; - for (const tabId of affectedTabIds) { - delete next[tabId]; - } - return next; - }); - }; - - const removeAllHistoryForBenchPack = async ( - benchPackId: string, - benchPackName: string, - ) => { - try { - await bl.benchPacks.clearHistory(benchPackId); - setRunHistories((current) => ({ - ...current, - [benchPackId]: [], - })); - clearLoadedHistoryForBenchPack(benchPackId); - setHistoryModal(null); - setAppNotice(`Removed all test histories for ${benchPackName}.`); - } catch (historyError) { - setError( - historyError instanceof Error - ? historyError.message - : "Failed to remove Bench Pack history.", - ); - } - }; - - const saveProviderModal = async () => { - if (!providerModal || !draft) { - return; - } - - const providerId = providerModal.form.id.trim(); - const previousDraft = cloneConfig(draft); - const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; - const nextConfig = previousLoadConfig - ? cloneConfig(previousLoadConfig) - : cloneConfig(draft); - - nextConfig.providers[providerId] = { - kind: providerModal.form.kind, - name: - providerModal.form.name.trim() || - defaultProviderName(providerModal.form.kind), - enabled: providerModal.form.enabled, - base_url: providerModal.form.base_url.trim(), - api_key: providerModal.form.api_key.trim() || undefined, - }; - - const saved = await persistConfig(nextConfig, { - notice: - providerModal.mode === "create" - ? "Added provider." - : "Updated provider.", - preserveFilesystemDraft: true, - previousDraft, - previousLoadConfig, - }); - - if (!saved) { - return; - } - - setProviderModal(null); - }; - - const deleteProvider = async (providerId: string): Promise => { - if (!draft) { - return false; - } - - const removedModelIds = new Set( - (draft?.models ?? []) - .filter((model) => model.provider === providerId) - .map((model) => model.id), - ); - const previousDraft = cloneConfig(draft); - const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; - const nextConfig = previousLoadConfig - ? cloneConfig(previousLoadConfig) - : cloneConfig(draft); - - delete nextConfig.providers[providerId]; - nextConfig.models = nextConfig.models.filter( - (model) => model.provider !== providerId, - ); - - const saved = await persistConfig(nextConfig, { - notice: `Deleted provider "${providerId}".`, - preserveFilesystemDraft: true, - previousDraft, - previousLoadConfig, - }); - - if (!saved) { - return false; - } - - if (removedModelIds.size > 0) { - updateWorkspaceState((current) => { - for (const tab of Object.values(current.tabs)) { - tab.modelSelections = tab.modelSelections.filter( - (selection) => !removedModelIds.has(selection.modelId), - ); - } - return current; - }); - } - - return true; - }; - - const confirmDeleteProvider = (providerId: string) => { - const provider = draft?.providers[providerId]; - const linkedModelCount = (draft?.models ?? []).filter( - (model) => model.provider === providerId, - ).length; - - setConfirmDialog({ - title: "Delete Provider", - subtitle: - linkedModelCount > 0 - ? `Delete ${provider?.name ?? "this provider"}? This will also delete ${linkedModelCount} linked ${linkedModelCount === 1 ? "model" : "models"} and remove them from any tab selections.` - : `Delete ${provider?.name ?? "this provider"}?`, - confirmLabel: "Delete Provider", - tone: "danger", - onConfirm: () => { - void deleteProvider(providerId).then((deleted) => { - if (deleted) { - setProviderModal(null); - } - }); - }, - }); - }; - - const openModelBrowser = async () => { - if (!modelModal || !draft) { - return; - } - - const provider = draft.providers[modelModal.form.provider]; - - if (!provider) { - setError("Select a provider first."); - return; - } - - if (!providerSupportsModelDiscovery(provider)) { - setError(`${provider.name} does not support model browsing yet.`); - return; - } - - const cacheKey = `${provider.kind}::${provider.base_url}`; - const cachedEntries = modelDiscoveryCacheRef.current[cacheKey]; - - setModelBrowserModal({ - providerId: modelModal.form.provider, - providerName: provider.name, - entries: cachedEntries ?? [], - query: "", - selectedModelId: - modelModal.form.model.trim() || cachedEntries?.[0]?.id || null, - loading: !cachedEntries, - error: null, - }); - - if (cachedEntries) { - return; - } - - try { - const entries = await bl.models.discover(provider); - modelDiscoveryCacheRef.current[cacheKey] = entries; - setModelBrowserModal((current) => - current && current.providerId === modelModal.form.provider - ? { - ...current, - entries, - selectedModelId: - current.selectedModelId ?? entries[0]?.id ?? null, - loading: false, - } - : current, - ); - } catch (discoverError) { - setModelBrowserModal((current) => - current && current.providerId === modelModal.form.provider - ? { - ...current, - loading: false, - error: - discoverError instanceof Error - ? discoverError.message - : `Failed to load models from ${provider.name}.`, - } - : current, - ); - } - }; - - const saveModelModal = async () => { - if (!modelModal || !draft) { - return; - } - - const modelConfig = buildModelConfig( - modelModal.form, - draft?.providers ?? {}, - ); - - if (!modelConfig.provider || !modelConfig.model) { - setError("Model provider and model identifier are required."); - return; - } - - if (!draft?.providers[modelConfig.provider]) { - setError(`Model provider "${modelConfig.provider}" does not exist yet.`); - return; - } - - const previousModelId = - modelModal.mode === "edit" - ? (draft?.models[modelModal.index]?.id ?? null) - : null; - const previousDraft = cloneConfig(draft); - const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; - const nextConfig = previousLoadConfig - ? cloneConfig(previousLoadConfig) - : cloneConfig(draft); - - if (modelModal.mode === "create") { - nextConfig.models.push(modelConfig); - } else { - nextConfig.models[modelModal.index] = modelConfig; - } - - const saved = await persistConfig(nextConfig, { - notice: modelModal.mode === "create" ? "Added model." : "Updated model.", - preserveFilesystemDraft: true, - previousDraft, - previousLoadConfig, - }); - - if (!saved) { - return; - } - - if (previousModelId && previousModelId !== modelConfig.id) { - updateWorkspaceState((current) => { - for (const tab of Object.values(current.tabs)) { - tab.modelSelections = tab.modelSelections.map((selection) => - selection.modelId === previousModelId - ? { ...selection, modelId: modelConfig.id } - : selection, - ); - } - return current; - }); - } - - setModelModal(null); - }; - - const deleteModel = async (index: number): Promise => { - if (!draft) { - return false; - } - - const removedModelId = draft?.models[index]?.id ?? null; - const previousDraft = cloneConfig(draft); - const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; - const nextConfig = previousLoadConfig - ? cloneConfig(previousLoadConfig) - : cloneConfig(draft); - nextConfig.models.splice(index, 1); - - const saved = await persistConfig(nextConfig, { - notice: "Deleted model.", - preserveFilesystemDraft: true, - previousDraft, - previousLoadConfig, - }); - - if (!saved) { - return false; - } - - if (removedModelId) { - updateWorkspaceState((current) => { - for (const tab of Object.values(current.tabs)) { - tab.modelSelections = tab.modelSelections.filter( - (selection) => selection.modelId !== removedModelId, - ); - } - return current; - }); - } - - return true; - }; - - const confirmDeleteModel = (index: number) => { - const model = draft?.models[index]; - if (!model) { - return; - } - - const linkedTabCount = workspaceState - ? Object.values(workspaceState.tabs).filter((tab) => - tab.modelSelections.some( - (selection) => selection.modelId === model.id, - ), - ).length - : 0; - - setConfirmDialog({ - title: "Delete Model", - subtitle: - linkedTabCount > 0 - ? `Delete ${model.label}? This will also remove it from ${linkedTabCount} tab ${linkedTabCount === 1 ? "selection" : "selections"}.` - : `Delete ${model.label}?`, - confirmLabel: "Delete Model", - tone: "danger", - onConfirm: () => { - void deleteModel(index).then((deleted) => { - if (deleted) { - setModelModal(null); - } - }); - }, - }); - }; - - return ( -
-
-
-
-
- - {!isMacPlatform ? ( -
-

BenchLocal

-
- ) : null} -
- -
- {isMacPlatform ? ( -
-

BenchLocal

-
- ) : null} - - {!settingsOpen ? ( -
- { - if (activeTab && !activeTab.benchPackId) { - assignBenchPackToTab(activeTab.id, benchPackId); - return; - } - - createTab(benchPackId); - }} - disabled={!activeWorkspace} - /> - - {appUpdateState?.status === "downloaded" ? ( - - ) : null} -
- ) : draft ? ( -
-
- - {themeMenuOpen ? ( -
- {themeOptions.map((themeId) => ( - - ))} -
- ) : null} -
-
- ) : null} -
-
- - {settingsOpen && draft ? ( - { - setSettingsNotice(null); - setSettingsOpen(false); - }} - onDismissNotice={() => setSettingsNotice(null)} - onDismissError={() => setError(null)} - onSaveAdvanced={() => void save()} - onResetAdvanced={reset} - onCreateProvider={() => - setProviderModal({ - mode: "create", - form: createEmptyProvider(), - }) - } - onEditProvider={(providerId) => - setProviderModal({ - mode: "edit", - initialId: providerId, - form: toProviderForm(providerId, draft.providers[providerId]), - }) - } - onCreateModel={() => - setModelModal({ - mode: "create", - form: createEmptyModel(providerIds[0] ?? "openrouter"), - }) - } - onEditModel={(index) => - setModelModal({ - mode: "edit", - index, - form: toModelForm(draft.models[index]), - }) - } - onStartVerifier={async ( - benchPackId, - benchPackName, - verifierId, - ) => { - setError(null); - setStoppingVerifierStarts((current) => { - if (!current[benchPackId]) { - return current; - } - - const next = { ...current }; - delete next[benchPackId]; - return next; - }); - setSettingsVerifierPreparationModal({ - benchPackId, - progress: { - type: "verifier_preparing", - benchPackId, - benchPackName, - verifierId, - phase: "checking_docker", - message: "Checking Local Docker availability.", - }, - }); - - try { - const status = await bl.verifiers.start(benchPackId); - setVerifierStatuses((current) => ({ - ...current, - [benchPackId]: status, - })); - } catch (verifierError) { - if (isAbortLikeError(verifierError)) { - if (settingsOpenRef.current) { - setSettingsNotice(`Cancelled preparing ${verifierId}.`); - } - } else { - setError( - verifierError instanceof Error - ? verifierError.message - : "Failed to start verifier.", - ); - } - } finally { - setSettingsVerifierPreparationModal((current) => - current?.benchPackId === benchPackId ? null : current, - ); - setStoppingVerifierStarts((current) => { - if (!current[benchPackId]) { - return current; - } - - const next = { ...current }; - delete next[benchPackId]; - return next; - }); - } - }} - onStopVerifier={async (benchPackId) => { - try { - const status = await bl.verifiers.stop(benchPackId); - setVerifierStatuses((current) => ({ - ...current, - [benchPackId]: status, - })); - } catch (verifierError) { - setError( - verifierError instanceof Error - ? verifierError.message - : "Failed to stop verifier.", - ); - } - }} - onDeleteVerifierImage={( - benchPackId, - benchPackName, - verifierId, - ) => { - setConfirmDialog({ - title: "Delete Verifier Image", - subtitle: `Delete the Local Docker image for verifier "${verifierId}" in ${benchPackName}? BenchLocal will pull or rebuild it again the next time this verifier starts.`, - confirmLabel: "Delete Image", - tone: "danger", - onConfirm: () => { - void (async () => { - setIsBusy(true); - setError(null); - - try { - const result = await bl.verifiers.deleteImage( - benchPackId, - verifierId, - ); - setVerifierStatuses((current) => ({ - ...current, - [benchPackId]: result.status, - })); - if (settingsOpenRef.current) { - setSettingsNotice( - result.removed - ? `Deleted Docker image ${result.image}.` - : `Docker image ${result.image} was already absent.`, - ); - } - } catch (verifierError) { - setError( - verifierError instanceof Error - ? verifierError.message - : "Failed to delete verifier image.", - ); - } finally { - setIsBusy(false); - } - })(); - }, - }); - }} - onRefreshRegistry={() => void loadRegistryEntries()} - onInstallBenchPack={(benchPackId) => - void installBenchPack(benchPackId) - } - onInstallBenchPackFromUrl={(url) => installBenchPackFromUrl(url)} - onUpdateBenchPack={(benchPackId) => - void updateBenchPack(benchPackId) - } - onUninstallBenchPack={(benchPackId) => - void uninstallInstalledBenchPack(benchPackId) - } - updateDraft={updateDraft} - onUpdateVerifier={(benchPackId, verifierId, updater) => { - void saveVerifierConfig(benchPackId, verifierId, updater); - }} - /> - ) : ( -
- - -
- {appNotice ? ( - -
- {appNotice} - -
-
- ) : null} - {showDownloadedUpdateBanner ? ( - -
- {describeAppUpdateState(appUpdateState)} - -
-
- ) : null} - {error ? {error} : null} - {isBusy && !draft ? ( - Loading BenchLocal config... - ) : null} - -
- {draft ? ( - activeWorkspace ? ( -
-
- {activeTabMask ? ( - - ) : null} -
- {workspaceTabs.map((tab) => { - const inspection = benchPackInspections.find( - (candidate) => candidate.id === tab.benchPackId, - ); - const isTabRunning = Boolean(activeRuns[tab.id]); - const hasTabRetryActivity = - (liveRuns[tab.id]?.activeCellKeys.length ?? 0) > - 0; - const showTabSpinner = - isTabRunning || hasTabRetryActivity; - const showWarning = - !isTabRunning && - inspection && - inspection.status !== "ready"; - const isEditingTab = editingTab?.tabId === tab.id; - - return ( - - ); - })} - -
-
- - -
-
-
- {activeInspection && activeTab ? ( - { - if ( - activeRuns[activeTab.id] && - supportsLiveScenarioColumnFocus( - activeTab.executionMode, - ) - ) { - setLiveScenarioFocus((current) => { - const existing = current[activeTab.id]; - const liveScenarioId = - existing?.liveScenarioId ?? null; - - return { - ...current, - [activeTab.id]: { - liveScenarioId, - autoFollow: - liveScenarioId === scenarioId, - }, - }; - }); - } - - updateWorkspaceState((current) => { - const tab = activeTab - ? current.tabs[activeTab.id] - : null; - if (!tab) { - return current; - } - tab.focusedScenarioId = scenarioId; - tab.updatedAt = new Date().toISOString(); - return current; - }); - }} - onEditModels={() => - setTabModelsModal({ - tabId: activeTab.id, - selections: structuredClone( - activeTab.modelSelections, - ), - }) - } - onEditSampling={() => - setSamplingModal({ - tabId: activeTab.id, - benchPackId: activeInspection.id, - benchPackName: - activeInspection.manifest?.name ?? - activeInspection.id, - defaults: { - ...DEFAULT_BENCHLOCAL_GENERATION, - ...(activeInspection.manifest - ?.samplingDefaults ?? {}), - }, - form: createSamplingForm( - activeTab.samplingOverrides, - ), - }) - } - executionMode={activeTab.executionMode} - isViewingHistory={Boolean(activeLoadedHistory)} - onOpenHistory={() => - setHistoryModal({ - benchPackId: activeInspection.id, - benchPackName: - activeInspection.manifest?.name ?? - activeInspection.id, - entries: - runHistories[activeInspection.id] ?? [], - }) - } - onEditModelAlias={(model) => - setModelAliasModal({ - tabId: activeTab.id, - modelId: model.id, - baseLabel: model.label, - alias: model.alias ?? "", - }) - } - onChangeExecutionMode={(executionMode) => - updateWorkspaceState((current) => { - const tab = activeTab - ? current.tabs[activeTab.id] - : null; - if (!tab) { - return current; - } - tab.executionMode = executionMode; - tab.updatedAt = new Date().toISOString(); - return current; - }) - } - isRunning={Boolean(activeRuns[activeTab.id])} - isStopping={Boolean(stoppingRuns[activeTab.id])} - onOpenVerification={() => { - setSettingsTab("verification"); - setSettingsOpen(true); - }} - onRefreshVerification={() => - void loadVerifierStatuses() - } - onClearHistory={() => - clearLoadedHistoryRun(activeTab.id) - } - onRun={() => - void (activeLoadedHistory?.mode === "replay" && - activeRunSummary - ? replayTabRun(activeTab, activeRunSummary) - : activeRunSummary && - !isRunSummaryComplete(activeRunSummary) - ? resumeTabRun(activeTab, activeRunSummary) - : runTab(activeTab)) - } - onStop={() => void stopTabRun(activeTab.id)} - onOpenDetail={setDetailModal} - /> - ) : ( - { - setSettingsTab("providers"); - setSettingsOpen(true); - }} - onOpenModels={() => { - setSettingsTab("models"); - setSettingsOpen(true); - }} - onOpenBenchPacks={() => { - setSettingsTab("benchPacks"); - setSettingsOpen(true); - }} - onSelectBenchPack={ - activeTab - ? () => setTabMenuOpen(true) - : undefined - } - /> - )} -
-
- ) : ( - { - setSettingsTab("providers"); - setSettingsOpen(true); - }} - onOpenModels={() => { - setSettingsTab("models"); - setSettingsOpen(true); - }} - onOpenBenchPacks={() => { - setSettingsTab("benchPacks"); - setSettingsOpen(true); - }} - /> - ) - ) : null} -
- {logsOpen && !logsDetached ? ( -
-
{ - document.body.dataset.logResizeActive = "true"; - }} - /> -
-
-

Run Logs

-
- {activeTab ? activeTab.title : "No Active Tab"} -
-
-
- - - {activeLogEvents.length} events - - -
-
- {activeLogEvents.length > 0 ? ( -
- {activeLogEvents.map((event, index) => ( -
- {event.type} - - {" "} - {JSON.stringify(event)} - -
- ))} -
- ) : ( -
- No run logs yet for the active tab. -
- )} -
- ) : null} -
-
- )} - {!settingsOpen ? ( -
-
- - {activeWorkspace?.name ?? "No Workspace"} - - - - {activeTab?.title ?? "No Tab"} - -
-
- - - - {activeLogEvents.length} events - -
-
- ) : null} -
-
- - {providerModal ? ( - setProviderModal(null)} - onSubmit={saveProviderModal} - submitLabel={ - providerModal.mode === "create" - ? "Create Provider" - : "Save Provider" - } - leadingActions={ - providerModal.mode === "edit" ? ( - - ) : undefined - } - > -
- option.value)} - getOptionLabel={(value) => - providerKindLabel(value as BenchLocalProviderKind) - } - onChange={(value) => - setProviderModal((current) => - current - ? { - ...current, - form: { - ...current.form, - id: - current.mode === "create" - ? `${value as BenchLocalProviderKind}-${crypto.randomUUID()}` - : current.form.id, - kind: value as BenchLocalProviderKind, - name: - current.form.name.trim() === "" || - current.form.name === - defaultProviderName(current.form.kind) - ? defaultProviderName( - value as BenchLocalProviderKind, - ) - : current.form.name, - base_url: - current.form.base_url === - defaultProviderBaseUrl(current.form.kind) - ? defaultProviderBaseUrl( - value as BenchLocalProviderKind, - ) - : current.form.base_url, - }, - } - : current, - ) - } - /> - - setProviderModal((current) => - current - ? { ...current, form: { ...current.form, name: value } } - : current, - ) - } - /> - - setProviderModal((current) => - current - ? { ...current, form: { ...current.form, api_key: value } } - : current, - ) - } - /> - - setProviderModal((current) => - current - ? { - ...current, - form: { ...current.form, enabled: checked }, - } - : current, - ) - } - /> -
- - setProviderModal((current) => - current - ? { ...current, form: { ...current.form, base_url: value } } - : current, - ) - } - /> -
- ) : null} - - {modelModal - ? (() => { - const selectedProvider = draft?.providers[modelModal.form.provider]; - const canBrowseModels = - providerSupportsModelDiscovery(selectedProvider); - - return ( - setModelModal(null)} - onSubmit={saveModelModal} - submitLabel={ - modelModal.mode === "create" ? "Create Model" : "Save Model" - } - leadingActions={ - modelModal.mode === "edit" ? ( - - ) : undefined - } - > -
- 0 ? providerIds : ["openrouter"] - } - getOptionLabel={(value) => { - const provider = draft?.providers[value]; - return provider ? provider.name : value; - }} - onChange={(value) => - setModelModal((current) => - current - ? { - ...current, - form: { ...current.form, provider: value }, - } - : current, - ) - } - /> - - setModelModal((current) => - current - ? { - ...current, - form: { ...current.form, group: value }, - } - : current, - ) - } - /> - - - setModelModal((current) => - current - ? { - ...current, - form: { ...current.form, label: value }, - } - : current, - ) - } - /> - undefined} - /> - - setModelModal((current) => - current - ? { - ...current, - form: { ...current.form, enabled: checked }, - } - : current, - ) - } - /> -
-
- ); - })() - : null} - - {modelBrowserModal ? ( - setModelBrowserModal(null)} - onQueryChange={(query) => - setModelBrowserModal((current) => - current ? { ...current, query } : current, - ) - } - onSelect={(modelId) => - setModelBrowserModal((current) => - current ? { ...current, selectedModelId: modelId } : current, - ) - } - onSubmit={() => { - if (!modelBrowserModal.selectedModelId) { - return; - } - - const selectedEntry = modelBrowserModal.entries.find( - (entry) => entry.id === modelBrowserModal.selectedModelId, - ); - - if (!selectedEntry) { - return; - } - - setModelModal((current) => { - if (!current) { - return current; - } - - const providerName = - draft?.providers[current.form.provider]?.name ?? - current.form.provider; - const currentDefaultLabel = current.form.model.trim() - ? defaultModelLabel(providerName, current.form.model, undefined) - : ""; - const nextLabel = defaultModelLabel( - providerName, - selectedEntry.id, - selectedEntry.name, - ); - const shouldAutofillLabel = - current.form.label.trim() === "" || - current.form.label.trim() === currentDefaultLabel; - - return { - ...current, - form: { - ...current.form, - model: selectedEntry.id, - label: shouldAutofillLabel ? nextLabel : current.form.label, - }, - }; - }); - setModelBrowserModal(null); - }} - /> - ) : null} - - {tabModelsModal && draft ? ( - setTabModelsModal(null)} - onChange={(selections) => - setTabModelsModal((current) => - current ? { ...current, selections } : current, - ) - } - onSubmit={() => { - const nextSelections = normalizeTabModelSelections( - tabModelsModal.selections, - ); - - updateWorkspaceState((current) => { - const tab = current.tabs[tabModelsModal.tabId]; - - if (!tab) { - return current; - } - - tab.modelSelections = nextSelections; - tab.updatedAt = new Date().toISOString(); - return current; - }); - - setTabModelsModal(null); - }} - /> - ) : null} - - {samplingModal ? ( - setSamplingModal(null)} - onChange={(form) => - setSamplingModal((current) => - current ? { ...current, form } : current, - ) - } - onSubmit={() => { - const parsed = parseSamplingForm(samplingModal.form); - - if (parsed.error) { - setError(parsed.error); - return; - } - - updateWorkspaceState((current) => { - const tab = current.tabs[samplingModal.tabId]; - - if (!tab) { - return current; - } - - tab.samplingOverrides = parsed.value ?? {}; - tab.updatedAt = new Date().toISOString(); - return current; - }); - - setSamplingModal(null); - }} - /> - ) : null} - - {modelAliasModal && draft ? ( - setModelAliasModal(null)} - onSubmit={() => { - updateWorkspaceState((current) => { - const tab = current.tabs[modelAliasModal.tabId]; - - if (!tab) { - return current; - } - - tab.modelSelections = upsertTabModelAlias( - tab, - draft.models, - modelAliasModal.modelId, - modelAliasModal.alias, - ); - tab.updatedAt = new Date().toISOString(); - return current; - }); - - setModelAliasModal(null); - }} - submitLabel="Save Alias" - > - - setModelAliasModal((current) => - current ? { ...current, alias: value } : current, - ) - } - /> - - ) : null} - - {aboutDialogOpen ? ( - void checkForAppUpdates()} - onInstallUpdate={() => void installDownloadedAppUpdate()} - onClose={() => setAboutDialogOpen(false)} - /> - ) : null} - - {workspaceModal ? ( - setWorkspaceModal(null)} - onSubmit={() => { - if (!workspaceModal.name.trim()) { - setError("Workspace name is required."); - return; - } - - renameWorkspace(workspaceModal.workspaceId, workspaceModal.name); - setWorkspaceModal(null); - }} - submitLabel="Save Workspace" - > - - setWorkspaceModal((current) => - current ? { ...current, name: value } : current, - ) - } - /> - - ) : null} - - {historyModal ? ( - setHistoryModal(null)} - onOpenRun={(runId, mode) => { - void restoreHistoryRun(historyModal.benchPackId, runId, mode); - setHistoryModal(null); - }} - onRemoveAll={() => - setConfirmDialog({ - title: `Remove all histories for ${historyModal.benchPackName}?`, - subtitle: - "This permanently deletes all saved test runs for this Bench Pack.", - confirmLabel: "Remove All Histories", - tone: "danger", - onConfirm: () => { - void removeAllHistoryForBenchPack( - historyModal.benchPackId, - historyModal.benchPackName, - ); - }, - }) - } - /> - ) : null} - - {confirmDialog ? ( - setConfirmDialog(null)} - onSubmit={() => { - confirmDialog.onConfirm(); - setConfirmDialog(null); - }} - submitLabel={confirmDialog.confirmLabel} - submitTone={confirmDialog.tone === "danger" ? "danger" : "primary"} - /> - ) : null} - - {settingsVerifierPreparationModal ? ( - - void cancelSettingsVerifierStart( - settingsVerifierPreparationModal.benchPackId, - ) - } - /> - ) : verifierPreparationModal ? ( - void stopTabRun(verifierPreparationModal.tabId)} - /> - ) : null} - - {workspaceContextMenu ? ( -
event.stopPropagation()} - > - - -
- ) : null} - - {detailModal ? ( - setDetailModal(null)} - onSubmit={() => setDetailModal(null)} - submitLabel="Close" - leadingActions={ - - } - > -
-
- Status - Validation Result -
- - {detailModal.status} - -
-
{detailModal.rawLog}
-
- ) : null} -
- ); + // Removed detached logs view in web version + + const isMacPlatform = + typeof navigator !== 'undefined' && navigator.userAgent.includes('Mac'); + const [loadState, setLoadState] = useState(null); + const [draft, setDraft] = useState(null); + const [workspaceState, setWorkspaceState] = + useState(null); + const [benchPackInspections, setBenchPackInspections] = useState< + BenchPackInspection[] + >([]); + const [registryEntries, setRegistryEntries] = useState< + BenchPackRegistryEntry[] + >([]); + const [registryWarning, setRegistryWarning] = useState(null); + const [availableThemes, setAvailableThemes] = useState< + BenchLocalThemeDescriptor[] + >([]); + const [activeThemeDefinition, setActiveThemeDefinition] = + useState(null); + const [systemPrefersDark, setSystemPrefersDark] = useState( + typeof window !== 'undefined' + ? window.matchMedia('(prefers-color-scheme: dark)').matches + : false, + ); + const [verifierStatuses, setVerifierStatuses] = useState< + Record + >({}); + const [tabMenuOpen, setTabMenuOpen] = useState(false); + const [themeMenuOpen, setThemeMenuOpen] = useState(false); + const [sidebarOpen, setSidebarOpen] = useState(() => { + if (typeof window === 'undefined') { + return true; + } + + return window.localStorage.getItem(SIDEBAR_OPEN_STORAGE_KEY) !== 'false'; + }); + const [settingsOpen, setSettingsOpen] = useState(false); + const [settingsTab, setSettingsTab] = useState('providers'); + const [aboutDialogOpen, setAboutDialogOpen] = useState(false); + const [appMetadata, setAppMetadata] = useState( + null, + ); + const [appUpdateState, setAppUpdateState] = + useState(null); + const [ + dismissedDownloadedUpdateVersion, + setDismissedDownloadedUpdateVersion, + ] = useState(null); + const [providerModal, setProviderModal] = useState( + null, + ); + const [modelModal, setModelModal] = useState(null); + const [modelBrowserModal, setModelBrowserModal] = + useState(null); + const [tabModelsModal, setTabModelsModal] = + useState(null); + const [samplingModal, setSamplingModal] = useState( + null, + ); + const [modelAliasModal, setModelAliasModal] = + useState(null); + const [workspaceModal, setWorkspaceModal] = + useState(null); + const [workspaceContextMenu, setWorkspaceContextMenu] = + useState(null); + const [historyModal, setHistoryModal] = useState( + null, + ); + const [confirmDialog, setConfirmDialog] = useState(null); + const [verifierPreparationModal, setVerifierPreparationModal] = + useState(null); + const [ + settingsVerifierPreparationModal, + setSettingsVerifierPreparationModal, + ] = useState(null); + const [stoppingVerifierStarts, setStoppingVerifierStarts] = useState< + Record + >({}); + const [draggedTabId, setDraggedTabId] = useState(null); + const [editingTab, setEditingTab] = useState<{ + tabId: string; + value: string; + width: number; + } | null>(null); + const [activeRuns, setActiveRuns] = useState>( + {}, + ); + const [stoppingRuns, setStoppingRuns] = useState>({}); + const [runSummaries, setRunSummaries] = useState< + Record + >({}); + const [runHistories, setRunHistories] = useState< + Record + >({}); + const [liveRuns, setLiveRuns] = useState>({}); + const [liveScenarioFocus, setLiveScenarioFocus] = useState< + Record + >({}); + const [loadedHistoryRuns, setLoadedHistoryRuns] = useState< + Record + >({}); + const [logsOpen, setLogsOpen] = useState(false); + const [logsAutoScroll, setLogsAutoScroll] = useState(true); + const [logsDetached, setLogsDetached] = useState(false); + const [logDrawerHeight, setLogDrawerHeight] = useState(240); + const [detailModal, setDetailModal] = useState(null); + const [isBusy, setIsBusy] = useState(true); + const [error, setError] = useState(null); + const [appNotice, setAppNotice] = useState(null); + const [settingsNotice, setSettingsNotice] = useState(null); + const [benchPackMutations, setBenchPackMutations] = useState< + Record + >({}); + const themeMenuRef = useRef(null); + const settingsOpenRef = useRef(false); + + const providerIds = useMemo( + () => Object.keys(draft?.providers ?? {}), + [draft], + ); + const themeOptions = useMemo( + () => ['system', ...availableThemes.map((theme) => theme.id)], + [availableThemes], + ); + const currentThemeLabel = useMemo( + () => + resolveThemeLabel( + draft?.ui.theme ?? 'system', + availableThemes, + systemPrefersDark, + ), + [draft?.ui.theme, availableThemes, systemPrefersDark], + ); + const readyInspections = useMemo( + () => + benchPackInspections.filter( + (inspection) => inspection.status === 'ready', + ), + [benchPackInspections], + ); + const activeWorkspace = useMemo( + () => + workspaceState?.activeWorkspaceId + ? (workspaceState.workspaces[workspaceState.activeWorkspaceId] ?? null) + : null, + [workspaceState], + ); + const workspaceTabs = useMemo( + () => + activeWorkspace?.tabIds + .map((tabId: any) => workspaceState?.tabs[tabId]) + .filter((tab): tab is BenchLocalWorkspaceTab => Boolean(tab)) ?? [], + [activeWorkspace, workspaceState], + ); + const activeTab = useMemo( + () => + activeWorkspace?.activeTabId + ? (workspaceState?.tabs[activeWorkspace.activeTabId] ?? null) + : (workspaceTabs[0] ?? null), + [activeWorkspace, workspaceState, workspaceTabs], + ); + const activeInspection = useMemo( + () => + benchPackInspections.find( + (inspection) => inspection.id === activeTab?.benchPackId, + ) ?? null, + [benchPackInspections, activeTab], + ); + const activeVerifierStatus = useMemo( + () => + activeInspection ? (verifierStatuses[activeInspection.id] ?? null) : null, + [activeInspection, verifierStatuses], + ); + const activeTabModels = useMemo( + () => (draft ? resolveTabModels(activeTab, draft.models) : []), + [draft, activeTab], + ); + const activeRunSummary = useMemo( + () => (activeTab ? (runSummaries[activeTab.id] ?? null) : null), + [runSummaries, activeTab], + ); + const activeLiveRun = useMemo( + () => (activeTab ? (liveRuns[activeTab.id] ?? null) : null), + [liveRuns, activeTab], + ); + const activeLiveScenarioFocus = useMemo( + () => (activeTab ? (liveScenarioFocus[activeTab.id] ?? null) : null), + [liveScenarioFocus, activeTab], + ); + const activeRunBlocker = useMemo( + () => + activeInspection && draft + ? getRequiredVerifierRunBlocker( + activeInspection.manifest, + draft.benchpacks[activeInspection.id], + activeVerifierStatus ?? undefined, + ) + : null, + [activeInspection, activeVerifierStatus, draft], + ); + const activeLoadedHistory = useMemo( + () => (activeTab ? (loadedHistoryRuns[activeTab.id] ?? null) : null), + [loadedHistoryRuns, activeTab], + ); + const activeDisplayModels = useMemo(() => { + if (!draft) { + return []; + } + + if (activeLoadedHistory) { + return resolveHistoryModels(activeRunSummary, draft.models); + } + + return activeTabModels; + }, [draft, activeLoadedHistory, activeRunSummary, activeTabModels]); + const downloadedUpdateVersion = + appUpdateState?.downloadedVersion ?? + appUpdateState?.availableVersion ?? + null; + const showDownloadedUpdateBanner = + appUpdateState?.status === 'downloaded' && + downloadedUpdateVersion !== dismissedDownloadedUpdateVersion; + const activeLogEvents = + activeLiveRun?.events ?? activeRunSummary?.events ?? []; + const logContainerRef = useRef(null); + const tabStripShellRef = useRef(null); + const tabStripRef = useRef(null); + const tabChipRefs = useRef(new Map()); + const modelDiscoveryCacheRef = useRef< + Record + >({}); + const replayRunTokensRef = useRef(new Map()); + const appliedThemeKeysRef = useRef([]); + const [tabStripOverflow, setTabStripOverflow] = useState(false); + const [activeTabMask, setActiveTabMask] = useState<{ + left: number; + width: number; + } | null>(null); + + const hasUnsavedChanges = + loadState && draft + ? JSON.stringify(loadState.config) !== JSON.stringify(draft) + : false; + const effectiveThemeId = useMemo(() => { + const requested = draft?.ui.theme ?? 'system'; + + if (requested === 'system') { + return systemPrefersDark ? 'dark' : 'light'; + } + + return requested; + }, [draft?.ui.theme, systemPrefersDark]); + + const updateDraft = ( + updater: (current: BenchLocalConfig) => BenchLocalConfig, + ) => { + setDraft((current) => { + if (!current) { + return current; + } + + return updater(cloneConfig(current)); + }); + }; + + const persistWorkspaceState = async (nextState: BenchLocalWorkspaceState) => { + setWorkspaceState(nextState); + + try { + const saved = await bl.workspaces.save(nextState); + setWorkspaceState(saved.state); + } catch (workspaceError) { + setError( + workspaceError instanceof Error + ? workspaceError.message + : 'Failed to save workspace state.', + ); + } + }; + + const updateWorkspaceState = ( + updater: (current: BenchLocalWorkspaceState) => BenchLocalWorkspaceState, + ) => { + setWorkspaceState((current) => { + if (!current) { + return current; + } + + const next = updater(structuredClone(current)); + void persistWorkspaceState(next); + return next; + }); + }; + + const loadBenchPackInspections = async () => { + try { + const inspections = await bl.benchPacks.list(); + setBenchPackInspections(inspections); + } catch (pluginError) { + setError( + pluginError instanceof Error + ? pluginError.message + : 'Failed to inspect configured Bench Packs.', + ); + } + }; + + const loadRegistryEntries = async () => { + try { + const entries = await bl.benchPacks.registry(); + setRegistryEntries(entries); + setRegistryWarning(null); + } catch (registryError) { + setRegistryWarning(formatRegistryWarning(registryError)); + } + }; + + const loadVerifierStatuses = async () => { + try { + const statuses = await bl.verifiers.list(); + setVerifierStatuses( + Object.fromEntries( + statuses.map((status: any) => [status.benchPackId, status]), + ), + ); + } catch (verifierError) { + setError( + verifierError instanceof Error + ? verifierError.message + : 'Failed to load verifier status.', + ); + } + }; + + const loadThemes = async () => { + try { + const themes = await bl.themes.list(); + setAvailableThemes(themes); + } catch (themeError) { + setError( + themeError instanceof Error + ? themeError.message + : 'Failed to load available themes.', + ); + } + }; + + // Updates removed in web version + const checkForAppUpdates = async () => {}; + const installDownloadedAppUpdate = async () => {}; + + const loadHistoryForBenchPack = async (benchPackId: string) => { + try { + const history = await bl.benchPacks.history(benchPackId); + setRunHistories((current) => ({ + ...current, + [benchPackId]: history, + })); + } catch (historyError) { + setError( + historyError instanceof Error + ? historyError.message + : 'Failed to load Bench Pack history.', + ); + } + }; + + useEffect(() => { + let cancelled = false; + + const load = async () => { + setIsBusy(true); + setError(null); + setRegistryWarning(null); + + try { + const [ + result, + workspaceResult, + inspections, + themes, + verifierStatusList, + activeRunsResult, + ] = await Promise.all([ + bl.config.load(), + bl.workspaces.load(), + bl.benchPacks.list(), + bl.themes.list(), + bl.verifiers.list(), + bl.benchPacks.activeRuns(), + ]); + + let registry: BenchPackRegistryEntry[] = []; + let nextRegistryWarning: string | null = null; + + try { + registry = await bl.benchPacks.registry(); + } catch (registryError) { + nextRegistryWarning = formatRegistryWarning(registryError); + } + + if (cancelled) { + return; + } + + const persistedRunEntries = await Promise.all( + Object.values(workspaceResult.state.tabs) + .filter((tab: any) => tab.benchPackId && tab.loadedRunId) + .map(async (tab: any) => { + try { + const summary = await bl.benchPacks.loadHistory( + tab.benchPackId as string, + tab.loadedRunId as string, + ); + return [tab.id, summary] as const; + } catch { + return null; + } + }), + ); + + setLoadState(result); + setDraft(cloneConfig(result.config)); + setWorkspaceState(workspaceResult.state); + setRunSummaries( + Object.fromEntries( + persistedRunEntries.filter( + (entry): entry is readonly [string, BenchPackRunSummary] => + entry !== null, + ), + ), + ); + setLoadedHistoryRuns( + Object.fromEntries( + persistedRunEntries + .filter( + (entry): entry is readonly [string, BenchPackRunSummary] => + entry !== null, + ) + .map(([tabId, summary]) => [ + tabId, + { + runId: summary.runId, + startedAt: summary.startedAt, + mode: 'history', + }, + ]), + ), + ); + setBenchPackInspections(inspections); + setRegistryEntries(registry); + setRegistryWarning(nextRegistryWarning); + setAvailableThemes(themes); + setVerifierStatuses( + Object.fromEntries( + verifierStatusList.map((status: any) => [ + status.benchPackId, + status, + ]), + ), + ); + setActiveRuns( + Object.fromEntries( + activeRunsResult.map((run: any) => [ + run.tabId, + { benchPackId: run.benchPackId }, + ]), + ), + ); + setAppNotice( + result.created + ? 'Created a fresh ~/.benchlocal/config.toml bootstrap.' + : null, + ); + } catch (loadError) { + if (!cancelled) { + setError( + loadError instanceof Error + ? loadError.message + : 'Failed to load BenchLocal config.', + ); + } + } finally { + if (!cancelled) { + setIsBusy(false); + } + } + }; + + void load(); + + return () => { + cancelled = true; + }; + }, []); + + useEffect(() => { + if (typeof window === 'undefined') { + return; + } + + const media = window.matchMedia('(prefers-color-scheme: dark)'); + const handleChange = () => { + setSystemPrefersDark(media.matches); + }; + + handleChange(); + media.addEventListener('change', handleChange); + + return () => { + media.removeEventListener('change', handleChange); + }; + }, []); + + useEffect(() => { + let cancelled = false; + + void bl.updates + .state() + .then((state) => { + if (!cancelled) { + setAppUpdateState(state); + } + }) + .catch(() => undefined); + + const unsubscribe = bl.updates.onState((state) => { + setAppUpdateState(state); + + if (state.status !== 'downloaded') { + setDismissedDownloadedUpdateVersion(null); + } + }); + + return () => { + cancelled = true; + unsubscribe(); + }; + }, []); + + useEffect(() => { + let cancelled = false; + + const loadTheme = async () => { + const theme = await bl.themes.load(effectiveThemeId); + + if (!cancelled) { + setActiveThemeDefinition(theme); + } + }; + + void loadTheme(); + + return () => { + cancelled = true; + }; + }, [effectiveThemeId]); + + useEffect(() => { + if (!activeThemeDefinition || typeof document === 'undefined') { + return; + } + + const root = document.documentElement; + + for (const key of appliedThemeKeysRef.current) { + root.style.removeProperty(key); + } + + for (const [key, value] of Object.entries( + activeThemeDefinition.variables, + )) { + root.style.setProperty(key, value); + } + + appliedThemeKeysRef.current = Object.keys(activeThemeDefinition.variables); + root.style.setProperty('color-scheme', activeThemeDefinition.colorScheme); + root.dataset.theme = activeThemeDefinition.id; + }, [activeThemeDefinition]); + + useEffect(() => { + const sse = bl.sse(); + const handleRunEvent = (e: MessageEvent) => { + const { tabId, event } = JSON.parse(e.data) as { + tabId: string; + event: ProgressEvent; + }; + + if (event.type === 'verifier_preparing') { + setVerifierPreparationModal({ + tabId, + progress: event, + }); + } else { + setVerifierPreparationModal((current) => + current?.tabId === tabId ? null : current, + ); + } + + if (event.type === 'run_finished' || event.type === 'run_error') { + setActiveRuns((current) => { + if (!current[tabId]) { + return current; + } + + const next = { ...current }; + delete next[tabId]; + return next; + }); + setStoppingRuns((current) => { + if (!current[tabId]) { + return current; + } + + const next = { ...current }; + delete next[tabId]; + return next; + }); + } + + setLiveRuns((current) => ({ + ...current, + [tabId]: updateLiveRunState(current[tabId], event), + })); + + if (event.type === 'run_started') { + setLiveScenarioFocus((current) => ({ + ...current, + [tabId]: { + liveScenarioId: null, + autoFollow: true, + }, + })); + } else if ( + event.type === 'scenario_started' || + event.type === 'model_progress' || + event.type === 'scenario_result' || + event.type === 'scenario_finished' + ) { + setLiveScenarioFocus((current) => { + const existing = current[tabId]; + return { + ...current, + [tabId]: { + liveScenarioId: event.scenarioId, + autoFollow: existing?.autoFollow ?? true, + }, + }; + }); + } + }; + const handleMutationProgress = (e: MessageEvent) => { + const payload = JSON.parse(e.data) as BenchPackMutationProgress; + setBenchPackMutations((current) => ({ + ...current, + [payload.benchPackId]: payload, + })); + }; + const handleVerifierProgress = (e: MessageEvent) => { + const { benchPackId, event } = JSON.parse(e.data) as { + benchPackId: string; + event: ProgressEvent; + }; + setSettingsVerifierPreparationModal((current) => + current?.benchPackId === benchPackId || current === null + ? ({ benchPackId, progress: event } as any) + : current, + ); + }; + sse.addEventListener('run-event', handleRunEvent); + sse.addEventListener('benchpack-mutation-progress', handleMutationProgress); + sse.addEventListener('verifier-progress', handleVerifierProgress); + return () => { + sse.removeEventListener('run-event', handleRunEvent); + sse.removeEventListener( + 'benchpack-mutation-progress', + handleMutationProgress, + ); + sse.removeEventListener('verifier-progress', handleVerifierProgress); + sse.close(); + }; + }, []); + + useEffect(() => { + return bl.benchPacks.onMutationProgress((payload) => { + setBenchPackMutations((current) => ({ + ...current, + [payload.benchPackId]: payload, + })); + }); + }, []); + + useEffect(() => { + return bl.verifiers.onProgress(({ benchPackId, event }) => { + setSettingsVerifierPreparationModal((current) => + current?.benchPackId === benchPackId || current === null + ? { + benchPackId, + progress: event, + } + : current, + ); + }); + }, []); + + useEffect(() => { + if (!settingsOpen || settingsTab !== 'verification') { + return; + } + + void loadVerifierStatuses(); + }, [settingsOpen, settingsTab]); + + useEffect(() => { + if (!settingsOpen || settingsTab !== 'advanced') { + return; + } + + setSettingsTab('providers'); + }, [settingsOpen, settingsTab]); + + useEffect(() => { + if (!logsOpen || !logsAutoScroll || !logContainerRef.current) { + return; + } + + logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight; + }, [activeLogEvents, logsOpen, logsAutoScroll]); + + useEffect(() => { + if (!activeInspection?.id || activeInspection.status !== 'ready') { + return; + } + + void loadHistoryForBenchPack(activeInspection.id); + }, [activeInspection?.id, activeInspection?.status]); + + useEffect(() => { + const dispose = bl.logs.onDetachedWindowClosed(() => { + setLogsDetached(false); + }); + + return dispose; + }, []); + + useEffect(() => { + void bl.logs.publishDetachedState({ + workspaceName: activeWorkspace?.name ?? 'No Workspace', + tabTitle: activeTab?.title ?? 'No Active Tab', + eventCount: activeLogEvents.length, + events: activeLogEvents, + }); + }, [activeWorkspace?.name, activeTab?.title, activeLogEvents]); + + useEffect(() => { + const handleMove = (event: MouseEvent) => { + const shell = document.querySelector('.desktop-shell'); + + if (!shell || !document.body.dataset.logResizeActive) { + return; + } + + const shellRect = shell.getBoundingClientRect(); + const nextHeight = Math.min( + 420, + Math.max(160, shellRect.bottom - event.clientY - 30), + ); + setLogDrawerHeight(nextHeight); + }; + + const handleUp = () => { + delete document.body.dataset.logResizeActive; + }; + + window.addEventListener('mousemove', handleMove); + window.addEventListener('mouseup', handleUp); + + return () => { + window.removeEventListener('mousemove', handleMove); + window.removeEventListener('mouseup', handleUp); + }; + }, []); + + useEffect(() => { + if (!workspaceContextMenu) { + return; + } + + const closeMenu = () => { + setWorkspaceContextMenu(null); + }; + + const handleKeyDown = (event: KeyboardEvent) => { + if (event.key === 'Escape') { + closeMenu(); + } + }; + + window.addEventListener('mousedown', closeMenu); + window.addEventListener('scroll', closeMenu, true); + window.addEventListener('resize', closeMenu); + window.addEventListener('keydown', handleKeyDown); + + return () => { + window.removeEventListener('mousedown', closeMenu); + window.removeEventListener('scroll', closeMenu, true); + window.removeEventListener('resize', closeMenu); + window.removeEventListener('keydown', handleKeyDown); + }; + }, [workspaceContextMenu]); + + useEffect(() => { + if (!themeMenuOpen) { + return; + } + + const handlePointerDown = (event: MouseEvent) => { + const target = event.target as Node; + if (!themeMenuRef.current?.contains(target)) { + setThemeMenuOpen(false); + } + }; + + const handleEscape = (event: KeyboardEvent) => { + if (event.key === 'Escape') { + setThemeMenuOpen(false); + } + }; + + window.addEventListener('mousedown', handlePointerDown); + window.addEventListener('keydown', handleEscape); + + return () => { + window.removeEventListener('mousedown', handlePointerDown); + window.removeEventListener('keydown', handleEscape); + }; + }, [themeMenuOpen]); + + useEffect(() => { + return bl.app.onOpenAbout(() => { + setAboutDialogOpen(true); + + if (!appMetadata) { + void bl.app + .metadata() + .then((metadata) => { + setAppMetadata(metadata); + }) + .catch(() => undefined); + } + }); + }, [appMetadata]); + + useEffect(() => { + return bl.app.onOpenSettings(() => { + setSettingsOpen(true); + }); + }, []); + + useEffect(() => { + settingsOpenRef.current = settingsOpen; + + if (!settingsOpen) { + setSettingsNotice(null); + } + }, [settingsOpen]); + + useEffect(() => { + if (typeof window === 'undefined') { + return; + } + + window.localStorage.setItem(SIDEBAR_OPEN_STORAGE_KEY, String(sidebarOpen)); + }, [sidebarOpen]); + + useEffect(() => { + const updateOverflow = () => { + const element = tabStripRef.current; + + if (!element) { + setTabStripOverflow(false); + return; + } + + setTabStripOverflow(element.scrollWidth > element.clientWidth + 4); + }; + + updateOverflow(); + window.addEventListener('resize', updateOverflow); + + return () => { + window.removeEventListener('resize', updateOverflow); + }; + }, [workspaceTabs.length, activeWorkspace?.id, sidebarOpen]); + + useEffect(() => { + const shell = tabStripShellRef.current; + const strip = tabStripRef.current; + const activeTabId = activeTab?.id; + + if (!shell || !strip || !activeTabId) { + setActiveTabMask(null); + return; + } + + const updateMask = () => { + const activeElement = tabChipRefs.current.get(activeTabId); + + if (!activeElement) { + setActiveTabMask(null); + return; + } + + const shellRect = shell.getBoundingClientRect(); + const tabRect = activeElement.getBoundingClientRect(); + + setActiveTabMask({ + left: Math.round(tabRect.left - shellRect.left), + width: Math.round(tabRect.width), + }); + }; + + const frameId = window.requestAnimationFrame(updateMask); + window.addEventListener('resize', updateMask); + strip.addEventListener('scroll', updateMask, { passive: true }); + + return () => { + window.cancelAnimationFrame(frameId); + window.removeEventListener('resize', updateMask); + strip.removeEventListener('scroll', updateMask); + }; + }, [activeTab?.id, workspaceTabs, sidebarOpen, tabStripOverflow]); + + const persistConfig = async ( + nextConfig: BenchLocalConfig, + options?: { + notice?: string | null; + preserveFilesystemDraft?: boolean; + previousDraft?: BenchLocalConfig | null; + previousLoadConfig?: BenchLocalConfig | null; + }, + ): Promise => { + if (!nextConfig) { + return false; + } + + setIsBusy(true); + setError(null); + + try { + const result = await bl.config.save(nextConfig); + setLoadState(result); + setDraft( + options?.preserveFilesystemDraft && + options.previousDraft && + options.previousLoadConfig + ? reapplyPendingFilesystemDraft( + result.config, + options.previousDraft, + options.previousLoadConfig, + ) + : cloneConfig(result.config), + ); + await loadBenchPackInspections(); + await loadRegistryEntries(); + if (settingsOpenRef.current && options?.notice) { + setSettingsNotice(options.notice); + } + return true; + } catch (saveError) { + setError( + saveError instanceof Error + ? saveError.message + : 'Failed to save BenchLocal config.', + ); + return false; + } finally { + setIsBusy(false); + } + }; + + const save = async (): Promise => { + if (!draft) { + return false; + } + + return persistConfig(draft, { notice: 'Saved ~/.benchlocal/config.toml' }); + }; + + const refreshBenchPackState = async (result?: LoadState) => { + const nextLoadState = result ?? (await bl.config.load()); + const inspections = await bl.benchPacks.list(); + const verifierStatusList = await bl.verifiers.list(); + let registry = registryEntries; + + try { + registry = await bl.benchPacks.registry(); + setRegistryWarning(null); + } catch (registryError) { + setRegistryWarning(formatRegistryWarning(registryError)); + } + + setLoadState(nextLoadState); + setDraft(cloneConfig(nextLoadState.config)); + setBenchPackInspections(inspections); + setRegistryEntries(registry); + setVerifierStatuses( + Object.fromEntries( + verifierStatusList.map((status: any) => [status.benchPackId, status]), + ), + ); + }; + + const ensureBenchPackMutationReady = async (): Promise => { + if (!hasUnsavedChanges) { + return true; + } + + return save(); + }; + + const installBenchPack = async (benchPackId: string) => { + if (!(await ensureBenchPackMutationReady())) { + return; + } + + setIsBusy(true); + setError(null); + setBenchPackMutations((current) => ({ + ...current, + [benchPackId]: { + benchPackId, + action: 'install', + phase: 'resolving', + message: 'Resolving Bench Pack from registry.', + }, + })); + + try { + const result = await bl.benchPacks.install(benchPackId); + await refreshBenchPackState(result); + if (settingsOpenRef.current) { + setSettingsNotice(`Installed ${benchPackId}.`); + } + } catch (installError) { + setError( + formatRegistryMutationError('install', benchPackId, installError), + ); + } finally { + setIsBusy(false); + setBenchPackMutations((current) => { + const next = { ...current }; + delete next[benchPackId]; + return next; + }); + } + }; + + const installBenchPackFromUrl = async (url: string) => { + if (!(await ensureBenchPackMutationReady())) { + return; + } + + const normalizedUrl = url.trim(); + + if (!normalizedUrl) { + setError('Bench Pack URL is required.'); + return; + } + + setIsBusy(true); + setError(null); + let installedBenchPackId: string | null = null; + setBenchPackMutations((current) => ({ + ...current, + [THIRD_PARTY_INSTALL_MUTATION_ID]: { + benchPackId: THIRD_PARTY_INSTALL_MUTATION_ID, + action: 'install', + phase: 'resolving', + message: 'Resolving Bench Pack from URL.', + }, + })); + + try { + const result = await bl.benchPacks.installFromUrl(normalizedUrl); + await refreshBenchPackState(result); + installedBenchPackId = + Object.entries(result.config.benchpacks).find( + ([, benchPack]: any) => + benchPack.source === 'archive' && benchPack.url === normalizedUrl, + )?.[0] ?? null; + if (settingsOpenRef.current) { + setSettingsNotice( + installedBenchPackId + ? `Installed ${installedBenchPackId}.` + : 'Installed third-party Bench Pack.', + ); + } + return true; + } catch (installError) { + setError( + formatDesktopErrorMessage(installError) || + 'Failed to install Bench Pack from URL.', + ); + return false; + } finally { + setIsBusy(false); + setBenchPackMutations((current) => { + const next = { ...current }; + delete next[THIRD_PARTY_INSTALL_MUTATION_ID]; + delete next['third-party']; + if (installedBenchPackId) { + delete next[installedBenchPackId]; + } + return next; + }); + } + }; + + const updateBenchPack = async (benchPackId: string) => { + if (!(await ensureBenchPackMutationReady())) { + return; + } + + setIsBusy(true); + setError(null); + setBenchPackMutations((current) => ({ + ...current, + [benchPackId]: { + benchPackId, + action: 'update', + phase: 'resolving', + message: 'Resolving Bench Pack update.', + }, + })); + + try { + const result = await bl.benchPacks.update(benchPackId); + await refreshBenchPackState(result); + if (settingsOpenRef.current) { + setSettingsNotice(`Updated ${benchPackId}.`); + } + } catch (updateError) { + setError(formatRegistryMutationError('update', benchPackId, updateError)); + } finally { + setIsBusy(false); + setBenchPackMutations((current) => { + const next = { ...current }; + delete next[benchPackId]; + return next; + }); + } + }; + + const uninstallInstalledBenchPack = async (benchPackId: string) => { + if (!(await ensureBenchPackMutationReady())) { + return; + } + + if ( + Object.values(activeRuns).some((run) => run.benchPackId === benchPackId) + ) { + setError('Stop active Bench Pack runs before uninstalling this pack.'); + return; + } + + setIsBusy(true); + setError(null); + setBenchPackMutations((current) => ({ + ...current, + [benchPackId]: { + benchPackId, + action: 'uninstall', + phase: 'removing', + message: 'Removing Bench Pack.', + }, + })); + + try { + const result = await bl.benchPacks.uninstall(benchPackId); + await refreshBenchPackState(result); + if (settingsOpenRef.current) { + setSettingsNotice(`Uninstalled ${benchPackId}.`); + } + } catch (uninstallError) { + setError( + uninstallError instanceof Error + ? uninstallError.message + : `Failed to uninstall ${benchPackId}.`, + ); + } finally { + setIsBusy(false); + setBenchPackMutations((current) => { + const next = { ...current }; + delete next[benchPackId]; + return next; + }); + } + }; + + const reset = () => { + if (!loadState) { + return; + } + + setDraft(cloneConfig(loadState.config)); + setProviderModal(null); + setModelModal(null); + if (settingsOpenRef.current) { + setSettingsNotice('Reverted unsaved changes.'); + } + setError(null); + }; + + const saveThemeSelection = async (themeId: string) => { + if (!draft) { + return; + } + + const previousDraft = cloneConfig(draft); + const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; + const nextConfig = previousLoadConfig + ? cloneConfig(previousLoadConfig) + : cloneConfig(draft); + nextConfig.ui.theme = themeId; + setDraft(nextConfig); + + const saved = await persistConfig(nextConfig, { + preserveFilesystemDraft: true, + previousDraft, + previousLoadConfig, + }); + if (!saved) { + setDraft(previousDraft); + } + }; + + const saveVerifierConfig = async ( + benchPackId: string, + verifierId: string, + updater: (verifier: BenchLocalVerifierConfig) => BenchLocalVerifierConfig, + ) => { + if (!draft) { + return; + } + + const currentVerifier = + draft.benchpacks[benchPackId]?.verifiers?.[verifierId]; + if (!currentVerifier) { + return; + } + + const previousDraft = cloneConfig(draft); + const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; + const nextConfig = previousLoadConfig + ? cloneConfig(previousLoadConfig) + : cloneConfig(draft); + nextConfig.benchpacks[benchPackId].verifiers![verifierId] = + updater(currentVerifier); + setDraft(nextConfig); + + const saved = await persistConfig(nextConfig, { + preserveFilesystemDraft: true, + previousDraft, + previousLoadConfig, + }); + if (!saved) { + setDraft(previousDraft); + } + }; + + const scrollTabStrip = (delta: number) => { + tabStripRef.current?.scrollBy({ + left: delta, + behavior: 'smooth', + }); + }; + + const handleTabStripWheel = (event: React.WheelEvent) => { + const strip = tabStripRef.current; + + if (!strip || !tabStripOverflow) { + return; + } + + const horizontalDelta = + Math.abs(event.deltaX) > Math.abs(event.deltaY) + ? event.deltaX + : event.deltaY; + + if (Math.abs(horizontalDelta) < 1) { + return; + } + + event.preventDefault(); + strip.scrollBy({ + left: horizontalDelta, + behavior: 'auto', + }); + }; + + const runTab = async (tab: BenchLocalWorkspaceTab) => { + setError(null); + setAppNotice(null); + + if (!tab.benchPackId || !draft) { + setError('Select a Bench Pack for this tab first.'); + return; + } + + const benchPackId = tab.benchPackId; + const selectedModels = resolveTabModels(tab, draft.models); + const inspection = benchPackInspections.find( + (candidate) => candidate.id === benchPackId, + ); + + if (inspection?.manifest) { + try { + const verifierStatusList = await bl.verifiers.list(); + const nextVerifierStatuses = Object.fromEntries( + verifierStatusList.map((status: any) => [status.benchPackId, status]), + ); + setVerifierStatuses(nextVerifierStatuses); + + const runBlocker = getRequiredVerifierRunBlocker( + inspection.manifest, + draft.benchpacks[benchPackId], + nextVerifierStatuses[benchPackId], + ); + + if (runBlocker) { + setConfirmDialog({ + title: runBlocker.title, + subtitle: runBlocker.message, + confirmLabel: runBlocker.actionLabel, + onConfirm: () => { + setSettingsTab('verification'); + setSettingsOpen(true); + }, + }); + return; + } + } catch (verifierError) { + setError( + verifierError instanceof Error + ? verifierError.message + : 'Failed to refresh verifier status.', + ); + return; + } + } + + if (selectedModels.length === 0) { + setError( + 'Select at least one enabled model for this tab before running the Bench Pack.', + ); + return; + } + + if (hasUnsavedChanges) { + const saved = await save(); + + if (!saved) { + return; + } + } + + setActiveRuns((current) => ({ + ...current, + [tab.id]: { benchPackId, mode: 'host' }, + })); + setStoppingRuns((current) => { + if (!current[tab.id]) { + return current; + } + + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setLiveRuns((current) => ({ + ...current, + [tab.id]: { + events: [], + resultsByModel: {}, + activeCellKeys: [], + }, + })); + setRunSummaries((current) => { + if (!current[tab.id]) { + return current; + } + + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setLoadedHistoryRuns((current) => { + if (!current[tab.id]) { + return current; + } + + const next = { ...current }; + delete next[tab.id]; + return next; + }); + + try { + const result = await bl.benchPacks.run({ + tabId: tab.id, + benchPackId, + modelIds: selectedModels.map((model) => model.id), + executionMode: tab.executionMode, + generation: tab.samplingOverrides, + }); + setRunSummaries((current) => ({ + ...current, + [tab.id]: result, + })); + updateWorkspaceState((current) => { + const nextTab = current.tabs[tab.id]; + + if (!nextTab) { + return current; + } + + nextTab.loadedRunId = result.runId; + nextTab.updatedAt = new Date().toISOString(); + return current; + }); + if (result.cancelled) { + setAppNotice(`Stopped ${result.benchPackName}.`); + } else { + setAppNotice( + `Completed ${result.benchPackName} across ${result.scenarioCount} scenarios and ${result.modelCount} model${result.modelCount === 1 ? '' : 's'}.`, + ); + } + await loadBenchPackInspections(); + await loadHistoryForBenchPack(benchPackId); + } catch (runError) { + setError( + runError instanceof Error + ? runError.message + : `Failed to run Bench Pack for ${benchPackId}.`, + ); + } finally { + setVerifierPreparationModal((current) => + current?.tabId === tab.id ? null : current, + ); + setActiveRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setStoppingRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setLiveRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setLoadedHistoryRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + } + }; + + const resumeTabRun = async ( + tab: BenchLocalWorkspaceTab, + runSummary: BenchPackRunSummary, + ) => { + setError(null); + setAppNotice(null); + + if (!tab.benchPackId || !draft) { + setError('Select a Bench Pack for this tab first.'); + return; + } + + if (isRunSummaryComplete(runSummary)) { + setError('This saved run is already complete.'); + return; + } + + const benchPackId = tab.benchPackId; + const previousLoadedHistory = loadedHistoryRuns[tab.id] ?? null; + const previousTabModelSelections = structuredClone(tab.modelSelections); + const previousExecutionMode = tab.executionMode; + + if (hasUnsavedChanges) { + const saved = await save(); + + if (!saved) { + return; + } + } + + const historicalSelections = buildHistoryModelSelections( + runSummary, + draft.models, + ); + updateWorkspaceState((current) => { + const nextTab = current.tabs[tab.id]; + + if (!nextTab) { + return current; + } + + nextTab.modelSelections = + normalizeTabModelSelections(historicalSelections); + nextTab.executionMode = runSummary.executionMode ?? nextTab.executionMode; + nextTab.updatedAt = new Date().toISOString(); + return current; + }); + + setLoadedHistoryRuns((current) => { + if (!current[tab.id]) { + return current; + } + + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setActiveRuns((current) => ({ + ...current, + [tab.id]: { benchPackId, mode: 'host' }, + })); + setStoppingRuns((current) => { + if (!current[tab.id]) { + return current; + } + + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setLiveRuns((current) => ({ + ...current, + [tab.id]: { + runId: runSummary.runId, + events: [], + resultsByModel: {}, + activeCellKeys: [], + }, + })); + + try { + const result = await bl.benchPacks.resumeRun({ + tabId: tab.id, + benchPackId, + runId: runSummary.runId, + executionMode: runSummary.executionMode ?? tab.executionMode, + generation: tab.samplingOverrides, + }); + setRunSummaries((current) => ({ + ...current, + [tab.id]: result, + })); + updateWorkspaceState((current) => { + const nextTab = current.tabs[tab.id]; + + if (!nextTab) { + return current; + } + + nextTab.loadedRunId = result.runId; + nextTab.updatedAt = new Date().toISOString(); + return current; + }); + if (result.cancelled) { + setAppNotice(`Stopped ${result.benchPackName}.`); + } else { + setAppNotice( + isRunSummaryComplete(result) + ? `Completed ${result.benchPackName} across ${result.scenarioCount} scenarios and ${result.modelCount} model${result.modelCount === 1 ? '' : 's'}.` + : `Resumed ${result.benchPackName}, but the run is still incomplete.`, + ); + } + await loadBenchPackInspections(); + await loadHistoryForBenchPack(benchPackId); + } catch (runError) { + updateWorkspaceState((current) => { + const nextTab = current.tabs[tab.id]; + + if (!nextTab) { + return current; + } + + nextTab.modelSelections = structuredClone(previousTabModelSelections); + nextTab.executionMode = previousExecutionMode; + nextTab.updatedAt = new Date().toISOString(); + return current; + }); + if (previousLoadedHistory) { + setLoadedHistoryRuns((current) => ({ + ...current, + [tab.id]: previousLoadedHistory, + })); + } + setError( + runError instanceof Error + ? runError.message + : `Failed to resume Bench Pack for ${benchPackId}.`, + ); + } finally { + setVerifierPreparationModal((current) => + current?.tabId === tab.id ? null : current, + ); + setActiveRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setStoppingRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setLiveRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + } + }; + + const replayTabRun = async ( + tab: BenchLocalWorkspaceTab, + runSummary: BenchPackRunSummary, + ) => { + if (!tab.benchPackId) { + setError('Select a Bench Pack for this tab first.'); + return; + } + + if (!isRunSummaryComplete(runSummary)) { + setError('Replay is only available for completed test runs.'); + return; + } + + const inspection = benchPackInspections.find( + (candidate) => candidate.id === tab.benchPackId, + ); + const scenarios = inspection?.scenarios ?? []; + const modelIds = resolveHistoryModels(runSummary, draft?.models ?? []).map( + (model) => model.id, + ); + const replayGroups = buildReplayGroups(runSummary, scenarios, modelIds); + const token = Symbol(`replay:${tab.id}`); + replayRunTokensRef.current.set(tab.id, token); + + setError(null); + setAppNotice(null); + setActiveRuns((current) => ({ + ...current, + [tab.id]: { benchPackId: tab.benchPackId as string, mode: 'replay' }, + })); + setStoppingRuns((current) => { + if (!current[tab.id]) { + return current; + } + + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setLiveRuns((current) => ({ + ...current, + [tab.id]: { + runId: runSummary.runId, + events: [], + resultsByModel: {}, + activeCellKeys: [], + }, + })); + setLiveScenarioFocus((current) => ({ + ...current, + [tab.id]: { + liveScenarioId: null, + autoFollow: supportsLiveScenarioColumnFocus( + runSummary.executionMode ?? tab.executionMode, + ), + }, + })); + + const wait = async (ms: number) => { + await new Promise((resolve) => setTimeout(resolve, ms)); + }; + + try { + for (const group of replayGroups) { + if (replayRunTokensRef.current.get(tab.id) !== token) { + return; + } + + const nextActiveCellKeys = group.map((cell) => + getCellKey(cell.modelId, cell.scenarioId), + ); + const leadScenarioId = group[0]?.scenarioId ?? null; + + setLiveRuns((current) => { + const existing = current[tab.id]; + return { + ...current, + [tab.id]: { + runId: runSummary.runId, + events: existing?.events ?? [], + resultsByModel: existing?.resultsByModel ?? {}, + activeCellKeys: nextActiveCellKeys, + }, + }; + }); + if ( + leadScenarioId && + supportsLiveScenarioColumnFocus( + runSummary.executionMode ?? tab.executionMode, + ) + ) { + setLiveScenarioFocus((current) => ({ + ...current, + [tab.id]: { + liveScenarioId: leadScenarioId, + autoFollow: true, + }, + })); + } + + await wait(1000); + + if (replayRunTokensRef.current.get(tab.id) !== token) { + return; + } + + setLiveRuns((current) => { + const existing = current[tab.id]; + const nextResultsByModel = { ...(existing?.resultsByModel ?? {}) }; + + for (const cell of group) { + nextResultsByModel[cell.modelId] = [ + ...(nextResultsByModel[cell.modelId] ?? []).filter( + (candidate) => candidate.scenarioId !== cell.scenarioId, + ), + cell.result, + ]; + } + + return { + ...current, + [tab.id]: { + runId: runSummary.runId, + events: existing?.events ?? [], + resultsByModel: nextResultsByModel, + activeCellKeys: [], + }, + }; + }); + } + + setAppNotice(`Replayed ${runSummary.benchPackName}.`); + } finally { + if (replayRunTokensRef.current.get(tab.id) === token) { + replayRunTokensRef.current.delete(tab.id); + } + + setActiveRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + setStoppingRuns((current) => { + const next = { ...current }; + delete next[tab.id]; + return next; + }); + } + }; + + const stopTabRun = async (tabId: string) => { + const activeRun = activeRuns[tabId]; + + if (activeRun?.mode === 'replay') { + replayRunTokensRef.current.delete(tabId); + setActiveRuns((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + setStoppingRuns((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + setLiveRuns((current) => ({ + ...current, + [tabId]: { + ...(current[tabId] ?? { + events: [], + resultsByModel: {}, + activeCellKeys: [], + }), + activeCellKeys: [], + }, + })); + setAppNotice('Stopped replay.'); + return; + } + + setStoppingRuns((current) => ({ + ...current, + [tabId]: true, + })); + + try { + const result = await bl.benchPacks.stop(tabId); + + if (!result.stopped) { + setAppNotice('That Bench Pack run was no longer active.'); + setActiveRuns((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + setStoppingRuns((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + return; + } + + setAppNotice('Stopping Bench Pack run...'); + } catch (stopError) { + setStoppingRuns((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + setError( + stopError instanceof Error + ? stopError.message + : 'Failed to stop Bench Pack run.', + ); + } + }; + + const cancelSettingsVerifierStart = async (benchPackId: string) => { + setStoppingVerifierStarts((current) => ({ + ...current, + [benchPackId]: true, + })); + + try { + const result = await bl.verifiers.cancelStart(benchPackId); + + if (!result.cancelled) { + setSettingsVerifierPreparationModal((current) => + current?.benchPackId === benchPackId ? null : current, + ); + setStoppingVerifierStarts((current) => { + if (!current[benchPackId]) { + return current; + } + + const next = { ...current }; + delete next[benchPackId]; + return next; + }); + } + } catch (cancelError) { + setStoppingVerifierStarts((current) => { + if (!current[benchPackId]) { + return current; + } + + const next = { ...current }; + delete next[benchPackId]; + return next; + }); + setError( + cancelError instanceof Error + ? cancelError.message + : 'Failed to cancel verifier start.', + ); + } + }; + + const createWorkspace = () => { + updateWorkspaceState((current) => { + const now = new Date().toISOString(); + const workspaceId = `workspace-${crypto.randomUUID()}`; + const tabId = `tab-${crypto.randomUUID()}`; + + current.workspaceOrder.push(workspaceId); + current.activeWorkspaceId = workspaceId; + current.workspaces[workspaceId] = { + id: workspaceId, + name: createWorkspaceName(current.workspaceOrder.length - 1), + tabIds: [tabId], + activeTabId: tabId, + createdAt: now, + updatedAt: now, + }; + current.tabs[tabId] = { + id: tabId, + title: 'New Tab', + benchPackId: null, + loadedRunId: null, + focusedScenarioId: null, + modelSelections: [], + samplingOverrides: {}, + executionMode: 'parallel_by_test_case', + createdAt: now, + updatedAt: now, + }; + + return current; + }); + }; + + const renameWorkspace = (workspaceId: string, name: string) => { + updateWorkspaceState((current) => { + const workspace = current.workspaces[workspaceId]; + + if (!workspace) { + return current; + } + + workspace.name = name.trim(); + workspace.updatedAt = new Date().toISOString(); + return current; + }); + }; + + const deleteWorkspace = (workspaceId: string) => { + const removedTabIds = new Set( + workspaceState?.workspaces[workspaceId]?.tabIds ?? [], + ); + + if (Array.from(removedTabIds).some((tabId) => activeRuns[tabId])) { + setError('Stop active Bench Pack runs before deleting this workspace.'); + return; + } + + updateWorkspaceState((current) => { + const workspace = current.workspaces[workspaceId]; + + if (!workspace) { + return current; + } + + for (const tabId of workspace.tabIds) { + delete current.tabs[tabId]; + } + + delete current.workspaces[workspaceId]; + current.workspaceOrder = current.workspaceOrder.filter( + (id) => id !== workspaceId, + ); + + if (current.workspaceOrder.length === 0) { + const now = new Date().toISOString(); + const nextWorkspaceId = `workspace-${crypto.randomUUID()}`; + const nextTabId = `tab-${crypto.randomUUID()}`; + + current.workspaceOrder = [nextWorkspaceId]; + current.activeWorkspaceId = nextWorkspaceId; + current.workspaces[nextWorkspaceId] = { + id: nextWorkspaceId, + name: 'My Workspace', + tabIds: [nextTabId], + activeTabId: nextTabId, + createdAt: now, + updatedAt: now, + }; + current.tabs[nextTabId] = { + id: nextTabId, + title: 'New Tab', + benchPackId: null, + loadedRunId: null, + focusedScenarioId: null, + modelSelections: [], + samplingOverrides: {}, + executionMode: 'parallel_by_test_case', + createdAt: now, + updatedAt: now, + }; + } else if (current.activeWorkspaceId === workspaceId) { + current.activeWorkspaceId = current.workspaceOrder[0] ?? null; + } + + return current; + }); + + if (removedTabIds.size > 0) { + setRunSummaries((current) => + Object.fromEntries( + Object.entries(current).filter( + ([tabId]) => !removedTabIds.has(tabId), + ), + ), + ); + setLiveRuns((current) => + Object.fromEntries( + Object.entries(current).filter( + ([tabId]) => !removedTabIds.has(tabId), + ), + ), + ); + setActiveRuns((current) => + Object.fromEntries( + Object.entries(current).filter( + ([tabId]) => !removedTabIds.has(tabId), + ), + ), + ); + setStoppingRuns( + (current) => + Object.fromEntries( + Object.entries(current).filter( + ([tabId]) => !removedTabIds.has(tabId), + ), + ) as Record, + ); + } + }; + + const exportWorkspace = async (workspaceId: string) => { + if (!workspaceState) { + return; + } + + try { + const result = await bl.workspaces.export(workspaceId, workspaceState); + + if (result.exported) { + setAppNotice(`Exported workspace to ${result.filePath}.`); + } + } catch (workspaceError) { + setError( + workspaceError instanceof Error + ? workspaceError.message + : 'Failed to export workspace.', + ); + } + }; + + const importWorkspace = async () => { + try { + const result = await bl.workspaces.import(null); + + if (!result.imported || !result.workspace || !result.tabs) { + return; + } + + const importedWorkspace = result.workspace; + const importedTabs = result.tabs; + const workspaceIdMap = new Map(); + const tabIdMap = new Map(); + const newWorkspaceId = `workspace-${crypto.randomUUID()}`; + workspaceIdMap.set(importedWorkspace.id, newWorkspaceId); + + updateWorkspaceState((current) => { + const now = new Date().toISOString(); + const nextTabIds = importedWorkspace.tabIds.map((tabId: any) => { + const nextTabId = `tab-${crypto.randomUUID()}`; + tabIdMap.set(tabId, nextTabId); + const importedTab = importedTabs[tabId]; + + if (importedTab) { + const importedTabRecord = importedTab as typeof importedTab & { + pluginId?: string | null; + }; + current.tabs[nextTabId] = { + ...importedTabRecord, + id: nextTabId, + benchPackId: + importedTabRecord.benchPackId ?? + importedTabRecord.pluginId ?? + null, + samplingOverrides: importedTab.samplingOverrides ?? {}, + createdAt: importedTab.createdAt ?? now, + updatedAt: now, + }; + } + + return nextTabId; + }); + + current.workspaceOrder.push(newWorkspaceId); + current.activeWorkspaceId = newWorkspaceId; + current.workspaces[newWorkspaceId] = { + ...importedWorkspace, + id: newWorkspaceId, + name: Object.values(current.workspaces).some( + (workspace) => workspace.name === importedWorkspace.name, + ) + ? `${importedWorkspace.name} Imported` + : importedWorkspace.name, + tabIds: nextTabIds, + activeTabId: importedWorkspace.activeTabId + ? (tabIdMap.get(importedWorkspace.activeTabId) ?? + nextTabIds[0] ?? + null) + : (nextTabIds[0] ?? null), + createdAt: importedWorkspace.createdAt ?? now, + updatedAt: now, + }; + + return current; + }); + + setAppNotice(`Imported workspace "${importedWorkspace.name}".`); + } catch (workspaceError) { + setError( + workspaceError instanceof Error + ? workspaceError.message + : 'Failed to import workspace.', + ); + } + }; + + const activateWorkspace = (workspaceId: string) => { + setWorkspaceContextMenu(null); + updateWorkspaceState((current) => { + current.activeWorkspaceId = workspaceId; + return current; + }); + }; + + const createTab = (benchPackId: string) => { + if (!activeWorkspace) { + return; + } + + updateWorkspaceState((current) => { + const workspace = current.workspaces[activeWorkspace.id]; + + if (!workspace) { + return current; + } + + const now = new Date().toISOString(); + const tabId = `tab-${crypto.randomUUID()}`; + current.tabs[tabId] = { + id: tabId, + title: createTabTitle(benchPackId, benchPackInspections), + benchPackId, + loadedRunId: null, + focusedScenarioId: null, + modelSelections: [], + samplingOverrides: {}, + executionMode: 'parallel_by_test_case', + createdAt: now, + updatedAt: now, + }; + workspace.tabIds.push(tabId); + workspace.activeTabId = tabId; + workspace.updatedAt = now; + return current; + }); + setTabMenuOpen(false); + }; + + const assignBenchPackToTab = (tabId: string, benchPackId: string) => { + updateWorkspaceState((current) => { + const tab = current.tabs[tabId]; + + if (!tab) { + return current; + } + + tab.title = createTabTitle(benchPackId, benchPackInspections); + tab.benchPackId = benchPackId; + tab.loadedRunId = null; + tab.focusedScenarioId = null; + tab.samplingOverrides = {}; + tab.updatedAt = new Date().toISOString(); + + return current; + }); + setTabMenuOpen(false); + }; + + const activateTab = (tabId: string) => { + if (!activeWorkspace) { + return; + } + + updateWorkspaceState((current) => { + const workspace = current.workspaces[activeWorkspace.id]; + + if (!workspace) { + return current; + } + + workspace.activeTabId = tabId; + workspace.updatedAt = new Date().toISOString(); + return current; + }); + }; + + const startEditingTab = (tabId: string, currentTitle: string) => { + const width = tabChipRefs.current.get(tabId)?.offsetWidth ?? 180; + setEditingTab({ + tabId, + value: currentTitle, + width, + }); + }; + + const commitEditingTab = () => { + if (!editingTab) { + return; + } + + const nextTitle = editingTab.value.trim() || 'New Tab'; + + updateWorkspaceState((current) => { + const tab = current.tabs[editingTab.tabId]; + + if (!tab) { + return current; + } + + tab.title = nextTitle; + tab.updatedAt = new Date().toISOString(); + return current; + }); + + setEditingTab(null); + }; + + const cancelEditingTab = () => { + setEditingTab(null); + }; + + const reorderTab = (draggedId: string, targetId: string) => { + if (!activeWorkspace || draggedId === targetId) { + return; + } + + updateWorkspaceState((current) => { + const workspace = current.workspaces[activeWorkspace.id]; + + if (!workspace) { + return current; + } + + const nextTabIds = [...workspace.tabIds]; + const fromIndex = nextTabIds.indexOf(draggedId); + const toIndex = nextTabIds.indexOf(targetId); + + if (fromIndex < 0 || toIndex < 0) { + return current; + } + + const [moved] = nextTabIds.splice(fromIndex, 1); + nextTabIds.splice(toIndex, 0, moved); + workspace.tabIds = nextTabIds; + workspace.updatedAt = new Date().toISOString(); + return current; + }); + }; + + const closeTab = (tabId: string) => { + if (!activeWorkspace) { + return; + } + + if (activeRuns[tabId]) { + setError('Stop the Bench Pack run before closing this tab.'); + return; + } + + updateWorkspaceState((current) => { + const workspace = current.workspaces[activeWorkspace.id]; + + if (!workspace) { + return current; + } + + workspace.tabIds = workspace.tabIds.filter((id) => id !== tabId); + delete current.tabs[tabId]; + + workspace.activeTabId = + workspace.activeTabId === tabId + ? (workspace.tabIds[workspace.tabIds.length - 1] ?? null) + : workspace.activeTabId; + workspace.updatedAt = new Date().toISOString(); + + if (workspace.tabIds.length === 0) { + const replacementTabId = `tab-${crypto.randomUUID()}`; + current.tabs[replacementTabId] = { + id: replacementTabId, + title: 'New Tab', + benchPackId: null, + loadedRunId: null, + focusedScenarioId: null, + modelSelections: [], + samplingOverrides: {}, + executionMode: 'parallel_by_test_case', + createdAt: workspace.updatedAt, + updatedAt: workspace.updatedAt, + }; + workspace.tabIds = [replacementTabId]; + workspace.activeTabId = replacementTabId; + } + + return current; + }); + setRunSummaries((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + setLiveRuns((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + setActiveRuns((current) => { + const next = { ...current }; + delete next[tabId]; + return next; + }); + }; + + const restoreHistoryRun = async ( + benchPackId: string, + runId: string, + mode: 'history' | 'replay' = 'history', + ) => { + if (!activeTab) { + return; + } + + try { + const summary = await bl.benchPacks.loadHistory(benchPackId, runId); + setRunSummaries((current) => ({ + ...current, + [activeTab.id]: summary, + })); + updateWorkspaceState((current) => { + const tab = current.tabs[activeTab.id]; + + if (!tab) { + return current; + } + + tab.loadedRunId = summary.runId; + tab.updatedAt = new Date().toISOString(); + return current; + }); + setLiveRuns((current) => { + const next = { ...current }; + delete next[activeTab.id]; + return next; + }); + setLoadedHistoryRuns((current) => ({ + ...current, + [activeTab.id]: { + runId, + startedAt: summary.startedAt, + mode, + }, + })); + if (summary.executionMode) { + updateWorkspaceState((current) => { + const tab = current.tabs[activeTab.id]; + + if (!tab) { + return current; + } + + tab.executionMode = summary.executionMode ?? tab.executionMode; + tab.updatedAt = new Date().toISOString(); + return current; + }); + } + } catch (historyError) { + setError( + historyError instanceof Error + ? historyError.message + : 'Failed to load Bench Pack history.', + ); + } + }; + + const retryScenarioFromDetail = async (detail: DetailModalState) => { + if (!workspaceState) { + return; + } + + if (!detail.runId) { + setError('This scenario does not belong to a saved test run yet.'); + return; + } + + const tab = workspaceState.tabs[detail.tabId]; + + if (!tab || tab.benchPackId !== detail.benchPackId) { + setError('The original tab for this test is no longer available.'); + return; + } + + if (hasUnsavedChanges) { + const saved = await save(); + + if (!saved) { + return; + } + } + + const retryKey = detailModalKey(detail); + const retryCellKey = getCellKey(detail.modelId, detail.scenarioId); + setDetailModal((current) => + current && detailModalKey(current) === retryKey ? null : current, + ); + setLiveRuns((current) => { + const existing = current[detail.tabId]; + + if (existing) { + return { + ...current, + [detail.tabId]: { + ...existing, + runId: existing.runId ?? detail.runId ?? undefined, + activeCellKeys: existing.activeCellKeys.includes(retryCellKey) + ? existing.activeCellKeys + : [...existing.activeCellKeys, retryCellKey], + }, + }; + } + + return { + ...current, + [detail.tabId]: { + runId: detail.runId ?? undefined, + events: [], + resultsByModel: {}, + activeCellKeys: [retryCellKey], + }, + }; + }); + + try { + await bl.benchPacks.retryScenario({ + tabId: detail.tabId, + benchPackId: detail.benchPackId, + runId: detail.runId, + scenarioId: detail.scenarioId, + modelId: detail.modelId, + generation: tab.samplingOverrides, + }); + const refreshedSummary = await bl.benchPacks.loadHistory( + detail.benchPackId, + detail.runId, + ); + + if (!activeRuns[detail.tabId]) { + setRunSummaries((current) => ({ + ...current, + [detail.tabId]: refreshedSummary, + })); + } + await loadHistoryForBenchPack(detail.benchPackId); + setAppNotice(`Retested ${detail.scenarioId} for ${detail.modelId}.`); + } catch (retryError) { + setLiveRuns((current) => { + const existing = current[detail.tabId]; + + if (!existing || !existing.activeCellKeys.includes(retryCellKey)) { + return current; + } + + return { + ...current, + [detail.tabId]: { + ...existing, + activeCellKeys: existing.activeCellKeys.filter( + (key) => key !== retryCellKey, + ), + }, + }; + }); + setError( + retryError instanceof Error + ? retryError.message + : 'Failed to retry the selected test.', + ); + } + }; + + const clearLoadedHistoryRun = (tabId: string) => { + updateWorkspaceState((current) => { + const tab = current.tabs[tabId]; + + if (!tab) { + return current; + } + + tab.loadedRunId = null; + tab.updatedAt = new Date().toISOString(); + return current; + }); + setLoadedHistoryRuns((current) => { + if (!current[tabId]) { + return current; + } + + const next = { ...current }; + delete next[tabId]; + return next; + }); + setRunSummaries((current) => { + if (!current[tabId]) { + return current; + } + + const next = { ...current }; + delete next[tabId]; + return next; + }); + setLiveRuns((current) => { + if (!current[tabId]) { + return current; + } + + const next = { ...current }; + delete next[tabId]; + return next; + }); + }; + + const clearLoadedHistoryForBenchPack = (benchPackId: string) => { + const affectedTabIds = workspaceState + ? Object.values(workspaceState.tabs) + .filter( + (tab) => + tab.benchPackId === benchPackId && + Boolean(loadedHistoryRuns[tab.id]), + ) + .map((tab) => tab.id) + : []; + + if (affectedTabIds.length === 0) { + return; + } + + updateWorkspaceState((current) => { + for (const tabId of affectedTabIds) { + const tab = current.tabs[tabId]; + + if (!tab) { + continue; + } + + tab.loadedRunId = null; + tab.updatedAt = new Date().toISOString(); + } + + return current; + }); + + setLoadedHistoryRuns((current) => { + const next = { ...current }; + for (const tabId of affectedTabIds) { + delete next[tabId]; + } + return next; + }); + + setRunSummaries((current) => { + const next = { ...current }; + for (const tabId of affectedTabIds) { + delete next[tabId]; + } + return next; + }); + + setLiveRuns((current) => { + const next = { ...current }; + for (const tabId of affectedTabIds) { + delete next[tabId]; + } + return next; + }); + }; + + const removeAllHistoryForBenchPack = async ( + benchPackId: string, + benchPackName: string, + ) => { + try { + await bl.benchPacks.clearHistory(benchPackId); + setRunHistories((current) => ({ + ...current, + [benchPackId]: [], + })); + clearLoadedHistoryForBenchPack(benchPackId); + setHistoryModal(null); + setAppNotice(`Removed all test histories for ${benchPackName}.`); + } catch (historyError) { + setError( + historyError instanceof Error + ? historyError.message + : 'Failed to remove Bench Pack history.', + ); + } + }; + + const saveProviderModal = async () => { + if (!providerModal || !draft) { + return; + } + + const providerId = providerModal.form.id.trim(); + const previousDraft = cloneConfig(draft); + const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; + const nextConfig = previousLoadConfig + ? cloneConfig(previousLoadConfig) + : cloneConfig(draft); + + nextConfig.providers[providerId] = { + kind: providerModal.form.kind, + name: + providerModal.form.name.trim() || + defaultProviderName(providerModal.form.kind), + enabled: providerModal.form.enabled, + base_url: providerModal.form.base_url.trim(), + api_key: providerModal.form.api_key.trim() || undefined, + }; + + const saved = await persistConfig(nextConfig, { + notice: + providerModal.mode === 'create' + ? 'Added provider.' + : 'Updated provider.', + preserveFilesystemDraft: true, + previousDraft, + previousLoadConfig, + }); + + if (!saved) { + return; + } + + setProviderModal(null); + }; + + const deleteProvider = async (providerId: string): Promise => { + if (!draft) { + return false; + } + + const removedModelIds = new Set( + (draft?.models ?? []) + .filter((model) => model.provider === providerId) + .map((model) => model.id), + ); + const previousDraft = cloneConfig(draft); + const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; + const nextConfig = previousLoadConfig + ? cloneConfig(previousLoadConfig) + : cloneConfig(draft); + + delete nextConfig.providers[providerId]; + nextConfig.models = nextConfig.models.filter( + (model) => model.provider !== providerId, + ); + + const saved = await persistConfig(nextConfig, { + notice: `Deleted provider "${providerId}".`, + preserveFilesystemDraft: true, + previousDraft, + previousLoadConfig, + }); + + if (!saved) { + return false; + } + + if (removedModelIds.size > 0) { + updateWorkspaceState((current) => { + for (const tab of Object.values(current.tabs)) { + tab.modelSelections = tab.modelSelections.filter( + (selection) => !removedModelIds.has(selection.modelId), + ); + } + return current; + }); + } + + return true; + }; + + const confirmDeleteProvider = (providerId: string) => { + const provider = draft?.providers[providerId]; + const linkedModelCount = (draft?.models ?? []).filter( + (model) => model.provider === providerId, + ).length; + + setConfirmDialog({ + title: 'Delete Provider', + subtitle: + linkedModelCount > 0 + ? `Delete ${provider?.name ?? 'this provider'}? This will also delete ${linkedModelCount} linked ${linkedModelCount === 1 ? 'model' : 'models'} and remove them from any tab selections.` + : `Delete ${provider?.name ?? 'this provider'}?`, + confirmLabel: 'Delete Provider', + tone: 'danger', + onConfirm: () => { + void deleteProvider(providerId).then((deleted) => { + if (deleted) { + setProviderModal(null); + } + }); + }, + }); + }; + + const openModelBrowser = async () => { + if (!modelModal || !draft) { + return; + } + + const provider = draft.providers[modelModal.form.provider]; + + if (!provider) { + setError('Select a provider first.'); + return; + } + + if (!providerSupportsModelDiscovery(provider)) { + setError(`${provider.name} does not support model browsing yet.`); + return; + } + + const cacheKey = `${provider.kind}::${provider.base_url}`; + const cachedEntries = modelDiscoveryCacheRef.current[cacheKey]; + + setModelBrowserModal({ + providerId: modelModal.form.provider, + providerName: provider.name, + entries: cachedEntries ?? [], + query: '', + selectedModelId: + modelModal.form.model.trim() || cachedEntries?.[0]?.id || null, + loading: !cachedEntries, + error: null, + }); + + if (cachedEntries) { + return; + } + + try { + const entries = await bl.models.discover(provider); + modelDiscoveryCacheRef.current[cacheKey] = entries; + setModelBrowserModal((current) => + current && current.providerId === modelModal.form.provider + ? { + ...current, + entries, + selectedModelId: + current.selectedModelId ?? entries[0]?.id ?? null, + loading: false, + } + : current, + ); + } catch (discoverError) { + setModelBrowserModal((current) => + current && current.providerId === modelModal.form.provider + ? { + ...current, + loading: false, + error: + discoverError instanceof Error + ? discoverError.message + : `Failed to load models from ${provider.name}.`, + } + : current, + ); + } + }; + + const saveModelModal = async () => { + if (!modelModal || !draft) { + return; + } + + const modelConfig = buildModelConfig( + modelModal.form, + draft?.providers ?? {}, + ); + + if (!modelConfig.provider || !modelConfig.model) { + setError('Model provider and model identifier are required.'); + return; + } + + if (!draft?.providers[modelConfig.provider]) { + setError(`Model provider "${modelConfig.provider}" does not exist yet.`); + return; + } + + const previousModelId = + modelModal.mode === 'edit' + ? (draft?.models[modelModal.index]?.id ?? null) + : null; + const previousDraft = cloneConfig(draft); + const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; + const nextConfig = previousLoadConfig + ? cloneConfig(previousLoadConfig) + : cloneConfig(draft); + + if (modelModal.mode === 'create') { + nextConfig.models.push(modelConfig); + } else { + nextConfig.models[modelModal.index] = modelConfig; + } + + const saved = await persistConfig(nextConfig, { + notice: modelModal.mode === 'create' ? 'Added model.' : 'Updated model.', + preserveFilesystemDraft: true, + previousDraft, + previousLoadConfig, + }); + + if (!saved) { + return; + } + + if (previousModelId && previousModelId !== modelConfig.id) { + updateWorkspaceState((current) => { + for (const tab of Object.values(current.tabs)) { + tab.modelSelections = tab.modelSelections.map((selection) => + selection.modelId === previousModelId + ? { ...selection, modelId: modelConfig.id } + : selection, + ); + } + return current; + }); + } + + setModelModal(null); + }; + + const deleteModel = async (index: number): Promise => { + if (!draft) { + return false; + } + + const removedModelId = draft?.models[index]?.id ?? null; + const previousDraft = cloneConfig(draft); + const previousLoadConfig = loadState ? cloneConfig(loadState.config) : null; + const nextConfig = previousLoadConfig + ? cloneConfig(previousLoadConfig) + : cloneConfig(draft); + nextConfig.models.splice(index, 1); + + const saved = await persistConfig(nextConfig, { + notice: 'Deleted model.', + preserveFilesystemDraft: true, + previousDraft, + previousLoadConfig, + }); + + if (!saved) { + return false; + } + + if (removedModelId) { + updateWorkspaceState((current) => { + for (const tab of Object.values(current.tabs)) { + tab.modelSelections = tab.modelSelections.filter( + (selection) => selection.modelId !== removedModelId, + ); + } + return current; + }); + } + + return true; + }; + + const confirmDeleteModel = (index: number) => { + const model = draft?.models[index]; + if (!model) { + return; + } + + const linkedTabCount = workspaceState + ? Object.values(workspaceState.tabs).filter((tab) => + tab.modelSelections.some( + (selection) => selection.modelId === model.id, + ), + ).length + : 0; + + setConfirmDialog({ + title: 'Delete Model', + subtitle: + linkedTabCount > 0 + ? `Delete ${model.label}? This will also remove it from ${linkedTabCount} tab ${linkedTabCount === 1 ? 'selection' : 'selections'}.` + : `Delete ${model.label}?`, + confirmLabel: 'Delete Model', + tone: 'danger', + onConfirm: () => { + void deleteModel(index).then((deleted) => { + if (deleted) { + setModelModal(null); + } + }); + }, + }); + }; + + return ( +
+
+
+
+
+ + {!isMacPlatform ? ( +
+

BenchLocal

+
+ ) : null} +
+ +
+ {isMacPlatform ? ( +
+

BenchLocal

+
+ ) : null} + + {!settingsOpen ? ( +
+ { + if (activeTab && !activeTab.benchPackId) { + assignBenchPackToTab(activeTab.id, benchPackId); + return; + } + + createTab(benchPackId); + }} + disabled={!activeWorkspace} + /> + + {appUpdateState?.status === 'downloaded' ? ( + + ) : null} +
+ ) : draft ? ( +
+
+ + {themeMenuOpen ? ( +
+ {themeOptions.map((themeId) => ( + + ))} +
+ ) : null} +
+
+ ) : null} +
+
+ + {settingsOpen && draft ? ( + { + setSettingsNotice(null); + setSettingsOpen(false); + }} + onDismissNotice={() => setSettingsNotice(null)} + onDismissError={() => setError(null)} + onSaveAdvanced={() => void save()} + onResetAdvanced={reset} + onCreateProvider={() => + setProviderModal({ + mode: 'create', + form: createEmptyProvider(), + }) + } + onEditProvider={(providerId) => + setProviderModal({ + mode: 'edit', + initialId: providerId, + form: toProviderForm(providerId, draft.providers[providerId]), + }) + } + onCreateModel={() => + setModelModal({ + mode: 'create', + form: createEmptyModel(providerIds[0] ?? 'openrouter'), + }) + } + onEditModel={(index) => + setModelModal({ + mode: 'edit', + index, + form: toModelForm(draft.models[index]), + }) + } + onStartVerifier={async ( + benchPackId, + benchPackName, + verifierId, + ) => { + setError(null); + setStoppingVerifierStarts((current) => { + if (!current[benchPackId]) { + return current; + } + + const next = { ...current }; + delete next[benchPackId]; + return next; + }); + setSettingsVerifierPreparationModal({ + benchPackId, + progress: { + type: 'verifier_preparing', + benchPackId, + benchPackName, + verifierId, + phase: 'checking_docker', + message: 'Checking Local Docker availability.', + }, + }); + + try { + const status = await bl.verifiers.start(benchPackId); + setVerifierStatuses((current) => ({ + ...current, + [benchPackId]: status, + })); + } catch (verifierError) { + if (isAbortLikeError(verifierError)) { + if (settingsOpenRef.current) { + setSettingsNotice(`Cancelled preparing ${verifierId}.`); + } + } else { + setError( + verifierError instanceof Error + ? verifierError.message + : 'Failed to start verifier.', + ); + } + } finally { + setSettingsVerifierPreparationModal((current) => + current?.benchPackId === benchPackId ? null : current, + ); + setStoppingVerifierStarts((current) => { + if (!current[benchPackId]) { + return current; + } + + const next = { ...current }; + delete next[benchPackId]; + return next; + }); + } + }} + onStopVerifier={async (benchPackId) => { + try { + const status = await bl.verifiers.stop(benchPackId); + setVerifierStatuses((current) => ({ + ...current, + [benchPackId]: status, + })); + } catch (verifierError) { + setError( + verifierError instanceof Error + ? verifierError.message + : 'Failed to stop verifier.', + ); + } + }} + onDeleteVerifierImage={( + benchPackId, + benchPackName, + verifierId, + ) => { + setConfirmDialog({ + title: 'Delete Verifier Image', + subtitle: `Delete the Local Docker image for verifier "${verifierId}" in ${benchPackName}? BenchLocal will pull or rebuild it again the next time this verifier starts.`, + confirmLabel: 'Delete Image', + tone: 'danger', + onConfirm: () => { + void (async () => { + setIsBusy(true); + setError(null); + + try { + const result = await bl.verifiers.deleteImage( + benchPackId, + verifierId, + ); + setVerifierStatuses((current) => ({ + ...current, + [benchPackId]: result.status, + })); + if (settingsOpenRef.current) { + setSettingsNotice( + result.removed + ? `Deleted Docker image ${result.image}.` + : `Docker image ${result.image} was already absent.`, + ); + } + } catch (verifierError) { + setError( + verifierError instanceof Error + ? verifierError.message + : 'Failed to delete verifier image.', + ); + } finally { + setIsBusy(false); + } + })(); + }, + }); + }} + onRefreshRegistry={() => void loadRegistryEntries()} + onInstallBenchPack={(benchPackId) => + void installBenchPack(benchPackId) + } + onInstallBenchPackFromUrl={(url) => installBenchPackFromUrl(url)} + onUpdateBenchPack={(benchPackId) => + void updateBenchPack(benchPackId) + } + onUninstallBenchPack={(benchPackId) => + void uninstallInstalledBenchPack(benchPackId) + } + updateDraft={updateDraft} + onUpdateVerifier={(benchPackId, verifierId, updater) => { + void saveVerifierConfig(benchPackId, verifierId, updater); + }} + /> + ) : ( +
+ + +
+ {appNotice ? ( + +
+ {appNotice} + +
+
+ ) : null} + {showDownloadedUpdateBanner ? ( + +
+ {describeAppUpdateState(appUpdateState)} + +
+
+ ) : null} + {error ? {error} : null} + {isBusy && !draft ? ( + Loading BenchLocal config... + ) : null} + +
+ {draft ? ( + activeWorkspace ? ( +
+
+ {activeTabMask ? ( + + ) : null} +
+ {workspaceTabs.map((tab) => { + const inspection = benchPackInspections.find( + (candidate) => candidate.id === tab.benchPackId, + ); + const isTabRunning = Boolean(activeRuns[tab.id]); + const hasTabRetryActivity = + (liveRuns[tab.id]?.activeCellKeys.length ?? 0) > + 0; + const showTabSpinner = + isTabRunning || hasTabRetryActivity; + const showWarning = + !isTabRunning && + inspection && + inspection.status !== 'ready'; + const isEditingTab = editingTab?.tabId === tab.id; + + return ( + + ); + })} + +
+
+ + +
+
+
+ {activeInspection && activeTab ? ( + { + if ( + activeRuns[activeTab.id] && + supportsLiveScenarioColumnFocus( + activeTab.executionMode, + ) + ) { + setLiveScenarioFocus((current) => { + const existing = current[activeTab.id]; + const liveScenarioId = + existing?.liveScenarioId ?? null; + + return { + ...current, + [activeTab.id]: { + liveScenarioId, + autoFollow: + liveScenarioId === scenarioId, + }, + }; + }); + } + + updateWorkspaceState((current) => { + const tab = activeTab + ? current.tabs[activeTab.id] + : null; + if (!tab) { + return current; + } + tab.focusedScenarioId = scenarioId; + tab.updatedAt = new Date().toISOString(); + return current; + }); + }} + onEditModels={() => + setTabModelsModal({ + tabId: activeTab.id, + selections: structuredClone( + activeTab.modelSelections, + ), + }) + } + onEditSampling={() => + setSamplingModal({ + tabId: activeTab.id, + benchPackId: activeInspection.id, + benchPackName: + activeInspection.manifest?.name ?? + activeInspection.id, + defaults: { + ...DEFAULT_BENCHLOCAL_GENERATION, + ...(activeInspection.manifest + ?.samplingDefaults ?? {}), + }, + form: createSamplingForm( + activeTab.samplingOverrides, + ), + }) + } + executionMode={activeTab.executionMode} + isViewingHistory={Boolean(activeLoadedHistory)} + onOpenHistory={() => + setHistoryModal({ + benchPackId: activeInspection.id, + benchPackName: + activeInspection.manifest?.name ?? + activeInspection.id, + entries: + runHistories[activeInspection.id] ?? [], + }) + } + onEditModelAlias={(model) => + setModelAliasModal({ + tabId: activeTab.id, + modelId: model.id, + baseLabel: model.label, + alias: model.alias ?? '', + }) + } + onChangeExecutionMode={(executionMode) => + updateWorkspaceState((current) => { + const tab = activeTab + ? current.tabs[activeTab.id] + : null; + if (!tab) { + return current; + } + tab.executionMode = executionMode; + tab.updatedAt = new Date().toISOString(); + return current; + }) + } + isRunning={Boolean(activeRuns[activeTab.id])} + isStopping={Boolean(stoppingRuns[activeTab.id])} + onOpenVerification={() => { + setSettingsTab('verification'); + setSettingsOpen(true); + }} + onRefreshVerification={() => + void loadVerifierStatuses() + } + onClearHistory={() => + clearLoadedHistoryRun(activeTab.id) + } + onRun={() => + void (activeLoadedHistory?.mode === 'replay' && + activeRunSummary + ? replayTabRun(activeTab, activeRunSummary) + : activeRunSummary && + !isRunSummaryComplete(activeRunSummary) + ? resumeTabRun(activeTab, activeRunSummary) + : runTab(activeTab)) + } + onStop={() => void stopTabRun(activeTab.id)} + onOpenDetail={setDetailModal} + /> + ) : ( + { + setSettingsTab('providers'); + setSettingsOpen(true); + }} + onOpenModels={() => { + setSettingsTab('models'); + setSettingsOpen(true); + }} + onOpenBenchPacks={() => { + setSettingsTab('benchPacks'); + setSettingsOpen(true); + }} + onSelectBenchPack={ + activeTab + ? () => setTabMenuOpen(true) + : undefined + } + /> + )} +
+
+ ) : ( + { + setSettingsTab('providers'); + setSettingsOpen(true); + }} + onOpenModels={() => { + setSettingsTab('models'); + setSettingsOpen(true); + }} + onOpenBenchPacks={() => { + setSettingsTab('benchPacks'); + setSettingsOpen(true); + }} + /> + ) + ) : null} +
+ {logsOpen && !logsDetached ? ( +
+
{ + document.body.dataset.logResizeActive = 'true'; + }} + /> +
+
+

Run Logs

+
+ {activeTab ? activeTab.title : 'No Active Tab'} +
+
+
+ + + {activeLogEvents.length} events + + +
+
+ {activeLogEvents.length > 0 ? ( +
+ {activeLogEvents.map((event, index) => ( +
+ {event.type} + + {' '} + {JSON.stringify(event)} + +
+ ))} +
+ ) : ( +
+ No run logs yet for the active tab. +
+ )} +
+ ) : null} +
+
+ )} + {!settingsOpen ? ( +
+
+ + {activeWorkspace?.name ?? 'No Workspace'} + + + + {activeTab?.title ?? 'No Tab'} + +
+
+ + + + {activeLogEvents.length} events + +
+
+ ) : null} +
+
+ + {providerModal ? ( + setProviderModal(null)} + onSubmit={saveProviderModal} + submitLabel={ + providerModal.mode === 'create' + ? 'Create Provider' + : 'Save Provider' + } + leadingActions={ + providerModal.mode === 'edit' ? ( + + ) : undefined + } + > +
+ option.value)} + getOptionLabel={(value) => + providerKindLabel(value as BenchLocalProviderKind) + } + onChange={(value) => + setProviderModal((current) => + current + ? { + ...current, + form: { + ...current.form, + id: + current.mode === 'create' + ? `${value as BenchLocalProviderKind}-${crypto.randomUUID()}` + : current.form.id, + kind: value as BenchLocalProviderKind, + name: + current.form.name.trim() === '' || + current.form.name === + defaultProviderName(current.form.kind) + ? defaultProviderName( + value as BenchLocalProviderKind, + ) + : current.form.name, + base_url: + current.form.base_url === + defaultProviderBaseUrl(current.form.kind) + ? defaultProviderBaseUrl( + value as BenchLocalProviderKind, + ) + : current.form.base_url, + }, + } + : current, + ) + } + /> + + setProviderModal((current) => + current + ? { ...current, form: { ...current.form, name: value } } + : current, + ) + } + /> + + setProviderModal((current) => + current + ? { ...current, form: { ...current.form, api_key: value } } + : current, + ) + } + /> + + setProviderModal((current) => + current + ? { + ...current, + form: { ...current.form, enabled: checked }, + } + : current, + ) + } + /> +
+ + setProviderModal((current) => + current + ? { ...current, form: { ...current.form, base_url: value } } + : current, + ) + } + /> +
+ ) : null} + + {modelModal + ? (() => { + const selectedProvider = draft?.providers[modelModal.form.provider]; + const canBrowseModels = + providerSupportsModelDiscovery(selectedProvider); + + return ( + setModelModal(null)} + onSubmit={saveModelModal} + submitLabel={ + modelModal.mode === 'create' ? 'Create Model' : 'Save Model' + } + leadingActions={ + modelModal.mode === 'edit' ? ( + + ) : undefined + } + > +
+ 0 ? providerIds : ['openrouter'] + } + getOptionLabel={(value) => { + const provider = draft?.providers[value]; + return provider ? provider.name : value; + }} + onChange={(value) => + setModelModal((current) => + current + ? { + ...current, + form: { ...current.form, provider: value }, + } + : current, + ) + } + /> + + setModelModal((current) => + current + ? { + ...current, + form: { ...current.form, group: value }, + } + : current, + ) + } + /> + + + setModelModal((current) => + current + ? { + ...current, + form: { ...current.form, label: value }, + } + : current, + ) + } + /> + undefined} + /> + + setModelModal((current) => + current + ? { + ...current, + form: { ...current.form, enabled: checked }, + } + : current, + ) + } + /> +
+
+ ); + })() + : null} + + {modelBrowserModal ? ( + setModelBrowserModal(null)} + onQueryChange={(query) => + setModelBrowserModal((current) => + current ? { ...current, query } : current, + ) + } + onSelect={(modelId) => + setModelBrowserModal((current) => + current ? { ...current, selectedModelId: modelId } : current, + ) + } + onSubmit={() => { + if (!modelBrowserModal.selectedModelId) { + return; + } + + const selectedEntry = modelBrowserModal.entries.find( + (entry) => entry.id === modelBrowserModal.selectedModelId, + ); + + if (!selectedEntry) { + return; + } + + setModelModal((current) => { + if (!current) { + return current; + } + + const providerName = + draft?.providers[current.form.provider]?.name ?? + current.form.provider; + const currentDefaultLabel = current.form.model.trim() + ? defaultModelLabel(providerName, current.form.model, undefined) + : ''; + const nextLabel = defaultModelLabel( + providerName, + selectedEntry.id, + selectedEntry.name, + ); + const shouldAutofillLabel = + current.form.label.trim() === '' || + current.form.label.trim() === currentDefaultLabel; + + return { + ...current, + form: { + ...current.form, + model: selectedEntry.id, + label: shouldAutofillLabel ? nextLabel : current.form.label, + }, + }; + }); + setModelBrowserModal(null); + }} + /> + ) : null} + + {tabModelsModal && draft ? ( + setTabModelsModal(null)} + onChange={(selections) => + setTabModelsModal((current) => + current ? { ...current, selections } : current, + ) + } + onSubmit={() => { + const nextSelections = normalizeTabModelSelections( + tabModelsModal.selections, + ); + + updateWorkspaceState((current) => { + const tab = current.tabs[tabModelsModal.tabId]; + + if (!tab) { + return current; + } + + tab.modelSelections = nextSelections; + tab.updatedAt = new Date().toISOString(); + return current; + }); + + setTabModelsModal(null); + }} + /> + ) : null} + + {samplingModal ? ( + setSamplingModal(null)} + onChange={(form) => + setSamplingModal((current) => + current ? { ...current, form } : current, + ) + } + onSubmit={() => { + const parsed = parseSamplingForm(samplingModal.form); + + if (parsed.error) { + setError(parsed.error); + return; + } + + updateWorkspaceState((current) => { + const tab = current.tabs[samplingModal.tabId]; + + if (!tab) { + return current; + } + + tab.samplingOverrides = parsed.value ?? {}; + tab.updatedAt = new Date().toISOString(); + return current; + }); + + setSamplingModal(null); + }} + /> + ) : null} + + {modelAliasModal && draft ? ( + setModelAliasModal(null)} + onSubmit={() => { + updateWorkspaceState((current) => { + const tab = current.tabs[modelAliasModal.tabId]; + + if (!tab) { + return current; + } + + tab.modelSelections = upsertTabModelAlias( + tab, + draft.models, + modelAliasModal.modelId, + modelAliasModal.alias, + ); + tab.updatedAt = new Date().toISOString(); + return current; + }); + + setModelAliasModal(null); + }} + submitLabel="Save Alias" + > + + setModelAliasModal((current) => + current ? { ...current, alias: value } : current, + ) + } + /> + + ) : null} + + {aboutDialogOpen ? ( + void checkForAppUpdates()} + onInstallUpdate={() => void installDownloadedAppUpdate()} + onClose={() => setAboutDialogOpen(false)} + /> + ) : null} + + {workspaceModal ? ( + setWorkspaceModal(null)} + onSubmit={() => { + if (!workspaceModal.name.trim()) { + setError('Workspace name is required.'); + return; + } + + renameWorkspace(workspaceModal.workspaceId, workspaceModal.name); + setWorkspaceModal(null); + }} + submitLabel="Save Workspace" + > + + setWorkspaceModal((current) => + current ? { ...current, name: value } : current, + ) + } + /> + + ) : null} + + {historyModal ? ( + setHistoryModal(null)} + onOpenRun={(runId, mode) => { + void restoreHistoryRun(historyModal.benchPackId, runId, mode); + setHistoryModal(null); + }} + onRemoveAll={() => + setConfirmDialog({ + title: `Remove all histories for ${historyModal.benchPackName}?`, + subtitle: + 'This permanently deletes all saved test runs for this Bench Pack.', + confirmLabel: 'Remove All Histories', + tone: 'danger', + onConfirm: () => { + void removeAllHistoryForBenchPack( + historyModal.benchPackId, + historyModal.benchPackName, + ); + }, + }) + } + /> + ) : null} + + {confirmDialog ? ( + setConfirmDialog(null)} + onSubmit={() => { + confirmDialog.onConfirm(); + setConfirmDialog(null); + }} + submitLabel={confirmDialog.confirmLabel} + submitTone={confirmDialog.tone === 'danger' ? 'danger' : 'primary'} + /> + ) : null} + + {settingsVerifierPreparationModal ? ( + + void cancelSettingsVerifierStart( + settingsVerifierPreparationModal.benchPackId, + ) + } + /> + ) : verifierPreparationModal ? ( + void stopTabRun(verifierPreparationModal.tabId)} + /> + ) : null} + + {workspaceContextMenu ? ( +
event.stopPropagation()} + > + + +
+ ) : null} + + {detailModal ? ( + setDetailModal(null)} + onSubmit={() => setDetailModal(null)} + submitLabel="Close" + leadingActions={ + + } + > +
+
+ Status + Validation Result +
+ + {detailModal.status} + +
+
{detailModal.rawLog}
+
+ ) : null} +
+ ); } function BenchPackPickerDialog({ - inspections, - open, - setOpen, - onSelectBenchPack, - title = "New Tab", - subtitle = "Pick a Bench Pack to open in this workspace.", - actionLabel = "Open Bench Pack", + inspections, + open, + setOpen, + onSelectBenchPack, + title = 'New Tab', + subtitle = 'Pick a Bench Pack to open in this workspace.', + actionLabel = 'Open Bench Pack', }: { - inspections: BenchPackInspection[]; - open: boolean; - setOpen: (open: boolean) => void; - onSelectBenchPack: (benchPackId: string) => void; - title?: string; - subtitle?: string; - actionLabel?: string; + inspections: BenchPackInspection[]; + open: boolean; + setOpen: (open: boolean) => void; + onSelectBenchPack: (benchPackId: string) => void; + title?: string; + subtitle?: string; + actionLabel?: string; }) { - const [query, setQuery] = useState(""); - const filteredInspections = inspections.filter((inspection) => { - const haystack = [ - inspection.manifest?.name, - inspection.id, - inspection.manifest?.description, - inspection.manifest?.author, - ] - .filter(Boolean) - .join(" ") - .toLowerCase(); - - return haystack.includes(query.trim().toLowerCase()); - }); - const [selectedId, setSelectedId] = useState(null); - const selectedInspection = - filteredInspections.find((inspection) => inspection.id === selectedId) ?? - filteredInspections[0] ?? - null; - - useEffect(() => { - if (!open) { - return; - } - - setSelectedId((current) => { - if ( - current && - filteredInspections.some((inspection) => inspection.id === current) - ) { - return current; - } - - return filteredInspections[0]?.id ?? null; - }); - }, [open, filteredInspections]); - - if (!open) { - return null; - } - - return ( -
-
-
-
-

{title}

-

- {subtitle} -

-
- -
- -
-
- - -
- {filteredInspections.map((inspection) => ( - - ))} - {filteredInspections.length === 0 ? ( -
- No Bench Packs match your search. -
- ) : null} -
-
- -
- {selectedInspection ? ( - <> -
-

Bench Pack

-

- {selectedInspection.manifest?.name ?? selectedInspection.id} -

-

- {selectedInspection.manifest?.description ?? - "No description provided."} -

-
- -
-
- Author - - {selectedInspection.manifest?.author ?? "Unknown"} - -
-
- Tests - - {selectedInspection.scenarioCount ?? 0} - -
-
- Version - - {selectedInspection.manifest?.version ?? "n/a"} - -
-
- -
- - {selectedInspection.status.replaceAll("_", " ")} - - - {selectedInspection.manifest?.capabilities.tools - ? "Supports tools" - : "No tools"} - - - {selectedInspection.manifest?.capabilities.verification - ? "Requires verifier" - : "No extra dependencies"} - -
- -
- -
- - ) : ( -
-

No Installed Bench Packs

-

- Install a Bench Pack from Settings -

-

- BenchLocal now starts with zero installed Bench Packs. Open - Settings, go to Bench Packs, and install one from the official - registry. -

-
- )} -
-
-
-
- ); + const [query, setQuery] = useState(''); + const filteredInspections = inspections.filter((inspection) => { + const haystack = [ + inspection.manifest?.name, + inspection.id, + inspection.manifest?.description, + inspection.manifest?.author, + ] + .filter(Boolean) + .join(' ') + .toLowerCase(); + + return haystack.includes(query.trim().toLowerCase()); + }); + const [selectedId, setSelectedId] = useState(null); + const selectedInspection = + filteredInspections.find((inspection) => inspection.id === selectedId) ?? + filteredInspections[0] ?? + null; + + useEffect(() => { + if (!open) { + return; + } + + setSelectedId((current) => { + if ( + current && + filteredInspections.some((inspection) => inspection.id === current) + ) { + return current; + } + + return filteredInspections[0]?.id ?? null; + }); + }, [open, filteredInspections]); + + if (!open) { + return null; + } + + return ( +
+
+
+
+

{title}

+

+ {subtitle} +

+
+ +
+ +
+
+ + +
+ {filteredInspections.map((inspection) => ( + + ))} + {filteredInspections.length === 0 ? ( +
+ No Bench Packs match your search. +
+ ) : null} +
+
+ +
+ {selectedInspection ? ( + <> +
+

Bench Pack

+

+ {selectedInspection.manifest?.name ?? selectedInspection.id} +

+

+ {selectedInspection.manifest?.description ?? + 'No description provided.'} +

+
+ +
+
+ Author + + {selectedInspection.manifest?.author ?? 'Unknown'} + +
+
+ Tests + + {selectedInspection.scenarioCount ?? 0} + +
+
+ Version + + {selectedInspection.manifest?.version ?? 'n/a'} + +
+
+ +
+ + {selectedInspection.status.replaceAll('_', ' ')} + + + {selectedInspection.manifest?.capabilities.tools + ? 'Supports tools' + : 'No tools'} + + + {selectedInspection.manifest?.capabilities.verification + ? 'Requires verifier' + : 'No extra dependencies'} + +
+ +
+ +
+ + ) : ( +
+

No Installed Bench Packs

+

+ Install a Bench Pack from Settings +

+

+ BenchLocal now starts with zero installed Bench Packs. Open + Settings, go to Bench Packs, and install one from the official + registry. +

+
+ )} +
+
+
+
+ ); } function BenchPackPickerTrigger({ - inspections, - open, - setOpen, - onCreateTab, - disabled, + inspections, + open, + setOpen, + onCreateTab, + disabled, }: { - inspections: BenchPackInspection[]; - open: boolean; - setOpen: (open: boolean) => void; - onCreateTab: (benchPackId: string) => void; - disabled?: boolean; + inspections: BenchPackInspection[]; + open: boolean; + setOpen: (open: boolean) => void; + onCreateTab: (benchPackId: string) => void; + disabled?: boolean; }) { - return ( - <> - - - - - ); + return ( + <> + + + + + ); } function BenchmarkSection({ - tabId, - inspection, - verifierStatus, - runBlocker, - selectedModels, - runSummary, - historyEntries, - liveRun, - loadedHistory, - focusedScenarioId, - onFocusScenario, - onEditModels, - onEditSampling, - onEditModelAlias, - executionMode, - isViewingHistory, - onChangeExecutionMode, - onOpenHistory, - isRunning, - isStopping, - onOpenVerification, - onRefreshVerification, - onClearHistory, - onRun, - onStop, - onOpenDetail, + tabId, + inspection, + verifierStatus, + runBlocker, + selectedModels, + runSummary, + historyEntries, + liveRun, + loadedHistory, + focusedScenarioId, + onFocusScenario, + onEditModels, + onEditSampling, + onEditModelAlias, + executionMode, + isViewingHistory, + onChangeExecutionMode, + onOpenHistory, + isRunning, + isStopping, + onOpenVerification, + onRefreshVerification, + onClearHistory, + onRun, + onStop, + onOpenDetail, }: { - tabId: string; - inspection: BenchPackInspection; - verifierStatus: BenchPackVerifierStatus | null; - runBlocker: BenchPackRunBlocker | null; - selectedModels: ResolvedTabModel[]; - runSummary: BenchPackRunSummary | null; - historyEntries: BenchPackRunHistoryEntry[]; - liveRun: LiveRunState | null; - loadedHistory: LoadedHistoryEntry | null; - focusedScenarioId: string | null; - onFocusScenario: (scenarioId: string) => void; - onEditModels: () => void; - onEditSampling: () => void; - onEditModelAlias: (model: ResolvedTabModel) => void; - executionMode: BenchLocalExecutionMode; - isViewingHistory: boolean; - onChangeExecutionMode: (executionMode: BenchLocalExecutionMode) => void; - onOpenHistory: () => void; - isRunning: boolean; - isStopping: boolean; - onOpenVerification: () => void; - onRefreshVerification: () => void; - onClearHistory: () => void; - onRun: () => void; - onStop: () => void; - onOpenDetail: (detail: DetailModalState) => void; + tabId: string; + inspection: BenchPackInspection; + verifierStatus: BenchPackVerifierStatus | null; + runBlocker: BenchPackRunBlocker | null; + selectedModels: ResolvedTabModel[]; + runSummary: BenchPackRunSummary | null; + historyEntries: BenchPackRunHistoryEntry[]; + liveRun: LiveRunState | null; + loadedHistory: LoadedHistoryEntry | null; + focusedScenarioId: string | null; + onFocusScenario: (scenarioId: string) => void; + onEditModels: () => void; + onEditSampling: () => void; + onEditModelAlias: (model: ResolvedTabModel) => void; + executionMode: BenchLocalExecutionMode; + isViewingHistory: boolean; + onChangeExecutionMode: (executionMode: BenchLocalExecutionMode) => void; + onOpenHistory: () => void; + isRunning: boolean; + isStopping: boolean; + onOpenVerification: () => void; + onRefreshVerification: () => void; + onClearHistory: () => void; + onRun: () => void; + onStop: () => void; + onOpenDetail: (detail: DetailModalState) => void; }) { - const [runModeOpen, setRunModeOpen] = useState(false); - const runModeRef = useRef(null); - const tableScrollViewportRef = useRef(null); - const tableScrollbarTrackRef = useRef(null); - const tableScrollbarDragRef = useRef<{ - startX: number; - startScrollLeft: number; - } | null>(null); - const [tableScrollMetrics, setTableScrollMetrics] = useState({ - clientWidth: 0, - scrollWidth: 0, - scrollLeft: 0, - }); - const scenarios = inspection.scenarios ?? []; - const currentScenario = - scenarios.find((scenario) => scenario.id === focusedScenarioId) ?? - scenarios[0] ?? - null; - const highlightedScenarioId = supportsLiveScenarioColumnFocus(executionMode) - ? (currentScenario?.id ?? null) - : focusedScenarioId; - const hasRetryActivity = (liveRun?.activeCellKeys.length ?? 0) > 0; - const isReplayMode = loadedHistory?.mode === "replay"; - const isResumableRun = - Boolean(runSummary) && !isRunSummaryComplete(runSummary) && !isRunning; - const replayRevealedCellCount = Object.values( - liveRun?.resultsByModel ?? {}, - ).reduce((total, results) => total + results.length, 0); - const replayTotalCellCount = Object.values( - runSummary?.resultsByModel ?? {}, - ).reduce((total, results) => total + results.length, 0); - const currentExecutionModeLabel = - EXECUTION_MODE_OPTIONS.find((option) => option.value === executionMode) - ?.label ?? "Run Mode"; - const canReplayRun = - isReplayMode && Boolean(runSummary) && isRunSummaryComplete(runSummary); - const runButtonLabel = isRunning - ? "Stop" - : canReplayRun - ? "Replay" - : isResumableRun - ? "Resume Test" - : "Run"; - const hasLiveActivity = isRunning || hasRetryActivity; - const hasCompletedReplay = - isReplayMode && - !hasLiveActivity && - replayTotalCellCount > 0 && - replayRevealedCellCount >= replayTotalCellCount; - const canStartFreshRun = - inspection.status === "ready" && selectedModels.length > 0; - const canResumeRun = Boolean(runSummary) && isResumableRun; - const isRunButtonDisabled = isRunning - ? false - : hasRetryActivity || - isStopping || - !( - canReplayRun || - canResumeRun || - (!isViewingHistory && canStartFreshRun) - ); - const hasHorizontalOverflow = - tableScrollMetrics.scrollWidth > tableScrollMetrics.clientWidth + 1; - const stickyColumnShadow = tableScrollMetrics.scrollLeft > 2; - const scrollbarThumbWidth = hasHorizontalOverflow - ? getTableScrollbarThumbWidth(tableScrollMetrics) - : 0; - const scrollbarThumbOffset = - hasHorizontalOverflow && tableScrollbarTrackRef.current - ? (tableScrollMetrics.scrollLeft / - Math.max( - 1, - tableScrollMetrics.scrollWidth - tableScrollMetrics.clientWidth, - )) * - Math.max( - 0, - tableScrollbarTrackRef.current.clientWidth - scrollbarThumbWidth, - ) - : 0; - - useEffect(() => { - if (!runModeOpen) { - return; - } - - const handlePointerDown = (event: MouseEvent) => { - const target = event.target as Node; - const insideRunMode = runModeRef.current?.contains(target); - - if (!insideRunMode) { - setRunModeOpen(false); - } - }; - - const handleEscape = (event: KeyboardEvent) => { - if (event.key === "Escape") { - setRunModeOpen(false); - } - }; - - window.addEventListener("mousedown", handlePointerDown); - window.addEventListener("keydown", handleEscape); - - return () => { - window.removeEventListener("mousedown", handlePointerDown); - window.removeEventListener("keydown", handleEscape); - }; - }, [runModeOpen]); - - useEffect(() => { - const viewport = tableScrollViewportRef.current; - if (!viewport) { - return; - } - - const updateMetrics = () => { - setTableScrollMetrics({ - clientWidth: viewport.clientWidth, - scrollWidth: viewport.scrollWidth, - scrollLeft: viewport.scrollLeft, - }); - }; - - const syncFromViewport = () => { - updateMetrics(); - }; - - updateMetrics(); - viewport.addEventListener("scroll", syncFromViewport); - window.addEventListener("resize", updateMetrics); - - return () => { - viewport.removeEventListener("scroll", syncFromViewport); - window.removeEventListener("resize", updateMetrics); - }; - }, [selectedModels.length, scenarios.length, runSummary, liveRun]); - - useEffect(() => { - const handleMove = (event: MouseEvent) => { - const viewport = tableScrollViewportRef.current; - const track = tableScrollbarTrackRef.current; - const drag = tableScrollbarDragRef.current; - - if (!viewport || !track || !drag) { - return; - } - - const maxScrollLeft = Math.max( - 0, - viewport.scrollWidth - viewport.clientWidth, - ); - const maxThumbOffset = Math.max( - 1, - track.clientWidth - getTableScrollbarThumbWidth(tableScrollMetrics), - ); - const deltaX = event.clientX - drag.startX; - const nextScrollLeft = Math.min( - maxScrollLeft, - Math.max( - 0, - drag.startScrollLeft + (deltaX / maxThumbOffset) * maxScrollLeft, - ), - ); - viewport.scrollLeft = nextScrollLeft; - }; - - const handleUp = () => { - tableScrollbarDragRef.current = null; - document.body.style.userSelect = ""; - }; - - window.addEventListener("mousemove", handleMove); - window.addEventListener("mouseup", handleUp); - - return () => { - window.removeEventListener("mousemove", handleMove); - window.removeEventListener("mouseup", handleUp); - }; - }, [tableScrollMetrics]); - - if (inspection.status !== "ready") { - return ( -
-
-
-

Bench Pack Session

-
-
- {inspection.manifest?.name ?? inspection.id} -
-
- - {inspection.scenarioCount ?? 0} scenarios - - - {selectedModels.length} models - - Idle -
-
-
-
- - - {inspection.status.replaceAll("_", " ")} - -
-
- -
-
-
- -
-

Bench Pack Unavailable

-

- {inspection.manifest?.name ?? inspection.id} cannot run yet -

-

- {inspection.error ?? - "This Bench Pack is not installed or is missing its BenchLocal runtime entry."} -

-
- - {inspection.status.replaceAll("_", " ")} - - - {selectedModels.length} selected models - -
-
-
-
- ); - } - - function renderResultCell(modelId: string, scenarioId: string) { - const liveResult = liveRun?.resultsByModel[modelId]?.find( - (candidate) => candidate.scenarioId === scenarioId, - ); - const persistedResult = isReplayMode - ? undefined - : runSummary?.resultsByModel[modelId]?.find( - (candidate) => candidate.scenarioId === scenarioId, - ); - const result = liveResult ?? persistedResult; - const isActive = - liveRun?.activeCellKeys.includes(`${modelId}::${scenarioId}`) ?? false; - - if (isActive) { - return ( -
- -
- ); - } - - if (!result) { - return ( -
- {isActive ? ( - - ) : ( - - - )} -
- ); - } - - const tone = - result.status === "pass" - ? "result-pass" - : result.status === "partial" - ? "result-partial" - : "result-fail"; - - return ( - - ); - } - - return ( -
- {loadedHistory && loadedHistory.mode !== "replay" ? ( -
-
- - Loaded test history from{" "} - {new Date(loadedHistory.startedAt).toLocaleString()}. - - -
-
- ) : null} -
-
-

Bench Pack Session

-
-
- {inspection.manifest?.name ?? inspection.id} -
-
- - {inspection.scenarioCount ?? 0} scenarios - - - {selectedModels.length} models - - - {hasLiveActivity ? "Live" : runSummary ? "Done" : "Idle"} - -
-
-
-
- - -
-
- - {runBlocker ? ( -
-
- entry.required)?.status)}`} - > - Verifier blocked - -
-
- {runBlocker.title} -
-
{runBlocker.message}
-
-
-
- - -
-
- ) : null} - -
-
-
- -
-

Scenario Detail

-

- {currentScenario - ? `${currentScenario.id} · ${currentScenario.title}` - : "No scenario selected"} -

-
-
- -
-
- -
- {(currentScenario?.detailCards?.length - ? currentScenario.detailCards - : [ - { - title: "What this tests", - content: - currentScenario?.description ?? - "Click a scenario column in the Bench Pack table below to inspect that scenario.", - }, - { - title: "Prompt Contract", - content: - currentScenario?.description ?? - "The active scenario follows the selected table column. Richer prompt or methodology detail will appear here as Bench Pack metadata expands.", - }, - { - title: "Run Notes", - content: runSummary - ? "Click a scenario column to switch context. Click any result cell to inspect the trace and summary for that model and scenario." - : "Run this Bench Pack, then use the scenario columns in the table below to switch the preview context.", - }, - ] - ).map((card) => ( - - ))} -
-
- -
-
- -
Test Results
-
-
-
- - {runModeOpen ? ( -
- {EXECUTION_MODE_OPTIONS.map((option) => ( - - ))} -
- ) : null} -
- - -
-
- -
- {selectedModels.length === 0 ? ( -
-
- -
-
-

- No models selected -

-

- Add one or more models to start running this Bench Pack. -

-
-
- - -
-
- ) : ( - <> -
- - - - - {scenarios.map((scenario) => ( - - ))} - - - - {selectedModels.map((model) => ( - - - {scenarios.map((scenario) => ( - - ))} - - ))} - -
- Model - -
- -
-
- {isViewingHistory ? ( -
- {model.displayLabel} -
- ) : ( - - )} -
- {renderResultCell(model.id, scenario.id)} -
-
- {hasHorizontalOverflow ? ( -
- - {runSummary && - !hasLiveActivity && - (!isReplayMode || hasCompletedReplay) ? ( -
- {Object.entries(runSummary.scores).map(([modelId, score]) => ( -
-
-

- {selectedModels.find((model) => model.id === modelId) - ?.displayLabel ?? modelId} -

-

- {modelId} -

-
-
- {score.totalScore} -
- {score.categories.map((category) => ( - - {category.id}: {category.score} - - ))} -
-
-
- ))} -
- ) : null} -
-
-
- ); + const [runModeOpen, setRunModeOpen] = useState(false); + const runModeRef = useRef(null); + const tableScrollViewportRef = useRef(null); + const tableScrollbarTrackRef = useRef(null); + const tableScrollbarDragRef = useRef<{ + startX: number; + startScrollLeft: number; + } | null>(null); + const [tableScrollMetrics, setTableScrollMetrics] = useState({ + clientWidth: 0, + scrollWidth: 0, + scrollLeft: 0, + }); + const scenarios = inspection.scenarios ?? []; + const currentScenario = + scenarios.find((scenario) => scenario.id === focusedScenarioId) ?? + scenarios[0] ?? + null; + const highlightedScenarioId = supportsLiveScenarioColumnFocus(executionMode) + ? (currentScenario?.id ?? null) + : focusedScenarioId; + const hasRetryActivity = (liveRun?.activeCellKeys.length ?? 0) > 0; + const isReplayMode = loadedHistory?.mode === 'replay'; + const isResumableRun = + Boolean(runSummary) && !isRunSummaryComplete(runSummary) && !isRunning; + const replayRevealedCellCount = Object.values( + liveRun?.resultsByModel ?? {}, + ).reduce((total, results) => total + results.length, 0); + const replayTotalCellCount = Object.values( + runSummary?.resultsByModel ?? {}, + ).reduce((total, results) => total + results.length, 0); + const currentExecutionModeLabel = + EXECUTION_MODE_OPTIONS.find((option) => option.value === executionMode) + ?.label ?? 'Run Mode'; + const canReplayRun = + isReplayMode && Boolean(runSummary) && isRunSummaryComplete(runSummary); + const runButtonLabel = isRunning + ? 'Stop' + : canReplayRun + ? 'Replay' + : isResumableRun + ? 'Resume Test' + : 'Run'; + const hasLiveActivity = isRunning || hasRetryActivity; + const hasCompletedReplay = + isReplayMode && + !hasLiveActivity && + replayTotalCellCount > 0 && + replayRevealedCellCount >= replayTotalCellCount; + const canStartFreshRun = + inspection.status === 'ready' && selectedModels.length > 0; + const canResumeRun = Boolean(runSummary) && isResumableRun; + const isRunButtonDisabled = isRunning + ? false + : hasRetryActivity || + isStopping || + !( + canReplayRun || + canResumeRun || + (!isViewingHistory && canStartFreshRun) + ); + const hasHorizontalOverflow = + tableScrollMetrics.scrollWidth > tableScrollMetrics.clientWidth + 1; + const stickyColumnShadow = tableScrollMetrics.scrollLeft > 2; + const scrollbarThumbWidth = hasHorizontalOverflow + ? getTableScrollbarThumbWidth(tableScrollMetrics) + : 0; + const scrollbarThumbOffset = + hasHorizontalOverflow && tableScrollbarTrackRef.current + ? (tableScrollMetrics.scrollLeft / + Math.max( + 1, + tableScrollMetrics.scrollWidth - tableScrollMetrics.clientWidth, + )) * + Math.max( + 0, + tableScrollbarTrackRef.current.clientWidth - scrollbarThumbWidth, + ) + : 0; + + useEffect(() => { + if (!runModeOpen) { + return; + } + + const handlePointerDown = (event: MouseEvent) => { + const target = event.target as Node; + const insideRunMode = runModeRef.current?.contains(target); + + if (!insideRunMode) { + setRunModeOpen(false); + } + }; + + const handleEscape = (event: KeyboardEvent) => { + if (event.key === 'Escape') { + setRunModeOpen(false); + } + }; + + window.addEventListener('mousedown', handlePointerDown); + window.addEventListener('keydown', handleEscape); + + return () => { + window.removeEventListener('mousedown', handlePointerDown); + window.removeEventListener('keydown', handleEscape); + }; + }, [runModeOpen]); + + useEffect(() => { + const viewport = tableScrollViewportRef.current; + if (!viewport) { + return; + } + + const updateMetrics = () => { + setTableScrollMetrics({ + clientWidth: viewport.clientWidth, + scrollWidth: viewport.scrollWidth, + scrollLeft: viewport.scrollLeft, + }); + }; + + const syncFromViewport = () => { + updateMetrics(); + }; + + updateMetrics(); + viewport.addEventListener('scroll', syncFromViewport); + window.addEventListener('resize', updateMetrics); + + return () => { + viewport.removeEventListener('scroll', syncFromViewport); + window.removeEventListener('resize', updateMetrics); + }; + }, [selectedModels.length, scenarios.length, runSummary, liveRun]); + + useEffect(() => { + const handleMove = (event: MouseEvent) => { + const viewport = tableScrollViewportRef.current; + const track = tableScrollbarTrackRef.current; + const drag = tableScrollbarDragRef.current; + + if (!viewport || !track || !drag) { + return; + } + + const maxScrollLeft = Math.max( + 0, + viewport.scrollWidth - viewport.clientWidth, + ); + const maxThumbOffset = Math.max( + 1, + track.clientWidth - getTableScrollbarThumbWidth(tableScrollMetrics), + ); + const deltaX = event.clientX - drag.startX; + const nextScrollLeft = Math.min( + maxScrollLeft, + Math.max( + 0, + drag.startScrollLeft + (deltaX / maxThumbOffset) * maxScrollLeft, + ), + ); + viewport.scrollLeft = nextScrollLeft; + }; + + const handleUp = () => { + tableScrollbarDragRef.current = null; + document.body.style.userSelect = ''; + }; + + window.addEventListener('mousemove', handleMove); + window.addEventListener('mouseup', handleUp); + + return () => { + window.removeEventListener('mousemove', handleMove); + window.removeEventListener('mouseup', handleUp); + }; + }, [tableScrollMetrics]); + + if (inspection.status !== 'ready') { + return ( +
+
+
+

Bench Pack Session

+
+
+ {inspection.manifest?.name ?? inspection.id} +
+
+ + {inspection.scenarioCount ?? 0} scenarios + + + {selectedModels.length} models + + Idle +
+
+
+
+ + + {inspection.status.replaceAll('_', ' ')} + +
+
+ +
+
+
+ +
+

Bench Pack Unavailable

+

+ {inspection.manifest?.name ?? inspection.id} cannot run yet +

+

+ {inspection.error ?? + 'This Bench Pack is not installed or is missing its BenchLocal runtime entry.'} +

+
+ + {inspection.status.replaceAll('_', ' ')} + + + {selectedModels.length} selected models + +
+
+
+
+ ); + } + + function renderResultCell(modelId: string, scenarioId: string) { + const liveResult = liveRun?.resultsByModel[modelId]?.find( + (candidate) => candidate.scenarioId === scenarioId, + ); + const persistedResult = isReplayMode + ? undefined + : runSummary?.resultsByModel[modelId]?.find( + (candidate) => candidate.scenarioId === scenarioId, + ); + const result = liveResult ?? persistedResult; + const isActive = + liveRun?.activeCellKeys.includes(`${modelId}::${scenarioId}`) ?? false; + + if (isActive) { + return ( +
+ +
+ ); + } + + if (!result) { + return ( +
+ {isActive ? ( + + ) : ( + - + )} +
+ ); + } + + const tone = + result.status === 'pass' + ? 'result-pass' + : result.status === 'partial' + ? 'result-partial' + : 'result-fail'; + + return ( + + ); + } + + return ( +
+ {loadedHistory && loadedHistory.mode !== 'replay' ? ( +
+
+ + Loaded test history from{' '} + {new Date(loadedHistory.startedAt).toLocaleString()}. + + +
+
+ ) : null} +
+
+

Bench Pack Session

+
+
+ {inspection.manifest?.name ?? inspection.id} +
+
+ + {inspection.scenarioCount ?? 0} scenarios + + + {selectedModels.length} models + + + {hasLiveActivity ? 'Live' : runSummary ? 'Done' : 'Idle'} + +
+
+
+
+ + +
+
+ + {runBlocker ? ( +
+
+ entry.required)?.status)}`} + > + Verifier blocked + +
+
+ {runBlocker.title} +
+
{runBlocker.message}
+
+
+
+ + +
+
+ ) : null} + +
+
+
+ +
+

Scenario Detail

+

+ {currentScenario + ? `${currentScenario.id} · ${currentScenario.title}` + : 'No scenario selected'} +

+
+
+ +
+
+ +
+ {(currentScenario?.detailCards?.length + ? currentScenario.detailCards + : [ + { + title: 'What this tests', + content: + currentScenario?.description ?? + 'Click a scenario column in the Bench Pack table below to inspect that scenario.', + }, + { + title: 'Prompt Contract', + content: + currentScenario?.description ?? + 'The active scenario follows the selected table column. Richer prompt or methodology detail will appear here as Bench Pack metadata expands.', + }, + { + title: 'Run Notes', + content: runSummary + ? 'Click a scenario column to switch context. Click any result cell to inspect the trace and summary for that model and scenario.' + : 'Run this Bench Pack, then use the scenario columns in the table below to switch the preview context.', + }, + ] + ).map((card) => ( + + ))} +
+
+ +
+
+ +
Test Results
+
+
+
+ + {runModeOpen ? ( +
+ {EXECUTION_MODE_OPTIONS.map((option) => ( + + ))} +
+ ) : null} +
+ + +
+
+ +
+ {selectedModels.length === 0 ? ( +
+
+ +
+
+

+ No models selected +

+

+ Add one or more models to start running this Bench Pack. +

+
+
+ + +
+
+ ) : ( + <> +
+ + + + + {scenarios.map((scenario) => ( + + ))} + + + + {selectedModels.map((model) => ( + + + {scenarios.map((scenario) => ( + + ))} + + ))} + +
+ Model + +
+ +
+
+ {isViewingHistory ? ( +
+ {model.displayLabel} +
+ ) : ( + + )} +
+ {renderResultCell(model.id, scenario.id)} +
+
+ {hasHorizontalOverflow ? ( +
+ + {runSummary && + !hasLiveActivity && + (!isReplayMode || hasCompletedReplay) ? ( +
+ {Object.entries(runSummary.scores).map(([modelId, score]) => ( +
+
+

+ {selectedModels.find((model) => model.id === modelId) + ?.displayLabel ?? modelId} +

+

+ {modelId} +

+
+
+ {score.totalScore} +
+ {score.categories.map((category) => ( + + {category.id}: {category.score} + + ))} +
+
+
+ ))} +
+ ) : null} +
+
+
+ ); } function TabModelsModal({ - providers, - models, - selections, - onClose, - onChange, - onSubmit, + providers, + models, + selections, + onClose, + onChange, + onSubmit, }: { - providers: Record; - models: BenchLocalModelConfig[]; - selections: BenchLocalWorkspaceTabModelSelection[]; - onClose: () => void; - onChange: (selections: BenchLocalWorkspaceTabModelSelection[]) => void; - onSubmit: () => void; + providers: Record; + models: BenchLocalModelConfig[]; + selections: BenchLocalWorkspaceTabModelSelection[]; + onClose: () => void; + onChange: (selections: BenchLocalWorkspaceTabModelSelection[]) => void; + onSubmit: () => void; }) { - const [providerFilter, setProviderFilter] = useState("all"); - const [groupFilter, setGroupFilter] = useState("all"); - const [searchQuery, setSearchQuery] = useState(""); - const enabledModels = models.filter((model) => model.enabled); - const editableSelections = normalizeEditableTabModelSelections(selections); - const selectionMap = new Map( - editableSelections.map((selection) => [selection.modelId, selection]), - ); - const availableIds = new Set(enabledModels.map((model) => model.id)); - const orderedSelectedIds = editableSelections - .map((selection) => selection.modelId) - .filter((modelId) => availableIds.has(modelId)); - const selectedIdSet = new Set(orderedSelectedIds); - const providerOptions = [ - { value: "all", label: "All Providers" }, - ...Array.from(new Set(enabledModels.map((model) => model.provider))) - .sort((left, right) => - (providers[left]?.name ?? left).localeCompare( - providers[right]?.name ?? right, - ), - ) - .map((providerId) => ({ - value: providerId, - label: providers[providerId]?.name ?? providerId, - })), - ]; - const groupOptions = [ - { value: "all", label: "All Groups" }, - ...Array.from( - new Set( - enabledModels.map((model) => model.group.trim() || "__ungrouped__"), - ), - ) - .sort((left, right) => left.localeCompare(right)) - .map((group) => ({ - value: group, - label: group === "__ungrouped__" ? "Ungrouped" : group, - })), - ]; - const filteredAvailableModels = enabledModels.filter((model) => { - const normalizedGroup = model.group.trim() || "__ungrouped__"; - const normalizedQuery = searchQuery.trim().toLowerCase(); - const haystack = [ - model.label, - model.id, - model.group, - providers[model.provider]?.name ?? model.provider, - ] - .filter(Boolean) - .join(" ") - .toLowerCase(); - - return ( - (providerFilter === "all" || model.provider === providerFilter) && - (groupFilter === "all" || normalizedGroup === groupFilter) && - (!normalizedQuery || haystack.includes(normalizedQuery)) - ); - }); - const selectedModels = orderedSelectedIds - .map((modelId) => enabledModels.find((model) => model.id === modelId)) - .filter((model): model is BenchLocalModelConfig => Boolean(model)); - - const toggleModel = (modelId: string, enabled: boolean) => { - if (enabled) { - const existing = selectionMap.get(modelId); - onChange([...editableSelections, { modelId, alias: existing?.alias }]); - return; - } - - onChange( - editableSelections.filter((selection) => selection.modelId !== modelId), - ); - }; - - const updateAlias = (modelId: string, alias: string) => { - const next = editableSelections.map((selection) => - selection.modelId === modelId - ? { ...selection, alias: alias || undefined } - : selection, - ); - onChange(next); - }; - - const moveSelection = (draggedId: string, targetId: string) => { - if (draggedId === targetId) { - return; - } - - const next = [...editableSelections]; - const fromIndex = next.findIndex( - (selection) => selection.modelId === draggedId, - ); - const toIndex = next.findIndex( - (selection) => selection.modelId === targetId, - ); - - if (fromIndex < 0 || toIndex < 0) { - return; - } - - const [moved] = next.splice(fromIndex, 1); - next.splice(toIndex, 0, moved); - onChange(next); - }; - - useEffect(() => { - if ( - providerFilter !== "all" && - !providerOptions.some((option) => option.value === providerFilter) - ) { - setProviderFilter("all"); - } - }, [providerFilter, providerOptions]); - - useEffect(() => { - if ( - groupFilter !== "all" && - !groupOptions.some((option) => option.value === groupFilter) - ) { - setGroupFilter("all"); - } - }, [groupFilter, groupOptions]); - - return ( - -
-
-
-

Available Models

- - {filteredAvailableModels.length} - -
-
- - - -
-
- {filteredAvailableModels.length === 0 ? ( -
-

- No models match the current filters. -

-
- ) : ( - filteredAvailableModels.map((model) => { - const isSelected = selectedIdSet.has(model.id); - - return ( -
- - -
- - {model.group.trim() || "Ungrouped"} - -
-
- ); - }) - )} -
-
- -
-
-

Selected Models

- - {selectedModels.length} - -
-
- {selectedModels.length === 0 ? ( -
-

- Select models from the left to add them to this tab. -

-
- ) : ( - selectedModels.map((model) => { - const selection = selectionMap.get(model.id); - - return ( -
{ - event.dataTransfer.setData("text/plain", model.id); - event.dataTransfer.effectAllowed = "move"; - }} - onDragOver={(event) => { - event.preventDefault(); - event.dataTransfer.dropEffect = "move"; - }} - onDrop={(event) => { - event.preventDefault(); - moveSelection( - event.dataTransfer.getData("text/plain"), - model.id, - ); - }} - > - - -
- - updateAlias(model.id, event.target.value) - } - className="config-input tab-model-alias-input" - /> -
- -
-
-
- ); - }) - )} -
-
-
-
- ); + const [providerFilter, setProviderFilter] = useState('all'); + const [groupFilter, setGroupFilter] = useState('all'); + const [searchQuery, setSearchQuery] = useState(''); + const enabledModels = models.filter((model) => model.enabled); + const editableSelections = normalizeEditableTabModelSelections(selections); + const selectionMap = new Map( + editableSelections.map((selection) => [selection.modelId, selection]), + ); + const availableIds = new Set(enabledModels.map((model) => model.id)); + const orderedSelectedIds = editableSelections + .map((selection) => selection.modelId) + .filter((modelId) => availableIds.has(modelId)); + const selectedIdSet = new Set(orderedSelectedIds); + const providerOptions = [ + { value: 'all', label: 'All Providers' }, + ...Array.from(new Set(enabledModels.map((model) => model.provider))) + .sort((left, right) => + (providers[left]?.name ?? left).localeCompare( + providers[right]?.name ?? right, + ), + ) + .map((providerId) => ({ + value: providerId, + label: providers[providerId]?.name ?? providerId, + })), + ]; + const groupOptions = [ + { value: 'all', label: 'All Groups' }, + ...Array.from( + new Set( + enabledModels.map((model) => model.group.trim() || '__ungrouped__'), + ), + ) + .sort((left, right) => left.localeCompare(right)) + .map((group) => ({ + value: group, + label: group === '__ungrouped__' ? 'Ungrouped' : group, + })), + ]; + const filteredAvailableModels = enabledModels.filter((model) => { + const normalizedGroup = model.group.trim() || '__ungrouped__'; + const normalizedQuery = searchQuery.trim().toLowerCase(); + const haystack = [ + model.label, + model.id, + model.group, + providers[model.provider]?.name ?? model.provider, + ] + .filter(Boolean) + .join(' ') + .toLowerCase(); + + return ( + (providerFilter === 'all' || model.provider === providerFilter) && + (groupFilter === 'all' || normalizedGroup === groupFilter) && + (!normalizedQuery || haystack.includes(normalizedQuery)) + ); + }); + const selectedModels = orderedSelectedIds + .map((modelId) => enabledModels.find((model) => model.id === modelId)) + .filter((model): model is BenchLocalModelConfig => Boolean(model)); + + const toggleModel = (modelId: string, enabled: boolean) => { + if (enabled) { + const existing = selectionMap.get(modelId); + onChange([...editableSelections, { modelId, alias: existing?.alias }]); + return; + } + + onChange( + editableSelections.filter((selection) => selection.modelId !== modelId), + ); + }; + + const updateAlias = (modelId: string, alias: string) => { + const next = editableSelections.map((selection) => + selection.modelId === modelId + ? { ...selection, alias: alias || undefined } + : selection, + ); + onChange(next); + }; + + const moveSelection = (draggedId: string, targetId: string) => { + if (draggedId === targetId) { + return; + } + + const next = [...editableSelections]; + const fromIndex = next.findIndex( + (selection) => selection.modelId === draggedId, + ); + const toIndex = next.findIndex( + (selection) => selection.modelId === targetId, + ); + + if (fromIndex < 0 || toIndex < 0) { + return; + } + + const [moved] = next.splice(fromIndex, 1); + next.splice(toIndex, 0, moved); + onChange(next); + }; + + useEffect(() => { + if ( + providerFilter !== 'all' && + !providerOptions.some((option) => option.value === providerFilter) + ) { + setProviderFilter('all'); + } + }, [providerFilter, providerOptions]); + + useEffect(() => { + if ( + groupFilter !== 'all' && + !groupOptions.some((option) => option.value === groupFilter) + ) { + setGroupFilter('all'); + } + }, [groupFilter, groupOptions]); + + return ( + +
+
+
+

Available Models

+ + {filteredAvailableModels.length} + +
+
+ + + +
+
+ {filteredAvailableModels.length === 0 ? ( +
+

+ No models match the current filters. +

+
+ ) : ( + filteredAvailableModels.map((model) => { + const isSelected = selectedIdSet.has(model.id); + + return ( +
+ + +
+ + {model.group.trim() || 'Ungrouped'} + +
+
+ ); + }) + )} +
+
+ +
+
+

Selected Models

+ + {selectedModels.length} + +
+
+ {selectedModels.length === 0 ? ( +
+

+ Select models from the left to add them to this tab. +

+
+ ) : ( + selectedModels.map((model) => { + const selection = selectionMap.get(model.id); + + return ( +
{ + event.dataTransfer.setData('text/plain', model.id); + event.dataTransfer.effectAllowed = 'move'; + }} + onDragOver={(event) => { + event.preventDefault(); + event.dataTransfer.dropEffect = 'move'; + }} + onDrop={(event) => { + event.preventDefault(); + moveSelection( + event.dataTransfer.getData('text/plain'), + model.id, + ); + }} + > + + +
+ + updateAlias(model.id, event.target.value) + } + className="config-input tab-model-alias-input" + /> +
+ +
+
+
+ ); + }) + )} +
+
+
+
+ ); } function ModelBrowserModal({ - state, - onClose, - onQueryChange, - onSelect, - onSubmit, + state, + onClose, + onQueryChange, + onSelect, + onSubmit, }: { - state: ModelBrowserModalState; - onClose: () => void; - onQueryChange: (query: string) => void; - onSelect: (modelId: string) => void; - onSubmit: () => void; + state: ModelBrowserModalState; + onClose: () => void; + onQueryChange: (query: string) => void; + onSelect: (modelId: string) => void; + onSubmit: () => void; }) { - const normalizedQuery = state.query.trim().toLowerCase(); - const filteredEntries = state.entries.filter((entry) => { - const haystack = [ - entry.id, - entry.name, - entry.ownedBy, - entry.modality, - entry.pricing, - ] - .filter(Boolean) - .join(" ") - .toLowerCase(); - - return !normalizedQuery || haystack.includes(normalizedQuery); - }); - - return ( - - - -
- {state.loading ? ( -
- -

- Loading models from {state.providerName}... -

-
- ) : state.error ? ( -
-

{state.error}

-
- ) : filteredEntries.length === 0 ? ( -
-

No models match the current search.

-
- ) : ( - filteredEntries.map((entry) => ( - - )) - )} -
-
- ); + const normalizedQuery = state.query.trim().toLowerCase(); + const filteredEntries = state.entries.filter((entry) => { + const haystack = [ + entry.id, + entry.name, + entry.ownedBy, + entry.modality, + entry.pricing, + ] + .filter(Boolean) + .join(' ') + .toLowerCase(); + + return !normalizedQuery || haystack.includes(normalizedQuery); + }); + + return ( + + + +
+ {state.loading ? ( +
+ +

+ Loading models from {state.providerName}... +

+
+ ) : state.error ? ( +
+

{state.error}

+
+ ) : filteredEntries.length === 0 ? ( +
+

No models match the current search.

+
+ ) : ( + filteredEntries.map((entry) => ( + + )) + )} +
+
+ ); } function SamplingModal({ - benchPackName, - defaults, - form, - onChange, - onClose, - onSubmit, + benchPackName, + defaults, + form, + onChange, + onClose, + onSubmit, }: { - benchPackName: string; - defaults: GenerationRequest; - form: SamplingFormState; - onChange: (form: SamplingFormState) => void; - onClose: () => void; - onSubmit: () => void; + benchPackName: string; + defaults: GenerationRequest; + form: SamplingFormState; + onChange: (form: SamplingFormState) => void; + onClose: () => void; + onSubmit: () => void; }) { - const hasEffectiveDefaults = Object.values(defaults).some( - (value) => value !== undefined, - ); - - return ( - onChange(createSamplingForm())} - className="ghost-button" - > - - Reset Overrides - - } - > - {hasEffectiveDefaults ? ( -
-

- Effective defaults:{" "} - {SAMPLING_FIELDS.map((field) => { - const value = defaults[field.key as keyof GenerationRequest]; - return value === undefined ? null : ( - - {field.label}: {value} - - ); - }) - .filter(Boolean) - .reduce((items, item, index) => { - if (index > 0) { - items.push( · ); - } - items.push(item); - return items; - }, [])} -

-
- ) : ( -
-

- This Bench Pack does not define recommended defaults yet. Blank - fields mean BenchLocal will use its platform defaults and omit any - values that are still unset. -

-
- )} -
- {SAMPLING_FIELDS.map((field) => ( - - onChange({ - ...form, - [field.key]: value, - }) - } - /> - ))} -
-
- ); + const hasEffectiveDefaults = Object.values(defaults).some( + (value) => value !== undefined, + ); + + return ( + onChange(createSamplingForm())} + className="ghost-button" + > + + Reset Overrides + + } + > + {hasEffectiveDefaults ? ( +
+

+ Effective defaults:{' '} + {SAMPLING_FIELDS.map((field) => { + const value = defaults[field.key as keyof GenerationRequest]; + return value === undefined ? null : ( + + {field.label}: {value} + + ); + }) + .filter(Boolean) + .reduce((items, item, index) => { + if (index > 0) { + items.push( · ); + } + items.push(item); + return items; + }, [])} +

+
+ ) : ( +
+

+ This Bench Pack does not define recommended defaults yet. Blank + fields mean BenchLocal will use its platform defaults and omit any + values that are still unset. +

+
+ )} +
+ {SAMPLING_FIELDS.map((field) => ( + + onChange({ + ...form, + [field.key]: value, + }) + } + /> + ))} +
+
+ ); } function EmptyWorkspace({ - providerCount, - modelCount, - installedBenchPackCount, - onOpenProviders, - onOpenModels, - onOpenBenchPacks, - onSelectBenchPack, + providerCount, + modelCount, + installedBenchPackCount, + onOpenProviders, + onOpenModels, + onOpenBenchPacks, + onSelectBenchPack, }: { - providerCount: number; - modelCount: number; - installedBenchPackCount: number; - onOpenProviders: () => void; - onOpenModels: () => void; - onOpenBenchPacks: () => void; - onSelectBenchPack?: () => void; + providerCount: number; + modelCount: number; + installedBenchPackCount: number; + onOpenProviders: () => void; + onOpenModels: () => void; + onOpenBenchPacks: () => void; + onSelectBenchPack?: () => void; }) { - const hasProviders = providerCount > 0; - const hasModels = modelCount > 0; - const hasInstalledBenchPacks = installedBenchPackCount > 0; - const checklist = [ - { - key: "providers", - complete: hasProviders, - title: "Set up providers", - detail: hasProviders - ? `${providerCount} configured` - : "Add at least one provider endpoint.", - actionLabel: "Providers", - onAction: onOpenProviders, - }, - { - key: "models", - complete: hasModels, - title: "Add models", - detail: hasModels - ? `${modelCount} configured` - : "Create shared models that point to your providers.", - actionLabel: "Models", - onAction: onOpenModels, - }, - { - key: "benchpacks", - complete: hasInstalledBenchPacks, - title: "Install Bench Packs", - detail: hasInstalledBenchPacks - ? `${installedBenchPackCount} installed` - : "Install at least one Bench Pack from the official registry.", - actionLabel: "Bench Packs", - onAction: onOpenBenchPacks, - }, - ]; - - return ( -
-
-
- -
-

No Active Bench Pack

-

- Select a Bench Pack to open its workspace -

-

- Complete the setup checklist below. BenchLocal keeps providers and - models shared across the app, while each Bench Pack owns its own - scenarios, sampling defaults, and scoring. -

- -
- {checklist.map((item) => ( -
- -
-
{item.title}
-
{item.detail}
-
- {item.complete ? ( - Done - ) : ( - - )} -
- ))} -
- - {hasInstalledBenchPacks && onSelectBenchPack ? ( - - ) : null} -
-
- ); + const hasProviders = providerCount > 0; + const hasModels = modelCount > 0; + const hasInstalledBenchPacks = installedBenchPackCount > 0; + const checklist = [ + { + key: 'providers', + complete: hasProviders, + title: 'Set up providers', + detail: hasProviders + ? `${providerCount} configured` + : 'Add at least one provider endpoint.', + actionLabel: 'Providers', + onAction: onOpenProviders, + }, + { + key: 'models', + complete: hasModels, + title: 'Add models', + detail: hasModels + ? `${modelCount} configured` + : 'Create shared models that point to your providers.', + actionLabel: 'Models', + onAction: onOpenModels, + }, + { + key: 'benchpacks', + complete: hasInstalledBenchPacks, + title: 'Install Bench Packs', + detail: hasInstalledBenchPacks + ? `${installedBenchPackCount} installed` + : 'Install at least one Bench Pack from the official registry.', + actionLabel: 'Bench Packs', + onAction: onOpenBenchPacks, + }, + ]; + + return ( +
+
+
+ +
+

No Active Bench Pack

+

+ Select a Bench Pack to open its workspace +

+

+ Complete the setup checklist below. BenchLocal keeps providers and + models shared across the app, while each Bench Pack owns its own + scenarios, sampling defaults, and scoring. +

+ +
+ {checklist.map((item) => ( +
+ +
+
{item.title}
+
{item.detail}
+
+ {item.complete ? ( + Done + ) : ( + + )} +
+ ))} +
+ + {hasInstalledBenchPacks && onSelectBenchPack ? ( + + ) : null} +
+
+ ); } function DetachedLogsWindow() { - const [state, setState] = useState({ - workspaceName: "No Workspace", - tabTitle: "No Active Tab", - eventCount: 0, - events: [], - }); - const [autoScroll, setAutoScroll] = useState(true); - const [systemPrefersDark, setSystemPrefersDark] = useState( - typeof window !== "undefined" - ? window.matchMedia("(prefers-color-scheme: dark)").matches - : false, - ); - const [themeDefinition, setThemeDefinition] = - useState(null); - const logContainerRef = useRef(null); - const appliedThemeKeysRef = useRef([]); - - useEffect(() => { - // onDetachedState removed in web version - return () => {}; - }, []); - - useEffect(() => { - if (typeof window === "undefined") { - return; - } - - const media = window.matchMedia("(prefers-color-scheme: dark)"); - const handleChange = () => { - setSystemPrefersDark(media.matches); - }; - - handleChange(); - media.addEventListener("change", handleChange); - - return () => { - media.removeEventListener("change", handleChange); - }; - }, []); - - useEffect(() => { - let cancelled = false; - - const loadTheme = async () => { - const configResult = await bl.config.load(); - const requestedThemeId = - configResult.config.ui.theme === "system" - ? systemPrefersDark - ? "dark" - : "light" - : configResult.config.ui.theme; - const nextTheme = await bl.themes.load(requestedThemeId); - - if (!cancelled) { - setThemeDefinition(nextTheme); - } - }; - - void loadTheme(); - - return () => { - cancelled = true; - }; - }, [systemPrefersDark]); - - useEffect(() => { - if (!themeDefinition || typeof document === "undefined") { - return; - } - - const root = document.documentElement; - - for (const key of appliedThemeKeysRef.current) { - root.style.removeProperty(key); - } - - for (const [key, value] of Object.entries(themeDefinition.variables)) { - root.style.setProperty(key, value); - } - - appliedThemeKeysRef.current = Object.keys(themeDefinition.variables); - root.style.setProperty("color-scheme", themeDefinition.colorScheme); - root.dataset.theme = themeDefinition.id; - }, [themeDefinition]); - - useEffect(() => { - if (!autoScroll || !logContainerRef.current) { - return; - } - - logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight; - }, [state, autoScroll]); - - useEffect(() => { - document.title = `Run Logs - ${state.workspaceName} - ${state.tabTitle}`; - }, [state.workspaceName, state.tabTitle]); - - return ( -
-
-
-

- {state.workspaceName} · {state.tabTitle} -

-
-
- - - {state.eventCount} events - - -
-
- - {state.events.length > 0 ? ( -
- {state.events.map((event, index) => ( -
- {event.type} - {JSON.stringify(event)} -
- ))} -
- ) : ( -
- No run logs are being streamed yet. -
- )} -
- ); + const [state, setState] = useState({ + workspaceName: 'No Workspace', + tabTitle: 'No Active Tab', + eventCount: 0, + events: [], + }); + const [autoScroll, setAutoScroll] = useState(true); + const [systemPrefersDark, setSystemPrefersDark] = useState( + typeof window !== 'undefined' + ? window.matchMedia('(prefers-color-scheme: dark)').matches + : false, + ); + const [themeDefinition, setThemeDefinition] = + useState(null); + const logContainerRef = useRef(null); + const appliedThemeKeysRef = useRef([]); + + useEffect(() => { + // onDetachedState removed in web version + return () => {}; + }, []); + + useEffect(() => { + if (typeof window === 'undefined') { + return; + } + + const media = window.matchMedia('(prefers-color-scheme: dark)'); + const handleChange = () => { + setSystemPrefersDark(media.matches); + }; + + handleChange(); + media.addEventListener('change', handleChange); + + return () => { + media.removeEventListener('change', handleChange); + }; + }, []); + + useEffect(() => { + let cancelled = false; + + const loadTheme = async () => { + const configResult = await bl.config.load(); + const requestedThemeId = + configResult.config.ui.theme === 'system' + ? systemPrefersDark + ? 'dark' + : 'light' + : configResult.config.ui.theme; + const nextTheme = await bl.themes.load(requestedThemeId); + + if (!cancelled) { + setThemeDefinition(nextTheme); + } + }; + + void loadTheme(); + + return () => { + cancelled = true; + }; + }, [systemPrefersDark]); + + useEffect(() => { + if (!themeDefinition || typeof document === 'undefined') { + return; + } + + const root = document.documentElement; + + for (const key of appliedThemeKeysRef.current) { + root.style.removeProperty(key); + } + + for (const [key, value] of Object.entries(themeDefinition.variables)) { + root.style.setProperty(key, value); + } + + appliedThemeKeysRef.current = Object.keys(themeDefinition.variables); + root.style.setProperty('color-scheme', themeDefinition.colorScheme); + root.dataset.theme = themeDefinition.id; + }, [themeDefinition]); + + useEffect(() => { + if (!autoScroll || !logContainerRef.current) { + return; + } + + logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight; + }, [state, autoScroll]); + + useEffect(() => { + document.title = `Run Logs - ${state.workspaceName} - ${state.tabTitle}`; + }, [state.workspaceName, state.tabTitle]); + + return ( +
+
+
+

+ {state.workspaceName} · {state.tabTitle} +

+
+
+ + + {state.eventCount} events + + +
+
+ + {state.events.length > 0 ? ( +
+ {state.events.map((event, index) => ( +
+ {event.type} + {JSON.stringify(event)} +
+ ))} +
+ ) : ( +
+ No run logs are being streamed yet. +
+ )} +
+ ); } function SettingsScene({ - settingsTab, - setSettingsTab, - settingsNotice, - error, - draft, - loadState, - hasUnsavedChanges, - isBusy, - providerIds, - benchPackInspections, - registryEntries, - registryWarning, - benchPackMutations, - verifierStatuses, - onBack, - onDismissNotice, - onDismissError, - onSaveAdvanced, - onResetAdvanced, - onCreateProvider, - onEditProvider, - onCreateModel, - onEditModel, - onStartVerifier, - onStopVerifier, - onDeleteVerifierImage, - onRefreshRegistry, - onInstallBenchPack, - onInstallBenchPackFromUrl, - onUpdateBenchPack, - onUninstallBenchPack, - updateDraft, - onUpdateVerifier, + settingsTab, + setSettingsTab, + settingsNotice, + error, + draft, + loadState, + hasUnsavedChanges, + isBusy, + providerIds, + benchPackInspections, + registryEntries, + registryWarning, + benchPackMutations, + verifierStatuses, + onBack, + onDismissNotice, + onDismissError, + onSaveAdvanced, + onResetAdvanced, + onCreateProvider, + onEditProvider, + onCreateModel, + onEditModel, + onStartVerifier, + onStopVerifier, + onDeleteVerifierImage, + onRefreshRegistry, + onInstallBenchPack, + onInstallBenchPackFromUrl, + onUpdateBenchPack, + onUninstallBenchPack, + updateDraft, + onUpdateVerifier, }: { - settingsTab: SettingsTab; - setSettingsTab: (tab: SettingsTab) => void; - settingsNotice: string | null; - error: string | null; - draft: BenchLocalConfig; - loadState: LoadState | null; - hasUnsavedChanges: boolean; - isBusy: boolean; - providerIds: string[]; - benchPackInspections: BenchPackInspection[]; - registryEntries: BenchPackRegistryEntry[]; - registryWarning: string | null; - benchPackMutations: Record; - verifierStatuses: Record; - onBack: () => void; - onDismissNotice: () => void; - onDismissError: () => void; - onSaveAdvanced: () => void; - onResetAdvanced: () => void; - onCreateProvider: () => void; - onEditProvider: (providerId: string) => void; - onCreateModel: () => void; - onEditModel: (index: number) => void; - onStartVerifier: ( - benchPackId: string, - benchPackName: string, - verifierId: string, - ) => Promise; - onStopVerifier: (benchPackId: string) => Promise; - onDeleteVerifierImage: ( - benchPackId: string, - benchPackName: string, - verifierId: string, - ) => void; - onRefreshRegistry: () => void; - onInstallBenchPack: (benchPackId: string) => void; - onInstallBenchPackFromUrl: (url: string) => Promise; - onUpdateBenchPack: (benchPackId: string) => void; - onUninstallBenchPack: (benchPackId: string) => void; - updateDraft: ( - updater: (current: BenchLocalConfig) => BenchLocalConfig, - ) => void; - onUpdateVerifier: ( - benchPackId: string, - verifierId: string, - updater: (verifier: BenchLocalVerifierConfig) => BenchLocalVerifierConfig, - ) => void; + settingsTab: SettingsTab; + setSettingsTab: (tab: SettingsTab) => void; + settingsNotice: string | null; + error: string | null; + draft: BenchLocalConfig; + loadState: LoadState | null; + hasUnsavedChanges: boolean; + isBusy: boolean; + providerIds: string[]; + benchPackInspections: BenchPackInspection[]; + registryEntries: BenchPackRegistryEntry[]; + registryWarning: string | null; + benchPackMutations: Record; + verifierStatuses: Record; + onBack: () => void; + onDismissNotice: () => void; + onDismissError: () => void; + onSaveAdvanced: () => void; + onResetAdvanced: () => void; + onCreateProvider: () => void; + onEditProvider: (providerId: string) => void; + onCreateModel: () => void; + onEditModel: (index: number) => void; + onStartVerifier: ( + benchPackId: string, + benchPackName: string, + verifierId: string, + ) => Promise; + onStopVerifier: (benchPackId: string) => Promise; + onDeleteVerifierImage: ( + benchPackId: string, + benchPackName: string, + verifierId: string, + ) => void; + onRefreshRegistry: () => void; + onInstallBenchPack: (benchPackId: string) => void; + onInstallBenchPackFromUrl: (url: string) => Promise; + onUpdateBenchPack: (benchPackId: string) => void; + onUninstallBenchPack: (benchPackId: string) => void; + updateDraft: ( + updater: (current: BenchLocalConfig) => BenchLocalConfig, + ) => void; + onUpdateVerifier: ( + benchPackId: string, + verifierId: string, + updater: (verifier: BenchLocalVerifierConfig) => BenchLocalVerifierConfig, + ) => void; }) { - return ( -
- - -
- {settingsNotice ? ( - -
- {settingsNotice} - -
-
- ) : null} - {error ? ( - -
- {error} - -
-
- ) : null} -
- {settingsTab === "providers" ? ( - - ) : null} - - {settingsTab === "models" ? ( - - ) : null} - - {settingsTab === "benchPacks" ? ( - - ) : null} - - {settingsTab === "verification" ? ( - { - await onStartVerifier(benchPackId, benchPackName, verifierId); - }} - onStop={async (benchPackId) => { - await onStopVerifier(benchPackId); - }} - onDeleteImage={(benchPackId, benchPackName, verifierId) => { - onDeleteVerifierImage(benchPackId, benchPackName, verifierId); - }} - /> - ) : null} - - {settingsTab === "advanced" ? ( -
- } - > - undefined} - /> - - updateDraft((current) => { - current.run_storage_dir = value; - return current; - }) - } - /> - - updateDraft((current) => { - current.benchpack_storage_dir = value; - return current; - }) - } - /> - - updateDraft((current) => { - current.log_storage_dir = value; - return current; - }) - } - /> - - updateDraft((current) => { - current.cache_dir = value; - return current; - }) - } - /> -
-

- These paths are saved to{" "} - ~/.benchlocal/config.toml. -

-
-
- - -
-
-
- ) : null} -
-
-
- ); + return ( +
+ + +
+ {settingsNotice ? ( + +
+ {settingsNotice} + +
+
+ ) : null} + {error ? ( + +
+ {error} + +
+
+ ) : null} +
+ {settingsTab === 'providers' ? ( + + ) : null} + + {settingsTab === 'models' ? ( + + ) : null} + + {settingsTab === 'benchPacks' ? ( + + ) : null} + + {settingsTab === 'verification' ? ( + { + await onStartVerifier(benchPackId, benchPackName, verifierId); + }} + onStop={async (benchPackId) => { + await onStopVerifier(benchPackId); + }} + onDeleteImage={(benchPackId, benchPackName, verifierId) => { + onDeleteVerifierImage(benchPackId, benchPackName, verifierId); + }} + /> + ) : null} + + {settingsTab === 'advanced' ? ( +
+ } + > + undefined} + /> + + updateDraft((current) => { + current.run_storage_dir = value; + return current; + }) + } + /> + + updateDraft((current) => { + current.benchpack_storage_dir = value; + return current; + }) + } + /> + + updateDraft((current) => { + current.log_storage_dir = value; + return current; + }) + } + /> + + updateDraft((current) => { + current.cache_dir = value; + return current; + }) + } + /> +
+

+ These paths are saved to{' '} + ~/.benchlocal/config.toml. +

+
+
+ + +
+
+
+ ) : null} +
+
+
+ ); } function ProvidersView({ - providers, - models, - onCreate, - onEdit, + providers, + models, + onCreate, + onEdit, }: { - providers: Record; - models: BenchLocalModelConfig[]; - onCreate: () => void; - onEdit: (providerId: string) => void; + providers: Record; + models: BenchLocalModelConfig[]; + onCreate: () => void; + onEdit: (providerId: string) => void; }) { - const providerIds = Object.keys(providers); - - return ( - } - actions={ - - } - > - - - - - - - - - - - - - - {providerIds.map((providerId) => { - const provider = providers[providerId]; - const linkedModels = models.filter( - (model) => model.provider === providerId, - ).length; - - return ( - - - - - - - - - ); - })} - -
ProviderTypeStatusBase URLModelsActions
-
{provider.name}
-
-
- {providerKindLabel(provider.kind)} -
-
- - {provider.enabled ? "active" : "inactive"} - - {provider.base_url}{linkedModels} -
- -
-
-
-
- ); + const providerIds = Object.keys(providers); + + return ( + } + actions={ + + } + > + + + + + + + + + + + + + + {providerIds.map((providerId) => { + const provider = providers[providerId]; + const linkedModels = models.filter( + (model) => model.provider === providerId, + ).length; + + return ( + + + + + + + + + ); + })} + +
ProviderTypeStatusBase URLModelsActions
+
{provider.name}
+
+
+ {providerKindLabel(provider.kind)} +
+
+ + {provider.enabled ? 'active' : 'inactive'} + + {provider.base_url}{linkedModels} +
+ +
+
+
+
+ ); } function ModelsView({ - models, - providers, - providerIds, - onCreate, - onEdit, + models, + providers, + providerIds, + onCreate, + onEdit, }: { - models: BenchLocalModelConfig[]; - providers: Record; - providerIds: string[]; - onCreate: () => void; - onEdit: (index: number) => void; + models: BenchLocalModelConfig[]; + providers: Record; + providerIds: string[]; + onCreate: () => void; + onEdit: (index: number) => void; }) { - const [providerFilter, setProviderFilter] = useState("all"); - const [groupFilter, setGroupFilter] = useState("all"); - const [searchQuery, setSearchQuery] = useState(""); - const providerOptions = [ - { value: "all", label: "All Providers" }, - ...Array.from(new Set(models.map((model) => model.provider))) - .sort((left, right) => - (providers[left]?.name ?? left).localeCompare( - providers[right]?.name ?? right, - ), - ) - .map((providerId) => ({ - value: providerId, - label: providers[providerId]?.name ?? providerId, - })), - ]; - const groupOptions = [ - { value: "all", label: "All Groups" }, - ...Array.from( - new Set(models.map((model) => model.group.trim() || "__ungrouped__")), - ) - .sort((left, right) => left.localeCompare(right)) - .map((group) => ({ - value: group, - label: group === "__ungrouped__" ? "Ungrouped" : group, - })), - ]; - const filteredModels = models - .map((model, index) => ({ model, index })) - .filter(({ model }) => { - const normalizedGroup = model.group.trim() || "__ungrouped__"; - const normalizedQuery = searchQuery.trim().toLowerCase(); - const providerName = providers[model.provider]?.name ?? model.provider; - const haystack = [ - model.label, - model.id, - model.model, - model.group, - providerName, - model.provider, - ] - .filter(Boolean) - .join(" ") - .toLowerCase(); - - return ( - (providerFilter === "all" || model.provider === providerFilter) && - (groupFilter === "all" || normalizedGroup === groupFilter) && - (!normalizedQuery || haystack.includes(normalizedQuery)) - ); - }); - - useEffect(() => { - if ( - providerFilter !== "all" && - !providerOptions.some((option) => option.value === providerFilter) - ) { - setProviderFilter("all"); - } - }, [providerFilter, providerOptions]); - - useEffect(() => { - if ( - groupFilter !== "all" && - !groupOptions.some((option) => option.value === groupFilter) - ) { - setGroupFilter("all"); - } - }, [groupFilter, groupOptions]); - - return ( - } - actions={ - - } - > -
- - - -
- - - - - - - - - - - - - - {filteredModels.length === 0 ? ( - - - - ) : ( - filteredModels.map(({ model, index }) => ( - - - - - - - - - )) - )} - -
LabelStatusProviderModelGroupActions
-
- No models match the current filters. -
-
-
{model.label}
-
- {model.id} -
-
- - {model.enabled ? "active" : "inactive"} - - - {providers[model.provider]?.name ?? - model.provider.split("-")[0] ?? - model.provider} - {model.model}{model.group} -
- -
-
-
-
- ); + const [providerFilter, setProviderFilter] = useState('all'); + const [groupFilter, setGroupFilter] = useState('all'); + const [searchQuery, setSearchQuery] = useState(''); + const providerOptions = [ + { value: 'all', label: 'All Providers' }, + ...Array.from(new Set(models.map((model) => model.provider))) + .sort((left, right) => + (providers[left]?.name ?? left).localeCompare( + providers[right]?.name ?? right, + ), + ) + .map((providerId) => ({ + value: providerId, + label: providers[providerId]?.name ?? providerId, + })), + ]; + const groupOptions = [ + { value: 'all', label: 'All Groups' }, + ...Array.from( + new Set(models.map((model) => model.group.trim() || '__ungrouped__')), + ) + .sort((left, right) => left.localeCompare(right)) + .map((group) => ({ + value: group, + label: group === '__ungrouped__' ? 'Ungrouped' : group, + })), + ]; + const filteredModels = models + .map((model, index) => ({ model, index })) + .filter(({ model }) => { + const normalizedGroup = model.group.trim() || '__ungrouped__'; + const normalizedQuery = searchQuery.trim().toLowerCase(); + const providerName = providers[model.provider]?.name ?? model.provider; + const haystack = [ + model.label, + model.id, + model.model, + model.group, + providerName, + model.provider, + ] + .filter(Boolean) + .join(' ') + .toLowerCase(); + + return ( + (providerFilter === 'all' || model.provider === providerFilter) && + (groupFilter === 'all' || normalizedGroup === groupFilter) && + (!normalizedQuery || haystack.includes(normalizedQuery)) + ); + }); + + useEffect(() => { + if ( + providerFilter !== 'all' && + !providerOptions.some((option) => option.value === providerFilter) + ) { + setProviderFilter('all'); + } + }, [providerFilter, providerOptions]); + + useEffect(() => { + if ( + groupFilter !== 'all' && + !groupOptions.some((option) => option.value === groupFilter) + ) { + setGroupFilter('all'); + } + }, [groupFilter, groupOptions]); + + return ( + } + actions={ + + } + > +
+ + + +
+ + + + + + + + + + + + + + {filteredModels.length === 0 ? ( + + + + ) : ( + filteredModels.map(({ model, index }) => ( + + + + + + + + + )) + )} + +
LabelStatusProviderModelGroupActions
+
+ No models match the current filters. +
+
+
{model.label}
+
+ {model.id} +
+
+ + {model.enabled ? 'active' : 'inactive'} + + + {providers[model.provider]?.name ?? + model.provider.split('-')[0] ?? + model.provider} + {model.model}{model.group} +
+ +
+
+
+
+ ); } function BenchPackRegistryView({ - draft, - inspections, - registryEntries, - registryWarning, - benchPackMutations, - onRefresh, - onInstall, - onInstallFromUrl, - onUpdate, - onUninstall, + draft, + inspections, + registryEntries, + registryWarning, + benchPackMutations, + onRefresh, + onInstall, + onInstallFromUrl, + onUpdate, + onUninstall, }: { - draft: BenchLocalConfig; - inspections: BenchPackInspection[]; - registryEntries: BenchPackRegistryEntry[]; - registryWarning: string | null; - benchPackMutations: Record; - onRefresh: () => void; - onInstall: (benchPackId: string) => void; - onInstallFromUrl: (url: string) => Promise; - onUpdate: (benchPackId: string) => void; - onUninstall: (benchPackId: string) => void; + draft: BenchLocalConfig; + inspections: BenchPackInspection[]; + registryEntries: BenchPackRegistryEntry[]; + registryWarning: string | null; + benchPackMutations: Record; + onRefresh: () => void; + onInstall: (benchPackId: string) => void; + onInstallFromUrl: (url: string) => Promise; + onUpdate: (benchPackId: string) => void; + onUninstall: (benchPackId: string) => void; }) { - const [manualUrl, setManualUrl] = useState(""); - const inspectionsById = Object.fromEntries( - inspections.map((inspection) => [inspection.id, inspection]), - ); - const hasActiveMutation = Object.keys(benchPackMutations).length > 0; - const officialRows = registryEntries.map((entry) => { - const installed = draft.benchpacks[entry.id]; - const inspection = inspectionsById[entry.id]; - const mutation = benchPackMutations[entry.id]; - const updateAvailable = - Boolean(installed) && - (installed?.version !== entry.version || - (entry.source.type === "github" - ? installed?.ref !== entry.source.tag - : false)); - - return { - id: entry.id, - name: entry.name, - description: entry.description ?? "No description provided.", - version: entry.version, - installedVersion: installed?.version, - installed: Boolean(installed), - status: installed - ? (inspection?.status ?? "not_installed") - : "not_installed", - mutation, - updateAvailable, - isRegistryEntry: true, - } as const; - }); - const thirdPartyRows = Object.entries(draft.benchpacks) - .filter(([, benchPack]: any) => benchPack.source !== "registry") - .map(([benchPackId, benchPack]) => { - const inspection = inspectionsById[benchPackId]; - const mutation = benchPackMutations[benchPackId]; - - return { - id: benchPackId, - name: inspection?.manifest?.name ?? benchPackId, - description: - inspection?.manifest?.description ?? - "Installed from a third-party source maintained outside BenchLocal.", - version: - benchPack.version ?? inspection?.manifest?.version ?? "unknown", - status: inspection?.status ?? "not_installed", - sourceLabel: - benchPack.source === "archive" - ? (benchPack.url ?? "Archive URL") - : benchPack.source === "github" - ? (benchPack.repo ?? "GitHub") - : benchPack.source === "local" - ? (benchPack.path ?? "Local path") - : benchPack.source, - mutation, - } as const; - }); - - return ( -
- } - actions={ - - } - > - {registryWarning ? ( - {registryWarning} - ) : null} - - - - - - - - - - - - - {officialRows.length === 0 ? ( - - - - ) : ( - officialRows.map((row) => { - const isMutating = Boolean(row.mutation); - const disableRowAction = hasActiveMutation && !isMutating; - - return ( - - - - - - - - ); - }) - )} - -
NameDescriptionVersionStatusActions
-
- {registryWarning - ? "The official registry is currently unavailable." - : "No Bench Packs are available in the official registry."} -
-
-
- {row.name} -
-
{row.description} -
-
- {row.installed && - row.updateAvailable && - row.installedVersion ? ( - <> - v{row.installedVersion} - - v{row.version} - - ) : ( - v{row.version} - )} -
- {row.installed && - row.isRegistryEntry && - row.updateAvailable ? ( - - ) : null} -
-
- - {row.mutation - ? benchPackMutationLabel(row.mutation) - : row.installed - ? row.status.replaceAll("_", " ") - : "available"} - - -
- {row.installed ? ( - - ) : ( - - )} -
-
-
-
- - } - > -
-

- Third-party Bench Packs are maintained by their authors, not by - BenchLocal. Only install packages from sources you trust. -

-
-
- - -
- - - - - - - - - - - - - - - {thirdPartyRows.length === 0 ? ( - - - - ) : ( - thirdPartyRows.map((row) => { - const isMutating = Boolean(row.mutation); - const disableRowAction = hasActiveMutation && !isMutating; - - return ( - - - - - - - - - ); - }) - )} - -
NameDescriptionVersionSourceStatusActions
-
- No third-party Bench Packs are installed. -
-
-
- {row.name} -
-
{row.description}v{row.version}{row.sourceLabel} - - {row.mutation - ? benchPackMutationLabel(row.mutation) - : row.status.replaceAll("_", " ")} - - -
- -
-
-
-
-
- ); + const [manualUrl, setManualUrl] = useState(''); + const inspectionsById = Object.fromEntries( + inspections.map((inspection) => [inspection.id, inspection]), + ); + const hasActiveMutation = Object.keys(benchPackMutations).length > 0; + const officialRows = registryEntries.map((entry) => { + const installed = draft.benchpacks[entry.id]; + const inspection = inspectionsById[entry.id]; + const mutation = benchPackMutations[entry.id]; + const updateAvailable = + Boolean(installed) && + (installed?.version !== entry.version || + (entry.source.type === 'github' + ? installed?.ref !== entry.source.tag + : false)); + + return { + id: entry.id, + name: entry.name, + description: entry.description ?? 'No description provided.', + version: entry.version, + installedVersion: installed?.version, + installed: Boolean(installed), + status: installed + ? (inspection?.status ?? 'not_installed') + : 'not_installed', + mutation, + updateAvailable, + isRegistryEntry: true, + } as const; + }); + const thirdPartyRows = Object.entries(draft.benchpacks) + .filter(([, benchPack]: any) => benchPack.source !== 'registry') + .map(([benchPackId, benchPack]) => { + const inspection = inspectionsById[benchPackId]; + const mutation = benchPackMutations[benchPackId]; + + return { + id: benchPackId, + name: inspection?.manifest?.name ?? benchPackId, + description: + inspection?.manifest?.description ?? + 'Installed from a third-party source maintained outside BenchLocal.', + version: + benchPack.version ?? inspection?.manifest?.version ?? 'unknown', + status: inspection?.status ?? 'not_installed', + sourceLabel: + benchPack.source === 'archive' + ? (benchPack.url ?? 'Archive URL') + : benchPack.source === 'github' + ? (benchPack.repo ?? 'GitHub') + : benchPack.source === 'local' + ? (benchPack.path ?? 'Local path') + : benchPack.source, + mutation, + } as const; + }); + + return ( +
+ } + actions={ + + } + > + {registryWarning ? ( + {registryWarning} + ) : null} + + + + + + + + + + + + + {officialRows.length === 0 ? ( + + + + ) : ( + officialRows.map((row) => { + const isMutating = Boolean(row.mutation); + const disableRowAction = hasActiveMutation && !isMutating; + + return ( + + + + + + + + ); + }) + )} + +
NameDescriptionVersionStatusActions
+
+ {registryWarning + ? 'The official registry is currently unavailable.' + : 'No Bench Packs are available in the official registry.'} +
+
+
+ {row.name} +
+
{row.description} +
+
+ {row.installed && + row.updateAvailable && + row.installedVersion ? ( + <> + v{row.installedVersion} + + v{row.version} + + ) : ( + v{row.version} + )} +
+ {row.installed && + row.isRegistryEntry && + row.updateAvailable ? ( + + ) : null} +
+
+ + {row.mutation + ? benchPackMutationLabel(row.mutation) + : row.installed + ? row.status.replaceAll('_', ' ') + : 'available'} + + +
+ {row.installed ? ( + + ) : ( + + )} +
+
+
+
+ + } + > +
+

+ Third-party Bench Packs are maintained by their authors, not by + BenchLocal. Only install packages from sources you trust. +

+
+
+ + +
+ + + + + + + + + + + + + + + {thirdPartyRows.length === 0 ? ( + + + + ) : ( + thirdPartyRows.map((row) => { + const isMutating = Boolean(row.mutation); + const disableRowAction = hasActiveMutation && !isMutating; + + return ( + + + + + + + + + ); + }) + )} + +
NameDescriptionVersionSourceStatusActions
+
+ No third-party Bench Packs are installed. +
+
+
+ {row.name} +
+
{row.description}v{row.version}{row.sourceLabel} + + {row.mutation + ? benchPackMutationLabel(row.mutation) + : row.status.replaceAll('_', ' ')} + + +
+ +
+
+
+
+
+ ); } -function verifierModeLabel(mode: BenchLocalVerifierConfig["mode"]): string { - switch (mode) { - case "cloud": - return "BenchLocal Cloud"; - case "custom_url": - return "Custom URL"; - case "docker": - default: - return "Local Docker"; - } +function verifierModeLabel(mode: BenchLocalVerifierConfig['mode']): string { + switch (mode) { + case 'cloud': + return 'BenchLocal Cloud'; + case 'custom_url': + return 'Custom URL'; + case 'docker': + default: + return 'Local Docker'; + } } function VerificationView({ - draft, - statuses, - onUpdate, - onStart, - onStop, - onDeleteImage, + draft, + statuses, + onUpdate, + onStart, + onStop, + onDeleteImage, }: { - draft: BenchLocalConfig; - statuses: Record; - onUpdate: ( - benchPackId: string, - verifierId: string, - updater: (verifier: BenchLocalVerifierConfig) => BenchLocalVerifierConfig, - ) => void; - onStart: ( - benchPackId: string, - benchPackName: string, - verifierId: string, - ) => Promise; - onStop: (benchPackId: string) => Promise; - onDeleteImage: ( - benchPackId: string, - benchPackName: string, - verifierId: string, - ) => void; + draft: BenchLocalConfig; + statuses: Record; + onUpdate: ( + benchPackId: string, + verifierId: string, + updater: (verifier: BenchLocalVerifierConfig) => BenchLocalVerifierConfig, + ) => void; + onStart: ( + benchPackId: string, + benchPackName: string, + verifierId: string, + ) => Promise; + onStop: (benchPackId: string) => Promise; + onDeleteImage: ( + benchPackId: string, + benchPackName: string, + verifierId: string, + ) => void; }) { - const verificationEntries = Object.entries(draft.benchpacks).filter( - ([benchPackId]) => { - const status = statuses[benchPackId]; - return Boolean(status && status.verifiers.length > 0); - }, - ); - - const rows = verificationEntries.flatMap(([benchPackId, benchPack]) => { - const status = statuses[benchPackId]; - const inspectionName = status?.benchPackName ?? benchPackId; - - return Object.entries(benchPack.verifiers ?? {}).map( - ([verifierId, verifier]) => { - const runtime = status?.verifiers.find( - (entry) => entry.id === verifierId, - ); - return { - benchPackId, - benchPackName: inspectionName, - verifierId, - verifier, - runtime, - docker: status?.docker, - }; - }, - ); - }); - - return ( - } - > - - - - - - - - - - - - - - {rows.length === 0 ? ( - - - - ) : ( - rows.map( - ({ - benchPackId, - benchPackName, - verifierId, - verifier, - runtime, - docker, - }) => ( - - - - - - - - - ), - ) - )} - -
Bench PackModeStatusEndpointAuto StartActions
-
- No installed Bench Packs currently require a verifier. -
-
-
- {benchPackName} -
-
- - onUpdate(benchPackId, verifierId, (current) => ({ - ...current, - mode: value as BenchLocalVerifierConfig["mode"], - })) - } - /> - - - {formatVerifierRuntimeStatus(runtime?.status)} - - -
- {runtime?.url ?? "Managed by BenchLocal"} -
-
- Docker:{" "} - {docker?.state === "ready" - ? (docker.details ?? "ready") - : docker?.state === "not_running" - ? (docker.details ?? "not running") - : (docker?.details ?? "not installed")} -
-
-
- - onUpdate(benchPackId, verifierId, (current) => ({ - ...current, - auto_start: event.target.checked, - })) - } - /> -
-
-
- {runtime?.status === "running" ? ( - - ) : ( - - )} - {runtime?.dockerImagePresent ? ( - - ) : null} -
-
-
-
- ); + const verificationEntries = Object.entries(draft.benchpacks).filter( + ([benchPackId]) => { + const status = statuses[benchPackId]; + return Boolean(status && status.verifiers.length > 0); + }, + ); + + const rows = verificationEntries.flatMap(([benchPackId, benchPack]) => { + const status = statuses[benchPackId]; + const inspectionName = status?.benchPackName ?? benchPackId; + + return Object.entries(benchPack.verifiers ?? {}).map( + ([verifierId, verifier]) => { + const runtime = status?.verifiers.find( + (entry) => entry.id === verifierId, + ); + return { + benchPackId, + benchPackName: inspectionName, + verifierId, + verifier, + runtime, + docker: status?.docker, + }; + }, + ); + }); + + return ( + } + > + + + + + + + + + + + + + + {rows.length === 0 ? ( + + + + ) : ( + rows.map( + ({ + benchPackId, + benchPackName, + verifierId, + verifier, + runtime, + docker, + }) => ( + + + + + + + + + ), + ) + )} + +
Bench PackModeStatusEndpointAuto StartActions
+
+ No installed Bench Packs currently require a verifier. +
+
+
+ {benchPackName} +
+
+ + onUpdate(benchPackId, verifierId, (current) => ({ + ...current, + mode: value as BenchLocalVerifierConfig['mode'], + })) + } + /> + + + {formatVerifierRuntimeStatus(runtime?.status)} + + +
+ {runtime?.url ?? 'Managed by BenchLocal'} +
+
+ Docker:{' '} + {docker?.state === 'ready' + ? (docker.details ?? 'ready') + : docker?.state === 'not_running' + ? (docker.details ?? 'not running') + : (docker?.details ?? 'not installed')} +
+
+
+ + onUpdate(benchPackId, verifierId, (current) => ({ + ...current, + auto_start: event.target.checked, + })) + } + /> +
+
+
+ {runtime?.status === 'running' ? ( + + ) : ( + + )} + {runtime?.dockerImagePresent ? ( + + ) : null} +
+
+
+
+ ); } function Panel({ - title, - subtitle, - tone, - icon, - actions, - children, + title, + subtitle, + tone, + icon, + actions, + children, }: { - title: string; - subtitle: string; - tone: "sky" | "orange" | "slate"; - icon?: ReactNode; - actions?: ReactNode; - children: ReactNode; + title: string; + subtitle: string; + tone: 'sky' | 'orange' | 'slate'; + icon?: ReactNode; + actions?: ReactNode; + children: ReactNode; }) { - return ( -
-
-
-
{icon}
-
-

{title}

-

{subtitle}

-
-
- {actions ?
{actions}
: null} -
-
{children}
-
- ); + return ( +
+
+
+
{icon}
+
+

{title}

+

{subtitle}

+
+
+ {actions ?
{actions}
: null} +
+
{children}
+
+ ); } function DetailCard({ title, content }: { title: string; content: string }) { - const toneClass = - title === "What this tests" - ? "is-blue" - : title === "Prompt Contract" - ? "is-amber" - : "is-slate"; - - const lines = content.split("\n"); - - return ( -
-
-

{title}

-
-

- {lines.map((line, lineIndex) => ( - - {line.split(/(`[^`]+`)/g).map((part, partIndex) => { - if ( - part.startsWith("`") && - part.endsWith("`") && - part.length >= 2 - ) { - return ( - - {part.slice(1, -1)} - - ); - } - - return ( - {part} - ); - })} - {lineIndex < lines.length - 1 ?
: null} -
- ))} -

-
- ); + const toneClass = + title === 'What this tests' + ? 'is-blue' + : title === 'Prompt Contract' + ? 'is-amber' + : 'is-slate'; + + const lines = content.split('\n'); + + return ( +
+
+

{title}

+
+

+ {lines.map((line, lineIndex) => ( + + {line.split(/(`[^`]+`)/g).map((part, partIndex) => { + if ( + part.startsWith('`') && + part.endsWith('`') && + part.length >= 2 + ) { + return ( + + {part.slice(1, -1)} + + ); + } + + return ( + {part} + ); + })} + {lineIndex < lines.length - 1 ?
: null} +
+ ))} +

+
+ ); } function HistoryModal({ - benchPackName, - entries, - onClose, - onOpenRun, - onRemoveAll, + benchPackName, + entries, + onClose, + onOpenRun, + onRemoveAll, }: { - benchPackName: string; - entries: BenchPackRunHistoryEntry[]; - onClose: () => void; - onOpenRun: (runId: string, mode: "history" | "replay") => void; - onRemoveAll: () => void; + benchPackName: string; + entries: BenchPackRunHistoryEntry[]; + onClose: () => void; + onOpenRun: (runId: string, mode: 'history' | 'replay') => void; + onRemoveAll: () => void; }) { - return ( -
-
-
-
-

Test Histories

-

- {benchPackName} -

-
- -
- -
- - - - - - - - - - - - - - {entries.map((entry) => { - const executionModeLabel = - EXECUTION_MODE_OPTIONS.find( - (option) => option.value === entry.executionMode, - )?.label ?? "Unknown"; - - return ( - - - - - - - - - ); - })} - -
Date TimeModeModelsCasesStatusAction
-
- {new Date(entry.startedAt).toLocaleString()} -
-
- - {executionModeLabel} - - - - {entry.modelCount} - - - - {entry.scenarioCount} - - - - {entry.error - ? "error" - : entry.cancelled - ? "stopped" - : "completed"} - - - -
-
-
- -
- -
-
-
- ); + return ( +
+
+
+
+

Test Histories

+

+ {benchPackName} +

+
+ +
+ +
+ + + + + + + + + + + + + + {entries.map((entry) => { + const executionModeLabel = + EXECUTION_MODE_OPTIONS.find( + (option) => option.value === entry.executionMode, + )?.label ?? 'Unknown'; + + return ( + + + + + + + + + ); + })} + +
Date TimeModeModelsCasesStatusAction
+
+ {new Date(entry.startedAt).toLocaleString()} +
+
+ + {executionModeLabel} + + + + {entry.modelCount} + + + + {entry.scenarioCount} + + + + {entry.error + ? 'error' + : entry.cancelled + ? 'stopped' + : 'completed'} + + + +
+
+
+ +
+ +
+
+
+ ); } function VerifierPreparationModal({ - benchPackName, - verifierId, - message, - isCancelling, - onCancel, + benchPackName, + verifierId, + message, + isCancelling, + onCancel, }: { - benchPackName: string; - verifierId: string; - message: string; - isCancelling?: boolean; - onCancel?: () => void; + benchPackName: string; + verifierId: string; + message: string; + isCancelling?: boolean; + onCancel?: () => void; }) { - return ( -
-
-
-
- -
-
-

Preparing Verifier

-

{benchPackName}

-

- BenchLocal is preparing{" "} - {verifierId} before - the run can start. -

-
-
- -

- {message} -

- - {onCancel ? ( -
- -
- ) : null} -
-
- ); + return ( +
+
+
+
+ +
+
+

Preparing Verifier

+

{benchPackName}

+

+ BenchLocal is preparing{' '} + {verifierId} before + the run can start. +

+
+
+ +

+ {message} +

+ + {onCancel ? ( +
+ +
+ ) : null} +
+
+ ); } function Banner({ - tone, - children, + tone, + children, }: { - tone: "success" | "danger" | "neutral" | "warning"; - children: ReactNode; + tone: 'success' | 'danger' | 'neutral' | 'warning'; + children: ReactNode; }) { - const toneClass = - tone === "success" - ? "banner-success" - : tone === "danger" - ? "banner-danger" - : tone === "warning" - ? "banner-warning" - : "banner-neutral"; - return
{children}
; + const toneClass = + tone === 'success' + ? 'banner-success' + : tone === 'danger' + ? 'banner-danger' + : tone === 'warning' + ? 'banner-warning' + : 'banner-neutral'; + return
{children}
; } function AboutDialog({ - metadata, - updateState, - onCheckForUpdates, - onInstallUpdate, - onClose, + metadata, + updateState, + onCheckForUpdates, + onInstallUpdate, + onClose, }: { - metadata: BenchLocalAppMetadata | null; - updateState: BenchLocalUpdateState | null; - onCheckForUpdates: () => void; - onInstallUpdate: () => void; - onClose: () => void; + metadata: BenchLocalAppMetadata | null; + updateState: BenchLocalUpdateState | null; + onCheckForUpdates: () => void; + onInstallUpdate: () => void; + onClose: () => void; }) { - const dialogRef = useRef(null); - const productName = metadata?.productName ?? "BenchLocal"; - const version = metadata?.version?.trim(); - const updateMessage = describeAppUpdateState(updateState); - const checkedAtLabel = formatAppUpdateCheckedAt(updateState?.checkedAt); - const updateFeedLabel = updateState?.feedLabel?.trim() || "GitHub Releases"; - const updateFeedUrl = updateState?.feedUrl?.trim(); - const progressPercent = - typeof updateState?.progressPercent === "number" - ? Math.max(0, Math.min(100, updateState.progressPercent)) - : null; - const canCheckForUpdates = - updateState?.status !== "checking" && - updateState?.status !== "downloading" && - updateState?.status !== "available" && - updateState?.status !== "unsupported"; - const updateActionLabel = - updateState?.status === "downloaded" - ? "Restart to Update" - : updateState?.status === "checking" - ? "Checking..." - : updateState?.status === "downloading" || - updateState?.status === "available" - ? progressPercent !== null - ? `Downloading ${Math.round(progressPercent)}%` - : "Downloading..." - : "Check for Updates"; - - useEffect(() => { - const frameId = window.requestAnimationFrame(() => { - dialogRef.current?.focus(); - }); - - return () => { - window.cancelAnimationFrame(frameId); - }; - }, []); - - useEffect(() => { - const handleKeyDown = (event: KeyboardEvent) => { - if (event.key === "Escape" || event.key === "Enter") { - event.preventDefault(); - onClose(); - } - }; - - window.addEventListener("keydown", handleKeyDown); - - return () => { - window.removeEventListener("keydown", handleKeyDown); - }; - }, [onClose]); - - return ( -
-
- -
- -

{productName}

- {version ? ( -

Version {version}

- ) : null} - {metadata?.copyright ? ( -

{metadata.copyright}

- ) : null} -
-
- Self Update - {updateState?.availableVersion ? ( - - v{updateState.availableVersion} - - ) : null} -
-

{updateMessage}

-

- Feed:{" "} - {updateFeedUrl - ? `${updateFeedLabel} (${updateFeedUrl})` - : updateFeedLabel} -

- {progressPercent !== null ? ( -
-
- -
- - {Math.round(progressPercent)}% - -
- ) : null} - {checkedAtLabel ? ( -

- Last checked: {checkedAtLabel} -

- ) : null} - {updateState?.releaseNotes ? ( -
-								{updateState.releaseNotes}
-							
- ) : null} -
- -
-
-
-
-
- ); + const dialogRef = useRef(null); + const productName = metadata?.productName ?? 'BenchLocal'; + const version = metadata?.version?.trim(); + const updateMessage = describeAppUpdateState(updateState); + const checkedAtLabel = formatAppUpdateCheckedAt(updateState?.checkedAt); + const updateFeedLabel = updateState?.feedLabel?.trim() || 'GitHub Releases'; + const updateFeedUrl = updateState?.feedUrl?.trim(); + const progressPercent = + typeof updateState?.progressPercent === 'number' + ? Math.max(0, Math.min(100, updateState.progressPercent)) + : null; + const canCheckForUpdates = + updateState?.status !== 'checking' && + updateState?.status !== 'downloading' && + updateState?.status !== 'available' && + updateState?.status !== 'unsupported'; + const updateActionLabel = + updateState?.status === 'downloaded' + ? 'Restart to Update' + : updateState?.status === 'checking' + ? 'Checking...' + : updateState?.status === 'downloading' || + updateState?.status === 'available' + ? progressPercent !== null + ? `Downloading ${Math.round(progressPercent)}%` + : 'Downloading...' + : 'Check for Updates'; + + useEffect(() => { + const frameId = window.requestAnimationFrame(() => { + dialogRef.current?.focus(); + }); + + return () => { + window.cancelAnimationFrame(frameId); + }; + }, []); + + useEffect(() => { + const handleKeyDown = (event: KeyboardEvent) => { + if (event.key === 'Escape' || event.key === 'Enter') { + event.preventDefault(); + onClose(); + } + }; + + window.addEventListener('keydown', handleKeyDown); + + return () => { + window.removeEventListener('keydown', handleKeyDown); + }; + }, [onClose]); + + return ( +
+
+ +
+ +

{productName}

+ {version ? ( +

Version {version}

+ ) : null} + {metadata?.copyright ? ( +

{metadata.copyright}

+ ) : null} +
+
+ Self Update + {updateState?.availableVersion ? ( + + v{updateState.availableVersion} + + ) : null} +
+

{updateMessage}

+

+ Feed:{' '} + {updateFeedUrl + ? `${updateFeedLabel} (${updateFeedUrl})` + : updateFeedLabel} +

+ {progressPercent !== null ? ( +
+
+ +
+ + {Math.round(progressPercent)}% + +
+ ) : null} + {checkedAtLabel ? ( +

+ Last checked: {checkedAtLabel} +

+ ) : null} + {updateState?.releaseNotes ? ( +
+                {updateState.releaseNotes}
+              
+ ) : null} +
+ +
+
+
+
+
+ ); } function Modal({ - title, - subtitle, - onClose, - onSubmit, - submitLabel, - submitTone = "primary", - size = "default", - leadingActions, - children, + title, + subtitle, + onClose, + onSubmit, + submitLabel, + submitTone = 'primary', + size = 'default', + leadingActions, + children, }: { - title: string; - subtitle?: string; - onClose: () => void; - onSubmit: () => void; - submitLabel: string; - submitTone?: "primary" | "danger"; - size?: "default" | "wide"; - leadingActions?: ReactNode; - children?: ReactNode; + title: string; + subtitle?: string; + onClose: () => void; + onSubmit: () => void; + submitLabel: string; + submitTone?: 'primary' | 'danger'; + size?: 'default' | 'wide'; + leadingActions?: ReactNode; + children?: ReactNode; }) { - const hasBody = Boolean(children); - const hasSubtitle = Boolean(subtitle?.trim()); - const dialogRef = useRef(null); - const submitButtonRef = useRef(null); - - useEffect(() => { - const frameId = window.requestAnimationFrame(() => { - const activeElement = document.activeElement; - const dialog = dialogRef.current; - - if (!dialog) { - return; - } - - if ( - activeElement instanceof HTMLElement && - dialog.contains(activeElement) - ) { - return; - } - - submitButtonRef.current?.focus(); - }); - - return () => { - window.cancelAnimationFrame(frameId); - }; - }, []); - - useEffect(() => { - const handleKeyDown = (event: KeyboardEvent) => { - if (event.key === "Escape") { - event.preventDefault(); - onClose(); - return; - } - - if ( - event.key !== "Enter" || - event.metaKey || - event.ctrlKey || - event.altKey || - event.shiftKey || - event.isComposing - ) { - return; - } - - const target = event.target; - - if ( - target instanceof HTMLElement && - (target.tagName === "TEXTAREA" || target.isContentEditable) - ) { - return; - } - - event.preventDefault(); - onSubmit(); - }; - - window.addEventListener("keydown", handleKeyDown); - - return () => { - window.removeEventListener("keydown", handleKeyDown); - }; - }, [onClose, onSubmit]); - - return ( -
-
-
-
-

{title}

- {hasSubtitle ? ( -

- {subtitle} -

- ) : null} -
- -
- - {hasBody ?
{children}
: null} - -
-
{leadingActions}
- -
-
-
- ); + const hasBody = Boolean(children); + const hasSubtitle = Boolean(subtitle?.trim()); + const dialogRef = useRef(null); + const submitButtonRef = useRef(null); + + useEffect(() => { + const frameId = window.requestAnimationFrame(() => { + const activeElement = document.activeElement; + const dialog = dialogRef.current; + + if (!dialog) { + return; + } + + if ( + activeElement instanceof HTMLElement && + dialog.contains(activeElement) + ) { + return; + } + + submitButtonRef.current?.focus(); + }); + + return () => { + window.cancelAnimationFrame(frameId); + }; + }, []); + + useEffect(() => { + const handleKeyDown = (event: KeyboardEvent) => { + if (event.key === 'Escape') { + event.preventDefault(); + onClose(); + return; + } + + if ( + event.key !== 'Enter' || + event.metaKey || + event.ctrlKey || + event.altKey || + event.shiftKey || + event.isComposing + ) { + return; + } + + const target = event.target; + + if ( + target instanceof HTMLElement && + (target.tagName === 'TEXTAREA' || target.isContentEditable) + ) { + return; + } + + event.preventDefault(); + onSubmit(); + }; + + window.addEventListener('keydown', handleKeyDown); + + return () => { + window.removeEventListener('keydown', handleKeyDown); + }; + }, [onClose, onSubmit]); + + return ( +
+
+
+
+

{title}

+ {hasSubtitle ? ( +

+ {subtitle} +

+ ) : null} +
+ +
+ + {hasBody ?
{children}
: null} + +
+
{leadingActions}
+ +
+
+
+ ); } function Field({ - label, - value, - onChange, - placeholder, - type = "text", - readOnly = false, - className = "", + label, + value, + onChange, + placeholder, + type = 'text', + readOnly = false, + className = '', }: { - label?: string; - value: string; - onChange: (value: string) => void; - placeholder?: string; - type?: string; - readOnly?: boolean; - className?: string; + label?: string; + value: string; + onChange: (value: string) => void; + placeholder?: string; + type?: string; + readOnly?: boolean; + className?: string; }) { - return ( - - ); + return ( + + ); } function ToggleRow({ - label, - checked, - onChange, + label, + checked, + onChange, }: { - label: string; - checked: boolean; - onChange: (checked: boolean) => void; + label: string; + checked: boolean; + onChange: (checked: boolean) => void; }) { - return ( - - ); + return ( + + ); } function FieldToggle({ - label, - checked, - onChange, + label, + checked, + onChange, }: { - label: string; - checked: boolean; - onChange: (checked: boolean) => void; + label: string; + checked: boolean; + onChange: (checked: boolean) => void; }) { - return ( - - ); + return ( + + ); } function InlineSelectField({ - label, - value, - options, - getOptionLabel, - onChange, + label, + value, + options, + getOptionLabel, + onChange, }: { - label: string; - value: string; - options: Array< - string | { value: string; label?: string; disabled?: boolean } - >; - getOptionLabel?: (value: string) => string; - onChange: (value: string) => void; + label: string; + value: string; + options: Array< + string | { value: string; label?: string; disabled?: boolean } + >; + getOptionLabel?: (value: string) => string; + onChange: (value: string) => void; }) { - return ( - - ); + return ( + + ); } -function statusClasses(status: BenchPackInspection["status"]): string { - switch (status) { - case "ready": - return "status-ready"; - case "not_installed": - return "status-not-installed"; - case "incompatible": - return "status-load-error"; - case "manifest_missing": - case "entry_missing": - return "status-entry-missing"; - case "invalid_manifest": - case "load_error": - return "status-load-error"; - } +function statusClasses(status: BenchPackInspection['status']): string { + switch (status) { + case 'ready': + return 'status-ready'; + case 'not_installed': + return 'status-not-installed'; + case 'incompatible': + return 'status-load-error'; + case 'manifest_missing': + case 'entry_missing': + return 'status-entry-missing'; + case 'invalid_manifest': + case 'load_error': + return 'status-load-error'; + } } diff --git a/app/src/server/app-metadata.ts b/app/src/server/app-metadata.ts index f82129e..a6b4481 100644 --- a/app/src/server/app-metadata.ts +++ b/app/src/server/app-metadata.ts @@ -1,83 +1,71 @@ -import { promises as fs } from "node:fs"; -import path from "node:path"; -import { fileURLToPath } from "node:url"; +import { promises as fs } from 'node:fs'; +import { + resolveAppPackageJsonPath, + resolveLicensePath, +} from './path-resolution'; export type BenchLocalAppMetadata = { - productName: string; - description: string; - version: string; - author: string; - license?: string; - copyright?: string; + productName: string; + description: string; + version: string; + author: string; + license?: string; + copyright?: string; }; type AppPackageJson = { - productName?: string; - description?: string; - version?: string; - author?: string; - license?: string; + name?: string; + productName?: string; + description?: string; + version?: string; + author?: string; + license?: string; }; -function getPackageJsonPath(): string { - return path.resolve( - path.dirname(fileURLToPath(import.meta.url)), - "../../package.json", - ); -} - -function getLicensePath(): string { - const licensePath = path.resolve( - path.dirname(fileURLToPath(import.meta.url)), - "../../../LICENSE", - ); - return licensePath; -} - function parseCopyrightLine(licenseText: string): string | undefined { - const line = licenseText - .split(/\r?\n/) - .map((entry) => entry.trim()) - .find((entry) => /^copyright\s*\(c\)\s+/i.test(entry)); + const line = licenseText + .split(/\r?\n/) + .map((entry) => entry.trim()) + .find((entry) => /^copyright\s*\(c\)\s+/i.test(entry)); - if (!line) { - return undefined; - } + if (!line) { + return undefined; + } - return line.replace(/^copyright\s*\(c\)\s+/i, "Copyright © "); + return line.replace(/^copyright\s*\(c\)\s+/i, 'Copyright © '); } function parseLicenseName(licenseText: string): string | undefined { - const firstLine = licenseText - .split(/\r?\n/) - .map((entry) => entry.trim()) - .find((entry) => entry.length > 0); + const firstLine = licenseText + .split(/\r?\n/) + .map((entry) => entry.trim()) + .find((entry) => entry.length > 0); - return firstLine || undefined; + return firstLine || undefined; } export async function loadAppMetadata(): Promise { - const packageJsonRaw = await fs.readFile(getPackageJsonPath(), "utf8"); - const packageJson = JSON.parse(packageJsonRaw) as AppPackageJson; + const packageJsonRaw = await fs.readFile(resolveAppPackageJsonPath(), 'utf8'); + const packageJson = JSON.parse(packageJsonRaw) as AppPackageJson; - let license = packageJson.license; - let copyright: string | undefined; + let license = packageJson.license; + let copyright: string | undefined; - try { - const licenseRaw = await fs.readFile(getLicensePath(), "utf8"); - license = license ?? parseLicenseName(licenseRaw); - copyright = parseCopyrightLine(licenseRaw); - } catch { - license = packageJson.license; - copyright = undefined; - } + try { + const licenseRaw = await fs.readFile(resolveLicensePath(), 'utf8'); + license = license ?? parseLicenseName(licenseRaw); + copyright = parseCopyrightLine(licenseRaw); + } catch { + license = packageJson.license; + copyright = undefined; + } - return { - productName: packageJson.productName ?? "BenchLocal", - description: packageJson.description ?? "", - version: packageJson.version ?? "0.0.0", - author: packageJson.author ?? "", - license, - copyright, - }; + return { + productName: packageJson.productName ?? 'BenchLocal', + description: packageJson.description ?? '', + version: packageJson.version ?? '0.0.0', + author: packageJson.author ?? '', + license, + copyright, + }; } diff --git a/app/src/server/index.ts b/app/src/server/index.ts index 994546c..8c67a89 100644 --- a/app/src/server/index.ts +++ b/app/src/server/index.ts @@ -1,47 +1,60 @@ -import path from "node:path"; -import { fileURLToPath } from "node:url"; -import fastifyStatic from "@fastify/static"; -import Fastify from "fastify"; -import { registerApiRoutes } from "./api-routes"; -import { activeRunManager } from "./run-manager"; -import { registerSseRoute } from "./sse-route"; - -const __dirname = path.dirname(fileURLToPath(import.meta.url)); +import fastifyStatic from '@fastify/static'; +import Fastify from 'fastify'; +import { registerApiRoutes } from './api-routes'; +import { pathExists, resolveRendererOutDir } from './path-resolution'; +import { activeRunManager } from './run-manager'; +import { registerSseRoute } from './sse-route'; async function main() { - const server = Fastify({ logger: { level: "info" } }); - - registerApiRoutes(server); - registerSseRoute(server); - - // Serve the React SPA build output - const rendererOut = path.join(__dirname, "..", "renderer-out"); - server.register(fastifyStatic, { root: rendererOut, prefix: "/" }); - - // SPA fallback - server.setNotFoundHandler((req, reply) => { - if (req.url.startsWith("/api/")) { - return reply.code(404).send({ error: "Not found" }); - } - return reply.type("text/html").sendFile("index.html"); - }); - - const port = Number(process.env.BENCHLOCAL_PORT) || 4300; - const host = process.env.BENCHLOCAL_HOST || "0.0.0.0"; - - await server.listen({ port, host }); - console.log(`BenchLocal running at http://${host}:${port}`); + const server = Fastify({ logger: { level: 'info' } }); + + registerApiRoutes(server); + registerSseRoute(server); + + const rendererOut = resolveRendererOutDir(); + const rendererOutExists = pathExists(rendererOut); + + if (rendererOutExists) { + server.register(fastifyStatic, { root: rendererOut, prefix: '/' }); + + server.setNotFoundHandler((req, reply) => { + if (req.url.startsWith('/api/')) { + return reply.code(404).send({ error: 'Not found' }); + } + return reply.type('text/html').sendFile('index.html'); + }); + } else { + server.log.warn( + `Renderer build output not found at ${rendererOut}. Static SPA serving is disabled.`, + ); + + server.setNotFoundHandler((req, reply) => { + if (req.url.startsWith('/api/')) { + return reply.code(404).send({ error: 'Not found' }); + } + return reply.code(503).send({ + error: + 'Web renderer build not found. Run npm run web:build or npm run web:dev.', + }); + }); + } + + const port = Number(process.env.BENCHLOCAL_PORT) || 4300; + const host = process.env.BENCHLOCAL_HOST || '0.0.0.0'; + + await server.listen({ port, host }); + console.log(`BenchLocal running at http://${host}:${port}`); } // Graceful shutdown -process.on("SIGINT", async () => { - console.log("Shutting down..."); - await activeRunManager.shutdown(); - process.exit(0); +process.on('SIGINT', async () => { + console.log('Shutting down...'); + await activeRunManager.shutdown(); + process.exit(0); }); -process.on("SIGTERM", async () => { - await activeRunManager.shutdown(); - process.exit(0); +process.on('SIGTERM', async () => { + await activeRunManager.shutdown(); + process.exit(0); }); main(); diff --git a/app/src/server/path-resolution.ts b/app/src/server/path-resolution.ts new file mode 100644 index 0000000..4e6b4ef --- /dev/null +++ b/app/src/server/path-resolution.ts @@ -0,0 +1,180 @@ +import { existsSync, readFileSync, statSync } from 'node:fs'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +type PackageJsonShape = { + name?: string; + productName?: string; +}; + +const APP_PACKAGE_NAME = 'benchlocal-app'; +const APP_PRODUCT_NAME = 'BenchLocal'; + +function getModuleDir(): string { + return path.dirname(fileURLToPath(import.meta.url)); +} + +function getResourcesPath(): string | undefined { + return (process as NodeJS.Process & { resourcesPath?: string }).resourcesPath; +} + +function uniqueCandidates(candidates: Array): string[] { + return [ + ...new Set( + candidates.filter((candidate): candidate is string => Boolean(candidate)), + ), + ]; +} + +function isExistingFile(targetPath: string): boolean { + try { + return statSync(targetPath).isFile(); + } catch { + return false; + } +} + +function isExistingDir(targetPath: string): boolean { + try { + return statSync(targetPath).isDirectory(); + } catch { + return false; + } +} + +function readPackageJson(targetPath: string): PackageJsonShape | null { + if (!isExistingFile(targetPath)) { + return null; + } + + try { + return JSON.parse(readFileSync(targetPath, 'utf8')) as PackageJsonShape; + } catch { + return null; + } +} + +function isBenchLocalAppPackageJson(targetPath: string): boolean { + const packageJson = readPackageJson(targetPath); + + if (!packageJson) { + return false; + } + + return ( + packageJson.name === APP_PACKAGE_NAME || + packageJson.productName === APP_PRODUCT_NAME + ); +} + +function firstExisting( + candidates: string[], + predicate: (candidate: string) => boolean, +): string | undefined { + return candidates.find((candidate) => predicate(candidate)); +} + +export function resolveAppPackageJsonPath(): string { + const cwd = process.cwd(); + const moduleDir = getModuleDir(); + const candidates = uniqueCandidates([ + process.env.BENCHLOCAL_APP_PACKAGE_JSON, + path.resolve(cwd, 'app', 'package.json'), + path.resolve(cwd, '..', 'app', 'package.json'), + path.resolve(cwd, 'package.json'), + path.resolve(moduleDir, '..', '..', 'package.json'), + path.resolve(moduleDir, '..', '..', 'app', 'package.json'), + path.resolve(moduleDir, '..', '..', '..', 'app', 'package.json'), + ]); + + const appMatch = firstExisting(candidates, isBenchLocalAppPackageJson); + if (appMatch) { + return appMatch; + } + + return firstExisting(candidates, isExistingFile) ?? candidates[0]; +} + +export function resolveAppRoot(): string { + return path.dirname(resolveAppPackageJsonPath()); +} + +export function resolveWorkspaceRoot(): string { + const envWorkspaceRoot = process.env.BENCHLOCAL_WORKSPACE_ROOT; + if (envWorkspaceRoot && isExistingDir(envWorkspaceRoot)) { + return envWorkspaceRoot; + } + + const appRoot = resolveAppRoot(); + const cwd = process.cwd(); + const moduleDir = getModuleDir(); + const candidates = uniqueCandidates([ + path.dirname(appRoot), + path.resolve(cwd), + path.resolve(cwd, '..'), + path.resolve(moduleDir, '..', '..'), + path.resolve(moduleDir, '..', '..', '..'), + ]); + + const workspaceMatch = firstExisting( + candidates, + (candidate) => + isExistingDir(path.join(candidate, 'themes')) || + isBenchLocalAppPackageJson(path.join(candidate, 'app', 'package.json')), + ); + + return ( + workspaceMatch ?? + firstExisting(candidates, isExistingDir) ?? + path.dirname(appRoot) + ); +} + +export function resolveRendererOutDir(): string { + const workspaceRoot = resolveWorkspaceRoot(); + const cwd = process.cwd(); + const resourcesPath = getResourcesPath(); + const candidates = uniqueCandidates([ + process.env.BENCHLOCAL_RENDERER_OUT_DIR, + resourcesPath ? path.join(resourcesPath, 'renderer-out') : undefined, + path.join(workspaceRoot, 'dist', 'renderer-out'), + path.resolve(cwd, 'dist', 'renderer-out'), + path.resolve(cwd, '..', 'dist', 'renderer-out'), + ]); + + return firstExisting(candidates, isExistingDir) ?? candidates[0]; +} + +export function resolveBuiltInThemesDir(): string { + const workspaceRoot = resolveWorkspaceRoot(); + const appRoot = resolveAppRoot(); + const cwd = process.cwd(); + const resourcesPath = getResourcesPath(); + const candidates = uniqueCandidates([ + process.env.BENCHLOCAL_THEMES_DIR, + resourcesPath ? path.join(resourcesPath, 'themes') : undefined, + path.join(workspaceRoot, 'themes'), + path.join(appRoot, 'themes'), + path.resolve(cwd, 'themes'), + ]); + + return firstExisting(candidates, isExistingDir) ?? candidates[0]; +} + +export function resolveLicensePath(): string { + const workspaceRoot = resolveWorkspaceRoot(); + const appRoot = resolveAppRoot(); + const resourcesPath = getResourcesPath(); + const candidates = uniqueCandidates([ + process.env.BENCHLOCAL_LICENSE_PATH, + resourcesPath ? path.join(resourcesPath, 'LICENSE') : undefined, + path.join(workspaceRoot, 'LICENSE'), + path.resolve(appRoot, '..', 'LICENSE'), + ]); + + return firstExisting(candidates, isExistingFile) ?? candidates[0]; +} + +export function pathExists(targetPath: string): boolean { + return existsSync(targetPath); +} diff --git a/app/src/server/sse-route.ts b/app/src/server/sse-route.ts index b987e2a..ef1e559 100644 --- a/app/src/server/sse-route.ts +++ b/app/src/server/sse-route.ts @@ -1,39 +1,40 @@ -import type { FastifyInstance, FastifyReply, FastifyRequest } from "fastify"; -import { sseBus } from "./sse-bus"; +import type { FastifyInstance, FastifyReply, FastifyRequest } from 'fastify'; +import { sseBus } from './sse-bus'; export function registerSseRoute(server: FastifyInstance) { - server.get( - "/api/events/sse", - { handlerTimeout: 0 }, - async (req: FastifyRequest, reply: FastifyReply) => { - reply.header("Content-Type", "text/event-stream"); - reply.header("Cache-Control", "no-cache"); - reply.header("Connection", "keep-alive"); - reply.header("X-Accel-Buffering", "no"); - reply.raw.write(": connected\n\n"); + server.get( + '/api/events/sse', + async (req: FastifyRequest, reply: FastifyReply) => { + reply.hijack(); + reply.raw.statusCode = 200; + reply.raw.setHeader('Content-Type', 'text/event-stream; charset=utf-8'); + reply.raw.setHeader('Cache-Control', 'no-cache, no-transform'); + reply.raw.setHeader('Connection', 'keep-alive'); + reply.raw.setHeader('X-Accel-Buffering', 'no'); + reply.raw.setTimeout(0); + reply.raw.flushHeaders?.(); + reply.raw.write(': connected\n\n'); - const channels = [ - "run-event", - "benchpack-mutation-progress", - "verifier-progress", - ]; + const channels = [ + 'run-event', + 'benchpack-mutation-progress', + 'verifier-progress', + ]; - const unsubscribers = channels.map((ch) => - sseBus.on(ch, (data) => { - reply.raw.write(`event: ${ch}\ndata: ${JSON.stringify(data)}\n\n`); - }), - ); + const unsubscribers = channels.map((ch) => + sseBus.on(ch, (data) => { + reply.raw.write(`event: ${ch}\ndata: ${JSON.stringify(data)}\n\n`); + }), + ); - const keepAlive = setInterval(() => { - reply.raw.write(": heartbeat\n\n"); - }, 15000); + const keepAlive = setInterval(() => { + reply.raw.write(': heartbeat\n\n'); + }, 15000); - req.raw.on("close", () => { - unsubscribers.forEach((u) => u()); - clearInterval(keepAlive); - }); - - return new Promise(() => {}); - }, - ); + req.raw.on('close', () => { + unsubscribers.forEach((u) => u()); + clearInterval(keepAlive); + }); + }, + ); } diff --git a/app/src/server/themes.ts b/app/src/server/themes.ts index 3432d51..f532522 100644 --- a/app/src/server/themes.ts +++ b/app/src/server/themes.ts @@ -1,116 +1,88 @@ -import { accessSync, promises as fs } from "node:fs"; -import path from "node:path"; -import { fileURLToPath } from "node:url"; +import { promises as fs } from 'node:fs'; +import path from 'node:path'; import type { - BenchLocalThemeDefinition, - BenchLocalThemeDescriptor, -} from "@benchlocal/core"; + BenchLocalThemeDefinition, + BenchLocalThemeDescriptor, +} from '@benchlocal/core'; import { - getThemeStorageDir, - loadThemeDefinitionFromFile, -} from "@benchlocal/core"; - -function getBenchLocalWorkspaceRoot(): string { - return path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../.."); -} + getThemeStorageDir, + loadThemeDefinitionFromFile, +} from '@benchlocal/core'; +import { resolveBuiltInThemesDir } from './path-resolution'; function getBuiltInThemesDir(): string { - // Check for a themes directory next to the project root - const workspaceRoot = getBenchLocalWorkspaceRoot(); - const workspaceThemes = path.join(workspaceRoot, "themes"); - - // For the server, we also check a bundled themes directory - const bundledThemes = path.join( - path.dirname(fileURLToPath(import.meta.url)), - "../../themes", - ); - - // Prefer workspace themes if they exist - try { - accessSync(workspaceThemes); - return workspaceThemes; - } catch { - // Fall back to bundled - } - - try { - accessSync(bundledThemes); - return bundledThemes; - } catch { - // Last resort: workspace - return workspaceThemes; - } + return resolveBuiltInThemesDir(); } async function listThemeFiles(targetDir: string): Promise { - try { - const entries = await fs.readdir(targetDir, { withFileTypes: true }); - return entries - .filter((entry) => entry.isFile() && entry.name.endsWith(".json")) - .map((entry) => path.join(targetDir, entry.name)); - } catch { - return []; - } + try { + const entries = await fs.readdir(targetDir, { withFileTypes: true }); + return entries + .filter((entry) => entry.isFile() && entry.name.endsWith('.json')) + .map((entry) => path.join(targetDir, entry.name)); + } catch { + return []; + } } export async function listAvailableThemes(): Promise< - BenchLocalThemeDescriptor[] + BenchLocalThemeDescriptor[] > { - const builtInFiles = await listThemeFiles(getBuiltInThemesDir()); - const userDir = getThemeStorageDir(); - await fs.mkdir(userDir, { recursive: true }); - const userFiles = await listThemeFiles(userDir); + const builtInFiles = await listThemeFiles(getBuiltInThemesDir()); + const userDir = getThemeStorageDir(); + await fs.mkdir(userDir, { recursive: true }); + const userFiles = await listThemeFiles(userDir); - const themes: BenchLocalThemeDescriptor[] = []; + const themes: BenchLocalThemeDescriptor[] = []; - for (const filePath of builtInFiles) { - try { - const theme = await loadThemeDefinitionFromFile(filePath); - themes.push({ - id: theme.id, - name: theme.name, - colorScheme: theme.colorScheme, - source: "builtin", - path: filePath, - }); - } catch { - // Skip invalid theme files. - } - } + for (const filePath of builtInFiles) { + try { + const theme = await loadThemeDefinitionFromFile(filePath); + themes.push({ + id: theme.id, + name: theme.name, + colorScheme: theme.colorScheme, + source: 'builtin', + path: filePath, + }); + } catch { + // Skip invalid theme files. + } + } - for (const filePath of userFiles) { - try { - const theme = await loadThemeDefinitionFromFile(filePath); - themes.push({ - id: theme.id, - name: theme.name, - colorScheme: theme.colorScheme, - source: "user", - path: filePath, - }); - } catch { - // Skip invalid theme files. - } - } + for (const filePath of userFiles) { + try { + const theme = await loadThemeDefinitionFromFile(filePath); + themes.push({ + id: theme.id, + name: theme.name, + colorScheme: theme.colorScheme, + source: 'user', + path: filePath, + }); + } catch { + // Skip invalid theme files. + } + } - return themes.sort((left, right) => { - if (left.source !== right.source) { - return left.source === "builtin" ? -1 : 1; - } + return themes.sort((left, right) => { + if (left.source !== right.source) { + return left.source === 'builtin' ? -1 : 1; + } - return left.name.localeCompare(right.name); - }); + return left.name.localeCompare(right.name); + }); } export async function loadAvailableTheme( - themeId: string, + themeId: string, ): Promise { - const themes = await listAvailableThemes(); - const match = themes.find((theme) => theme.id === themeId); + const themes = await listAvailableThemes(); + const match = themes.find((theme) => theme.id === themeId); - if (!match?.path) { - return null; - } + if (!match?.path) { + return null; + } - return loadThemeDefinitionFromFile(match.path); + return loadThemeDefinitionFromFile(match.path); } diff --git a/app/vite.config.web.ts b/app/vite.config.web.ts index d990a8f..15dc3f3 100644 --- a/app/vite.config.web.ts +++ b/app/vite.config.web.ts @@ -1,31 +1,37 @@ -import path from "node:path"; -import react from "@vitejs/plugin-react"; -import { defineConfig } from "vite"; +import path from 'node:path'; +import react from '@vitejs/plugin-react'; +import { defineConfig } from 'vite'; + +const apiPort = Number(process.env.BENCHLOCAL_PORT) || 4300; +const webDevPort = Number(process.env.BENCHLOCAL_WEB_PORT) || 4301; +const webRoot = path.resolve(__dirname, 'src/renderer'); +const webOutDir = path.resolve(__dirname, '../dist/renderer-out'); export default defineConfig({ - plugins: [react()], - resolve: { - alias: { - "@": path.resolve(__dirname, "src"), - "@core": path.resolve(__dirname, "../packages/benchlocal-core/src"), - "@benchpack-host": path.resolve( - __dirname, - "../packages/benchpack-host/src", - ), - }, - }, - build: { - outDir: "out/renderer-out", - emptyOutDir: true, - }, - server: { - port: 4300, - host: "0.0.0.0", - proxy: { - "/api": { - target: "http://localhost:4300", - changeOrigin: true, - }, - }, - }, + root: webRoot, + plugins: [react()], + resolve: { + alias: { + '@': path.resolve(__dirname, 'src'), + '@core': path.resolve(__dirname, '../packages/benchlocal-core/src'), + '@benchpack-host': path.resolve( + __dirname, + '../packages/benchpack-host/src', + ), + }, + }, + build: { + outDir: webOutDir, + emptyOutDir: true, + }, + server: { + port: webDevPort, + host: '0.0.0.0', + proxy: { + '/api': { + target: `http://localhost:${apiPort}`, + changeOrigin: true, + }, + }, + }, });