diff --git a/middleware/packages/harness-orchestrator/src/buildOrchestrator.ts b/middleware/packages/harness-orchestrator/src/buildOrchestrator.ts index 4c202c2e..37a26c1c 100644 --- a/middleware/packages/harness-orchestrator/src/buildOrchestrator.ts +++ b/middleware/packages/harness-orchestrator/src/buildOrchestrator.ts @@ -147,6 +147,16 @@ export interface BuiltOrchestrator { readonly orchestrator: Orchestrator; /** The `chatAgent` bundle — verifier-wrapped agent + raw orchestrator. */ readonly bundle: ChatAgentBundle; + /** + * The resolved orchestrator model after the per-Agent overlay was applied — + * i.e. the actual id the turn loop will send to the provider for this Agent. + * Mirrors `config.model`; carried out so callers (registry log, /admin UI) + * can read it without re-computing the overlay. Issue #296 acceptance #4. + */ + readonly effectiveModel: string; + /** Same as `config.modelRouting` — surfaced so callers can describe the + * per-turn routing the Agent is on without inspecting the orchestrator. */ + readonly effectiveModelRouting?: ModelRoutingConfig; } /** @@ -331,6 +341,8 @@ export function buildOrchestratorForAgent( sessionLogger, chatSessionStore, }, + effectiveModel: config.model, + ...(config.modelRouting ? { effectiveModelRouting: config.modelRouting } : {}), }; } @@ -356,5 +368,7 @@ export function buildOrchestratorForAgent( return { orchestrator, bundle: { agent, raw: orchestrator, sessionLogger, chatSessionStore }, + effectiveModel: config.model, + ...(config.modelRouting ? { effectiveModelRouting: config.modelRouting } : {}), }; } diff --git a/middleware/packages/harness-orchestrator/src/index.ts b/middleware/packages/harness-orchestrator/src/index.ts index ffcf62c0..796a9441 100644 --- a/middleware/packages/harness-orchestrator/src/index.ts +++ b/middleware/packages/harness-orchestrator/src/index.ts @@ -69,6 +69,8 @@ export type { ScopedMemoryStoreOptions } from './registry/scopedMemoryStore.js'; export { ConfigStore, ConfigValidationError, + validateModelRef, + validateModelRoutingShape, } from './registry/configStore.js'; export type { AgentInput, @@ -123,7 +125,10 @@ export type { SubAgentGraph, SubAgentToolDeps, } from './registry/subAgentTools.js'; -export { resolveAgentModelRouting } from './registry/agentRuntime.js'; +export { + DEFAULT_ORCHESTRATOR_MODEL, + resolveAgentModelRouting, +} from './registry/agentRuntime.js'; export type { ResolvedAgentRuntime } from './registry/agentRuntime.js'; // Per-Agent Orchestrator factory (US3) — re-exported so US4-style external diff --git a/middleware/packages/harness-orchestrator/src/plugin.ts b/middleware/packages/harness-orchestrator/src/plugin.ts index d1635d4e..161f7a7f 100644 --- a/middleware/packages/harness-orchestrator/src/plugin.ts +++ b/middleware/packages/harness-orchestrator/src/plugin.ts @@ -54,6 +54,7 @@ import type { TurnHookRunner } from './turnHooks.js'; import type { ChatSessionStore } from './chatSessionStore.js'; import type { NativeToolRegistry } from './nativeToolRegistry.js'; import type { Orchestrator } from './orchestrator.js'; +import { DEFAULT_ORCHESTRATOR_MODEL } from './registry/agentRuntime.js'; import { ConfigStore } from './registry/configStore.js'; import { OrchestratorRegistry, @@ -139,11 +140,11 @@ const GRAPH_POOL_SERVICE = 'graphPool'; const PLUGIN_CAPABILITIES_SERVICE = 'pluginCapabilities'; // Fallback when the operator has not set `orchestrator_model` in the install -// config. Must be a currently-served Anthropic model id — a stale/typo'd id -// makes every turn fail with `404 not_found_error` (the orchestrator main -// loop has no other model source). Kept in sync with the kernel default -// `ORCHESTRATOR_MODEL` in middleware/src/config.ts. -const DEFAULT_MODEL = 'claude-opus-4-8'; +// config. Shared with the registry's per-instance resolution (tier 3) so the +// install-config default and the per-Agent overlay fall back to the same id. +// Kept in sync with the kernel default `ORCHESTRATOR_MODEL` in +// middleware/src/config.ts. +const DEFAULT_MODEL = DEFAULT_ORCHESTRATOR_MODEL; // 8192, not 4096: a verbose preamble + a large structured tool call (e.g. a // multi-sheet create_xlsx with formulas) truncates at 4096 → `max_tokens` // mid-tool-call, so the file is never built. Also enforced as a floor below so diff --git a/middleware/packages/harness-orchestrator/src/registry/agentGraphStore.ts b/middleware/packages/harness-orchestrator/src/registry/agentGraphStore.ts index 0abb23af..64632ccb 100644 --- a/middleware/packages/harness-orchestrator/src/registry/agentGraphStore.ts +++ b/middleware/packages/harness-orchestrator/src/registry/agentGraphStore.ts @@ -1,6 +1,6 @@ import type { Pool } from 'pg'; -import { ConfigValidationError } from './configStore.js'; +import { ConfigValidationError, validateModelRef } from './configStore.js'; /** * Agent Builder graph store (P0). @@ -316,6 +316,12 @@ export class AgentGraphStore { } async createSubAgent(input: SubAgentInput): Promise { + // Issue #296 follow-up — guard sub-agent model writes against unknown + // ids the same way `setModelRouting` guards the orchestrator model. + // Empty / null skips validation (= inherit parent agent / platform). + if (input.model != null && input.model.trim() !== '') { + validateModelRef('subAgent.model', input.model.trim()); + } try { const { rows } = await this.pool.query( `INSERT INTO agent_subagents @@ -347,6 +353,12 @@ export class AgentGraphStore { } async updateSubAgent(id: string, patch: SubAgentPatch): Promise { + // Issue #296 follow-up — see `createSubAgent` rationale. `null` is the + // wire form for "clear" (no validation); empty-string is treated the + // same so the UI dropdown's "(default)" choice round-trips cleanly. + if (patch.model != null && patch.model.trim() !== '') { + validateModelRef('subAgent.model', patch.model.trim()); + } const { rows } = await this.pool.query( `UPDATE agent_subagents SET name = COALESCE($2, name), diff --git a/middleware/packages/harness-orchestrator/src/registry/agentRuntime.ts b/middleware/packages/harness-orchestrator/src/registry/agentRuntime.ts index bfdbfc37..56167744 100644 --- a/middleware/packages/harness-orchestrator/src/registry/agentRuntime.ts +++ b/middleware/packages/harness-orchestrator/src/registry/agentRuntime.ts @@ -1,3 +1,5 @@ +import { resolveModelRef } from '@omadia/llm-provider'; + import type { ModelRoutingConfig as RuntimeModelRouting } from '../modelRouter.js'; /** @@ -17,6 +19,60 @@ import type { ModelRoutingConfig as RuntimeModelRouting } from '../modelRouter.j const DEFAULT_CLASSIFIER_MODEL = 'claude-haiku-4-5'; +/** + * Hard fallback orchestrator model — the last tier of the per-instance model + * resolution (issue #296 AC#2): + * + * 1. the Agent's own `model_routing.main` (operator's per-instance choice) + * 2. the global seeded platform default (`orchestrator_model` install config, + * itself seeded from the `ORCHESTRATOR_MODEL` env in middleware/src/config.ts) + * 3. this constant — so an empty / misconfigured platform default never yields + * an empty model id (which would 404 on every turn). + * + * Must be a currently-served model id, kept in sync with `ORCHESTRATOR_MODEL` + * (middleware/src/config.ts) and the plugin's install-config fallback. + */ +export const DEFAULT_ORCHESTRATOR_MODEL = 'claude-opus-4-8'; + +/** + * Resolve a model ref to the active provider's concrete bare `modelId` + * (issue #296). + * + * Both the orchestrator main loop AND in-process sub-agents send `model` RAW to + * a single concrete provider adapter — there is no ref→modelId resolution in + * the send path. The Admin picker stores a provider-qualified id + * (`anthropic:claude-opus-4-8`) or a legacy alias (`opus`); sending either raw + * 404s every turn. Returns: + * - registry-known, same provider → the bare vendor `modelId` + * - registry-known, DIFFERENT provider than `activeProvider` → `undefined` + * (cross-provider is out of scope and would 404 on the wrong adapter — the + * caller falls back to its own default) + * - registry-UNKNOWN (not in the curated set) → the raw trimmed ref. The + * registry is a curated subset, not the universe of valid API ids — an id + * the registry does not list may still be served (e.g. an undated default + * or an operator-typed id). Passing it through preserves pre-resolution + * behaviour, matching the `resolveModelRef(x)?.modelId ?? x` contract used + * elsewhere (e.g. `builderPreviewPrompt`). + * - empty / whitespace → `undefined` (no model specified → caller falls back) + * + * The CLI provider owns its own alias scheme (`sonnet`/`opus`) and must be + * handled by the caller BEFORE this — pass its refs through untouched. + */ +export function resolveModelIdForProvider( + ref: string | null | undefined, + activeProvider: string | undefined, +): string | undefined { + const trimmed = ref?.trim(); + if (!trimmed) return undefined; + const info = resolveModelRef( + trimmed, + activeProvider ? { defaultProvider: activeProvider } : {}, + ); + if (info === undefined) return trimmed; + if (activeProvider && info.provider !== activeProvider) return undefined; + return info.modelId; +} + export interface ResolvedAgentRuntime { /** Primary model override (the agent's `main`), if set. */ readonly model?: string; diff --git a/middleware/packages/harness-orchestrator/src/registry/applyDiff.ts b/middleware/packages/harness-orchestrator/src/registry/applyDiff.ts index 1ecb341f..7639f610 100644 --- a/middleware/packages/harness-orchestrator/src/registry/applyDiff.ts +++ b/middleware/packages/harness-orchestrator/src/registry/applyDiff.ts @@ -10,7 +10,11 @@ import type { SubAgentRow, ToolGrantRow, } from './agentGraphStore.js'; -import { resolveAgentModelRouting } from './agentRuntime.js'; +import { + DEFAULT_ORCHESTRATOR_MODEL, + resolveAgentModelRouting, + resolveModelIdForProvider, +} from './agentRuntime.js'; import type { AgentPluginRow, AgentRow, @@ -160,18 +164,56 @@ export function buildForAgent( // platform default: `main` overrides the model, `triage` mode adds per-turn // Haiku→Sonnet/Opus routing. Falls back to the platform runtime when unset. const routing = resolveAgentModelRouting(agent.modelRouting); + + // The orchestrator hands `model` to a SINGLE concrete provider adapter, which + // sends it RAW to the wire API (no ref→modelId resolution in the send path). + // The Admin picker stores a provider-qualified id / alias, so resolve the + // per-Agent overlay to the active provider's concrete `modelId` HERE — see + // `resolveModelIdForProvider` (issue #296). The CLI provider owns its own + // alias scheme so its refs pass through untouched; the platform default + // (`runtime.model`, operator-set env) is left as-is — it works raw today and + // resolving it would change established behaviour. + const activeProvider = deps.provider?.id; + const resolveOverlay = (ref: string | undefined): string | undefined => + activeProvider === 'claude-cli' + ? ref?.trim() || undefined + : resolveModelIdForProvider(ref, activeProvider); + + // Per-instance model resolution (issue #296 AC#2), three tiers: + // 1. the Agent's `model_routing.main` (operator's per-Agent choice) + // 2. the global seeded platform default `runtime.model` + // (= `orchestrator_model` install config = `ORCHESTRATOR_MODEL` env) + // 3. `DEFAULT_ORCHESTRATOR_MODEL` — guards against an empty / whitespace + // platform default so the turn loop never gets an empty model id. + const model = + resolveOverlay(routing.model) || + runtime.model?.trim() || + DEFAULT_ORCHESTRATOR_MODEL; + + // Resolve the per-turn routing sub-models the same way. Any sub-model that + // does not resolve to the active provider falls back to the resolved `model` + // so every id the turn loop sends is a valid same-provider `modelId`. + const overlayRouting = routing.modelRouting + ? { + classifierModel: + resolveOverlay(routing.modelRouting.classifierModel) ?? model, + simpleModel: resolveOverlay(routing.modelRouting.simpleModel) ?? model, + complexModel: resolveOverlay(routing.modelRouting.complexModel) ?? model, + } + : undefined; + return buildOrchestratorForAgent( { agentId: agent.slug, - model: routing.model ?? runtime.model, + model, maxTokens: runtime.maxTokens, maxToolIterations: runtime.maxToolIterations, // Per-turn model routing: prefer the agent's own persisted routing // (Agent Builder P5); otherwise fall back to the platform default // `runtime.modelRouting` so registry-managed orchestrators still emit // `turn_routing` and the UI renders the Haiku-triage badge (origin/main). - ...((routing.modelRouting ?? runtime.modelRouting) - ? { modelRouting: routing.modelRouting ?? runtime.modelRouting } + ...((overlayRouting ?? runtime.modelRouting) + ? { modelRouting: overlayRouting ?? runtime.modelRouting } : {}), ...(runtime.loopRepeatSoft !== undefined ? { loopRepeatSoft: runtime.loopRepeatSoft } diff --git a/middleware/packages/harness-orchestrator/src/registry/configStore.ts b/middleware/packages/harness-orchestrator/src/registry/configStore.ts index cb8063b7..8fec1950 100644 --- a/middleware/packages/harness-orchestrator/src/registry/configStore.ts +++ b/middleware/packages/harness-orchestrator/src/registry/configStore.ts @@ -1,5 +1,7 @@ import type { Pool } from 'pg'; +import { resolveModelRef } from '@omadia/llm-provider'; + import { AgentGraphStore, type ScheduleRow, @@ -110,6 +112,67 @@ export class ConfigValidationError extends Error { const SLUG_RE = /^[a-z0-9](?:[a-z0-9-]{0,62}[a-z0-9])?$/; +/** + * Validate persisted `model_routing` JSON before write. Shape: + * { mode: 'single'|'triage', main: , triage?: , simple?: } + * Each `` must resolve via `resolveModelRef` (provider-qualified id, + * legacy alias, bare vendor id, or `class:*`). An empty `main` is illegal — + * use `null` to clear. Optional fields may be absent or empty-string (latter + * treated as absent so a UI dropdown's "(default)" choice round-trips cleanly). + */ +export function validateModelRoutingShape( + routing: Record, +): void { + const mode = routing['mode']; + if (mode !== 'single' && mode !== 'triage') { + throw new ConfigValidationError( + `modelRouting.mode must be 'single' or 'triage' (got ${JSON.stringify(mode)})`, + ); + } + const main = routing['main']; + if (typeof main !== 'string' || main.trim() === '') { + throw new ConfigValidationError( + `modelRouting.main is required (clear routing by passing null instead)`, + ); + } + validateModelRef(`modelRouting.main`, main.trim()); + if (mode === 'triage') { + for (const key of ['triage', 'simple'] as const) { + const raw = routing[key]; + if (raw === undefined || raw === null) continue; + if (typeof raw !== 'string') { + throw new ConfigValidationError( + `modelRouting.${key} must be a string (got ${typeof raw})`, + ); + } + const trimmed = raw.trim(); + if (trimmed === '') continue; + validateModelRef(`modelRouting.${key}`, trimmed); + } + } +} + +/** + * Throw `ConfigValidationError` when `ref` is non-empty and does not resolve + * to any model registered with `@omadia/llm-provider`. Used by every persisted + * model-id surface (orchestrator routing, sub-agent overrides) so the operator + * cannot pin runtime to an id the live provider set does not serve — an + * unknown id would 404 at every turn. Empty / whitespace `ref` is rejected — + * callers should skip the validator when clearing the field instead. + */ +export function validateModelRef(field: string, ref: string): void { + if (typeof ref !== 'string' || ref.trim() === '') { + throw new ConfigValidationError( + `${field} must be a non-empty model ref (clear with null instead)`, + ); + } + if (resolveModelRef(ref.trim()) === undefined) { + throw new ConfigValidationError( + `${field} '${ref}' is not registered with any installed LLM provider`, + ); + } +} + interface AgentDbRow { id: string; slug: string; @@ -283,11 +346,18 @@ export class ConfigStore { } /** Agent Builder — set (or clear, with null) the per-agent model routing. - * Direct write (not COALESCE) so the operator can disable routing. */ + * Direct write (not COALESCE) so the operator can disable routing. + * + * Validates `main`/`triage`/`simple` against `@omadia/llm-provider` so an + * operator (or a stale REST client) cannot pin an agent to a model id that + * no installed provider serves — that would crash every turn at runtime + * with `404 not_found_error`. `null` clears routing back to the platform + * default and skips validation. */ async setModelRouting( id: string, routing: Record | null, ): Promise { + if (routing) validateModelRoutingShape(routing); const { rows } = await this.pool.query( `UPDATE agents SET model_routing = $2::jsonb, updated_at = now() WHERE id = $1 RETURNING *`, diff --git a/middleware/packages/harness-orchestrator/src/registry/index.ts b/middleware/packages/harness-orchestrator/src/registry/index.ts index c55e5cda..25b4658c 100644 --- a/middleware/packages/harness-orchestrator/src/registry/index.ts +++ b/middleware/packages/harness-orchestrator/src/registry/index.ts @@ -333,6 +333,10 @@ export class OrchestratorRegistry { agentId: action.agent.id, plugins: plugins.filter((p) => p.enabled).map((p) => p.pluginId), memoryScope, + effectiveModel: built.effectiveModel, + ...(built.effectiveModelRouting + ? { effectiveModelRouting: built.effectiveModelRouting } + : {}), }); return; } @@ -376,6 +380,10 @@ export class OrchestratorRegistry { agentId: action.agent.id, reason: action.reason, replaced: !!before, + effectiveModel: built.effectiveModel, + ...(built.effectiveModelRouting + ? { effectiveModelRouting: built.effectiveModelRouting } + : {}), }); return; } diff --git a/middleware/packages/harness-orchestrator/src/registry/subAgentTools.ts b/middleware/packages/harness-orchestrator/src/registry/subAgentTools.ts index 68d8a134..28770016 100644 --- a/middleware/packages/harness-orchestrator/src/registry/subAgentTools.ts +++ b/middleware/packages/harness-orchestrator/src/registry/subAgentTools.ts @@ -3,6 +3,7 @@ import type { LocalSubAgentTool } from '@omadia/plugin-api'; import { createCliSubAgent } from '../cliSubAgent.js'; import { LocalSubAgent } from '../localSubAgent.js'; +import { resolveModelIdForProvider } from './agentRuntime.js'; import type { McpManager} from '../mcp/mcpClient.js'; import { @@ -105,7 +106,7 @@ export function buildSubAgentDomainTools( : new LocalSubAgent({ name: sub.name, provider: deps.provider, - model: sub.model ?? deps.defaultModel, + model: resolveSubAgentModel(sub.model, deps), maxTokens: sub.maxTokens ?? deps.defaultMaxTokens, maxIterations: sub.maxIterations ?? deps.defaultMaxIterations, systemPrompt, @@ -124,6 +125,25 @@ export function buildSubAgentDomainTools( return tools; } +/** + * Resolve a sub-agent's persisted `model` ref to the active provider's concrete + * bare `modelId` (issue #296 MAJOR 3). `LocalSubAgent` sends `model` RAW to the + * provider adapter, exactly like the orchestrator main loop — so a picker-stored + * provider-qualified id (`anthropic:claude-opus-4-8`) or alias (`opus`) would + * 404 every delegated turn. Shares the resolver with the orchestrator path + * (`resolveModelIdForProvider`); an empty / unknown / cross-provider ref + * degrades to the parent's default model instead of 404ing. The CLI sub-agent + * path keeps its own alias scheme (`cliModelAlias`) and is not routed here. + */ +export function resolveSubAgentModel( + ref: string | null | undefined, + deps: Pick, +): string { + return ( + resolveModelIdForProvider(ref, deps.provider?.id) ?? deps.defaultModel + ); +} + function resolveSubAgentTools( grants: readonly ToolGrantRow[], deps: SubAgentToolDeps, diff --git a/middleware/src/routes/agentBuilder.ts b/middleware/src/routes/agentBuilder.ts index a8e137d2..c45dae29 100644 --- a/middleware/src/routes/agentBuilder.ts +++ b/middleware/src/routes/agentBuilder.ts @@ -87,7 +87,7 @@ export function createAgentBuilderRouter( l.graph.listSchedulesForAgent(agent.id), ]); res.json( - assembleGraph(agent, bindings, subAgents, skills, grants, servers, schedules), + assembleGraph(agent, bindings, subAgents, skills, grants, servers, schedules, l.registry), ); } catch (err) { fail(res, err); @@ -193,7 +193,7 @@ export function createAgentBuilderRouter( const routing = (req.body ?? {}).modelRouting ?? null; const updated = await l.config.setModelRouting(agent.id, routing); await reload(l); - res.json(agentNode(updated)); + res.json(agentNode(updated, l.registry)); } catch (err) { fail(res, err); } @@ -475,6 +475,7 @@ function assembleGraph( grants: readonly ToolGrantRow[], servers: readonly McpServerRow[], schedules: readonly ScheduleRow[], + registry: OrchestratorRegistry | undefined, ) { const mySubs = subAgents.filter((s) => s.parentAgentId === agent.id); const subIds = new Set(mySubs.map((s) => s.id)); @@ -527,7 +528,7 @@ function assembleGraph( } return { - agent: agentNode(agent), + agent: agentNode(agent, registry), channels: bindings.map((b) => ({ channelType: b.channelType, channelKey: b.channelKey, @@ -544,7 +545,18 @@ function assembleGraph( // ── node mappers ───────────────────────────────────────────────────────────── -function agentNode(a: AgentRow) { +/** + * Map an `AgentRow` to the canvas `agent` node payload. Exported for unit + * tests so the `effectiveModel` surface stays covered without spinning up + * the express app. + */ +export function agentNode(a: AgentRow, registry: OrchestratorRegistry | undefined) { + // Issue #296 acceptance #4 — surface the orchestrator model the registry + // actually resolved for this Agent (per-Agent overlay applied to the + // platform default). Absent when the registry has not yet built the Agent + // (in-memory bootstrap / Agent disabled); UI then shows just the persisted + // `modelRouting.main` as a hint. + const built = registry?.get(a.slug)?.built; return { id: a.id, slug: a.slug, @@ -553,6 +565,7 @@ function agentNode(a: AgentRow) { privacyProfile: a.privacyProfile, status: a.status, modelRouting: (a.modelRouting as Record | null) ?? null, + effectiveModel: built?.effectiveModel ?? null, position: a.canvasPosition ?? null, }; } diff --git a/middleware/src/routes/builder.ts b/middleware/src/routes/builder.ts index dbb475f0..a5cb5bac 100644 --- a/middleware/src/routes/builder.ts +++ b/middleware/src/routes/builder.ts @@ -295,6 +295,7 @@ export function createBuilderRouter(deps: BuilderRouterDeps): Router { .filter((m) => connected === null || connected.has(m.provider)) .map((m) => ({ id: m.id, + model_id: m.modelId, label: m.label, provider: m.provider, model_class: m.modelClass, diff --git a/middleware/test/agentBuilderAgentNode.test.ts b/middleware/test/agentBuilderAgentNode.test.ts new file mode 100644 index 00000000..c409745e --- /dev/null +++ b/middleware/test/agentBuilderAgentNode.test.ts @@ -0,0 +1,87 @@ +/** + * Route surface — `agentNode()` shape on `GET /agents/:slug/graph` (and + * the `setModelRouting` response). Issue #296 acceptance #4: the registry's + * resolved orchestrator model must be reachable from the Admin UI without + * a kernel-side restart. + * + * `agentNode` is the route helper that builds the `agent` node payload from + * an `AgentRow` and the live registry. Tested directly so we don't need to + * spin up express or a real Postgres pool. + */ + +import assert from 'node:assert/strict'; +import { test } from 'node:test'; + +import type { + AgentRow, + BuiltOrchestrator, + OrchestratorRegistry, +} from '@omadia/orchestrator'; + +import { agentNode } from '../src/routes/agentBuilder.js'; + +const AGENT_ID = '00000000-0000-0000-0000-000000000001'; + +function agentRow(overrides: Partial = {}): AgentRow { + return { + id: AGENT_ID, + slug: 'public', + name: 'Public', + description: null, + privacyProfile: 'default', + status: 'enabled', + createdAt: new Date(0), + updatedAt: new Date(0), + ...overrides, + }; +} + +/** Minimal `OrchestratorRegistry` stub — `agentNode` only calls `.get(slug)`. */ +function fakeRegistry( + slug: string, + built: Partial, +): OrchestratorRegistry { + return { + get: (s: string) => (s === slug ? { built } : undefined), + } as unknown as OrchestratorRegistry; +} + +test('effectiveModel is null when no registry is available (in-memory boot / disabled agent)', () => { + const node = agentNode(agentRow(), undefined); + assert.equal(node.effectiveModel, null); + assert.equal(node.slug, 'public'); + assert.equal(node.modelRouting, null); +}); + +test('effectiveModel is null when the registry has no built entry for the agent', () => { + // E.g. the agent is disabled, or hot-reload has not finished — the registry + // is published but `.get(slug)` returns undefined. The UI then shows just + // the persisted `modelRouting.main` hint, not a stale "Active: X" badge. + const reg = fakeRegistry('some-other-slug', { effectiveModel: 'haiku' }); + const node = agentNode(agentRow(), reg); + assert.equal(node.effectiveModel, null); +}); + +test('effectiveModel mirrors the registry-resolved model id (per-agent overlay applied)', () => { + const reg = fakeRegistry('public', { effectiveModel: 'claude-opus-4-8' }); + const node = agentNode(agentRow(), reg); + assert.equal(node.effectiveModel, 'claude-opus-4-8'); +}); + +test('persisted modelRouting passes through verbatim alongside effectiveModel', () => { + // setModelRouting -> agentNode echoes the persisted JSON so the Inspector + // hydrates form state from the response without a second GET. + const reg = fakeRegistry('public', { effectiveModel: 'claude-haiku-4-5' }); + const node = agentNode( + agentRow({ + modelRouting: { mode: 'triage', main: 'claude-opus-4-8', triage: 'claude-haiku-4-5' }, + }), + reg, + ); + assert.deepEqual(node.modelRouting, { + mode: 'triage', + main: 'claude-opus-4-8', + triage: 'claude-haiku-4-5', + }); + assert.equal(node.effectiveModel, 'claude-haiku-4-5'); +}); diff --git a/middleware/test/agentGraphStoreSubAgentModelValidation.test.ts b/middleware/test/agentGraphStoreSubAgentModelValidation.test.ts new file mode 100644 index 00000000..c08d0e16 --- /dev/null +++ b/middleware/test/agentGraphStoreSubAgentModelValidation.test.ts @@ -0,0 +1,153 @@ +/** + * Validation guard on sub-agent `model` writes (issue #296 follow-up). + * + * `createSubAgent` / `updateSubAgent` accept a free-form `model: string | null`. + * `null` (or empty) means "inherit parent agent" and skips validation; a + * non-empty value must resolve via `@omadia/llm-provider` for the same reason + * orchestrator routing does — an unknown id would 404 at every turn the + * sub-agent runs. + * + * Pool is stubbed: the validator throws BEFORE any SQL is sent, so unknown ids + * never reach `INSERT`/`UPDATE`. The stub also asserts the inverse — accepted + * writes DO reach SQL. + */ + +import assert from 'node:assert/strict'; +import { beforeEach, test } from 'node:test'; + +import type { Pool } from 'pg'; + +import { + clearExternalModels, + registerExternalModels, + type ModelInfo, +} from '@omadia/llm-provider'; + +import { AgentGraphStore } from '../packages/harness-orchestrator/src/registry/agentGraphStore.js'; +import { ConfigValidationError } from '../packages/harness-orchestrator/src/registry/configStore.js'; + +const OPUS: ModelInfo = { + id: 'anthropic:claude-opus-4-8', + provider: 'anthropic', + modelId: 'claude-opus-4-8', + label: 'Claude Opus 4.8', + class: 'frontier', + maxTokens: 16384, + contextWindow: 200000, + vision: true, + aliases: ['opus'], +}; + +beforeEach(() => { + clearExternalModels(); + registerExternalModels([OPUS]); +}); + +interface QueryCall { + sql: string; + params: unknown[]; +} + +/** Capture every `query()` call; return a deterministic `SubAgentDbRow`-shaped + * result so `mapSubAgent` succeeds and the call site doesn't crash on `rows[0]`. */ +function fakePool(): { pool: Pool; calls: QueryCall[] } { + const calls: QueryCall[] = []; + const pool = { + query: async (sql: string, params: unknown[]) => { + calls.push({ sql, params }); + return { + rows: [ + { + id: '00000000-0000-0000-0000-0000000000aa', + parent_agent_id: '00000000-0000-0000-0000-000000000001', + name: 'researcher', + skill_id: null, + model: (params[3] ?? params[2] ?? null) as string | null, + max_tokens: null, + max_iterations: null, + system_prompt_override: null, + status: 'enabled', + position: null, + created_at: new Date(0), + updated_at: new Date(0), + }, + ], + }; + }, + } as unknown as Pool; + return { pool, calls }; +} + +test('createSubAgent rejects an unknown model id BEFORE touching the DB', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + await assert.rejects( + () => + store.createSubAgent({ + parentAgentId: '00000000-0000-0000-0000-000000000001', + name: 'researcher', + model: 'gpt-99-not-registered', + }), + (err) => + err instanceof ConfigValidationError && + /subAgent\.model 'gpt-99-not-registered'/.test(err.message), + ); + assert.equal(calls.length, 0, 'no SQL was sent for the rejected write'); +}); + +test('createSubAgent accepts a registered model and persists the row', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + const row = await store.createSubAgent({ + parentAgentId: '00000000-0000-0000-0000-000000000001', + name: 'researcher', + model: 'claude-opus-4-8', + }); + assert.equal(row.model, 'claude-opus-4-8'); + assert.equal(calls.length, 1); + assert.match(calls[0]!.sql, /INSERT INTO agent_subagents/); +}); + +test('createSubAgent treats null / empty model as "inherit" — no validation, write fires', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + // null = clear / inherit parent; must NOT trigger the registered-model check. + await store.createSubAgent({ + parentAgentId: '00000000-0000-0000-0000-000000000001', + name: 'r1', + model: null, + }); + // empty string round-trips from the UI dropdown's "(default)" choice — same + // semantic as null per agentGraphStore guard. + await store.createSubAgent({ + parentAgentId: '00000000-0000-0000-0000-000000000001', + name: 'r2', + model: '', + }); + assert.equal(calls.length, 2, 'both writes hit SQL'); +}); + +test('updateSubAgent rejects an unknown model id BEFORE touching the DB', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + await assert.rejects( + () => + store.updateSubAgent('00000000-0000-0000-0000-0000000000aa', { + model: 'definitely-not-real', + }), + (err) => + err instanceof ConfigValidationError && + /subAgent\.model 'definitely-not-real'/.test(err.message), + ); + assert.equal(calls.length, 0); +}); + +test('updateSubAgent without a `model` patch skips validation entirely', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + await store.updateSubAgent('00000000-0000-0000-0000-0000000000aa', { + name: 'renamed', + }); + assert.equal(calls.length, 1); + assert.match(calls[0]!.sql, /UPDATE agent_subagents/); +}); diff --git a/middleware/test/configStoreModelRoutingValidation.test.ts b/middleware/test/configStoreModelRoutingValidation.test.ts new file mode 100644 index 00000000..0c8de3dc --- /dev/null +++ b/middleware/test/configStoreModelRoutingValidation.test.ts @@ -0,0 +1,190 @@ +/** + * Validation guard on `model_routing` writes (issue #296). + * + * `setModelRouting` accepts a `{ mode, main, triage?, simple? }` JSON. Every + * model id must resolve via `@omadia/llm-provider`; an unknown id would crash + * every turn at runtime with `404 not_found_error`. The shape validator is a + * pure function so we can test it without a Postgres pool. + */ + +import assert from 'node:assert/strict'; +import { beforeEach, test } from 'node:test'; + +import { + clearExternalModels, + registerExternalModels, + type ModelInfo, +} from '@omadia/llm-provider'; + +import { + ConfigValidationError, + validateModelRef, + validateModelRoutingShape, +} from '../packages/harness-orchestrator/src/registry/configStore.js'; + +const OPUS: ModelInfo = { + id: 'anthropic:claude-opus-4-8', + provider: 'anthropic', + modelId: 'claude-opus-4-8', + label: 'Claude Opus 4.8', + class: 'frontier', + maxTokens: 16384, + contextWindow: 200000, + vision: true, + aliases: ['opus'], +}; + +const HAIKU: ModelInfo = { + id: 'anthropic:claude-haiku-4-5', + provider: 'anthropic', + modelId: 'claude-haiku-4-5', + label: 'Claude Haiku 4.5', + class: 'fast', + maxTokens: 8192, + contextWindow: 200000, + vision: true, + aliases: ['haiku'], +}; + +const SONNET: ModelInfo = { + id: 'anthropic:claude-sonnet-4-6', + provider: 'anthropic', + modelId: 'claude-sonnet-4-6', + label: 'Claude Sonnet 4.6', + class: 'balanced', + maxTokens: 16384, + contextWindow: 200000, + vision: true, + aliases: ['sonnet'], +}; + +beforeEach(() => { + clearExternalModels(); + registerExternalModels([OPUS, HAIKU, SONNET]); +}); + +test('accepts single mode with a registered model', () => { + validateModelRoutingShape({ mode: 'single', main: 'claude-opus-4-8' }); + validateModelRoutingShape({ mode: 'single', main: 'anthropic:claude-opus-4-8' }); + validateModelRoutingShape({ mode: 'single', main: 'opus' }); // alias +}); + +test('accepts triage mode with registered classifier + simple', () => { + validateModelRoutingShape({ + mode: 'triage', + main: 'claude-opus-4-8', + triage: 'claude-haiku-4-5', + simple: 'claude-sonnet-4-6', + }); +}); + +test('triage mode tolerates omitted/empty triage + simple (run-time defaults)', () => { + validateModelRoutingShape({ mode: 'triage', main: 'claude-opus-4-8' }); + validateModelRoutingShape({ + mode: 'triage', + main: 'claude-opus-4-8', + triage: '', + simple: '', + }); +}); + +test('rejects unknown main model', () => { + assert.throws( + () => validateModelRoutingShape({ mode: 'single', main: 'made-up-model-9' }), + (err) => + err instanceof ConfigValidationError && + /modelRouting\.main 'made-up-model-9'/.test(err.message), + ); +}); + +test('rejects unknown triage / simple in triage mode', () => { + assert.throws( + () => + validateModelRoutingShape({ + mode: 'triage', + main: 'claude-opus-4-8', + triage: 'no-such-classifier', + }), + (err) => + err instanceof ConfigValidationError && + /modelRouting\.triage 'no-such-classifier'/.test(err.message), + ); + assert.throws( + () => + validateModelRoutingShape({ + mode: 'triage', + main: 'claude-opus-4-8', + simple: 'no-such-simple', + }), + (err) => + err instanceof ConfigValidationError && + /modelRouting\.simple 'no-such-simple'/.test(err.message), + ); +}); + +test('rejects missing or empty main', () => { + assert.throws( + () => validateModelRoutingShape({ mode: 'single' } as Record), + (err) => err instanceof ConfigValidationError && /main is required/.test(err.message), + ); + assert.throws( + () => validateModelRoutingShape({ mode: 'single', main: ' ' }), + (err) => err instanceof ConfigValidationError && /main is required/.test(err.message), + ); +}); + +test('rejects unknown mode', () => { + assert.throws( + () => + validateModelRoutingShape({ + mode: 'route-everything', + main: 'claude-opus-4-8', + }), + (err) => err instanceof ConfigValidationError && /mode must be/.test(err.message), + ); +}); + +test('rejects non-string triage / simple', () => { + assert.throws( + () => + validateModelRoutingShape({ + mode: 'triage', + main: 'claude-opus-4-8', + triage: 42, + }), + (err) => + err instanceof ConfigValidationError && /triage must be a string/.test(err.message), + ); +}); + +// ── validateModelRef (sub-agent guard) ────────────────────────────────── + +test('validateModelRef accepts a registered id (raw + alias + provider-qualified)', () => { + validateModelRef('subAgent.model', 'claude-opus-4-8'); + validateModelRef('subAgent.model', 'opus'); + validateModelRef('subAgent.model', 'anthropic:claude-opus-4-8'); +}); + +test('validateModelRef rejects an unknown id and tags the field', () => { + assert.throws( + () => validateModelRef('subAgent.model', 'no-such-model'), + (err) => + err instanceof ConfigValidationError && + /subAgent\.model 'no-such-model'/.test(err.message), + ); +}); + +test('validateModelRef rejects empty / whitespace (callers should skip instead)', () => { + assert.throws( + () => validateModelRef('subAgent.model', ''), + (err) => + err instanceof ConfigValidationError && + /must be a non-empty model ref/.test(err.message), + ); + assert.throws( + () => validateModelRef('subAgent.model', ' '), + (err) => + err instanceof ConfigValidationError && + /must be a non-empty model ref/.test(err.message), + ); +}); diff --git a/middleware/test/orchestratorRegistry.test.ts b/middleware/test/orchestratorRegistry.test.ts index aebd374d..aefa1524 100644 --- a/middleware/test/orchestratorRegistry.test.ts +++ b/middleware/test/orchestratorRegistry.test.ts @@ -16,10 +16,15 @@ * NOT prevent the other Agents from coming up. */ -import { test } from 'node:test'; +import { beforeEach, test } from 'node:test'; import assert from 'node:assert/strict'; import Anthropic from '@anthropic-ai/sdk'; +import { + clearExternalModels, + registerExternalModels, + type ModelInfo, +} from '@omadia/llm-provider'; import { InMemoryNudgeRegistry } from '@omadia/plugin-api'; import type { EntityRefBus, @@ -40,6 +45,7 @@ import { validateSnapshot, type PluginCapabilityLookup, } from '../packages/harness-orchestrator/src/registry/index.js'; +import { DEFAULT_ORCHESTRATOR_MODEL } from '../packages/harness-orchestrator/src/registry/agentRuntime.js'; function fakeNativeToolRegistry(): NativeToolRegistry { const names = new Set(); @@ -65,6 +71,14 @@ function deps(): OrchestratorDeps { }; } +/** `deps()` with a concrete provider id so the build path can pin model-ref + * resolution to a single provider (and drop cross-provider picks). The + * orchestrator only touches the provider at turn time, so a stub id suffices + * for build-time resolution assertions. */ +function depsWithProvider(id: string): OrchestratorDeps { + return { ...deps(), provider: { id } as OrchestratorDeps['provider'] }; +} + function fakeStore(snapshot: ConfigSnapshot): ConfigStore { return { loadSnapshot: () => Promise.resolve(snapshot), @@ -84,6 +98,48 @@ function agentRow(slug: string, id: string, status: AgentRow['status'] = 'enable }; } +// The build path resolves a per-Agent model ref to the active provider's +// concrete `modelId` via `@omadia/llm-provider`. Register the ids the tests +// pin so resolution lands on them instead of dropping to the platform default. +const OPUS: ModelInfo = { + id: 'anthropic:claude-opus-4-8', + provider: 'anthropic', + modelId: 'claude-opus-4-8', + label: 'Claude Opus 4.8', + class: 'frontier', + maxTokens: 16384, + contextWindow: 200000, + vision: true, + aliases: ['opus'], +}; +const HAIKU: ModelInfo = { + id: 'anthropic:claude-haiku-4-5', + provider: 'anthropic', + modelId: 'claude-haiku-4-5', + label: 'Claude Haiku 4.5', + class: 'fast', + maxTokens: 8192, + contextWindow: 200000, + vision: true, + aliases: ['haiku'], +}; +const GPT: ModelInfo = { + id: 'openai:gpt-5.5', + provider: 'openai', + modelId: 'gpt-5.5', + label: 'GPT-5.5', + class: 'frontier', + maxTokens: 16384, + contextWindow: 400000, + vision: true, + aliases: [], +}; + +beforeEach(() => { + clearExternalModels(); + registerExternalModels([OPUS, HAIKU, GPT]); +}); + const twoAgentSnapshot: ConfigSnapshot = { agents: [ agentRow('public', '00000000-0000-0000-0000-000000000001'), @@ -386,3 +442,284 @@ test('SC-007 / T018: a build-time failure for Agent B does NOT prevent Agent A', // second build trip was caught and logged, not propagated. assert.ok(registry.size() >= 1, 'at least one agent survived isolation'); }); + +test('issue #296 acceptance #4: `agent added` log includes effectiveModel + effectiveModelRouting', async () => { + // Per-agent overlay: agent persists `model_routing.main = haiku` so the + // registry must build it on haiku instead of the platform default `m-default`. + // The boot log line is the operator-visible surface for that resolution. + const snapshot: ConfigSnapshot = { + agents: [ + { + ...agentRow('public', '00000000-0000-0000-0000-000000000001'), + modelRouting: { mode: 'single', main: 'claude-haiku-4-5' }, + }, + ], + agentPlugins: [], + channelBindings: [], + platformSettings: { fallbackAgentId: null, updatedAt: new Date(0) }, + }; + const logged: Array<{ msg: string; fields?: Record }> = []; + const registry = new OrchestratorRegistry(fakeStore(snapshot), deps(), { + defaultRuntimeConfig: { model: 'm-default', maxTokens: 100, maxToolIterations: 4 }, + log: (msg, fields) => logged.push({ msg, ...(fields ? { fields } : {}) }), + }); + await registry.start(); + + const added = logged.find((l) => l.msg === 'registry: agent added'); + assert.ok(added, '`agent added` log was emitted'); + assert.equal(added.fields?.['slug'], 'public'); + assert.equal( + added.fields?.['effectiveModel'], + 'claude-haiku-4-5', + 'effectiveModel reflects per-agent overlay, not platform default', + ); + + // Triage overlay also surfaces effectiveModelRouting so an operator can + // confirm per-turn routing is wired without reading the orchestrator state. + const triageSnap: ConfigSnapshot = { + ...snapshot, + agents: [ + { + ...agentRow('triaged', '00000000-0000-0000-0000-000000000099'), + modelRouting: { mode: 'triage', main: 'claude-opus-4-8' }, + }, + ], + }; + logged.length = 0; + const r2 = new OrchestratorRegistry(fakeStore(triageSnap), deps(), { + defaultRuntimeConfig: { model: 'm-default', maxTokens: 100, maxToolIterations: 4 }, + log: (msg, fields) => logged.push({ msg, ...(fields ? { fields } : {}) }), + }); + await r2.start(); + const addedTriage = logged.find((l) => l.msg === 'registry: agent added'); + assert.equal(addedTriage?.fields?.['effectiveModel'], 'claude-opus-4-8'); + assert.ok( + addedTriage?.fields?.['effectiveModelRouting'], + 'effectiveModelRouting present in log when agent is on triage mode', + ); +}); + +test('issue #296 acceptance #4: `agent rebuilt` log includes the new effectiveModel after a routing change', async () => { + // Hot-apply path: a routing edit triggers a rebuild (see + // `runtimeChangeReasons:model_routing`). The rebuild log must carry the + // freshly-resolved effectiveModel so the operator can confirm the change + // landed without restarting. + const before: ConfigSnapshot = { + agents: [ + { + ...agentRow('public', '00000000-0000-0000-0000-000000000001'), + modelRouting: { mode: 'single', main: 'claude-haiku-4-5' }, + }, + ], + agentPlugins: [], + channelBindings: [], + platformSettings: { fallbackAgentId: null, updatedAt: new Date(0) }, + }; + const after: ConfigSnapshot = { + ...before, + agents: [ + { + ...agentRow('public', '00000000-0000-0000-0000-000000000001'), + modelRouting: { mode: 'single', main: 'claude-opus-4-8' }, + }, + ], + }; + let current: ConfigSnapshot = before; + const store: ConfigStore = { + loadSnapshot: () => Promise.resolve(current), + } as unknown as ConfigStore; + const logged: Array<{ msg: string; fields?: Record }> = []; + const registry = new OrchestratorRegistry(store, deps(), { + defaultRuntimeConfig: { model: 'm-default', maxTokens: 100, maxToolIterations: 4 }, + log: (msg, fields) => logged.push({ msg, ...(fields ? { fields } : {}) }), + }); + await registry.start(); + current = after; + await registry.reload(); + + const rebuilt = logged.find((l) => l.msg === 'registry: agent rebuilt'); + assert.ok(rebuilt, '`agent rebuilt` log was emitted on reload'); + assert.equal(rebuilt.fields?.['effectiveModel'], 'claude-opus-4-8'); + assert.match( + String(rebuilt.fields?.['reason']), + /model_routing/, + 'rebuild reason cites the routing change', + ); +}); + +test('issue #296 AC#2: per-instance model resolution is 3-tier (per-Agent → platform default → DEFAULT_ORCHESTRATOR_MODEL)', async () => { + // Tier 1 (per-Agent overlay) wins over the platform default; an Agent with no + // overlay falls to tier 2 (platform default); a blank platform default falls + // to tier 3 (DEFAULT_ORCHESTRATOR_MODEL) so the turn loop never gets an empty + // model id (which would 404 on every turn). + const snapshot: ConfigSnapshot = { + agents: [ + { + ...agentRow('overridden', '00000000-0000-0000-0000-000000000001'), + modelRouting: { mode: 'single', main: 'claude-haiku-4-5' }, + }, + // No modelRouting → inherits the platform default (tier 2). + agentRow('inherits', '00000000-0000-0000-0000-000000000002'), + ], + agentPlugins: [], + channelBindings: [], + platformSettings: { fallbackAgentId: null, updatedAt: new Date(0) }, + }; + + // Tier 1 + tier 2 with a real platform default. + const r1 = new OrchestratorRegistry(fakeStore(snapshot), deps(), { + defaultRuntimeConfig: { model: 'platform-default', maxTokens: 100, maxToolIterations: 4 }, + }); + await r1.start(); + assert.equal( + r1.get('overridden')?.built.effectiveModel, + 'claude-haiku-4-5', + 'tier 1: per-Agent overlay wins', + ); + assert.equal( + r1.get('inherits')?.built.effectiveModel, + 'platform-default', + 'tier 2: no overlay falls to the platform default', + ); + + // Tier 3: a blank platform default + no overlay falls to the hard fallback. + const r2 = new OrchestratorRegistry(fakeStore(snapshot), deps(), { + defaultRuntimeConfig: { model: ' ', maxTokens: 100, maxToolIterations: 4 }, + }); + await r2.start(); + assert.equal( + r2.get('inherits')?.built.effectiveModel, + DEFAULT_ORCHESTRATOR_MODEL, + 'tier 3: blank platform default falls to DEFAULT_ORCHESTRATOR_MODEL', + ); +}); + +test('issue #296 BLOCKER: per-Agent model ref resolves to the active provider bare modelId (never sent raw)', async () => { + // The Admin picker writes a provider-qualified id; aliases are also valid. + // The orchestrator sends `model` RAW to a single concrete adapter, so the + // build path must resolve the overlay to the bare vendor `modelId` — sending + // `anthropic:claude-opus-4-8` or `opus` raw 404s every turn. + const snapshot: ConfigSnapshot = { + agents: [ + { + ...agentRow('qualified', '00000000-0000-0000-0000-0000000000a1'), + modelRouting: { mode: 'single', main: 'anthropic:claude-opus-4-8' }, + }, + { + ...agentRow('alias', '00000000-0000-0000-0000-0000000000a2'), + modelRouting: { mode: 'single', main: 'opus' }, + }, + { + // triage sub-models are sent raw too — they must resolve as well. + ...agentRow('triaged', '00000000-0000-0000-0000-0000000000a3'), + modelRouting: { + mode: 'triage', + main: 'anthropic:claude-opus-4-8', + triage: 'haiku', + simple: 'anthropic:claude-haiku-4-5', + }, + }, + ], + agentPlugins: [], + channelBindings: [], + platformSettings: { fallbackAgentId: null, updatedAt: new Date(0) }, + }; + const registry = new OrchestratorRegistry( + fakeStore(snapshot), + depsWithProvider('anthropic'), + { defaultRuntimeConfig: { model: 'claude-opus-4-8', maxTokens: 100, maxToolIterations: 4 } }, + ); + await registry.start(); + + assert.equal( + registry.get('qualified')?.built.effectiveModel, + 'claude-opus-4-8', + 'provider-qualified id resolves to the bare modelId', + ); + assert.equal( + registry.get('alias')?.built.effectiveModel, + 'claude-opus-4-8', + 'legacy alias resolves to the bare modelId', + ); + const triaged = registry.get('triaged')?.built; + assert.equal(triaged?.effectiveModel, 'claude-opus-4-8'); + assert.deepEqual( + triaged?.effectiveModelRouting, + { + classifierModel: 'claude-haiku-4-5', + simpleModel: 'claude-haiku-4-5', + complexModel: 'claude-opus-4-8', + }, + 'every per-turn routing sub-model is resolved to a bare modelId', + ); +}); + +test('issue #296 FIX2: a cross-provider per-Agent pick is dropped, not sent to the wrong adapter', async () => { + // The orchestrator runs on ONE provider (`anthropic` here). A pick that + // resolves to a DIFFERENT provider (`openai:gpt-5.5`) would be sent to the + // anthropic adapter → 404. The build path drops it and falls back to the + // platform default so the Agent keeps running. + const snapshot: ConfigSnapshot = { + agents: [ + { + ...agentRow('crossprovider', '00000000-0000-0000-0000-0000000000b1'), + modelRouting: { mode: 'single', main: 'openai:gpt-5.5' }, + }, + ], + agentPlugins: [], + channelBindings: [], + platformSettings: { fallbackAgentId: null, updatedAt: new Date(0) }, + }; + const registry = new OrchestratorRegistry( + fakeStore(snapshot), + depsWithProvider('anthropic'), + { defaultRuntimeConfig: { model: 'claude-opus-4-8', maxTokens: 100, maxToolIterations: 4 } }, + ); + await registry.start(); + assert.equal( + registry.get('crossprovider')?.built.effectiveModel, + 'claude-opus-4-8', + 'cross-provider pick is dropped → falls back to the platform default (no 404)', + ); +}); + +test('issue #296 regression: a registry-unknown per-turn classifier is passed through raw, NOT collapsed to the main model', () => { + // The default classifier (`DEFAULT_CLASSIFIER_MODEL`) and any operator-typed + // id may be absent from the curated registry yet still served by the API. + // Resolution must pass it through, not drop it — dropping would silently run + // the cheap-classifier turn on the (expensive) main model. + const undatedClassifier = 'classifier-not-in-registry'; + const snapshot: ConfigSnapshot = { + agents: [ + { + ...agentRow('triaged', '00000000-0000-0000-0000-0000000000c1'), + modelRouting: { + mode: 'triage', + main: 'anthropic:claude-opus-4-8', + triage: undatedClassifier, + }, + }, + ], + agentPlugins: [], + channelBindings: [], + platformSettings: { fallbackAgentId: null, updatedAt: new Date(0) }, + }; + const registry = new OrchestratorRegistry( + fakeStore(snapshot), + depsWithProvider('anthropic'), + { defaultRuntimeConfig: { model: 'claude-opus-4-8', maxTokens: 100, maxToolIterations: 4 } }, + ); + return registry.start().then(() => { + const routing = registry.get('triaged')?.built.effectiveModelRouting; + assert.equal( + routing?.classifierModel, + undatedClassifier, + 'unknown classifier passes through raw', + ); + assert.notEqual( + routing?.classifierModel, + 'claude-opus-4-8', + 'unknown classifier is NOT collapsed to the main model', + ); + }); +}); diff --git a/middleware/test/subAgentModelResolution.test.ts b/middleware/test/subAgentModelResolution.test.ts new file mode 100644 index 00000000..ecaf2928 --- /dev/null +++ b/middleware/test/subAgentModelResolution.test.ts @@ -0,0 +1,105 @@ +/** + * Sub-agent model-ref resolution (issue #296 MAJOR 3). + * + * `LocalSubAgent` sends its `model` RAW to the provider adapter — exactly like + * the orchestrator main loop. So a picker-stored provider-qualified id or alias + * must be resolved to the active provider's bare `modelId` before it reaches the + * sub-agent, or every delegated turn 404s. `resolveSubAgentModel` is the pure + * resolver the builder uses; tested directly (the resolved id is otherwise + * private on `LocalSubAgent`). + */ + +import assert from 'node:assert/strict'; +import { beforeEach, test } from 'node:test'; + +import { + clearExternalModels, + registerExternalModels, + type LlmProvider, + type ModelInfo, +} from '@omadia/llm-provider'; + +import { resolveSubAgentModel } from '../packages/harness-orchestrator/src/registry/subAgentTools.js'; + +const OPUS: ModelInfo = { + id: 'anthropic:claude-opus-4-8', + provider: 'anthropic', + modelId: 'claude-opus-4-8', + label: 'Claude Opus 4.8', + class: 'frontier', + maxTokens: 16384, + contextWindow: 200000, + vision: true, + aliases: ['opus'], +}; +const GPT: ModelInfo = { + id: 'openai:gpt-5.5', + provider: 'openai', + modelId: 'gpt-5.5', + label: 'GPT-5.5', + class: 'frontier', + maxTokens: 16384, + contextWindow: 400000, + vision: true, + aliases: [], +}; + +const DEFAULT_MODEL = 'claude-sonnet-4-6'; + +function deps(providerId?: string): { + provider: LlmProvider; + defaultModel: string; +} { + return { + provider: (providerId ? { id: providerId } : {}) as LlmProvider, + defaultModel: DEFAULT_MODEL, + }; +} + +beforeEach(() => { + clearExternalModels(); + registerExternalModels([OPUS, GPT]); +}); + +test('null / empty / whitespace ref → parent default model (inherit)', () => { + assert.equal(resolveSubAgentModel(null, deps('anthropic')), DEFAULT_MODEL); + assert.equal(resolveSubAgentModel(undefined, deps('anthropic')), DEFAULT_MODEL); + assert.equal(resolveSubAgentModel('', deps('anthropic')), DEFAULT_MODEL); + assert.equal(resolveSubAgentModel(' ', deps('anthropic')), DEFAULT_MODEL); +}); + +test('provider-qualified id, alias, and bare id all resolve to the bare modelId', () => { + assert.equal( + resolveSubAgentModel('anthropic:claude-opus-4-8', deps('anthropic')), + 'claude-opus-4-8', + ); + assert.equal(resolveSubAgentModel('opus', deps('anthropic')), 'claude-opus-4-8'); + assert.equal( + resolveSubAgentModel('claude-opus-4-8', deps('anthropic')), + 'claude-opus-4-8', + ); +}); + +test('cross-provider pick → parent default (would 404 on the wrong adapter)', () => { + assert.equal( + resolveSubAgentModel('openai:gpt-5.5', deps('anthropic')), + DEFAULT_MODEL, + 'openai model on an anthropic-bound parent is dropped', + ); +}); + +test('registry-unknown ref → passed through raw (curated registry is not the universe of valid ids; writes are validation-guarded)', () => { + // A non-empty ref the registry does not list is passed through unchanged — + // the API may still serve it (e.g. an undated id). New writes are blocked by + // the `validateModelRef` guard, so this only matters for stale configs. + assert.equal( + resolveSubAgentModel('made-up-model-9', deps('anthropic')), + 'made-up-model-9', + ); +}); + +test('no active provider id → resolves against the anthropic default, no cross-provider drop', () => { + // A deps with no provider id (older boot / stub) still resolves a known ref to + // its bare modelId; the cross-provider guard is simply skipped. + assert.equal(resolveSubAgentModel('opus', deps()), 'claude-opus-4-8'); +}); diff --git a/web-ui/app/_lib/agentBuilder.ts b/web-ui/app/_lib/agentBuilder.ts index 188e9b1a..708a0d37 100644 --- a/web-ui/app/_lib/agentBuilder.ts +++ b/web-ui/app/_lib/agentBuilder.ts @@ -98,6 +98,13 @@ export interface AgentNode { privacyProfile: PrivacyProfile; status: NodeStatus; modelRouting: ModelRoutingConfig | null; + /** + * Resolved orchestrator model the registry currently runs this Agent on + * (per-Agent overlay applied to the platform default). `null` when the + * registry has not yet built the Agent (in-memory bootstrap / Agent + * disabled). Issue #296 acceptance #4. + */ + effectiveModel: string | null; position: CanvasPosition | null; } diff --git a/web-ui/app/_lib/builderTypes.ts b/web-ui/app/_lib/builderTypes.ts index 3910bca7..ab2e3ea2 100644 --- a/web-ui/app/_lib/builderTypes.ts +++ b/web-ui/app/_lib/builderTypes.ts @@ -22,7 +22,13 @@ export interface QualityConfig { export type BuilderModelId = string; export interface BuilderModelInfo { + /** Provider-qualified id (e.g. `anthropic:claude-opus-4-8`) — the value the + * picker writes. */ id: BuilderModelId; + /** Bare vendor model id (e.g. `claude-opus-4-8`). Persisted configs from + * before the provider-qualified picker store this form, so the Inspector + * matches against it to avoid flagging a valid model as "stale". */ + model_id: string; label: string; provider: string; model_class: string; diff --git a/web-ui/app/admin/builder/panels/InspectorPanel.tsx b/web-ui/app/admin/builder/panels/InspectorPanel.tsx index daf7f2d5..3bd876e6 100644 --- a/web-ui/app/admin/builder/panels/InspectorPanel.tsx +++ b/web-ui/app/admin/builder/panels/InspectorPanel.tsx @@ -1,7 +1,7 @@ 'use client'; import { useTranslations } from 'next-intl'; -import { useState } from 'react'; +import { useEffect, useState } from 'react'; import { discoverMcpTools, @@ -16,6 +16,8 @@ import { type SkillNode, type SubAgentNode, } from '../../../_lib/agentBuilder'; +import { listBuilderModels } from '../../../_lib/api'; +import type { BuilderModelInfo } from '../../../_lib/builderTypes'; import { Button } from '@/app/_components/ui/Button'; import type { BuilderNodeData } from '../nodes/types'; import { Field, inputCls, SaveButton } from './InspectorControls'; @@ -118,6 +120,7 @@ function AgentEditor({ const [triage, setTriage] = useState(r?.triage ?? ''); const [simple, setSimple] = useState(r?.simple ?? ''); const { pending, error, run } = useSaver(); + const catalog = useModelCatalog(); async function save(): Promise { await run(async () => { @@ -132,9 +135,24 @@ function AgentEditor({ }); } + // Picker placeholder for `main`: while the catalog is loading we show + // "Loading models…"; afterwards prefer the platform default ("(platform + // default: X)") and fall back to a neutral "(default)" so a loaded catalog + // without a declared default never displays the stale loading copy. + const mainPlaceholder = catalog.loading + ? t('inspector.modelPickerLoading') + : catalog.defaultId + ? t('inspector.platformDefault', { model: catalog.defaultId }) + : t('inspector.modelDefault'); + return (

{agent.name}

+ {agent.effectiveModel && ( +

+ {t('inspector.effectiveModel', { model: agent.effectiveModel })} +

+ )} setMain(e.target.value)} className={inputCls} /> + {mode === 'triage' && ( <> - setTriage(e.target.value)} className={inputCls} /> + - setSimple(e.target.value)} className={inputCls} /> + )} - void save()} pending={pending} label={t('inspector.save')} /> + + void save()} + pending={pending || catalog.loading} + label={t('inspector.save')} + />
); } +interface ModelCatalogState { + models: BuilderModelInfo[]; + defaultId: string | null; + loading: boolean; + error: string | null; +} + +function useModelCatalog(): ModelCatalogState { + const [state, setState] = useState({ + models: [], + defaultId: null, + loading: true, + error: null, + }); + useEffect(() => { + let alive = true; + void (async () => { + try { + const res = await listBuilderModels(); + if (!alive) return; + setState({ + models: res.models, + defaultId: res.default ?? null, + loading: false, + error: null, + }); + } catch (err) { + if (!alive) return; + setState({ + models: [], + defaultId: null, + loading: false, + error: err instanceof Error ? err.message : String(err), + }); + } + })(); + return () => { + alive = false; + }; + }, []); + return state; +} + +function ModelSelect({ + value, + onChange, + catalog, + placeholder, +}: { + value: string; + onChange: (next: string) => void; + catalog: ModelCatalogState; + placeholder: string; +}): React.ReactElement { + // Stale-binding: a persisted value that no installed provider serves anymore. + // Keep it visible (disabled) so the operator sees what is set and can pick a + // replacement instead of having the field silently snap to a different model. + // Match the provider-qualified id, the bare vendor id (pre-picker configs and + // the platform default persist this form), and aliases — only a value that + // resolves to NO registered model is genuinely stale. + const known = catalog.models.some( + (m) => m.id === value || m.model_id === value || m.aliases.includes(value), + ); + const showStale = value !== '' && !known && !catalog.loading; + return ( + + ); +} + // ── Sub-agent ────────────────────────────────────────────────────────────── function SubAgentEditor({ slug, @@ -179,6 +303,10 @@ function SubAgentEditor({ const [model, setModel] = useState(subAgent.model ?? ''); const [prompt, setPrompt] = useState(subAgent.systemPromptOverride ?? ''); const { pending, error, run } = useSaver(); + // Issue #296 follow-up — sub-agent picker uses the same registry-driven + // dropdown as the parent agent so an operator cannot pin a sub-agent to a + // model id no installed provider serves. + const catalog = useModelCatalog(); async function save(): Promise { await run(async () => { @@ -197,7 +325,12 @@ function SubAgentEditor({ setName(e.target.value)} className={inputCls} /> - setModel(e.target.value)} className={inputCls} /> +