diff --git a/console/Cargo.lock b/console/Cargo.lock index 9f9b8ec9..e110ab9a 100644 --- a/console/Cargo.lock +++ b/console/Cargo.lock @@ -257,7 +257,7 @@ checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" [[package]] name = "console" -version = "0.1.1" +version = "0.1.2" dependencies = [ "anyhow", "async-trait", diff --git a/console/web/src/App.tsx b/console/web/src/App.tsx index 03f0551e..cb884aaa 100644 --- a/console/web/src/App.tsx +++ b/console/web/src/App.tsx @@ -1,11 +1,4 @@ -import { - lazy, - Suspense, - useCallback, - useEffect, - useRef, - useState, -} from 'react' +import { lazy, Suspense, useCallback, useEffect, useRef, useState } from 'react' import { ChatDock } from '@/components/chat/ChatDock' import { Dialog, @@ -20,7 +13,10 @@ import { useChatDock } from '@/hooks/use-chat-dock' import { useHashRoute, type View } from '@/hooks/use-hash-route' import { useTheme } from '@/hooks/use-theme' import { type DockSignal, getDockSignal } from '@/lib/chat-activity' -import { ConversationsProvider, useConversationsCtx } from '@/lib/conversations-context' +import { + ConversationsProvider, + useConversationsCtx, +} from '@/lib/conversations-context' import { cn } from '@/lib/utils' import { Configuration } from '@/pages/Configuration' import { Traces } from '@/pages/Traces' @@ -126,10 +122,7 @@ export function App() { - + ) diff --git a/console/web/src/components/chat/AutoAcceptToggle.tsx b/console/web/src/components/chat/AutoAcceptToggle.tsx index 99db2bf5..55cd6550 100644 --- a/console/web/src/components/chat/AutoAcceptToggle.tsx +++ b/console/web/src/components/chat/AutoAcceptToggle.tsx @@ -76,10 +76,7 @@ export function AutoAcceptToggle({ > auto-accept: {value ? 'on' : 'off'} - + {value ? 'Auto-accept is on. Approval prompts for safe calls (reads, lookups, listings) are resolved automatically. Destructive or state-mutating calls still require a click.' : 'Auto-accept is off. Every approval prompt waits for an explicit click.'} diff --git a/console/web/src/components/chat/ChatView.tsx b/console/web/src/components/chat/ChatView.tsx index 78b1c54a..d29ffa46 100644 --- a/console/web/src/components/chat/ChatView.tsx +++ b/console/web/src/components/chat/ChatView.tsx @@ -403,7 +403,10 @@ export function ChatView({ // instead of leaving the response looking like it just // ran out of words. Pre-fix this event didn't exist and // the same condition produced a silently truncated reply. - const noticeContent = formatStopReason(event.reason, event.message) + const noticeContent = formatStopReason( + event.reason, + event.message, + ) const notice: SystemMessage = { id: uid(), role: 'system', diff --git a/console/web/src/components/chat/Composer.tsx b/console/web/src/components/chat/Composer.tsx index 671013f3..996d96e7 100644 --- a/console/web/src/components/chat/Composer.tsx +++ b/console/web/src/components/chat/Composer.tsx @@ -1,8 +1,8 @@ import type { LexicalEditor } from 'lexical' import { useCallback, useRef, useState } from 'react' import { Button } from '@/components/ui/Button' -import type { Attachment, Mode, ModelId, ModelOption } from '@/types/chat' import type { FunctionEntry } from '@/lib/functions' +import type { Attachment, Mode, ModelId, ModelOption } from '@/types/chat' import { AttachmentButton } from './AttachmentButton' import { AttachmentChip } from './AttachmentChip' import { AutoAcceptToggle } from './AutoAcceptToggle' diff --git a/console/web/src/components/chat/FunctionCallGroup.tsx b/console/web/src/components/chat/FunctionCallGroup.tsx index 50d26ca3..e5739635 100644 --- a/console/web/src/components/chat/FunctionCallGroup.tsx +++ b/console/web/src/components/chat/FunctionCallGroup.tsx @@ -27,7 +27,12 @@ interface FunctionCallGroupProps { * `unknown`, so this guard stays narrow on purpose. */ function isErrorOutput(v: unknown): boolean { - return !!v && typeof v === 'object' && !Array.isArray(v) && 'error' in (v as Record) + return ( + !!v && + typeof v === 'object' && + !Array.isArray(v) && + 'error' in (v as Record) + ) } type Tone = 'warn' | 'accent' | 'alert' | 'ink' @@ -54,7 +59,8 @@ function deriveStatus(messages: FunctionCallMessageType[]): GroupStatus { pulse: false, label: ( <> - permission to run ƒ{' '} + permission to run{' '} + ƒ{' '} {pending.functionId} ), @@ -69,8 +75,10 @@ function deriveStatus(messages: FunctionCallMessageType[]): GroupStatus { pulse: true, label: ( <> - running function {runningIdx + 1} of{' '} - {total}: ƒ{' '} + running function{' '} + {runningIdx + 1} of{' '} + {total}:{' '} + ƒ{' '} {running.functionId} ), @@ -84,7 +92,8 @@ function deriveStatus(messages: FunctionCallMessageType[]): GroupStatus { pulse: false, label: ( <> - {failed} {failed === 1 ? 'function' : 'functions'} failed + {failed}{' '} + {failed === 1 ? 'function' : 'functions'} failed {failed < total ? ( {' '} @@ -102,7 +111,8 @@ function deriveStatus(messages: FunctionCallMessageType[]): GroupStatus { pulse: false, label: ( <> - ran {total} functions for {sum}ms + ran {total} functions for{' '} + {sum}ms ), } @@ -113,7 +123,9 @@ function deriveStatus(messages: FunctionCallMessageType[]): GroupStatus { * where the user can't infer what's happening from the one-line header. */ function hasConcerningChild(messages: FunctionCallMessageType[]): boolean { - return messages.some((m) => m.pendingApproval || m.running || isErrorOutput(m.output)) + return messages.some( + (m) => m.pendingApproval || m.running || isErrorOutput(m.output), + ) } export function FunctionCallGroup({ @@ -151,12 +163,21 @@ export function FunctionCallGroup({ )} > - - {status.label} + + + {status.label} + diff --git a/console/web/src/components/chat/FunctionCallMessage.tsx b/console/web/src/components/chat/FunctionCallMessage.tsx index dc863c07..6199b608 100644 --- a/console/web/src/components/chat/FunctionCallMessage.tsx +++ b/console/web/src/components/chat/FunctionCallMessage.tsx @@ -1,6 +1,11 @@ import { useEffect, useState } from 'react' +import { + SandboxFunctionIdLabel, + SandboxToolView, +} from '@/components/chat/sandbox' import { Button } from '@/components/ui/Button' import { StatusDot } from '@/components/ui/StatusDot' +import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/Tabs' import { JsonHighlight } from '@/lib/syntax' import { cn } from '@/lib/utils' import type { FunctionCallMessage as FunctionCallMessageType } from '@/types/chat' @@ -84,9 +89,18 @@ export function FunctionCallMessage({ const pending = !!message.pendingApproval const running = !!message.running const [open, setOpen] = useState(!!defaultOpen || pending) + const [tab, setTab] = useState<'terminal' | 'json'>('terminal') const [submitting, setSubmitting] = useState<'approve' | 'deny' | null>(null) const [submitError, setSubmitError] = useState(null) + const sandboxPreview = SandboxToolView.tryRenderPreview(message) + const sandboxTerminal = !pending ? SandboxToolView.tryRender(message) : null + const hasSandboxTerminal = sandboxTerminal != null + const showRequestPaneAbove = + !(pending && sandboxPreview) && + !(running && hasSandboxTerminal) && + !(!pending && !running && hasSandboxTerminal) + const runResolve = async (kind: 'approve' | 'deny') => { const handler = kind === 'approve' ? onApprove : onDeny if (!handler || submitting) return @@ -140,7 +154,7 @@ export function FunctionCallMessage({ <>ran )} ƒ{' '} - {message.functionId} + {!pending && !running && typeof message.durationMs === 'number' ? ( {' '} @@ -163,9 +177,41 @@ export function FunctionCallMessage({ {open ? (
- + {pending && sandboxPreview ? ( +
{sandboxPreview}
+ ) : showRequestPaneAbove ? ( + + ) : null} + {running && !pending ? ( + hasSandboxTerminal ? ( +
{sandboxTerminal}
+ ) : ( + + ) + ) : null} {!pending && !running ? ( - + hasSandboxTerminal ? ( + setTab(v as 'terminal' | 'json')} + className="border-t border-rule-2" + > + + terminal + raw json + + {sandboxTerminal} + + + + + + ) : ( + <> + + + + ) ) : null}
) : null} diff --git a/console/web/src/components/chat/LexicalShell.tsx b/console/web/src/components/chat/LexicalShell.tsx index 926f5a33..7204f3a4 100644 --- a/console/web/src/components/chat/LexicalShell.tsx +++ b/console/web/src/components/chat/LexicalShell.tsx @@ -174,7 +174,10 @@ export function LexicalShell({ - + diff --git a/console/web/src/components/chat/MessageList.tsx b/console/web/src/components/chat/MessageList.tsx index 7f35db22..a4ed2597 100644 --- a/console/web/src/components/chat/MessageList.tsx +++ b/console/web/src/components/chat/MessageList.tsx @@ -138,10 +138,7 @@ export function MessageList({ const listPad = density === 'dock' ? 'px-4 py-6' : 'px-9 py-8' return ( -
+
{items.map((item) => item.kind === 'message' ? ( @@ -172,9 +169,7 @@ export function MessageList({ function EmptyState({ density }: { density: 'route' | 'dock' }) { const emptyPad = density === 'dock' ? 'px-4' : 'px-9' return ( -
+
new session diff --git a/console/web/src/components/chat/ModelPicker.tsx b/console/web/src/components/chat/ModelPicker.tsx index 07ee7109..b4d9c1e6 100644 --- a/console/web/src/components/chat/ModelPicker.tsx +++ b/console/web/src/components/chat/ModelPicker.tsx @@ -51,9 +51,8 @@ export function ModelPicker({ loading, className, }: ModelPickerProps) { - const [settingsProvider, setSettingsProvider] = useState( - null, - ) + const [settingsProvider, setSettingsProvider] = + useState(null) const pickerOptions = options.length > 0 ? options : [{ id: value, label: value }] diff --git a/console/web/src/components/chat/sandbox/CodeHighlight.tsx b/console/web/src/components/chat/sandbox/CodeHighlight.tsx new file mode 100644 index 00000000..517c8315 --- /dev/null +++ b/console/web/src/components/chat/sandbox/CodeHighlight.tsx @@ -0,0 +1,75 @@ +import { Highlight, Prism, type PrismTheme } from 'prism-react-renderer' +import { cn } from '@/lib/utils' + +const monoTheme: PrismTheme = { + plain: { color: 'var(--color-ink)' }, + styles: [ + { + types: ['comment', 'prolog', 'doctype', 'cdata'], + style: { color: 'var(--color-ink-ghost)', fontStyle: 'italic' }, + }, + { + types: ['string', 'attr-value', 'regex'], + style: { color: 'var(--color-ink-faint)' }, + }, + { + types: ['number', 'boolean', 'keyword', 'null'], + style: { color: 'var(--color-accent)', fontStyle: 'italic' }, + }, + { + types: ['function', 'class-name', 'builtin'], + style: { color: 'var(--color-ink)' }, + }, + { + types: ['punctuation', 'operator'], + style: { color: 'var(--color-ink-ghost)' }, + }, + ], +} + +interface CodeHighlightProps { + code: string + language?: string + className?: string +} + +export function CodeHighlight({ + code, + language = 'text', + className, +}: CodeHighlightProps) { + const lang = Prism.languages[language] ? language : 'text' + return ( + + {({ tokens, getLineProps, getTokenProps, className: hlClass, style }) => ( +
+          
+            {tokens.map((line, lineIdx) => {
+              const lineProps = getLineProps({ line })
+              /* Prism tokenization is deterministic for a given `code`;
+                 positional+content keys stay stable across renders. */
+              const lineKey = `${lineIdx}:${line.length}`
+              return (
+                
+                  {line.map((token, tokenIdx) => {
+                    const tokenProps = getTokenProps({ token })
+                    const tokenKey = `${tokenIdx}:${token.types.join('.')}:${token.content}`
+                    return 
+                  })}
+                  {lineIdx < tokens.length - 1 ? '\n' : ''}
+                
+              )
+            })}
+          
+        
+ )} +
+ ) +} diff --git a/console/web/src/components/chat/sandbox/CreateView.tsx b/console/web/src/components/chat/sandbox/CreateView.tsx new file mode 100644 index 00000000..68c945a4 --- /dev/null +++ b/console/web/src/components/chat/sandbox/CreateView.tsx @@ -0,0 +1,68 @@ +import { normaliseEnv, truncateMiddle } from './format' +import { + createRequestSchema, + createResponseSchema, + safeParseResponse, +} from './parsers' +import { Chip } from './terminal/Terminal' + +interface CreateViewProps { + input: unknown + output: unknown + running?: boolean +} + +export function CreateView({ input, output, running }: CreateViewProps) { + const req = createRequestSchema.safeParse(input) + if (!req.success) return null + const respData = + output != null ? safeParseResponse(createResponseSchema, output) : null + const env = normaliseEnv(req.data.env) + + return ( +
+
+
+ + + {running ? 'creating sandbox…' : 'created sandbox'} + {respData ? ( + + {truncateMiddle(respData.sandbox_id, 24)} + + ) : null} +
+
+ {respData?.image ?? req.data.image} + {typeof req.data.cpus === 'number' ? ( + {req.data.cpus} + ) : null} + {typeof req.data.memory_mb === 'number' ? ( + {`${req.data.memory_mb} MiB`} + ) : null} + {req.data.name ? {req.data.name} : null} + {typeof req.data.network === 'boolean' ? ( + {req.data.network ? 'on' : 'off'} + ) : null} + {typeof req.data.idle_timeout_secs === 'number' ? ( + {`${req.data.idle_timeout_secs}s`} + ) : null} +
+ {env.length > 0 ? ( +
+ + env + + {env.map(([k, v]) => ( + + {k}={v} + + ))} +
+ ) : null} +
+
+ ) +} diff --git a/console/web/src/components/chat/sandbox/ErrorView.tsx b/console/web/src/components/chat/sandbox/ErrorView.tsx new file mode 100644 index 00000000..fa9fad17 --- /dev/null +++ b/console/web/src/components/chat/sandbox/ErrorView.tsx @@ -0,0 +1,139 @@ +import { AnsiOutput } from '@/components/chat/sandbox/terminal/AnsiOutput' +import { Badge } from '@/components/ui/Badge' +import { + execResponseSchema, + type SandboxErrorDisplay, + type SandboxErrorWire, + type SandboxInvocationError, + safeParseResponse, +} from './parsers' + +interface ErrorViewProps { + error: SandboxErrorWire +} + +interface InvocationErrorViewProps { + error: SandboxInvocationError +} + +interface SandboxErrorViewProps { + display: SandboxErrorDisplay +} + +/** + * Visualises the flat `{ type, code, message, docs_url, retryable, + * fix, fix_note }` shape produced by `SandboxError::to_payload`. The + * card is a warn-toned slab so it stands out from successful tool + * cards without being as loud as the alert-red used for transport + * errors. `S200` (exec timeout) deliberately leaks through here + * rather than into `ExecView`: it carries no `ExecResponse`, so the + * terminal chrome would be misleading. + */ +function execStreamsFromFix(error: SandboxErrorWire) { + if (error.code !== 'S200' || error.fix == null) return null + const parsed = safeParseResponse(execResponseSchema, error.fix) + if (!parsed) return null + const { stdout, stderr } = parsed + if (!stdout && !stderr) return null + return { stdout, stderr } +} + +export function ErrorView({ error }: ErrorViewProps) { + const retryable = error.retryable === true + const streams = execStreamsFromFix(error) + return ( +
+
+
+ + {error.code} + + + {error.type} + + {retryable ? ( + + retryable + + ) : null} +
+ +
+          {error.message}
+        
+ + {error.fix_note ? ( +
+ {error.fix_note} +
+ ) : null} + + {error.docs_url ? ( + + docs ↗ + + ) : null} + + {streams ? ( +
+ +
+ ) : null} +
+
+ ) +} + +export function InvocationErrorView({ error }: InvocationErrorViewProps) { + const badge = error.deniedBy ?? 'error' + const showDetailText = + error.detailText && + error.detailText !== error.message && + error.detailText !== error.reason + + return ( +
+
+
+ + {badge} + + + {error.title} + +
+ + {error.functionId ? ( +
+ + function + {' '} + {error.functionId} +
+ ) : null} + +
+          {error.message}
+        
+ + {showDetailText ? ( +
+            {error.detailText}
+          
+ ) : null} +
+
+ ) +} + +export function SandboxErrorView({ display }: SandboxErrorViewProps) { + if (display.variant === 'wire') { + return + } + return +} diff --git a/console/web/src/components/chat/sandbox/ExecView.tsx b/console/web/src/components/chat/sandbox/ExecView.tsx new file mode 100644 index 00000000..b46d5448 --- /dev/null +++ b/console/web/src/components/chat/sandbox/ExecView.tsx @@ -0,0 +1,80 @@ +import { Prompt } from '@/components/ui/Prompt' +import { formatExecCommand, pillForExit, truncateMiddle } from './format' +import { + type ExecRequest, + type ExecResponse, + execRequestSchema, + execResponseSchema, + safeParseResponse, +} from './parsers' +import { AnsiOutput } from './terminal/AnsiOutput' +import { Chip, FooterPill, Terminal } from './terminal/Terminal' + +interface ExecViewProps { + input: unknown + output: unknown + running?: boolean +} + +export function ExecView({ input, output, running }: ExecViewProps) { + const req = execRequestSchema.safeParse(input) + if (!req.success) return null + const respData = + output != null ? safeParseResponse(execResponseSchema, output) : null + return ( + } + footer={respData ? : null} + > + + + ) +} + +/** Compact `$ cmd args` preview used in the pending-approval state. */ +export function ExecPreview({ input }: { input: unknown }) { + const req = execRequestSchema.safeParse(input) + if (!req.success) return null + return ( +
+ + + {formatExecCommand(req.data)} + + + + +
+ ) +} + +function ExecChips({ req }: { req: ExecRequest }) { + return ( + <> + {truncateMiddle(req.sandbox_id, 12)} + {req.workdir ? {req.workdir} : null} + {typeof req.timeout_ms === 'number' ? ( + {`${req.timeout_ms}ms`} + ) : null} + + ) +} + +function ExecFooter({ resp }: { resp: ExecResponse }) { + const exit = pillForExit(resp.exit_code) + return ( + <> + {exit.label} + {`${resp.duration_ms}ms`} + {resp.timed_out ? timed out : null} + {/* 1.0 MiB cap matches `INLINE_BUFFER_CAP` in fs/read.rs; exec is + buffered upstream with a comparable size, so flag when the + payload looks like it hit the lid. */} + {resp.stdout.length >= 1024 * 1024 ? ( + stdout cap (1.0 MiB) likely reached + ) : null} + + ) +} diff --git a/console/web/src/components/chat/sandbox/FsChmodView.tsx b/console/web/src/components/chat/sandbox/FsChmodView.tsx new file mode 100644 index 00000000..6a6ba282 --- /dev/null +++ b/console/web/src/components/chat/sandbox/FsChmodView.tsx @@ -0,0 +1,42 @@ +import { formatMode } from './format' +import { + fsChmodRequestSchema, + fsChmodResponseSchema, + safeParseResponse, +} from './parsers' +import { Chip } from './terminal/Terminal' + +interface FsChmodViewProps { + input: unknown + output: unknown +} + +export function FsChmodView({ input, output }: FsChmodViewProps) { + const req = fsChmodRequestSchema.safeParse(input) + if (!req.success) return null + const resp = safeParseResponse(fsChmodResponseSchema, output) + if (!resp) return null + const ownership = + typeof req.data.uid === 'number' || typeof req.data.gid === 'number' + ? `${req.data.uid ?? '_'}:${req.data.gid ?? '_'}` + : null + + return ( +
+
+
+ chmod + {req.data.path} + + {req.data.mode} + ({formatMode(req.data.mode)}) +
+
+ {ownership ? {ownership} : null} + {req.data.recursive ? true : null} + {resp.updated} +
+
+
+ ) +} diff --git a/console/web/src/components/chat/sandbox/FsGrepView.tsx b/console/web/src/components/chat/sandbox/FsGrepView.tsx new file mode 100644 index 00000000..f18b371d --- /dev/null +++ b/console/web/src/components/chat/sandbox/FsGrepView.tsx @@ -0,0 +1,130 @@ +import type * as React from 'react' +import { + fsGrepRequestSchema, + fsGrepResponseSchema, + safeParseResponse, +} from './parsers' +import { Chip, FooterPill } from './terminal/Terminal' + +interface FsGrepViewProps { + input: unknown + output: unknown +} + +export function FsGrepView({ input, output }: FsGrepViewProps) { + const req = fsGrepRequestSchema.safeParse(input) + if (!req.success) return null + const resp = safeParseResponse(fsGrepResponseSchema, output) + if (!resp) return null + const { matches, truncated } = resp + + return ( +
+
+ {req.data.path} + {req.data.pattern} + {req.data.ignore_case ? case-insensitive : null} + 0 ? 'default' : 'warn'}> + {`${matches.length} ${matches.length === 1 ? 'match' : 'matches'}`} + + {truncated ? truncated : null} +
+ + {matches.length === 0 ? ( +
+ · no matches +
+ ) : ( +
+ {matches.map((m) => ( +
+
+ {m.path} + : + {m.line} +
+
+                
+                  {renderWithHighlight(
+                    m.content,
+                    req.data.pattern,
+                    !!req.data.ignore_case,
+                  )}
+                
+              
+
+ ))} +
+ )} +
+ ) +} + +/** Best-effort substring/regex highlight. The daemon uses the Rust + `regex` crate; JS regex is a superset for the simple cases agents + use (TODO|FIXME, identifiers). Falls back to substring matching + if the pattern doesn't compile as a JS regex. */ +function renderWithHighlight( + line: string, + pattern: string, + ignoreCase: boolean, +): React.ReactNode { + if (!pattern) return line + let re: RegExp | null = null + try { + re = new RegExp(pattern, ignoreCase ? 'gi' : 'g') + } catch { + re = null + } + if (re) { + const parts: React.ReactNode[] = [] + let last = 0 + let n = 0 + for (const hit of line.matchAll(re)) { + const start = hit.index ?? 0 + const text = hit[0] + // Skip zero-width matches that would otherwise loop forever. + if (text.length === 0) continue + if (start > last) parts.push(line.slice(last, start)) + parts.push( + + {text} + , + ) + last = start + text.length + n++ + if (n > 200) break + } + if (last < line.length) parts.push(line.slice(last)) + return parts + } + // Substring fallback for patterns the JS regex engine rejects. + const needle = ignoreCase ? pattern.toLowerCase() : pattern + const hay = ignoreCase ? line.toLowerCase() : line + const parts: React.ReactNode[] = [] + let i = 0 + let n = 0 + while (i < line.length) { + const j = hay.indexOf(needle, i) + if (j === -1) { + parts.push(line.slice(i)) + break + } + if (j > i) parts.push(line.slice(i, j)) + parts.push( + + {line.slice(j, j + pattern.length)} + , + ) + i = j + pattern.length + } + return parts +} diff --git a/console/web/src/components/chat/sandbox/FsLsView.tsx b/console/web/src/components/chat/sandbox/FsLsView.tsx new file mode 100644 index 00000000..b85b3b0e --- /dev/null +++ b/console/web/src/components/chat/sandbox/FsLsView.tsx @@ -0,0 +1,74 @@ +import { File, FileText, Folder, Link as LinkIcon } from 'lucide-react' +import { formatBytes, formatMode, formatMtime } from './format' +import { + fsLsRequestSchema, + fsLsResponseSchema, + safeParseResponse, +} from './parsers' +import { Chip } from './terminal/Terminal' + +interface FsLsViewProps { + input: unknown + output: unknown +} + +export function FsLsView({ input, output }: FsLsViewProps) { + const req = fsLsRequestSchema.safeParse(input) + if (!req.success) return null + const resp = safeParseResponse(fsLsResponseSchema, output) + if (!resp) return null + const entries = resp.entries + + return ( +
+
+ {req.data.path} + {entries.length} +
+ {entries.length === 0 ? ( +
+ · directory is empty +
+ ) : ( + + + {entries.map((e) => { + const Icon = e.is_symlink + ? LinkIcon + : e.is_dir + ? Folder + : iconForFile(e.name) + return ( + + + + + + + + ) + })} + +
+ + {e.name} + {e.is_dir ? '—' : formatBytes(e.size)} + + {`${e.is_dir ? 'd' : '-'}${formatMode(e.mode)}`} + + {formatMtime(e.mtime)} +
+ )} +
+ ) +} + +function iconForFile(name: string) { + const lower = name.toLowerCase() + if (/\.(md|txt|json|yml|yaml|toml|csv|log)$/.test(lower)) return FileText + if (/\.(js|jsx|ts|tsx|py|rs|go|rb|sh|bash)$/.test(lower)) return FileText + return File +} diff --git a/console/web/src/components/chat/sandbox/FsMkdirView.tsx b/console/web/src/components/chat/sandbox/FsMkdirView.tsx new file mode 100644 index 00000000..c33a97a0 --- /dev/null +++ b/console/web/src/components/chat/sandbox/FsMkdirView.tsx @@ -0,0 +1,36 @@ +import { + fsMkdirRequestSchema, + fsMkdirResponseSchema, + safeParseResponse, +} from './parsers' +import { Chip } from './terminal/Terminal' + +interface FsMkdirViewProps { + input: unknown + output: unknown +} + +export function FsMkdirView({ input, output }: FsMkdirViewProps) { + const req = fsMkdirRequestSchema.safeParse(input) + if (!req.success) return null + const resp = safeParseResponse(fsMkdirResponseSchema, output) + if (!resp) return null + const created = resp.created + + return ( +
+
+
+ + {created ? '+ created ' : '· exists '} + + {req.data.path} +
+
+ {req.data.mode ?? '0755'} + {req.data.parents ? true : null} +
+
+
+ ) +} diff --git a/console/web/src/components/chat/sandbox/FsMvView.tsx b/console/web/src/components/chat/sandbox/FsMvView.tsx new file mode 100644 index 00000000..124a94a6 --- /dev/null +++ b/console/web/src/components/chat/sandbox/FsMvView.tsx @@ -0,0 +1,39 @@ +import { + fsMvRequestSchema, + fsMvResponseSchema, + safeParseResponse, +} from './parsers' +import { Chip } from './terminal/Terminal' + +interface FsMvViewProps { + input: unknown + output: unknown +} + +export function FsMvView({ input, output }: FsMvViewProps) { + const req = fsMvRequestSchema.safeParse(input) + if (!req.success) return null + const resp = safeParseResponse(fsMvResponseSchema, output) + if (!resp) return null + const moved = resp.moved + + return ( +
+
+
+ + {moved ? 'mv' : '·'} + + {req.data.src} + + {req.data.dst} +
+ {req.data.overwrite ? ( +
+ true +
+ ) : null} +
+
+ ) +} diff --git a/console/web/src/components/chat/sandbox/FsReadView.tsx b/console/web/src/components/chat/sandbox/FsReadView.tsx new file mode 100644 index 00000000..fe34d74f --- /dev/null +++ b/console/web/src/components/chat/sandbox/FsReadView.tsx @@ -0,0 +1,70 @@ +import { CodeHighlight } from '@/lib/syntax' +import { + formatBytes, + formatMode, + formatMtime, + inferLangFromPath, + truncateMiddle, +} from './format' +import { + fsReadRequestSchema, + fsReadResponseSchema, + safeParseResponse, + streamChannelRefSchema, +} from './parsers' +import { Chip } from './terminal/Terminal' + +interface FsReadViewProps { + input: unknown + output: unknown +} + +export function FsReadView({ input, output }: FsReadViewProps) { + const req = fsReadRequestSchema.safeParse(input) + if (!req.success) return null + const resp = safeParseResponse(fsReadResponseSchema, output) + if (!resp) return null + const inline = typeof resp.content === 'string' ? resp.content : null + const stream = + inline === null ? streamChannelRefSchema.safeParse(resp.content) : null + const lang = inferLangFromPath(req.data.path) + + return ( +
+
+ + file + + {req.data.path} +
+ + {inline !== null ? ( + lang ? ( + + ) : ( +
+            {inline}
+          
+ ) + ) : stream?.success ? ( +
+ streaming via channel + + {truncateMiddle(stream.data.channel_id, 18)} + + ({stream.data.direction}) +
+ ) : ( +
+ · empty +
+ )} + +
+ {formatBytes(resp.size)} + {formatMode(resp.mode)} + {formatMtime(resp.mtime)} +
+
+ ) +} diff --git a/console/web/src/components/chat/sandbox/FsRmView.tsx b/console/web/src/components/chat/sandbox/FsRmView.tsx new file mode 100644 index 00000000..edb83518 --- /dev/null +++ b/console/web/src/components/chat/sandbox/FsRmView.tsx @@ -0,0 +1,39 @@ +import { + fsRmRequestSchema, + fsRmResponseSchema, + safeParseResponse, +} from './parsers' +import { Chip } from './terminal/Terminal' + +interface FsRmViewProps { + input: unknown + output: unknown +} + +export function FsRmView({ input, output }: FsRmViewProps) { + const req = fsRmRequestSchema.safeParse(input) + if (!req.success) return null + const resp = safeParseResponse(fsRmResponseSchema, output) + if (!resp) return null + const removed = resp.removed + + return ( +
+
+
+ + {removed ? '− removed ' : '· not removed '} + + {req.data.path} +
+ {req.data.recursive ? ( +
+ + true + +
+ ) : null} +
+
+ ) +} diff --git a/console/web/src/components/chat/sandbox/FsSedView.tsx b/console/web/src/components/chat/sandbox/FsSedView.tsx new file mode 100644 index 00000000..182b5285 --- /dev/null +++ b/console/web/src/components/chat/sandbox/FsSedView.tsx @@ -0,0 +1,101 @@ +import { TriangleAlert } from 'lucide-react' +import { + Tooltip, + TooltipContent, + TooltipTrigger, +} from '@/components/ui/Tooltip' +import { + fsSedRequestSchema, + fsSedResponseSchema, + safeParseResponse, +} from './parsers' +import { Chip, FooterPill } from './terminal/Terminal' + +interface FsSedViewProps { + input: unknown + output: unknown +} + +export function FsSedView({ input, output }: FsSedViewProps) { + const req = fsSedRequestSchema.safeParse(input) + if (!req.success) return null + const resp = safeParseResponse(fsSedResponseSchema, output) + if (!resp) return null + const { results, total_replacements } = resp + const target = + req.data.path ?? + (req.data.files?.length ? `${req.data.files.length} files` : '—') + + return ( +
+
+ {target} + {req.data.pattern} + {req.data.replacement || "''"} + {req.data.regex === false ? literal : null} + {req.data.first_only ? first-only : null} + {req.data.ignore_case ? case-insensitive : null} +
+ + {results.length === 0 ? ( +
+ · no files touched +
+ ) : ( + + + + + + + + + + {results.map((r) => ( + + + + + + ))} + + + + + +
path + replacements + status
{r.path} + {r.replacements} + + {r.success ? ( + ok + ) : r.error ? ( + + + + + err + + + {r.error} + + ) : ( + err + )} +
+ total + + 0 ? 'accent' : 'default'} + > + {`${total_replacements} replacements`} + +
+ )} +
+ ) +} diff --git a/console/web/src/components/chat/sandbox/FsStatView.tsx b/console/web/src/components/chat/sandbox/FsStatView.tsx new file mode 100644 index 00000000..479f86e0 --- /dev/null +++ b/console/web/src/components/chat/sandbox/FsStatView.tsx @@ -0,0 +1,37 @@ +import { formatBytes, formatMode, formatMtime } from './format' +import { + fsStatRequestSchema, + fsStatResponseSchema, + safeParseResponse, +} from './parsers' +import { Chip, FooterPill } from './terminal/Terminal' + +interface FsStatViewProps { + input: unknown + output: unknown +} + +export function FsStatView({ input, output }: FsStatViewProps) { + const req = fsStatRequestSchema.safeParse(input) + if (!req.success) return null + const e = safeParseResponse(fsStatResponseSchema, output) + if (!e) return null + + return ( +
+
+
+ stat + {req.data.path} +
+
+ {e.is_dir ? '—' : formatBytes(e.size)} + {`${e.is_dir ? 'd' : '-'}${formatMode(e.mode)}`} + {formatMtime(e.mtime)} + {e.is_dir ? dir : null} + {e.is_symlink ? symlink : null} +
+
+
+ ) +} diff --git a/console/web/src/components/chat/sandbox/FsWriteView.tsx b/console/web/src/components/chat/sandbox/FsWriteView.tsx new file mode 100644 index 00000000..3fa67a35 --- /dev/null +++ b/console/web/src/components/chat/sandbox/FsWriteView.tsx @@ -0,0 +1,48 @@ +import { formatBytes } from './format' +import { + fsWriteRequestSchema, + fsWriteResponseSchema, + safeParseResponse, + streamChannelRefSchema, +} from './parsers' +import { Chip, FooterPill } from './terminal/Terminal' + +interface FsWriteViewProps { + input: unknown + output: unknown +} + +export function FsWriteView({ input, output }: FsWriteViewProps) { + const req = fsWriteRequestSchema.safeParse(input) + if (!req.success) return null + const resp = safeParseResponse(fsWriteResponseSchema, output) + if (!resp) return null + const streamed = req.data.content + ? streamChannelRefSchema.safeParse(req.data.content).success + : false + const usedB64 = !!req.data.content_b64 + + return ( +
+
+
+ + wrote{' '} + + {formatBytes(resp.bytes_written)} + {' '} + to {resp.path} +
+
+ {req.data.mode ?? '0644'} + {req.data.parents ? true : null} + {streamed ? ( + uploaded via channel + ) : null} + {usedB64 ? ( + base64 inline + ) : null} +
+
+
+ ) +} diff --git a/console/web/src/components/chat/sandbox/ListView.tsx b/console/web/src/components/chat/sandbox/ListView.tsx new file mode 100644 index 00000000..c7952660 --- /dev/null +++ b/console/web/src/components/chat/sandbox/ListView.tsx @@ -0,0 +1,76 @@ +import { Inbox } from 'lucide-react' +import { EmptyState } from '@/components/ui/EmptyState' +import { StatusDot } from '@/components/ui/StatusDot' +import { formatAgeSecs, truncateMiddle } from './format' +import { listResponseSchema, safeParseResponse } from './parsers' + +interface ListViewProps { + output: unknown +} + +export function ListView({ output }: ListViewProps) { + const parsed = safeParseResponse(listResponseSchema, output) + if (!parsed) return null + const sandboxes = parsed.sandboxes + + if (sandboxes.length === 0) { + return ( +
+ +
+ ) + } + + return ( +
+ + + + + + + + + + + + + {sandboxes.map((s) => ( + + + + + + + + + ))} + +
sandboxnameimageageexecstate
+ + {truncateMiddle(s.sandbox_id, 14)} + + {s.name ?? '—'}{s.image} + {formatAgeSecs(s.age_secs)} + + + + {s.stopped ? ( + stopped + ) : ( + live + )} +
+
+ ) +} diff --git a/console/web/src/components/chat/sandbox/RunView.tsx b/console/web/src/components/chat/sandbox/RunView.tsx new file mode 100644 index 00000000..d2ea85a3 --- /dev/null +++ b/console/web/src/components/chat/sandbox/RunView.tsx @@ -0,0 +1,149 @@ +import { useState } from 'react' +import { Prompt } from '@/components/ui/Prompt' +import { CodeHighlight } from '@/lib/syntax' +import { cn } from '@/lib/utils' +import { langFromRunLang, pillForExit, truncateMiddle } from './format' +import { + type RunRequest, + type RunResponse, + runRequestSchema, + runResponseSchema, + safeParseResponse, +} from './parsers' +import { AnsiOutput } from './terminal/AnsiOutput' +import { Chip, FooterPill, Terminal } from './terminal/Terminal' + +interface RunViewProps { + input: unknown + output: unknown + running?: boolean +} + +export function RunView({ input, output, running }: RunViewProps) { + const req = runRequestSchema.safeParse(input) + if (!req.success) return null + const respData = + output != null ? safeParseResponse(runResponseSchema, output) : null + return ( + } + footer={respData ? : null} + > + + + + ) +} + +export function RunPreview({ input }: { input: unknown }) { + const req = runRequestSchema.safeParse(input) + if (!req.success) return null + return ( +
+ + + {`run ${req.data.lang} /tmp/run.${extFor(req.data.lang)}`} + + + + +
+ ) +} + +function CodePreview({ req }: { req: RunRequest }) { + const [open, setOpen] = useState(false) + const lang = langFromRunLang(req.lang) + const lineCount = req.code.split('\n').length + return ( +
+ + {open ? ( + lang ? ( + + ) : ( +
+            {req.code}
+          
+ ) + ) : null} +
+ ) +} + +function RunChips({ + req, + resp, +}: { + req: RunRequest + resp: RunResponse | null +}) { + return ( + <> + {req.image} + {req.lang} + {req.keep_sandbox ? {'true'} : null} + {resp?.sandbox_id ? ( + {truncateMiddle(resp.sandbox_id, 12)} + ) : null} + + ) +} + +function RunFooter({ resp }: { resp: RunResponse }) { + const exit = pillForExit(resp.exit_code) + return ( + <> + {exit.label} + {`${resp.duration_ms}ms`} + {resp.timed_out ? timed out : null} + {resp.sandbox_id ? ( + kept alive + ) : ( + auto-stopped + )} + + ) +} + +function interpreterFor(lang: string): string { + const l = lang.toLowerCase() + if (l === 'node' || l === 'js' || l === 'javascript') return 'node' + if (l === 'python' || l === 'py') return 'python3' + if (l === 'shell' || l === 'sh' || l === 'bash') return '/bin/sh' + return lang +} + +function extFor(lang: string): string { + const l = lang.toLowerCase() + if (l === 'node' || l === 'js' || l === 'javascript') return 'js' + if (l === 'python' || l === 'py') return 'py' + if (l === 'shell' || l === 'sh' || l === 'bash') return 'sh' + return 'txt' +} diff --git a/console/web/src/components/chat/sandbox/StopView.tsx b/console/web/src/components/chat/sandbox/StopView.tsx new file mode 100644 index 00000000..68f5c127 --- /dev/null +++ b/console/web/src/components/chat/sandbox/StopView.tsx @@ -0,0 +1,42 @@ +import { truncateMiddle } from './format' +import { + safeParseResponse, + stopRequestSchema, + stopResponseSchema, +} from './parsers' +import { Chip, FooterPill } from './terminal/Terminal' + +interface StopViewProps { + input: unknown + output: unknown + running?: boolean +} + +export function StopView({ input, output, running }: StopViewProps) { + const req = stopRequestSchema.safeParse(input) + if (!req.success) return null + const respData = + output != null ? safeParseResponse(stopResponseSchema, output) : null + + return ( +
+
+
+ × + {running ? 'stopping sandbox…' : 'stopped sandbox'} + + {truncateMiddle(respData?.sandbox_id ?? req.data.sandbox_id, 24)} + +
+
+ {req.data.wait ? true : null} + {respData ? ( + + {respData.stopped ? 'stopped' : 'not stopped'} + + ) : null} +
+
+
+ ) +} diff --git a/console/web/src/components/chat/sandbox/__tests__/parsers.test.ts b/console/web/src/components/chat/sandbox/__tests__/parsers.test.ts new file mode 100644 index 00000000..c652d9e4 --- /dev/null +++ b/console/web/src/components/chat/sandbox/__tests__/parsers.test.ts @@ -0,0 +1,544 @@ +import { describe, expect, it } from 'vitest' +import { + createRequestSchema, + createResponseSchema, + execRequestSchema, + execResponseSchema, + fsChmodRequestSchema, + fsChmodResponseSchema, + fsGrepRequestSchema, + fsGrepResponseSchema, + fsLsRequestSchema, + fsLsResponseSchema, + fsMkdirRequestSchema, + fsMkdirResponseSchema, + fsMvRequestSchema, + fsMvResponseSchema, + fsReadResponseSchema, + fsRmRequestSchema, + fsRmResponseSchema, + fsSedRequestSchema, + fsSedResponseSchema, + fsStatRequestSchema, + fsStatResponseSchema, + fsWriteRequestSchema, + fsWriteResponseSchema, + extractFirstJsonObject, + isSandboxErrorWire, + listResponseSchema, + parseSandboxErrorDisplay, + runRequestSchema, + runResponseSchema, + sandboxErrorWireSchema, + stopRequestSchema, + stopResponseSchema, + streamChannelRefSchema, + unwrapEnvelope, +} from '../parsers' + +const SB = '00000000-0000-0000-0000-000000000000' + +/** Build a `{ content, details, terminate }` envelope identical to + what `harness/src/turn-orchestrator/agent-trigger.ts` produces. */ +function wrap(details: T) { + return { + content: [{ type: 'text', text: JSON.stringify(details) }], + details, + terminate: false, + } +} + +describe('unwrapEnvelope', () => { + it('peels the harness envelope', () => { + const payload = { sandbox_id: SB, stopped: true } + expect(unwrapEnvelope(wrap(payload))).toEqual(payload) + }) + + it('passes through raw payloads unchanged', () => { + const payload = { sandbox_id: SB, stopped: true } + expect(unwrapEnvelope(payload)).toBe(payload) + }) + + it('is idempotent on flat payloads', () => { + const payload = { foo: 1 } + expect(unwrapEnvelope(unwrapEnvelope(payload))).toBe(payload) + }) + + it('passes through primitives and arrays', () => { + expect(unwrapEnvelope(null)).toBe(null) + expect(unwrapEnvelope('hi')).toBe('hi') + const arr = [1, 2, 3] + expect(unwrapEnvelope(arr)).toBe(arr) + }) + + it('does not peel objects that just happen to have a `content` field', () => { + // The discriminator requires both `Array.isArray(content)` and the + // presence of `details`; bare `content` strings/objects must pass + // through as raw payloads. + const payload = { content: 'inline string', size: 5 } + expect(unwrapEnvelope(payload)).toBe(payload) + }) +}) + +describe('SandboxErrorWire schema', () => { + it('accepts a fully populated S200 timeout payload', () => { + const ok = sandboxErrorWireSchema.safeParse({ + type: 'execution', + code: 'S200', + message: 'exec timed out after 1500 ms', + docs_url: 'https://example/S200', + retryable: false, + fix: null, + fix_note: 'raise timeout_ms', + }) + expect(ok.success).toBe(true) + }) + + it('rejects payloads whose code does not match /^S\\d{3}$/', () => { + const bad = sandboxErrorWireSchema.safeParse({ + type: 'execution', + code: 'X404', + message: 'no', + }) + expect(bad.success).toBe(false) + }) + + it('isSandboxErrorWire narrows correctly', () => { + expect( + isSandboxErrorWire({ type: 'config', code: 'S100', message: 'no' }), + ).toBe(true) + expect(isSandboxErrorWire({ stdout: '' })).toBe(false) + }) + + it('detects SandboxErrorWire inside harness envelope details', () => { + const wire = { type: 'execution', code: 'S200', message: 'timed out' } + expect(isSandboxErrorWire(wrap(wire))).toBe(true) + }) + + it('detects handler_error tagged payloads', () => { + expect( + isSandboxErrorWire({ + error: 'handler_error', + type: 'filesystem', + code: 'S210', + message: 'path is required', + }), + ).toBe(true) + }) +}) + +describe('parseSandboxErrorDisplay', () => { + it('parses flat SandboxErrorWire', () => { + const out = parseSandboxErrorDisplay({ + type: 'execution', + code: 'S200', + message: 'timed out', + }) + expect(out).toEqual({ + variant: 'wire', + error: { + type: 'execution', + code: 'S200', + message: 'timed out', + }, + }) + }) + + it('parses harness envelope with SandboxErrorWire in details', () => { + const wire = { + type: 'filesystem', + code: 'S210', + message: 'path is required', + docs_url: 'https://example/S210', + } + const out = parseSandboxErrorDisplay(wrap(wire)) + expect(out?.variant).toBe('wire') + if (out?.variant === 'wire') { + expect(out.error.code).toBe('S210') + } + }) + + it('parses translate function_error envelope with denial details', () => { + const out = parseSandboxErrorDisplay({ + error: { + kind: 'function_error', + message: + 'trigger_failed: IIIInvocationError: invocation_failed: handler error', + details: { + schema_version: 1, + status: 'denied', + denied_by: 'gate_unavailable', + function_id: 'sandbox::fs::write', + reason: 'trigger_failed: policy unreachable', + }, + content: [{ type: 'text', text: 'trigger_failed: policy unreachable' }], + }, + }) + expect(out?.variant).toBe('invocation') + if (out?.variant === 'invocation') { + expect(out.error.deniedBy).toBe('gate_unavailable') + expect(out.error.functionId).toBe('sandbox::fs::write') + expect(out.error.title).toBe('Gate unavailable') + expect(out.error.message).toBe('trigger_failed: policy unreachable') + } + }) + + it('parses handler_error details inside function_error envelope', () => { + const out = parseSandboxErrorDisplay({ + error: { + kind: 'function_error', + message: 'path is required', + details: { + error: 'handler_error', + type: 'filesystem', + code: 'S210', + message: 'path is required', + docs_url: 'https://example/S210', + }, + content: [{ type: 'text', text: '{"code":"S210","message":"path is required"}' }], + }, + }) + expect(out?.variant).toBe('wire') + if (out?.variant === 'wire') { + expect(out.error.code).toBe('S210') + } + }) + + it('extracts embedded SandboxErrorWire JSON from error message text', () => { + const envelope = { + code: 'S220', + type: 'filesystem', + message: 'permission denied', + } + const out = parseSandboxErrorDisplay({ + error: { + kind: 'function_error', + message: `trigger_failed: handler error: ${JSON.stringify(envelope)}`, + details: { + schema_version: 1, + status: 'denied', + denied_by: 'gate_unavailable', + function_id: 'sandbox::fs::write', + reason: `trigger_failed: ${JSON.stringify(envelope)}`, + }, + content: [], + }, + }) + expect(out?.variant).toBe('wire') + if (out?.variant === 'wire') { + expect(out.error.code).toBe('S220') + } + }) +}) + +describe('extractFirstJsonObject', () => { + it('pulls the first balanced object from noisy text', () => { + const embedded = extractFirstJsonObject( + 'trigger_failed: handler error: {"code":"S210","type":"filesystem","message":"bad path"} tail', + ) + expect(embedded).toMatchObject({ code: 'S210', message: 'bad path' }) + }) +}) + +describe('lifecycle schemas', () => { + it('execRequest accepts the canonical example', () => { + const ok = execRequestSchema.safeParse({ + sandbox_id: SB, + cmd: 'node', + args: ['/home/app/index.js'], + env: { NODE_ENV: 'production' }, + timeout_ms: 300_000, + }) + expect(ok.success).toBe(true) + }) + + it('execRequest accepts argv-only shape', () => { + const ok = execRequestSchema.safeParse({ + sandbox_id: SB, + argv: ['node', '/home/app/index.js'], + }) + expect(ok.success).toBe(true) + }) + + it('execResponse round-trips through envelope unwrap', () => { + const resp = { + stdout: 'hi\n', + stderr: '', + exit_code: 0, + timed_out: false, + duration_ms: 12, + success: true, + } + const wrapped = wrap(resp) + const parsed = execResponseSchema.safeParse(unwrapEnvelope(wrapped)) + expect(parsed.success).toBe(true) + if (parsed.success) expect(parsed.data.exit_code).toBe(0) + }) + + it('execResponse accepts null exit_code (timed out)', () => { + const ok = execResponseSchema.safeParse({ + stdout: '', + stderr: '', + exit_code: null, + timed_out: true, + duration_ms: 100, + success: false, + }) + expect(ok.success).toBe(true) + }) + + it('runRequest accepts python lang + keep_sandbox', () => { + const ok = runRequestSchema.safeParse({ + image: 'python', + code: 'print(1)', + lang: 'python', + keep_sandbox: true, + }) + expect(ok.success).toBe(true) + }) + + it('runResponse accepts optional sandbox_id', () => { + const ok = runResponseSchema.safeParse({ + stdout: '', + stderr: '', + exit_code: 0, + timed_out: false, + duration_ms: 1, + success: true, + sandbox_id: SB, + }) + expect(ok.success).toBe(true) + }) + + it('createRequest + response parse the canonical example', () => { + expect( + createRequestSchema.safeParse({ + image: 'node', + memory_mb: 512, + env: { NODE_ENV: 'production' }, + idle_timeout_secs: 600, + }).success, + ).toBe(true) + expect( + createResponseSchema.safeParse({ sandbox_id: SB, image: 'node' }).success, + ).toBe(true) + }) + + it('stopRequest tolerates missing wait', () => { + expect(stopRequestSchema.safeParse({ sandbox_id: SB }).success).toBe(true) + expect( + stopResponseSchema.safeParse({ sandbox_id: SB, stopped: true }).success, + ).toBe(true) + }) + + it('listResponse accepts an empty list', () => { + expect(listResponseSchema.safeParse({ sandboxes: [] }).success).toBe(true) + }) + + it('listResponse parses a populated table', () => { + const ok = listResponseSchema.safeParse({ + sandboxes: [ + { + sandbox_id: SB, + name: 'worker', + image: 'node', + age_secs: 60, + exec_in_progress: false, + stopped: false, + }, + ], + }) + expect(ok.success).toBe(true) + }) +}) + +describe('fs schemas', () => { + it('fs::ls request + response', () => { + expect( + fsLsRequestSchema.safeParse({ sandbox_id: SB, path: '/' }).success, + ).toBe(true) + expect( + fsLsResponseSchema.safeParse({ + entries: [ + { + name: 'a', + is_dir: false, + size: 1, + mode: '0644', + mtime: 0, + is_symlink: false, + }, + ], + }).success, + ).toBe(true) + }) + + it('fs::stat request + response', () => { + expect( + fsStatRequestSchema.safeParse({ sandbox_id: SB, path: '/a' }).success, + ).toBe(true) + expect( + fsStatResponseSchema.safeParse({ + name: 'a', + is_dir: false, + size: 1, + mode: '0644', + mtime: 0, + is_symlink: false, + }).success, + ).toBe(true) + }) + + it('fs::mkdir defaults', () => { + expect( + fsMkdirRequestSchema.safeParse({ sandbox_id: SB, path: '/d' }).success, + ).toBe(true) + expect(fsMkdirResponseSchema.safeParse({ created: true }).success).toBe( + true, + ) + }) + + it('fs::read accepts inline string content', () => { + const ok = fsReadResponseSchema.safeParse({ + content: 'hello', + size: 5, + mode: '0644', + mtime: 0, + }) + expect(ok.success).toBe(true) + }) + + it('fs::read accepts StreamChannelRef content', () => { + const ok = fsReadResponseSchema.safeParse({ + content: { channel_id: 'ch1', access_key: 'key', direction: 'read' }, + size: 100, + mode: '0644', + mtime: 0, + }) + expect(ok.success).toBe(true) + }) + + it('fs::write accepts UTF-8 content', () => { + const ok = fsWriteRequestSchema.safeParse({ + sandbox_id: SB, + path: '/a', + content: 'console.log(1)\n', + }) + expect(ok.success).toBe(true) + expect( + fsWriteResponseSchema.safeParse({ bytes_written: 15, path: '/a' }) + .success, + ).toBe(true) + }) + + it('fs::write accepts StreamChannelRef content', () => { + const ok = fsWriteRequestSchema.safeParse({ + sandbox_id: SB, + path: '/a', + content: { channel_id: 'ch1', access_key: 'key', direction: 'write' }, + }) + expect(ok.success).toBe(true) + }) + + it('fs::rm tolerates default recursive', () => { + expect( + fsRmRequestSchema.safeParse({ sandbox_id: SB, path: '/a' }).success, + ).toBe(true) + expect(fsRmResponseSchema.safeParse({ removed: true }).success).toBe(true) + }) + + it('fs::mv', () => { + expect( + fsMvRequestSchema.safeParse({ sandbox_id: SB, src: '/a', dst: '/b' }) + .success, + ).toBe(true) + expect(fsMvResponseSchema.safeParse({ moved: true }).success).toBe(true) + }) + + it('fs::chmod with uid/gid', () => { + const ok = fsChmodRequestSchema.safeParse({ + sandbox_id: SB, + path: '/a', + mode: '0755', + uid: 1000, + gid: 1000, + recursive: true, + }) + expect(ok.success).toBe(true) + expect(fsChmodResponseSchema.safeParse({ updated: 3 }).success).toBe(true) + }) + + it('fs::grep request + response with truncation flag', () => { + expect( + fsGrepRequestSchema.safeParse({ + sandbox_id: SB, + path: '/src', + pattern: 'TODO|FIXME', + recursive: true, + include_glob: ['**/*.ts'], + }).success, + ).toBe(true) + expect( + fsGrepResponseSchema.safeParse({ + matches: [{ path: '/a.ts', line: 1, content: '// TODO' }], + truncated: true, + }).success, + ).toBe(true) + }) + + it('fs::grep response normalises legacy `file` alias to `path`', () => { + const parsed = fsGrepResponseSchema.safeParse({ + matches: [{ file: '/legacy.ts', line: 7, content: 'TODO' }], + truncated: false, + }) + expect(parsed.success).toBe(true) + if (parsed.success) { + expect(parsed.data.matches[0].path).toBe('/legacy.ts') + } + }) + + it('fs::sed request + response', () => { + expect( + fsSedRequestSchema.safeParse({ + sandbox_id: SB, + path: '/src', + pattern: 'foo', + replacement: 'bar', + }).success, + ).toBe(true) + expect( + fsSedResponseSchema.safeParse({ + results: [{ path: '/a.ts', replacements: 2, success: true }], + total_replacements: 2, + }).success, + ).toBe(true) + }) + + it('fs::sed response surfaces per-file errors', () => { + const parsed = fsSedResponseSchema.safeParse({ + results: [ + { + path: '/a.ts', + replacements: 0, + success: false, + error: 'permission denied', + }, + ], + total_replacements: 0, + }) + expect(parsed.success).toBe(true) + if (parsed.success) { + expect(parsed.data.results[0].error).toBe('permission denied') + } + }) + + it('streamChannelRef rejects unknown directions', () => { + expect( + streamChannelRefSchema.safeParse({ + channel_id: 'x', + access_key: 'y', + direction: 'sideways', + }).success, + ).toBe(false) + }) +}) diff --git a/console/web/src/components/chat/sandbox/format.ts b/console/web/src/components/chat/sandbox/format.ts new file mode 100644 index 00000000..dc707e51 --- /dev/null +++ b/console/web/src/components/chat/sandbox/format.ts @@ -0,0 +1,224 @@ +/* Pure formatting helpers shared by every sandbox renderer. No React, + no DOM access — these are deterministic transforms over the parsed + sandbox payloads. Kept colocated with the renderers so the + per-tool views stay terse. */ + +import type { EnvShape } from './parsers' + +/** `1024` → `"1.0 KiB"`. Pin to KiB/MiB/GiB so the unit matches + the daemon's `INLINE_BUFFER_CAP = 1 MiB` constant in `fs/read.rs`. */ +export function formatBytes(bytes: number): string { + if (!Number.isFinite(bytes) || bytes < 0) return '—' + if (bytes < 1024) return `${bytes} B` + const kib = bytes / 1024 + if (kib < 1024) return `${kib.toFixed(kib < 10 ? 1 : 0)} KiB` + const mib = kib / 1024 + if (mib < 1024) return `${mib.toFixed(mib < 10 ? 1 : 0)} MiB` + const gib = mib / 1024 + return `${gib.toFixed(gib < 10 ? 1 : 0)} GiB` +} + +/** Octal mode string `"0755"` → POSIX `"rwxr-xr-x"`. For directories + the caller can prepend the leading `d` (e.g. `${isDir ? 'd' : '-'}${formatMode(mode)}`). */ +export function formatMode(mode: string): string { + // Tolerate `"0755"`, `"755"`, leading `"o"`, or junk. Extract the last + // 3 octal digits and decode each; fall back to the original string if + // it doesn't parse. + const digits = mode.match(/[0-7]{3}$/)?.[0] + if (!digits) return mode + const bits = ['r', 'w', 'x'] as const + let out = '' + for (const ch of digits) { + const n = Number.parseInt(ch, 10) + for (let i = 0; i < 3; i++) { + out += n & (4 >> i) ? bits[i] : '-' + } + } + return out +} + +/** Unix seconds → short relative time ("3m ago", "2d ago"). `mtime` + from the daemon is seconds since epoch (or 0 for unset/unknown). */ +export function formatMtime(unixSecs: number, now = Date.now()): string { + if (!unixSecs || unixSecs <= 0) return '—' + const deltaSecs = Math.max(0, Math.floor(now / 1000 - unixSecs)) + if (deltaSecs < 60) return deltaSecs <= 1 ? 'just now' : `${deltaSecs}s ago` + const mins = Math.floor(deltaSecs / 60) + if (mins < 60) return `${mins}m ago` + const hours = Math.floor(mins / 60) + if (hours < 24) return `${hours}h ago` + const days = Math.floor(hours / 24) + if (days < 30) return `${days}d ago` + const months = Math.floor(days / 30) + if (months < 12) return `${months}mo ago` + return `${Math.floor(months / 12)}y ago` +} + +/** Truncate the middle of a long path/identifier so head and tail stay + visible. `"a/very/long/path"` → `"a/very/…/path"`. */ +export function truncateMiddle(value: string, maxLen = 28): string { + if (value.length <= maxLen) return value + const head = Math.ceil((maxLen - 1) / 2) + const tail = Math.floor((maxLen - 1) / 2) + return `${value.slice(0, head)}…${value.slice(value.length - tail)}` +} + +/** Humanize an age in seconds. Used by `sandbox::list`. */ +export function formatAgeSecs(secs: number): string { + if (!Number.isFinite(secs) || secs < 0) return '—' + if (secs < 60) return `${secs}s` + const mins = Math.floor(secs / 60) + if (mins < 60) return `${mins}m` + const hours = Math.floor(mins / 60) + if (hours < 24) return `${hours}h` + const days = Math.floor(hours / 24) + return `${days}d` +} + +/** Pick a tone for an exit code pill. Null/missing → warn ("unknown"). */ +export function pillForExit(exitCode: number | null | undefined): { + label: string + tone: 'accent' | 'warn' | 'alert' | 'default' +} { + if (exitCode === 0) return { label: 'exit 0', tone: 'accent' } + if (exitCode === null || exitCode === undefined) { + return { label: 'no exit', tone: 'warn' } + } + return { label: `exit ${exitCode}`, tone: 'alert' } +} + +/** Infer a syntax-highlight language from a file path. Returns `null` + when nothing useful matches — callers should fall back to a plain + `
`. Names mirror `prism-react-renderer` defaults plus the
+    handful we manually register in `@/lib/syntax`. */
+export function inferLangFromPath(path: string): string | null {
+  const lower = path.toLowerCase()
+  const ext = lower.match(/\.([a-z0-9]+)$/)?.[1]
+  if (!ext) {
+    if (lower.endsWith('dockerfile') || lower.includes('/dockerfile'))
+      return 'bash'
+    if (lower.endsWith('makefile') || lower.includes('/makefile')) return 'bash'
+    return null
+  }
+  switch (ext) {
+    case 'js':
+    case 'mjs':
+    case 'cjs':
+      return 'javascript'
+    case 'jsx':
+      return 'jsx'
+    case 'ts':
+      return 'typescript'
+    case 'tsx':
+      return 'tsx'
+    case 'json':
+    case 'jsonc':
+      return 'json'
+    case 'py':
+    case 'pyi':
+      return 'python'
+    case 'sh':
+    case 'bash':
+    case 'zsh':
+      return 'bash'
+    case 'rs':
+      return 'rust'
+    case 'go':
+      return 'go'
+    case 'rb':
+      return 'ruby'
+    case 'html':
+    case 'htm':
+      return 'markup'
+    case 'css':
+      return 'css'
+    case 'md':
+    case 'mdx':
+      return 'markdown'
+    case 'yml':
+    case 'yaml':
+      return 'yaml'
+    case 'toml':
+      return 'toml'
+    default:
+      return null
+  }
+}
+
+/** Map a `lang` field from `sandbox::run` to a highlight language. */
+export function langFromRunLang(lang: string): string | null {
+  const l = lang.toLowerCase()
+  if (l === 'node' || l === 'js' || l === 'javascript') return 'javascript'
+  if (l === 'python' || l === 'py') return 'python'
+  if (l === 'shell' || l === 'sh' || l === 'bash') return 'bash'
+  if (l === 'typescript' || l === 'ts') return 'typescript'
+  return null
+}
+
+/** Quote an argv slot for terminal display. Single-tokens come through
+    bare; anything with whitespace, quotes, or shell metacharacters
+    gets single-quoted with embedded single quotes escaped via the
+    POSIX `'\''` dance. Output is human-paste-able into a shell. */
+export function quoteShellArg(arg: string): string {
+  if (arg === '') return "''"
+  if (/^[A-Za-z0-9_@%+=:,./-]+$/.test(arg)) return arg
+  return `'${arg.replace(/'/g, `'\\''`)}'`
+}
+
+/** Render a `(cmd, args | argv)` ExecRequest as a single `cmd args`
+    string suitable for the terminal prompt. `argv` wins when present;
+    `cmd` shell-line shape (whitespace in `cmd`, no `args`) is left as-is
+    because the daemon shlex-splits it server-side. */
+export function formatExecCommand(req: {
+  cmd?: string | null
+  args?: string[] | null
+  argv?: string[] | null
+}): string {
+  const argv = req.argv ?? []
+  if (argv.length > 0) {
+    return argv.map(quoteShellArg).join(' ')
+  }
+  const cmd = req.cmd ?? ''
+  const args = req.args ?? []
+  if (args.length === 0) return cmd
+  return [cmd, ...args.map(quoteShellArg)].join(' ')
+}
+
+/** Alias used by terminal renderers. */
+export const formatCommandLine = formatExecCommand
+
+/** Normalise an `EnvShape` to an array of `[key, value]` tuples,
+    sorted by key (the daemon's BTreeMap ordering). */
+/** Matches `engine/src/protocol.rs::StreamChannelRef` on the wire. */
+export function isStreamChannelRef(
+  value: unknown,
+): value is { channel_id: string; access_key: string; direction: string } {
+  return (
+    !!value &&
+    typeof value === 'object' &&
+    'channel_id' in value &&
+    typeof (value as { channel_id: unknown }).channel_id === 'string'
+  )
+}
+
+const INLINE_BUFFER_CAP = 1024 * 1024
+
+/** Note when streamed output likely hit the daemon inline cap. */
+export function streamCapNote(byteLen: number): string | null {
+  if (byteLen >= INLINE_BUFFER_CAP) return 'stdout 1.0 MiB cap reached'
+  return null
+}
+
+export function normaliseEnv(env: EnvShape | undefined): [string, string][] {
+  if (!env) return []
+  if (Array.isArray(env)) {
+    return env
+      .map<[string, string]>((kv) => {
+        const eq = kv.indexOf('=')
+        if (eq < 0) return [kv, '']
+        return [kv.slice(0, eq), kv.slice(eq + 1)]
+      })
+      .sort(([a], [b]) => a.localeCompare(b))
+  }
+  return Object.entries(env).sort(([a], [b]) => a.localeCompare(b))
+}
diff --git a/console/web/src/components/chat/sandbox/index.tsx b/console/web/src/components/chat/sandbox/index.tsx
new file mode 100644
index 00000000..4c108b8a
--- /dev/null
+++ b/console/web/src/components/chat/sandbox/index.tsx
@@ -0,0 +1,139 @@
+import type { FunctionCallMessage } from '@/types/chat'
+import { CreateView } from './CreateView'
+import { SandboxErrorView } from './ErrorView'
+import { ExecPreview, ExecView } from './ExecView'
+import { FsChmodView } from './FsChmodView'
+import { FsGrepView } from './FsGrepView'
+import { FsLsView } from './FsLsView'
+import { FsMkdirView } from './FsMkdirView'
+import { FsMvView } from './FsMvView'
+import { FsReadView } from './FsReadView'
+import { FsRmView } from './FsRmView'
+import { FsSedView } from './FsSedView'
+import { FsStatView } from './FsStatView'
+import { FsWriteView } from './FsWriteView'
+import { ListView } from './ListView'
+import { parseSandboxErrorDisplay, unwrapEnvelope } from './parsers'
+import { RunPreview, RunView } from './RunView'
+import { StopView } from './StopView'
+
+/* The known sandbox::* set. Listed explicitly (not derived from
+   regex) so the dispatcher's "is this a sandbox call?" check is
+   never accidentally too broad. */
+const SANDBOX_FN_IDS = new Set([
+  'sandbox::exec',
+  'sandbox::run',
+  'sandbox::create',
+  'sandbox::stop',
+  'sandbox::list',
+  'sandbox::fs::ls',
+  'sandbox::fs::stat',
+  'sandbox::fs::read',
+  'sandbox::fs::write',
+  'sandbox::fs::mkdir',
+  'sandbox::fs::rm',
+  'sandbox::fs::mv',
+  'sandbox::fs::chmod',
+  'sandbox::fs::grep',
+  'sandbox::fs::sed',
+])
+
+export function isSandboxFunction(functionId: string): boolean {
+  return SANDBOX_FN_IDS.has(functionId)
+}
+
+/* Public surface mirrors the plan exactly. Both helpers return `null`
+   for unknown function ids or unparseable payloads so the caller can
+   silently fall back to the existing JSON view. */
+export function SandboxFunctionIdLabel({ functionId }: { functionId: string }) {
+  if (!functionId.startsWith('sandbox::')) {
+    return {functionId}
+  }
+  const tail = functionId.slice('sandbox::'.length)
+  return (
+    <>
+      sandbox::
+      {tail}
+    
+  )
+}
+
+function tryRender(message: FunctionCallMessage): React.ReactNode | null {
+  if (!isSandboxFunction(message.functionId)) return null
+  if (message.pendingApproval) return null
+
+  // The done-state view is what tryRender owns; the pending preview
+  // lives in tryRenderPreview. Running-state cards are rendered by
+  // the per-tool view with `running=true` so the shell chrome stays
+  // identical and only the body swaps to the executing-shimmer.
+  const input = unwrapEnvelope(message.input)
+  const rawOutput = message.output
+  const output = rawOutput != null ? unwrapEnvelope(rawOutput) : undefined
+  const running = !!message.running
+
+  const errorDisplay =
+    !running && rawOutput != null
+      ? parseSandboxErrorDisplay(rawOutput)
+      : null
+  if (errorDisplay) {
+    return 
+  }
+
+  switch (message.functionId) {
+    case 'sandbox::exec':
+      return 
+    case 'sandbox::run':
+      return 
+    case 'sandbox::create':
+      return 
+    case 'sandbox::stop':
+      return 
+    case 'sandbox::list':
+      return 
+    case 'sandbox::fs::ls':
+      return 
+    case 'sandbox::fs::stat':
+      return 
+    case 'sandbox::fs::read':
+      return 
+    case 'sandbox::fs::write':
+      return 
+    case 'sandbox::fs::mkdir':
+      return 
+    case 'sandbox::fs::rm':
+      return 
+    case 'sandbox::fs::mv':
+      return 
+    case 'sandbox::fs::chmod':
+      return 
+    case 'sandbox::fs::grep':
+      return 
+    case 'sandbox::fs::sed':
+      return 
+    default:
+      return null
+  }
+}
+
+function tryRenderPreview(
+  message: FunctionCallMessage,
+): React.ReactNode | null {
+  if (!isSandboxFunction(message.functionId)) return null
+  const input = unwrapEnvelope(message.input)
+  switch (message.functionId) {
+    case 'sandbox::exec':
+      return 
+    case 'sandbox::run':
+      return 
+    default:
+      return null
+  }
+}
+
+export const SandboxToolView = {
+  isSandboxFunction,
+  tryRender,
+  /** Alias kept for FCM symmetry; running state lives inside `tryRender`. */
+  tryRenderRunning: tryRender,
+  tryRenderPreview,
+}
diff --git a/console/web/src/components/chat/sandbox/parsers.ts b/console/web/src/components/chat/sandbox/parsers.ts
new file mode 100644
index 00000000..812dd239
--- /dev/null
+++ b/console/web/src/components/chat/sandbox/parsers.ts
@@ -0,0 +1,628 @@
+import { z } from 'zod'
+
+/* Zod schemas mirroring the sandbox::* request/response shapes from
+   `motia/crates/iii-worker/src/sandbox_daemon/*` and the shared
+   `iii-shell-proto` (FsEntry, FsMatch, FsSedFileResult).
+
+   The Rust handlers route via `RegisterFunction::new_async` with
+   `JsonSchema` deserialisers, so wire payloads are always plain JSON
+   matching these shapes. Every schema is intentionally non-strict
+   (no `.strict()`); forward-compat is preserved for unknown future
+   fields and the renderers only read what they need. */
+
+// ---------------------------------------------------------------------------
+// Shared building blocks
+// ---------------------------------------------------------------------------
+
+/** `engine/src/protocol.rs::StreamChannelRef` (untagged JSON object). */
+export const streamChannelRefSchema = z.object({
+  channel_id: z.string(),
+  access_key: z.string(),
+  direction: z.enum(['read', 'write']),
+})
+export type StreamChannelRef = z.infer
+
+/** Either an inline UTF-8 body or a streaming channel ref. */
+export const fileContentSchema = z.union([z.string(), streamChannelRefSchema])
+export type FileContent = z.infer
+
+/** `iii-shell-proto::FsEntry`. */
+export const fsEntrySchema = z.object({
+  name: z.string(),
+  is_dir: z.boolean(),
+  size: z.number(),
+  mode: z.string(),
+  mtime: z.number(),
+  is_symlink: z.boolean(),
+})
+export type FsEntry = z.infer
+
+/** `iii-shell-proto::FsMatch`. `path` is canonical; older guests sent
+    `file` — `aliasPath` peels that legacy spelling. */
+export const fsMatchSchema = z
+  .object({
+    path: z.string().optional(),
+    file: z.string().optional(),
+    line: z.number(),
+    content: z.string(),
+  })
+  .transform((m) => ({
+    path: m.path ?? m.file ?? '',
+    line: m.line,
+    content: m.content,
+  }))
+export type FsMatch = z.infer
+
+/** `iii-shell-proto::FsSedFileResult`. */
+export const fsSedFileResultSchema = z
+  .object({
+    path: z.string().optional(),
+    file: z.string().optional(),
+    replacements: z.number(),
+    success: z.boolean(),
+    error: z.string().nullable().optional(),
+  })
+  .transform((r) => ({
+    path: r.path ?? r.file ?? '',
+    replacements: r.replacements,
+    success: r.success,
+    error: r.error ?? null,
+  }))
+export type FsSedFileResult = z.infer
+
+/** EnvShape from exec.rs — accepts either Vec<"K=V"> or {K:V}. */
+export const envShapeSchema = z.union([
+  z.array(z.string()),
+  z.record(z.string(), z.string()),
+])
+export type EnvShape = z.infer
+
+// ---------------------------------------------------------------------------
+// SandboxErrorWire — the Stripe-style flat error payload from errors.rs.
+// ---------------------------------------------------------------------------
+
+export const sandboxErrorWireSchema = z.object({
+  type: z.string(),
+  code: z.string().regex(/^S\d{3}$/),
+  message: z.string(),
+  docs_url: z.string().optional(),
+  retryable: z.boolean().optional(),
+  fix: z.unknown().optional(),
+  fix_note: z.string().nullable().optional(),
+})
+export type SandboxErrorWire = z.infer
+
+// ---------------------------------------------------------------------------
+// Lifecycle
+// ---------------------------------------------------------------------------
+
+export const execRequestSchema = z.object({
+  sandbox_id: z.string(),
+  cmd: z.string().optional(),
+  args: z.array(z.string()).optional(),
+  argv: z.array(z.string()).optional(),
+  stdin: z.string().nullable().optional(),
+  env: envShapeSchema.optional(),
+  timeout_ms: z.number().nullable().optional(),
+  workdir: z.string().nullable().optional(),
+})
+export type ExecRequest = z.infer
+
+export const execResponseSchema = z.object({
+  stdout: z.string(),
+  stderr: z.string(),
+  exit_code: z.number().nullable(),
+  timed_out: z.boolean(),
+  duration_ms: z.number(),
+  success: z.boolean(),
+})
+export type ExecResponse = z.infer
+
+export const runFileSchema = z.object({
+  path: z.string(),
+  content: z.string(),
+})
+
+export const runRequestSchema = z.object({
+  image: z.string(),
+  code: z.string(),
+  lang: z.string(),
+  files: z.array(runFileSchema).optional(),
+  env: envShapeSchema.optional(),
+  stdin: z.string().nullable().optional(),
+  timeout_ms: z.number().nullable().optional(),
+  keep_sandbox: z.boolean().optional(),
+})
+export type RunRequest = z.infer
+
+export const runResponseSchema = z.object({
+  stdout: z.string(),
+  stderr: z.string(),
+  exit_code: z.number().nullable(),
+  timed_out: z.boolean(),
+  duration_ms: z.number(),
+  success: z.boolean(),
+  sandbox_id: z.string().nullable().optional(),
+})
+export type RunResponse = z.infer
+
+export const createRequestSchema = z.object({
+  image: z.string(),
+  cpus: z.number().nullable().optional(),
+  memory_mb: z.number().nullable().optional(),
+  name: z.string().nullable().optional(),
+  network: z.boolean().nullable().optional(),
+  idle_timeout_secs: z.number().nullable().optional(),
+  env: envShapeSchema.optional(),
+})
+export type CreateRequest = z.infer
+
+export const createResponseSchema = z.object({
+  sandbox_id: z.string(),
+  image: z.string(),
+})
+export type CreateResponse = z.infer
+
+export const stopRequestSchema = z.object({
+  sandbox_id: z.string(),
+  wait: z.boolean().optional(),
+})
+export type StopRequest = z.infer
+
+export const stopResponseSchema = z.object({
+  sandbox_id: z.string(),
+  stopped: z.boolean(),
+})
+export type StopResponse = z.infer
+
+export const listRequestSchema = z.object({}).passthrough()
+export type ListRequest = z.infer
+
+export const sandboxSummarySchema = z.object({
+  sandbox_id: z.string(),
+  name: z.string().nullable().optional(),
+  image: z.string(),
+  age_secs: z.number(),
+  exec_in_progress: z.boolean(),
+  stopped: z.boolean(),
+})
+export type SandboxSummary = z.infer
+
+export const listResponseSchema = z.object({
+  sandboxes: z.array(sandboxSummarySchema),
+})
+export type ListResponse = z.infer
+
+// ---------------------------------------------------------------------------
+// Filesystem
+// ---------------------------------------------------------------------------
+
+export const fsLsRequestSchema = z.object({
+  sandbox_id: z.string(),
+  path: z.string(),
+})
+export type FsLsRequest = z.infer
+
+export const fsLsResponseSchema = z.object({
+  entries: z.array(fsEntrySchema),
+})
+export type FsLsResponse = z.infer
+
+export const fsStatRequestSchema = z.object({
+  sandbox_id: z.string(),
+  path: z.string(),
+})
+export type FsStatRequest = z.infer
+
+export const fsStatResponseSchema = z.object({
+  name: z.string(),
+  is_dir: z.boolean(),
+  size: z.number(),
+  mode: z.string(),
+  mtime: z.number(),
+  is_symlink: z.boolean(),
+})
+export type FsStatResponse = z.infer
+
+export const fsMkdirRequestSchema = z.object({
+  sandbox_id: z.string(),
+  path: z.string(),
+  mode: z.string().optional(),
+  parents: z.boolean().optional(),
+})
+export type FsMkdirRequest = z.infer
+
+export const fsMkdirResponseSchema = z.object({
+  created: z.boolean(),
+})
+export type FsMkdirResponse = z.infer
+
+export const fsWriteRequestSchema = z.object({
+  sandbox_id: z.string(),
+  path: z.string(),
+  mode: z.string().optional(),
+  parents: z.boolean().optional(),
+  content: fileContentSchema.nullable().optional(),
+  content_b64: z.string().nullable().optional(),
+})
+export type FsWriteRequest = z.infer
+
+export const fsWriteResponseSchema = z.object({
+  bytes_written: z.number(),
+  path: z.string(),
+})
+export type FsWriteResponse = z.infer
+
+export const fsReadRequestSchema = z.object({
+  sandbox_id: z.string(),
+  path: z.string(),
+})
+export type FsReadRequest = z.infer
+
+export const fsReadResponseSchema = z.object({
+  content: fileContentSchema,
+  size: z.number(),
+  mode: z.string(),
+  mtime: z.number(),
+})
+export type FsReadResponse = z.infer
+
+export const fsRmRequestSchema = z.object({
+  sandbox_id: z.string(),
+  path: z.string(),
+  recursive: z.boolean().optional(),
+})
+export type FsRmRequest = z.infer
+
+export const fsRmResponseSchema = z.object({
+  removed: z.boolean(),
+})
+export type FsRmResponse = z.infer
+
+export const fsMvRequestSchema = z.object({
+  sandbox_id: z.string(),
+  src: z.string(),
+  dst: z.string(),
+  overwrite: z.boolean().optional(),
+})
+export type FsMvRequest = z.infer
+
+export const fsMvResponseSchema = z.object({
+  moved: z.boolean(),
+})
+export type FsMvResponse = z.infer
+
+export const fsChmodRequestSchema = z.object({
+  sandbox_id: z.string(),
+  path: z.string(),
+  mode: z.string(),
+  uid: z.number().nullable().optional(),
+  gid: z.number().nullable().optional(),
+  recursive: z.boolean().optional(),
+})
+export type FsChmodRequest = z.infer
+
+export const fsChmodResponseSchema = z.object({
+  updated: z.number(),
+})
+export type FsChmodResponse = z.infer
+
+export const fsGrepRequestSchema = z.object({
+  sandbox_id: z.string(),
+  path: z.string(),
+  pattern: z.string(),
+  recursive: z.boolean().optional(),
+  ignore_case: z.boolean().optional(),
+  include_glob: z.array(z.string()).optional(),
+  exclude_glob: z.array(z.string()).optional(),
+  max_matches: z.number().optional(),
+  max_line_bytes: z.number().optional(),
+})
+export type FsGrepRequest = z.infer
+
+export const fsGrepResponseSchema = z.object({
+  matches: z.array(fsMatchSchema),
+  truncated: z.boolean(),
+})
+export type FsGrepResponse = z.infer
+
+export const fsSedRequestSchema = z.object({
+  sandbox_id: z.string(),
+  files: z.array(z.string()).optional(),
+  path: z.string().nullable().optional(),
+  recursive: z.boolean().optional(),
+  include_glob: z.array(z.string()).optional(),
+  exclude_glob: z.array(z.string()).optional(),
+  pattern: z.string(),
+  replacement: z.string(),
+  regex: z.boolean().optional(),
+  first_only: z.boolean().optional(),
+  ignore_case: z.boolean().optional(),
+})
+export type FsSedRequest = z.infer
+
+export const fsSedResponseSchema = z.object({
+  results: z.array(fsSedFileResultSchema),
+  total_replacements: z.number(),
+})
+export type FsSedResponse = z.infer
+
+// ---------------------------------------------------------------------------
+// Envelope unwrap
+// ---------------------------------------------------------------------------
+
+/**
+ * The harness `agent-trigger.ts` wraps every tool result in a
+ * `{ content: ContentBlock[], details: unknown, terminate: boolean }`
+ * envelope before relaying it to the agent. The console receives the
+ * same shape via the engine's function_call output stream.
+ *
+ * This peels the wrapper so renderers operate on the flat sandbox
+ * response. Idempotent — calling it on an already-flat payload
+ * (i.e. one that didn't go through the envelope) returns the input
+ * unchanged. The discriminator is `Array.isArray(value.content)` —
+ * that's the structural marker harness sets unconditionally for
+ * every wrapped result.
+ */
+export function unwrapEnvelope(value: unknown): unknown {
+  if (!value || typeof value !== 'object' || Array.isArray(value)) {
+    return value
+  }
+  const obj = value as Record
+  if (Array.isArray(obj.content) && 'details' in obj) {
+    return obj.details
+  }
+  return value
+}
+
+const denialEnvelopeSchema = z.object({
+  schema_version: z.number().optional(),
+  status: z.string().optional(),
+  denied_by: z.string().optional(),
+  function_id: z.string().optional(),
+  reason: z.string().optional(),
+})
+export type DenialEnvelopeWire = z.infer
+
+const functionErrorEnvelopeSchema = z.object({
+  kind: z.string(),
+  message: z.string(),
+  details: z.unknown().optional(),
+  content: z.array(z.unknown()).optional(),
+})
+
+export type SandboxInvocationError = {
+  title: string
+  message: string
+  functionId?: string
+  deniedBy?: string
+  reason?: string
+  detailText?: string
+}
+
+export type SandboxErrorDisplay =
+  | { variant: 'wire'; error: SandboxErrorWire }
+  | { variant: 'invocation'; error: SandboxInvocationError }
+
+function contentBlocksText(content: unknown): string | undefined {
+  if (!Array.isArray(content)) return undefined
+  const parts: string[] = []
+  for (const block of content) {
+    if (!block || typeof block !== 'object') continue
+    const obj = block as Record
+    if (obj.type === 'text' && typeof obj.text === 'string' && obj.text.length > 0) {
+      parts.push(obj.text)
+    }
+  }
+  return parts.length > 0 ? parts.join('\n') : undefined
+}
+
+/** Pull the first balanced `{…}` JSON object out of a string. */
+export function extractFirstJsonObject(text: string): unknown | null {
+  const start = text.indexOf('{')
+  if (start === -1) return null
+  let depth = 0
+  for (let i = start; i < text.length; i++) {
+    const ch = text[i]
+    if (ch === '{') depth++
+    else if (ch === '}') {
+      depth--
+      if (depth === 0) {
+        try {
+          return JSON.parse(text.slice(start, i + 1))
+        } catch {
+          return null
+        }
+      }
+    }
+  }
+  return null
+}
+
+function tryParseWire(value: unknown): SandboxErrorWire | null {
+  const parsed = sandboxErrorWireSchema.safeParse(unwrapEnvelope(value))
+  if (parsed.success) return parsed.data
+
+  if (!value || typeof value !== 'object' || Array.isArray(value)) return null
+  const obj = value as Record
+  if (obj.error === 'handler_error' || (typeof obj.code === 'string' && /^S\d{3}$/.test(obj.code))) {
+    const { error: _tag, ...rest } = obj
+    const tagged = sandboxErrorWireSchema.safeParse(rest)
+    if (tagged.success) return tagged.data
+  }
+  return null
+}
+
+function tryParseDenial(value: unknown): DenialEnvelopeWire | null {
+  const parsed = denialEnvelopeSchema.safeParse(unwrapEnvelope(value))
+  if (!parsed.success) return null
+  if (parsed.data.status !== 'denied' && !parsed.data.denied_by) return null
+  return parsed.data
+}
+
+function denialToInvocation(
+  denial: DenialEnvelopeWire,
+  fallbackMessage?: string,
+  detailText?: string,
+): SandboxInvocationError {
+  const deniedBy = denial.denied_by
+  const title =
+    deniedBy === 'gate_unavailable'
+      ? 'Gate unavailable'
+      : deniedBy === 'permissions'
+        ? 'Permission denied'
+        : deniedBy === 'user'
+          ? 'Denied by user'
+          : denial.status === 'denied'
+            ? 'Denied'
+            : 'Invocation failed'
+  const message = denial.reason ?? fallbackMessage ?? 'The sandbox call could not complete.'
+  return {
+    title,
+    message,
+    functionId: denial.function_id,
+    deniedBy,
+    reason: denial.reason,
+    detailText,
+  }
+}
+
+function invocationFromFunctionError(
+  envelope: z.infer,
+): SandboxInvocationError | null {
+  const detailText = contentBlocksText(envelope.content)
+  const denial = envelope.details != null ? tryParseDenial(envelope.details) : null
+  if (denial) {
+    return denialToInvocation(denial, envelope.message, detailText)
+  }
+  return {
+    title: 'Invocation failed',
+    message: envelope.message,
+    detailText,
+  }
+}
+
+function collectErrorCandidates(value: unknown): unknown[] {
+  const seen = new Set()
+  const out: unknown[] = []
+  const push = (candidate: unknown) => {
+    if (seen.has(candidate)) return
+    seen.add(candidate)
+    out.push(candidate)
+  }
+
+  push(value)
+  push(unwrapEnvelope(value))
+
+  if (value && typeof value === 'object' && !Array.isArray(value)) {
+    const obj = value as Record
+    if (obj.error && typeof obj.error === 'object') {
+      const err = obj.error as Record
+      push(err)
+      if ('details' in err) push(err.details)
+      if (typeof err.message === 'string') push(err.message)
+      const text = contentBlocksText(err.content)
+      if (text) push(text)
+    }
+  }
+
+  return out
+}
+
+/**
+ * Normalise every failure shape the console may receive for sandbox calls:
+ * flat `SandboxErrorWire`, harness `{ content, details }` wrappers, the
+ * translate `{ error: { kind, message, details, content } }` envelope,
+ * handler-tagged payloads, denial envelopes, and JSON embedded in strings.
+ */
+export function parseSandboxErrorDisplay(
+  value: unknown,
+): SandboxErrorDisplay | null {
+  const candidates = collectErrorCandidates(value)
+
+  for (const candidate of candidates) {
+    const wire = tryParseWire(candidate)
+    if (wire) return { variant: 'wire', error: wire }
+
+    if (typeof candidate === 'string') {
+      const embedded = extractFirstJsonObject(candidate)
+      if (embedded != null) {
+        const wireFromText = tryParseWire(embedded)
+        if (wireFromText) return { variant: 'wire', error: wireFromText }
+      }
+    }
+  }
+
+  for (const candidate of candidates) {
+    const denial = tryParseDenial(candidate)
+    if (denial) {
+      return {
+        variant: 'invocation',
+        error: denialToInvocation(denial),
+      }
+    }
+  }
+
+  if (value && typeof value === 'object' && !Array.isArray(value)) {
+    const parsed = functionErrorEnvelopeSchema.safeParse(
+      (value as Record).error,
+    )
+    if (parsed.success && parsed.data.kind === 'function_error') {
+      const invocation = invocationFromFunctionError(parsed.data)
+      if (invocation) {
+        return { variant: 'invocation', error: invocation }
+      }
+    }
+  }
+
+  return null
+}
+
+/** Returns true if a value looks like a `SandboxErrorWire` payload. */
+export function isSandboxErrorWire(value: unknown): value is SandboxErrorWire {
+  return tryParseWire(value) != null
+}
+
+export const SANDBOX_FUNCTION_IDS = [
+  'sandbox::create',
+  'sandbox::exec',
+  'sandbox::stop',
+  'sandbox::list',
+  'sandbox::run',
+  'sandbox::fs::ls',
+  'sandbox::fs::stat',
+  'sandbox::fs::mkdir',
+  'sandbox::fs::write',
+  'sandbox::fs::read',
+  'sandbox::fs::rm',
+  'sandbox::fs::chmod',
+  'sandbox::fs::mv',
+  'sandbox::fs::grep',
+  'sandbox::fs::sed',
+] as const
+
+export type SandboxFunctionId = (typeof SANDBOX_FUNCTION_IDS)[number]
+
+export function isSandboxFunctionId(id: string): id is SandboxFunctionId {
+  return (SANDBOX_FUNCTION_IDS as readonly string[]).includes(id)
+}
+
+export function parseSandboxError(value: unknown): SandboxErrorWire | null {
+  return tryParseWire(value)
+}
+
+export function safeParseRequest(
+  schema: z.ZodType,
+  value: unknown,
+): T | null {
+  const parsed = schema.safeParse(value ?? {})
+  return parsed.success ? parsed.data : null
+}
+
+export function safeParseResponse(
+  schema: z.ZodType,
+  value: unknown,
+): T | null {
+  const parsed = schema.safeParse(unwrapEnvelope(value))
+  return parsed.success ? parsed.data : null
+}
diff --git a/console/web/src/components/chat/sandbox/shared.tsx b/console/web/src/components/chat/sandbox/shared.tsx
new file mode 100644
index 00000000..2c9cf002
--- /dev/null
+++ b/console/web/src/components/chat/sandbox/shared.tsx
@@ -0,0 +1,65 @@
+import type { ReactNode } from 'react'
+import { Badge } from '@/components/ui/Badge'
+import { cn } from '@/lib/utils'
+
+export function Chip({
+  children,
+  className,
+}: {
+  children: ReactNode
+  className?: string
+}) {
+  return (
+    
+      {children}
+    
+  )
+}
+
+export function MetaRow({ children }: { children: ReactNode }) {
+  return (
+    
+ {children} +
+ ) +} + +export function StatusPill({ + label, + variant = 'default', +}: { + label: string + variant?: 'default' | 'warn' | 'alert' | 'accent' +}) { + return ( + + {label} + + ) +} + +export function ActionLine({ + symbol, + children, + tone = 'accent', +}: { + symbol: string + children: ReactNode + tone?: 'accent' | 'warn' | 'ink' +}) { + const toneClass = + tone === 'warn' ? 'text-warn' : tone === 'ink' ? 'text-ink' : 'text-accent' + return ( +
+ + {symbol} + +
{children}
+
+ ) +} diff --git a/console/web/src/components/chat/sandbox/terminal/AnsiOutput.tsx b/console/web/src/components/chat/sandbox/terminal/AnsiOutput.tsx new file mode 100644 index 00000000..1c4d4ec7 --- /dev/null +++ b/console/web/src/components/chat/sandbox/terminal/AnsiOutput.tsx @@ -0,0 +1,69 @@ +import { cn } from '@/lib/utils' + +interface AnsiOutputProps { + stdout?: string + stderr?: string + className?: string +} + +/** + * Stream-coloured stdout/stderr pane. Sandbox exec is buffered (see + * `adapters.rs::run` — no incremental writes are surfaced), so we + * just stack stdout above stderr with distinct tones rather than + * try to interleave by timestamp (which the wire shape doesn't + * carry). ANSI escape parsing is explicitly out of scope per the + * plan; we strip the escape introducer to avoid leaking the raw + * `\x1b[` bytes into the rendered text. + */ +export function AnsiOutput({ stdout, stderr, className }: AnsiOutputProps) { + const hasStdout = !!stdout && stdout.length > 0 + const hasStderr = !!stderr && stderr.length > 0 + + if (!hasStdout && !hasStderr) { + return ( +
+ · no output +
+ ) + } + + return ( +
+ {hasStdout ? ( +
+          {stripAnsi(stdout ?? '')}
+        
+ ) : null} + {hasStdout && hasStderr ?
: null} + {hasStderr ? ( +
+          {stripAnsi(stderr ?? '')}
+        
+ ) : null} +
+ ) +} + +/* Single-pass strip of CSI/OSC escape sequences. Keeps the visible + text intact (newlines, tabs) so monospace alignment in tools like + `ls -l` / `cargo test` survives. The two control-char ranges are + intentional: 0x1B is the ANSI ESC introducer (CSI/OSC) and 0x07 is + the BEL terminator for OSC sequences. */ +// biome-ignore lint/suspicious/noControlCharactersInRegex: stripping ANSI escape sequences. +const CSI_REGEX = /\x1b\[[0-9;?]*[ -/]*[@-~]/g +// biome-ignore lint/suspicious/noControlCharactersInRegex: stripping ANSI OSC (ESC ] … BEL) sequences. +const OSC_REGEX = /\x1b\][^\x07]*\x07/g + +function stripAnsi(s: string): string { + return s.replace(CSI_REGEX, '').replace(OSC_REGEX, '') +} diff --git a/console/web/src/components/chat/sandbox/terminal/Terminal.tsx b/console/web/src/components/chat/sandbox/terminal/Terminal.tsx new file mode 100644 index 00000000..cb542218 --- /dev/null +++ b/console/web/src/components/chat/sandbox/terminal/Terminal.tsx @@ -0,0 +1,111 @@ +import type * as React from 'react' +import { Badge } from '@/components/ui/Badge' +import { Prompt } from '@/components/ui/Prompt' +import { cn } from '@/lib/utils' + +/** A small mono pill used for header chips (image / workdir / lang). + Distinct from `Badge` (which is text-only) — Chip carries a key + and value pair separated by a hair line. */ +interface ChipProps { + label?: string + children: React.ReactNode + className?: string +} + +export function Chip({ label, children, className }: ChipProps) { + return ( + + {label ? ( + + {label} + + ) : null} + {children} + + ) +} + +interface TerminalProps { + /** Single-line command shown to the right of the `$` prompt. */ + command?: React.ReactNode + /** Pulsing prompt + `executing…` shimmer in place of stdout. */ + running?: boolean + /** Optional chip row above the body. */ + chips?: React.ReactNode + /** Body region — usually `` or a code block. */ + children?: React.ReactNode + /** Footer pill row (exit code, duration, timed-out). */ + footer?: React.ReactNode + className?: string +} + +/** + * Shared terminal chrome. The exec card sets `command`, `chips`, + * ``, and `footer`. `sandbox::run` reuses the chrome + * with an extra code-preview pane in the body slot. `fs::read` + * borrows just the header + body styling (no command line). + */ +export function Terminal({ + command, + running, + chips, + children, + footer, + className, +}: TerminalProps) { + return ( +
+ {command !== undefined ? ( +
+ + + {command} + + {chips ? ( + + {chips} + + ) : null} +
+ ) : chips ? ( +
+ {chips} +
+ ) : null} + + {running ? ( +
+ executing… +
+ ) : ( + children + )} + + {footer ? ( +
+ {footer} +
+ ) : null} +
+ ) +} + +/** Standardised footer pill — like a Badge but always tabular-nums. */ +interface FooterPillProps { + tone?: 'accent' | 'warn' | 'alert' | 'default' + children: React.ReactNode +} + +export function FooterPill({ tone = 'default', children }: FooterPillProps) { + return ( + + {children} + + ) +} diff --git a/console/web/src/components/providers/ProviderRow.tsx b/console/web/src/components/providers/ProviderRow.tsx index 8824456d..de83664f 100644 --- a/console/web/src/components/providers/ProviderRow.tsx +++ b/console/web/src/components/providers/ProviderRow.tsx @@ -1,11 +1,11 @@ import { Settings } from 'lucide-react' import { useEffect, useState } from 'react' +import { ProviderSettingsDialog } from '@/components/providers/ProviderSettingsDialog' import { type ActiveProvider, isLocalProvider, PROVIDER_DISPLAY, } from '@/components/providers/provider-registry' -import { ProviderSettingsDialog } from '@/components/providers/ProviderSettingsDialog' import { StatusBadge } from '@/components/providers/StatusBadge' import { useAuthStatus, useProviderConfig } from '@/hooks/use-providers' import { cn } from '@/lib/utils' diff --git a/console/web/src/components/providers/ProviderSettingsDialog.tsx b/console/web/src/components/providers/ProviderSettingsDialog.tsx index edf5465b..ba422d48 100644 --- a/console/web/src/components/providers/ProviderSettingsDialog.tsx +++ b/console/web/src/components/providers/ProviderSettingsDialog.tsx @@ -451,9 +451,7 @@ export function ProviderSettingsDialog({ // receipt appears below the destructive zone with the key field // potentially off-screen — operator reads "type a new key above" // with nothing above to type into. - const scrollContainer = dialogBodyRef.current?.closest( - '[role="dialog"]', - ) + const scrollContainer = dialogBodyRef.current?.closest('[role="dialog"]') if (scrollContainer) scrollContainer.scrollTop = 0 } catch { // Error surfaces via the remove error memo; keep armed for retry. @@ -565,10 +563,9 @@ export function ProviderSettingsDialog({ {status?.source === 'environment' ? ( keyDirty ? (

- note: {envVar} is set in the environment and takes - precedence at runtime. saving stores this key as a - fallback — it will take effect once the env var is - unset. + note: {envVar} is set in the environment and takes precedence + at runtime. saving stores this key as a fallback — it will + take effect once the env var is unset.

) : (

@@ -618,11 +615,7 @@ export function ProviderSettingsDialog({ configQuery.isLoading ? 'loading stored overrides...' : undefined } > - {showAdvanced ? ( - - ) : ( - - )} + {showAdvanced ? : } {configQuery.isLoading ? 'loading overrides...' : showAdvanced @@ -670,8 +663,8 @@ export function ProviderSettingsDialog({ >

ceiling enforced by the harness on tokens the model may - generate per request. if the model's own max is lower, - the model wins. + generate per request. if the model's own max is lower, the + model wins.

{isLocalProvider(provider) ? (

- local provider · the {localBaseUrlEnv(provider)} env var, if set, - overrides this on startup. + local provider · the {localBaseUrlEnv(provider)} env var, if + set, overrides this on startup.

) : null} {overridesError ? ( @@ -794,7 +787,6 @@ export function ProviderSettingsDialog({ ) : null}
) : null} -
{/* Pinned region: undo receipts and surface errors live directly @@ -836,11 +828,9 @@ export function ProviderSettingsDialog({ onClick={undoReset} className="font-mono text-[11px] text-accent hover:text-ink transition-colors" > - undo · {Math.max( - 0, - Math.ceil((undo.expiresAt - nowTick) / 1000), - )} - s left + undo ·{' '} + {Math.max(0, Math.ceil((undo.expiresAt - nowTick) / 1000))}s + left ) : null}
) : null} @@ -144,10 +140,9 @@ export function Configuration({ theme, onThemeChange }: ConfigurationProps) { [stored] set here ·{' '} [env-var] from environment variable (takes precedence) ·{' '} - [local] no key needed - ·{' '} - [overridden] routes via - custom endpoint or max-tokens cap + [local] no key needed ·{' '} + [overridden] routes via custom + endpoint or max-tokens cap

keyboard shortcuts: ↑↓ navigate rows · press 1– diff --git a/console/web/src/pages/Examples/sections/message-variants.tsx b/console/web/src/pages/Examples/sections/message-variants.tsx index d29c482d..0cf4719e 100644 --- a/console/web/src/pages/Examples/sections/message-variants.tsx +++ b/console/web/src/pages/Examples/sections/message-variants.tsx @@ -10,6 +10,7 @@ import type { UserMessage, } from '@/types/chat' import { Section, VariantCard } from '../Section' +import { sandboxFixtures } from './sandbox-fixtures' const sampleAttachments: Attachment[] = [ { @@ -67,7 +68,8 @@ const assistantStreaming: AssistantMessage = { role: 'assistant', model: 'openai::gpt-5', mode: 'ask', - content: 'sure — a btree-backed index gives you both `O(log n)` lookups and cheap range', + content: + 'sure — a btree-backed index gives you both `O(log n)` lookups and cheap range', streaming: true, createdAt: Date.now(), } @@ -85,7 +87,8 @@ const assistantThinking: AssistantMessage = { const thoughtBrief: ThoughtType = { id: 't1', role: 'thought', - content: 'this is a one-line clarification. trivial to resolve, no branching to consider.', + content: + 'this is a one-line clarification. trivial to resolve, no branching to consider.', durationMs: 800, createdAt: Date.now(), } @@ -306,9 +309,21 @@ export function MessageVariantsSection() { - + + + {sandboxFixtures.map((fixture) => ( + + + + ))}

) diff --git a/console/web/src/pages/Examples/sections/sandbox-fixtures.ts b/console/web/src/pages/Examples/sections/sandbox-fixtures.ts new file mode 100644 index 00000000..444d2281 --- /dev/null +++ b/console/web/src/pages/Examples/sections/sandbox-fixtures.ts @@ -0,0 +1,373 @@ +import type { FunctionCallMessage } from '@/types/chat' + +const now = Date.now() +const SB = 'sb_abc123def456' + +/** Harness agent envelope (half of fixtures use this). */ +export function wrapHarness(details: unknown) { + return { + content: [ + { type: 'text' as const, text: JSON.stringify(details, null, 2) }, + ], + details, + terminate: true, + } +} + +function base( + id: string, + functionId: string, + input: unknown, + output?: unknown, + extra?: Partial, +): FunctionCallMessage { + return { + id, + role: 'function-call', + functionId, + input, + output, + durationMs: 240, + createdAt: now, + ...extra, + } +} + +export const sandboxExecDone = base( + 'sb-exec', + 'sandbox::exec', + { + sandbox_id: SB, + cmd: 'ls', + args: ['-la', '/workspace'], + workdir: '/workspace', + timeout_ms: 30_000, + }, + wrapHarness({ + stdout: + 'total 8\ndrwxr-xr-x 2 root root 4096 May 26 10:00 .\ndrwxr-xr-x 1 root root 4096 May 26 09:58 ..\n-rw-r--r-- 1 root root 12 May 26 10:00 README.md\n', + stderr: '', + exit_code: 0, + timed_out: false, + success: true, + duration_ms: 142, + }), +) + +export const sandboxExecRunning = base( + 'sb-exec-running', + 'sandbox::exec', + { + sandbox_id: SB, + cmd: 'npm', + args: ['test'], + workdir: '/workspace', + }, + undefined, + { running: true }, +) + +export const sandboxExecPending = base( + 'sb-exec-pending', + 'sandbox::exec', + { + sandbox_id: SB, + cmd: 'rm', + args: ['-rf', '/tmp/scratch'], + }, + undefined, + { pendingApproval: true }, +) + +export const sandboxRunDone = base( + 'sb-run', + 'sandbox::run', + { + image: 'motia/node:20', + lang: 'node', + code: 'console.log("hello from sandbox")\n', + keep_sandbox: false, + timeout_ms: 60_000, + }, + { + stdout: 'hello from sandbox\n', + stderr: '', + exit_code: 0, + timed_out: false, + success: true, + duration_ms: 890, + sandbox_id: SB, + }, +) + +export const sandboxCreateDone = base( + 'sb-create', + 'sandbox::create', + { + image: 'motia/node:20', + cpus: 2, + memory_mb: 512, + name: 'dev-shell', + network: true, + idle_timeout_secs: 300, + env: { NODE_ENV: 'development' }, + }, + wrapHarness({ + sandbox_id: SB, + image: 'motia/node:20', + }), +) + +export const sandboxStopDone = base( + 'sb-stop', + 'sandbox::stop', + { sandbox_id: SB, wait: true }, + { sandbox_id: SB, stopped: true }, +) + +export const sandboxListDone = base( + 'sb-list', + 'sandbox::list', + {}, + wrapHarness({ + sandboxes: [ + { + sandbox_id: SB, + name: 'dev-shell', + image: 'motia/node:20', + age_secs: 120, + exec_in_progress: false, + stopped: false, + }, + { + sandbox_id: 'sb_old999', + name: null, + image: 'motia/python:3.12', + age_secs: 3600, + exec_in_progress: true, + stopped: true, + }, + ], + }), +) + +export const sandboxFsLsDone = base( + 'sb-ls', + 'sandbox::fs::ls', + { sandbox_id: SB, path: '/workspace' }, + { + entries: [ + { + name: 'src', + is_dir: true, + size: 4096, + mode: '0755', + mtime: Math.floor(now / 1000) - 180, + is_symlink: false, + }, + { + name: 'package.json', + is_dir: false, + size: 412, + mode: '0644', + mtime: Math.floor(now / 1000) - 60, + is_symlink: false, + }, + ], + }, +) + +export const sandboxFsStatDone = base( + 'sb-stat', + 'sandbox::fs::stat', + { sandbox_id: SB, path: '/workspace/package.json' }, + wrapHarness({ + name: 'package.json', + is_dir: false, + size: 412, + mode: '0644', + mtime: Math.floor(now / 1000) - 60, + is_symlink: false, + }), +) + +export const sandboxFsReadDone = base( + 'sb-read', + 'sandbox::fs::read', + { sandbox_id: SB, path: '/workspace/src/index.ts' }, + { + content: 'export const main = () => console.log("ok")\n', + size: 48, + mode: '0644', + mtime: Math.floor(now / 1000) - 30, + }, +) + +export const sandboxFsWriteDone = base( + 'sb-write', + 'sandbox::fs::write', + { + sandbox_id: SB, + path: '/workspace/out.txt', + content: 'hello', + parents: true, + }, + wrapHarness({ bytes_written: 5, path: '/workspace/out.txt' }), +) + +export const sandboxFsMkdirDone = base( + 'sb-mkdir', + 'sandbox::fs::mkdir', + { + sandbox_id: SB, + path: '/workspace/nested/dir', + parents: true, + mode: '0755', + }, + { created: true }, +) + +export const sandboxFsRmDone = base( + 'sb-rm', + 'sandbox::fs::rm', + { sandbox_id: SB, path: '/workspace/tmp', recursive: true }, + wrapHarness({ removed: true }), +) + +export const sandboxFsMvDone = base( + 'sb-mv', + 'sandbox::fs::mv', + { + sandbox_id: SB, + src: '/workspace/old.txt', + dst: '/workspace/new.txt', + overwrite: true, + }, + { moved: true }, +) + +export const sandboxFsChmodDone = base( + 'sb-chmod', + 'sandbox::fs::chmod', + { sandbox_id: SB, path: '/workspace/script.sh', mode: '0755' }, + { updated: 1 }, +) + +export const sandboxFsGrepDone = base( + 'sb-grep', + 'sandbox::fs::grep', + { + sandbox_id: SB, + path: '/workspace', + pattern: 'TODO', + recursive: true, + ignore_case: true, + }, + wrapHarness({ + matches: [ + { + path: '/workspace/src/app.ts', + line: 14, + content: ' // TODO: wire sandbox UI', + }, + { + path: '/workspace/README.md', + line: 3, + content: '- TODO document env vars', + }, + ], + truncated: false, + }), +) + +export const sandboxFsSedDone = base( + 'sb-sed', + 'sandbox::fs::sed', + { + sandbox_id: SB, + files: ['/workspace/a.txt', '/workspace/b.txt'], + pattern: 'foo', + replacement: 'bar', + }, + { + results: [ + { path: '/workspace/a.txt', replacements: 2, success: true }, + { + path: '/workspace/b.txt', + replacements: 0, + success: false, + error: 'permission denied', + }, + ], + total_replacements: 2, + }, +) + +export const sandboxExecError = base( + 'sb-exec-err', + 'sandbox::exec', + { sandbox_id: SB, cmd: 'sleep', args: ['60'], timeout_ms: 100 }, + wrapHarness({ + type: 'sandbox_error', + code: 'S200', + message: 'command timed out after 100ms', + docs_url: 'https://docs.example/s200', + retryable: true, + fix_note: 'increase timeout_ms or simplify the command', + fix: { + stdout: 'partial output\n', + stderr: '', + exit_code: null, + timed_out: true, + duration_ms: 100, + }, + }), +) + +export const sandboxFsWriteGateError = base( + 'sb-fs-write-gate', + 'sandbox::fs::write', + { sandbox_id: SB, path: '/workspace/out.txt', content: 'hello\n' }, + { + error: { + kind: 'function_error', + message: + 'trigger_failed: IIIInvocationError: invocation_failed: handler error', + details: { + schema_version: 1, + status: 'denied', + denied_by: 'gate_unavailable', + function_id: 'sandbox::fs::write', + reason: 'trigger_failed: approval gate unreachable', + }, + content: [ + { + type: 'text', + text: 'trigger_failed: approval gate unreachable', + }, + ], + }, + }, +) + +export const sandboxFixtures = [ + sandboxExecDone, + sandboxExecRunning, + sandboxExecPending, + sandboxRunDone, + sandboxCreateDone, + sandboxStopDone, + sandboxListDone, + sandboxFsLsDone, + sandboxFsStatDone, + sandboxFsReadDone, + sandboxFsWriteDone, + sandboxFsMkdirDone, + sandboxFsRmDone, + sandboxFsMvDone, + sandboxFsChmodDone, + sandboxFsGrepDone, + sandboxFsSedDone, + sandboxExecError, + sandboxFsWriteGateError, +] as const diff --git a/console/web/src/pages/Playground/scenarios/happy-ask.ts b/console/web/src/pages/Playground/scenarios/happy-ask.ts index 31ca8508..3bc7ba9e 100644 --- a/console/web/src/pages/Playground/scenarios/happy-ask.ts +++ b/console/web/src/pages/Playground/scenarios/happy-ask.ts @@ -18,6 +18,9 @@ short answer: it depends on whether you're optimizing for **read** speed or pick the one whose worst case matches your hottest path. you can always swap later — the interface is the thing that matters.` -export const happyAsk = makeBackend('happy-ask', async function* (_prompt, _mode, _model, opts) { - yield* streamAssistant(BODY, { signal: opts?.signal }) -}) +export const happyAsk = makeBackend( + 'happy-ask', + async function* (_prompt, _mode, _model, opts) { + yield* streamAssistant(BODY, { signal: opts?.signal }) + }, +) diff --git a/console/web/src/pages/Playground/scenarios/index.ts b/console/web/src/pages/Playground/scenarios/index.ts index b1c0b144..467417eb 100644 --- a/console/web/src/pages/Playground/scenarios/index.ts +++ b/console/web/src/pages/Playground/scenarios/index.ts @@ -12,7 +12,12 @@ import { multiFunctionAgent } from './multi-function-agent' import { pendingApproval } from './pending-approval' import { slowTokens } from './slow-tokens' -export type ScenarioGroup = 'happy paths' | 'timing' | 'failure modes' | 'markdown' | 'agent' +export type ScenarioGroup = + | 'happy paths' + | 'timing' + | 'failure modes' + | 'markdown' + | 'agent' export interface PlaygroundScenario { id: string @@ -52,7 +57,8 @@ export const SCENARIOS: PlaygroundScenario[] = [ { id: 'slow-tokens', label: 'slow tokens', - description: '~200ms between assistant tokens — watch for renderer flicker.', + description: + '~200ms between assistant tokens — watch for renderer flicker.', group: 'timing', preferredMode: 'ask', backend: slowTokens, @@ -68,7 +74,8 @@ export const SCENARIOS: PlaygroundScenario[] = [ { id: 'abort-mid-thought', label: 'abort mid-thought', - description: 'half a thought, then throws AbortError. ChatView should clean up and stay responsive.', + description: + 'half a thought, then throws AbortError. ChatView should clean up and stay responsive.', group: 'failure modes', preferredMode: 'plan', backend: abortMidThought, @@ -76,7 +83,8 @@ export const SCENARIOS: PlaygroundScenario[] = [ { id: 'error-on-fcall', label: 'error on fcall', - description: 'function call ends with an error payload (rate_limited) instead of data.', + description: + 'function call ends with an error payload (rate_limited) instead of data.', group: 'failure modes', preferredMode: 'agent', backend: errorOnFcall, @@ -84,7 +92,8 @@ export const SCENARIOS: PlaygroundScenario[] = [ { id: 'multi-function-agent', label: 'multi-function agent', - description: 'three sequential function calls before the assistant body — surfaces fcall pointer reuse.', + description: + 'three sequential function calls before the assistant body — surfaces fcall pointer reuse.', group: 'agent', preferredMode: 'agent', backend: multiFunctionAgent, @@ -92,7 +101,8 @@ export const SCENARIOS: PlaygroundScenario[] = [ { id: 'pending-approval', label: 'pending approval', - description: 'fcall that requires user approval; auto-resolves after a delay so you can watch the lifecycle.', + description: + 'fcall that requires user approval; auto-resolves after a delay so you can watch the lifecycle.', group: 'agent', preferredMode: 'agent', backend: pendingApproval, @@ -100,7 +110,8 @@ export const SCENARIOS: PlaygroundScenario[] = [ { id: 'long-markdown', label: 'long markdown', - description: '~4kB body: headings, lists, tables, fenced code in 3 langs, blockquotes, task lists.', + description: + '~4kB body: headings, lists, tables, fenced code in 3 langs, blockquotes, task lists.', group: 'markdown', preferredMode: 'ask', backend: longMarkdown, @@ -108,14 +119,21 @@ export const SCENARIOS: PlaygroundScenario[] = [ { id: 'markdown-stress', label: 'markdown stress', - description: 'pathological markdown: nested lists, footnotes, autolinks, hard breaks, busy tables.', + description: + 'pathological markdown: nested lists, footnotes, autolinks, hard breaks, busy tables.', group: 'markdown', preferredMode: 'ask', backend: markdownStress, }, ] -export const SCENARIO_GROUPS: ScenarioGroup[] = ['happy paths', 'agent', 'failure modes', 'timing', 'markdown'] +export const SCENARIO_GROUPS: ScenarioGroup[] = [ + 'happy paths', + 'agent', + 'failure modes', + 'timing', + 'markdown', +] export function findScenario(id: string): PlaygroundScenario | undefined { return SCENARIOS.find((s) => s.id === id) diff --git a/console/web/src/pages/Playground/scenarios/multi-function-agent.ts b/console/web/src/pages/Playground/scenarios/multi-function-agent.ts index b62dc902..6e550a9d 100644 --- a/console/web/src/pages/Playground/scenarios/multi-function-agent.ts +++ b/console/web/src/pages/Playground/scenarios/multi-function-agent.ts @@ -1,4 +1,9 @@ -import { makeBackend, streamAssistant, streamFcall, streamThought } from './helpers' +import { + makeBackend, + streamAssistant, + streamFcall, + streamThought, +} from './helpers' const THOUGHT = `multi-step agent: i'll list workers, then inspect the healthiest one, then echo a probe through it. each function call is sequential @@ -21,36 +26,39 @@ ready for the next instruction.` * bug in ChatView (the consumer must clear its fcallId after each fcall-end * so the next fcall-start gets a fresh slot). */ -export const multiFunctionAgent = makeBackend('multi-function-agent', async function* (_prompt, _mode, _model, opts) { - const signal = opts?.signal - yield* streamThought(THOUGHT, { signal }) - yield* streamFcall({ - functionId: 'engine::list', - input: {}, - output: { - workers: ['worker-1', 'worker-3', 'worker-7'], - }, - waitMs: 450, - signal, - }) - yield* streamFcall({ - functionId: 'engine::info', - input: { id: 'worker-7' }, - output: { - id: 'worker-7', - load: 0.12, - version: '0.4.1', - skills: ['echo', 'tokenize', 'embed'], - }, - waitMs: 500, - signal, - }) - yield* streamFcall({ - functionId: 'engine::echo', - input: { workerId: 'worker-7', text: 'ping' }, - output: { text: 'ping' }, - waitMs: 350, - signal, - }) - yield* streamAssistant(BODY, { signal }) -}) +export const multiFunctionAgent = makeBackend( + 'multi-function-agent', + async function* (_prompt, _mode, _model, opts) { + const signal = opts?.signal + yield* streamThought(THOUGHT, { signal }) + yield* streamFcall({ + functionId: 'engine::list', + input: {}, + output: { + workers: ['worker-1', 'worker-3', 'worker-7'], + }, + waitMs: 450, + signal, + }) + yield* streamFcall({ + functionId: 'engine::info', + input: { id: 'worker-7' }, + output: { + id: 'worker-7', + load: 0.12, + version: '0.4.1', + skills: ['echo', 'tokenize', 'embed'], + }, + waitMs: 500, + signal, + }) + yield* streamFcall({ + functionId: 'engine::echo', + input: { workerId: 'worker-7', text: 'ping' }, + output: { text: 'ping' }, + waitMs: 350, + signal, + }) + yield* streamAssistant(BODY, { signal }) + }, +) diff --git a/console/web/src/pages/Traces/components/SessionDetailPanel.tsx b/console/web/src/pages/Traces/components/SessionDetailPanel.tsx index ae291d99..b716a9a3 100644 --- a/console/web/src/pages/Traces/components/SessionDetailPanel.tsx +++ b/console/web/src/pages/Traces/components/SessionDetailPanel.tsx @@ -149,7 +149,9 @@ export function SessionDetailPanel({ // guess. const expectedSpans = traceCount === 1 ? totalSpans : undefined const autoOpen = - idx === 0 && (expectedSpans === undefined || expectedSpans < AUTO_OPEN_SPAN_LIMIT) + idx === 0 && + (expectedSpans === undefined || + expectedSpans < AUTO_OPEN_SPAN_LIMIT) return ( { if (queryStartRef.current === null) return - setElapsedSec(Math.floor((performance.now() - queryStartRef.current) / 1000)) + setElapsedSec( + Math.floor((performance.now() - queryStartRef.current) / 1000), + ) }, 1000) return () => clearInterval(interval) }, [isLoading]) diff --git a/console/web/src/pages/Traces/components/WaterfallChart.tsx b/console/web/src/pages/Traces/components/WaterfallChart.tsx index cf39fd43..c69d6ad2 100644 --- a/console/web/src/pages/Traces/components/WaterfallChart.tsx +++ b/console/web/src/pages/Traces/components/WaterfallChart.tsx @@ -65,7 +65,6 @@ import { } from '@/components/ui/Tooltip' import { cn } from '@/lib/utils' import { useShowEngineRouting } from '../hooks/useShowEngineRouting' -import { IconToggleButton } from './IconToggleButton' import { formatSpanLabel, getSpanKindIndicator, @@ -74,6 +73,7 @@ import { import { buildSpanTree, type FlatSpanRow, flattenTree } from '../lib/spanTree' import type { VisualizationSpan, WaterfallData } from '../lib/traceTransform' import { formatDuration } from '../lib/traceUtils' +import { IconToggleButton } from './IconToggleButton' interface WaterfallChartProps { data: WaterfallData diff --git a/console/web/src/pages/Traces/hooks/useResizablePanels.ts b/console/web/src/pages/Traces/hooks/useResizablePanels.ts index 8937f994..258aa1ad 100644 --- a/console/web/src/pages/Traces/hooks/useResizablePanels.ts +++ b/console/web/src/pages/Traces/hooks/useResizablePanels.ts @@ -201,10 +201,7 @@ export function useResizablePanels({ PANEL_MIN_WIDTH, cw - PANEL_NEIGHBOR_MIN_WIDTH - handles - otherForTrace, ) - const nextTrace = Math.max( - PANEL_MIN_WIDTH, - Math.min(maxTrace, p.trace), - ) + const nextTrace = Math.max(PANEL_MIN_WIDTH, Math.min(maxTrace, p.trace)) let nextSpan = p.span if (hasSpan) { const maxSpan = Math.max( diff --git a/console/web/src/pages/Traces/lib/traceTransform.test.ts b/console/web/src/pages/Traces/lib/traceTransform.test.ts index 815793d4..76e99aef 100644 --- a/console/web/src/pages/Traces/lib/traceTransform.test.ts +++ b/console/web/src/pages/Traces/lib/traceTransform.test.ts @@ -6,7 +6,11 @@ import { describe, expect, it } from 'vitest' import type { SpanTreeNode } from '../api/traces' -import { calculateDurationMs, toMs, treeToWaterfallData } from './traceTransform' +import { + calculateDurationMs, + toMs, + treeToWaterfallData, +} from './traceTransform' const NS_PER_MS = 1_000_000 // NANO_THRESHOLD in the module is Jan 1 2100 in ms (4102444800000). diff --git a/console/web/src/pages/Traces/lib/traceTransform.ts b/console/web/src/pages/Traces/lib/traceTransform.ts index b91d7c54..45c10ab4 100644 --- a/console/web/src/pages/Traces/lib/traceTransform.ts +++ b/console/web/src/pages/Traces/lib/traceTransform.ts @@ -60,7 +60,11 @@ function calculateDepths(spans: StoredSpan[]): Map { const chain: StoredSpan[] = [] const visiting = new Set() let cursor: StoredSpan | undefined = seed - while (cursor !== undefined && !depths.has(cursor.span_id) && !visiting.has(cursor.span_id)) { + while ( + cursor !== undefined && + !depths.has(cursor.span_id) && + !visiting.has(cursor.span_id) + ) { visiting.add(cursor.span_id) chain.push(cursor) const parentId: string | undefined = cursor.parent_span_id @@ -68,7 +72,9 @@ function calculateDepths(spans: StoredSpan[]): Map { } // Base depth: 0 if we hit a root (no parent in the set or a cycle). let base = - cursor !== undefined && depths.has(cursor.span_id) ? (depths.get(cursor.span_id) ?? 0) : -1 + cursor !== undefined && depths.has(cursor.span_id) + ? (depths.get(cursor.span_id) ?? 0) + : -1 for (let i = chain.length - 1; i >= 0; i--) { base += 1 depths.set(chain[i].span_id, base) diff --git a/console/web/src/types/chat.ts b/console/web/src/types/chat.ts index a4e306e8..4f77926f 100644 --- a/console/web/src/types/chat.ts +++ b/console/web/src/types/chat.ts @@ -114,7 +114,12 @@ export interface SystemMessage extends BaseMessage { tokensBefore?: number } -export type Message = UserMessage | AssistantMessage | ThoughtMessage | FunctionCallMessage | SystemMessage +export type Message = + | UserMessage + | AssistantMessage + | ThoughtMessage + | FunctionCallMessage + | SystemMessage /** * Loose patch shape passed to updateMessage(). Lists every patchable field @@ -164,7 +169,12 @@ export interface Conversation { updatedAt: number } -const KNOWN_ROLES: ReadonlySet = new Set(['user', 'assistant', 'thought', 'function-call']) +const KNOWN_ROLES: ReadonlySet = new Set([ + 'user', + 'assistant', + 'thought', + 'function-call', +]) export function isKnownRole(role: unknown): role is Role { return typeof role === 'string' && KNOWN_ROLES.has(role as Role) diff --git a/database/Cargo.lock b/database/Cargo.lock index 85a97347..1a1b1557 100644 --- a/database/Cargo.lock +++ b/database/Cargo.lock @@ -596,7 +596,7 @@ checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" [[package]] name = "database" -version = "0.2.1" +version = "0.2.2" dependencies = [ "anyhow", "async-trait", diff --git a/harness/src/provider-llamacpp/discover.ts b/harness/src/provider-llamacpp/discover.ts index 9f9b2487..5e6ff69b 100644 --- a/harness/src/provider-llamacpp/discover.ts +++ b/harness/src/provider-llamacpp/discover.ts @@ -8,9 +8,12 @@ * - llama-server runs exactly ONE model at process startup (set via * `-m model.gguf`), so the endpoint returns at most one entry. There * is no concept of "downloaded but not loaded". - * - There is no native v0 endpoint exposing context length / arch. - * Discovery only learns the model id; everything else falls back to - * the embedded catalog placeholder (`llamacpp-local`). + * - There is no v0-style endpoint that surfaces per-model context + * length in `/v1/models`, but the server-wide `GET /props` endpoint + * does expose `n_ctx` (the value passed via `-c` / `--ctx-size` at + * startup), shared across all slots. We fetch it alongside the + * models list and use it to populate `context_window`; otherwise we + * fall back to the embedded catalog placeholder (`llamacpp-local`). * * Best-effort: failures (server offline, malformed JSON, register RPC * errors) are logged and swallowed — the worker still boots, and the @@ -62,6 +65,25 @@ export function modelsUrl(chatUrl: string): string { return `${trimmed}/models`; } +/** + * Derive the server-wide `/props` endpoint from a chat-completions URL. + * + * `/props` lives at the server root (not under `/v1`), so we strip both + * a `…/chat/completions` suffix and a trailing `/v1` (or any other path + * segment), then append `/props`. + * + * Examples: + * http://host:8080/v1/chat/completions → http://host:8080/props + * http://host:8080/v1/ → http://host:8080/props + * http://host:8080/custom/path → http://host:8080/custom/path/props + */ +export function propsUrl(chatUrl: string): string { + const withoutChat = chatUrl.replace(/\/chat\/completions\/?$/, ''); + const withoutV1 = withoutChat.replace(/\/v1\/?$/, ''); + const trimmed = withoutV1.endsWith('/') ? withoutV1.slice(0, -1) : withoutV1; + return `${trimmed}/props`; +} + /** * Fetch and parse the loaded model list. Returns an empty array on any * failure (caller decides whether to surface it). @@ -109,19 +131,91 @@ async function fetchModelIds( return ids; } +/** + * `GET /props` response shape (only the fields we read). `n_ctx` is the + * server-wide context size from `-c` / `--ctx-size` at startup and is + * shared across slots — applies to whichever single model llama-server + * has loaded. Older llama-server builds only expose `n_ctx` nested in + * `default_generation_settings`; newer builds also expose it at the top + * level. We read both and prefer the top-level field when present. + */ +type LlamaProps = { + n_ctx?: unknown; + default_generation_settings?: unknown; +}; + +function isPositiveInteger(value: unknown): value is number { + return typeof value === 'number' && Number.isFinite(value) && value > 0; +} + +function readNestedNCtx(value: unknown): number | null { + if (!value || typeof value !== 'object') return null; + const v = (value as Record).n_ctx; + return isPositiveInteger(v) ? v : null; +} + +/** + * Fetch the server-wide context window from `GET /props`. Returns the + * positive integer `n_ctx`, or `null` on any failure (server offline, + * non-2xx, malformed JSON, missing/non-positive field). Best-effort — + * a `null` return causes the caller to fall back to DEFAULT_CONTEXT_WINDOW. + */ +async function fetchPropsContextWindow( + endpoint: string, + headers: Record, +): Promise { + let resp: Response; + try { + resp = await fetchWithTimeout(endpoint, { method: 'GET', headers }, DISCOVERY_TIMEOUT_MS); + } catch (err) { + logger.warn('llamacpp discovery: /props fetch failed', { + url: endpoint, + err: String(err), + }); + return null; + } + if (!resp.ok) { + logger.warn('llamacpp discovery: /props non-2xx response', { + url: endpoint, + status: resp.status, + }); + return null; + } + let parsed: LlamaProps; + try { + parsed = (await resp.json()) as LlamaProps; + } catch (err) { + logger.warn('llamacpp discovery: /props malformed JSON response', { + url: endpoint, + err: String(err), + }); + return null; + } + if (!parsed || typeof parsed !== 'object') return null; + if (isPositiveInteger(parsed.n_ctx)) return parsed.n_ctx; + const nested = readNestedNCtx(parsed.default_generation_settings); + if (nested !== null) return nested; + logger.warn('llamacpp discovery: /props response missing usable n_ctx', { + url: endpoint, + }); + return null; +} + /** * Build a placeholder-shaped Model row for each id returned by * /v1/models. Field defaults match the `llamacpp-local` catalog * placeholder so capability gating (supports_tools etc.) works for - * arbitrary user-loaded models. + * arbitrary user-loaded models. `contextWindow` is the value from + * `/props.n_ctx` when available; callers pass DEFAULT_CONTEXT_WINDOW + * as a fallback. */ -function toCatalogModel(id: string): Model { +function toCatalogModel(id: string, contextWindow: number): Model { return { id, provider: 'llamacpp', api: 'openai-completions', display_name: id, - context_window: DEFAULT_CONTEXT_WINDOW, + context_window: contextWindow, max_output_tokens: DEFAULT_MAX_OUTPUT_TOKENS, supports_thinking: false, supports_xhigh: false, @@ -136,8 +230,20 @@ export async function discoverLoadedModel( chatUrl: string, headers: Record, ): Promise { - const ids = await fetchModelIds(modelsUrl(chatUrl), headers); - return ids.map(toCatalogModel); + const [ids, ctx] = await Promise.all([ + fetchModelIds(modelsUrl(chatUrl), headers), + fetchPropsContextWindow(propsUrl(chatUrl), headers), + ]); + const contextWindow = ctx ?? DEFAULT_CONTEXT_WINDOW; + // Visible at INFO so operators can tell from the harness log whether + // /props reported the real n_ctx or we fell through to the default — + // distinguishes "served context too small" from "discovery couldn't + // see n_ctx" without needing to attach a debugger. + logger.info('llamacpp discovery: resolved context window', { + context_window: contextWindow, + source: ctx === null ? 'default_fallback' : 'props_n_ctx', + }); + return ids.map((id) => toCatalogModel(id, contextWindow)); } /** diff --git a/harness/src/runtime/iii.ts b/harness/src/runtime/iii.ts index 220da8b3..ae630d86 100644 --- a/harness/src/runtime/iii.ts +++ b/harness/src/runtime/iii.ts @@ -4,7 +4,7 @@ * mock the SDK in tests. */ -export { registerWorker, TriggerAction } from 'iii-sdk'; +export { registerWorker, TriggerAction, IIIInvocationError } from 'iii-sdk'; export type { ISdk, Channel, diff --git a/harness/src/turn-orchestrator/agent-trigger.ts b/harness/src/turn-orchestrator/agent-trigger.ts index 76f3a545..6912b545 100644 --- a/harness/src/turn-orchestrator/agent-trigger.ts +++ b/harness/src/turn-orchestrator/agent-trigger.ts @@ -7,7 +7,7 @@ * used by both the hook gate and pre-approved resume execution. */ -import type { ISdk } from '../runtime/iii.js'; +import { IIIInvocationError, type ISdk } from '../runtime/iii.js'; import { z } from 'zod'; import type { ContentBlock } from '../types/content.js'; import type { FunctionCall, FunctionResult } from '../types/function.js'; @@ -104,6 +104,41 @@ function isFunctionNotFound(err: unknown): boolean { return false; } +/** + * If `err` is an {@link IIIInvocationError} whose `.message` carries a + * structured wire payload like + * `{"code":"S210","type":"...","message":"...","docs_url":"...","retryable":bool,...}`, + * return that payload. Otherwise return `null`. + * + * Several workers (notably `iii-worker`'s `sandbox::*`) serialize their + * domain errors as JSON via `Display` so the engine forwards them with the + * structured envelope intact. Without this extractor, those payloads get + * buried inside `gate_unavailable.reason` as `String(err)`, hiding the + * S-code, the docs URL, the `fix` hint, and any structured retry info from + * the calling agent. + * + * Only payloads with both a `code` string and a `message` string are + * accepted, so generic JSON values (numbers, plain strings, partial + * envelopes) still fall through to the `gate_unavailable` path. + */ +function extractStructuredHandlerError(err: unknown): Record | null { + if (!(err instanceof IIIInvocationError)) return null; + // `IIIInvocationError`'s Error.message is `"${code}: ${rawMessage}"`. The + // raw handler-emitted message lives after that prefix. + const prefix = `${err.code}: `; + const raw = err.message.startsWith(prefix) ? err.message.slice(prefix.length) : err.message; + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch { + return null; + } + if (!parsed || typeof parsed !== 'object') return null; + const obj = parsed as Record; + if (typeof obj.code !== 'string' || typeof obj.message !== 'string') return null; + return obj; +} + export function functionNotFoundHint(badFunctionId: string): string { if (!badFunctionId.includes('/')) { return 'load the relevant skill via directory::skills::get, or check the function id'; @@ -142,6 +177,15 @@ export async function triggerFunctionCall( hint: functionNotFoundHint(function_call.function_id), }); } + // Structured worker error (e.g. `sandbox::*` S-codes): forward the + // envelope verbatim so the agent gets code + docs_url + fix hint + // instead of a generic gate_unavailable wrapper. We tag it with + // `error: 'handler_error'` so `isErrorResult` (and any downstream + // "did this tool call fail?" gate) still classifies it correctly. + const structured = extractStructuredHandlerError(err); + if (structured) { + return errorResult({ error: 'handler_error', ...structured }); + } return denialResult( gateUnavailableEnvelope(function_call.function_id, `trigger_failed: ${String(err)}`), ); diff --git a/harness/tests/provider-llamacpp/discover.test.ts b/harness/tests/provider-llamacpp/discover.test.ts index 33072105..c9fe8ba3 100644 --- a/harness/tests/provider-llamacpp/discover.test.ts +++ b/harness/tests/provider-llamacpp/discover.test.ts @@ -3,6 +3,7 @@ import { discoverAndRegister, discoverLoadedModel, modelsUrl, + propsUrl, registerDiscovered, } from '../../src/provider-llamacpp/discover.js'; import type { ISdk } from '../../src/runtime/iii.js'; @@ -34,6 +35,30 @@ describe('modelsUrl', () => { }); }); +describe('propsUrl', () => { + it('derives /props at the server root from the default chat URL', () => { + expect(propsUrl('http://localhost:8080/v1/chat/completions')).toBe( + 'http://localhost:8080/props', + ); + }); + + it('strips a trailing slash on the chat URL before deriving /props', () => { + expect(propsUrl('http://localhost:8080/v1/chat/completions/')).toBe( + 'http://localhost:8080/props', + ); + }); + + it('strips a bare /v1 suffix', () => { + expect(propsUrl('http://localhost:8080/v1')).toBe('http://localhost:8080/props'); + }); + + it('appends /props to a custom-path proxy without /v1 (mirrors modelsUrl)', () => { + expect(propsUrl('http://localhost:8080/custom/path')).toBe( + 'http://localhost:8080/custom/path/props', + ); + }); +}); + describe('discoverLoadedModel', () => { it('returns one Model for the single loaded llama-server entry', async () => { globalThis.fetch = vi.fn( @@ -85,6 +110,98 @@ describe('discoverLoadedModel', () => { const out = await discoverLoadedModel('http://localhost:8080/v1/chat/completions', {}); expect(out.map((m) => m.id)).toEqual(['valid']); }); + + it('populates context_window from /props.n_ctx when available', async () => { + globalThis.fetch = vi.fn(async (input: RequestInfo | URL) => { + const url = typeof input === 'string' ? input : input.toString(); + if (url.endsWith('/props')) { + return new Response(JSON.stringify({ n_ctx: 262_144 }), { + status: 200, + headers: { 'content-type': 'application/json' }, + }); + } + return new Response(JSON.stringify({ data: [{ id: 'Qwen3-35B-A3B' }] }), { + status: 200, + headers: { 'content-type': 'application/json' }, + }); + }) as typeof globalThis.fetch; + const out = await discoverLoadedModel('http://localhost:8080/v1/chat/completions', {}); + expect(out).toHaveLength(1); + expect(out[0]?.context_window).toBe(262_144); + }); + + it('falls back to the default context_window when /props is unavailable', async () => { + globalThis.fetch = vi.fn(async (input: RequestInfo | URL) => { + const url = typeof input === 'string' ? input : input.toString(); + if (url.endsWith('/props')) { + return new Response('boom', { status: 502 }); + } + return new Response(JSON.stringify({ data: [{ id: 'Qwen3-35B-A3B' }] }), { + status: 200, + headers: { 'content-type': 'application/json' }, + }); + }) as typeof globalThis.fetch; + const out = await discoverLoadedModel('http://localhost:8080/v1/chat/completions', {}); + expect(out).toHaveLength(1); + expect(out[0]?.context_window).toBe(32_768); + }); + + it('reads context_window from /props.default_generation_settings.n_ctx (older llama-server)', async () => { + globalThis.fetch = vi.fn(async (input: RequestInfo | URL) => { + const url = typeof input === 'string' ? input : input.toString(); + if (url.endsWith('/props')) { + return new Response(JSON.stringify({ default_generation_settings: { n_ctx: 131_072 } }), { + status: 200, + headers: { 'content-type': 'application/json' }, + }); + } + return new Response(JSON.stringify({ data: [{ id: 'Qwen3-35B-A3B' }] }), { + status: 200, + headers: { 'content-type': 'application/json' }, + }); + }) as typeof globalThis.fetch; + const out = await discoverLoadedModel('http://localhost:8080/v1/chat/completions', {}); + expect(out[0]?.context_window).toBe(131_072); + }); + + it('prefers top-level /props.n_ctx over the nested default_generation_settings value', async () => { + globalThis.fetch = vi.fn(async (input: RequestInfo | URL) => { + const url = typeof input === 'string' ? input : input.toString(); + if (url.endsWith('/props')) { + return new Response( + JSON.stringify({ + n_ctx: 262_144, + default_generation_settings: { n_ctx: 4096 }, + }), + { status: 200, headers: { 'content-type': 'application/json' } }, + ); + } + return new Response(JSON.stringify({ data: [{ id: 'Qwen3-35B-A3B' }] }), { + status: 200, + headers: { 'content-type': 'application/json' }, + }); + }) as typeof globalThis.fetch; + const out = await discoverLoadedModel('http://localhost:8080/v1/chat/completions', {}); + expect(out[0]?.context_window).toBe(262_144); + }); + + it('falls back to the default context_window when /props omits or invalidates n_ctx', async () => { + globalThis.fetch = vi.fn(async (input: RequestInfo | URL) => { + const url = typeof input === 'string' ? input : input.toString(); + if (url.endsWith('/props')) { + return new Response(JSON.stringify({ n_ctx: -1 }), { + status: 200, + headers: { 'content-type': 'application/json' }, + }); + } + return new Response(JSON.stringify({ data: [{ id: 'Qwen3-35B-A3B' }] }), { + status: 200, + headers: { 'content-type': 'application/json' }, + }); + }) as typeof globalThis.fetch; + const out = await discoverLoadedModel('http://localhost:8080/v1/chat/completions', {}); + expect(out[0]?.context_window).toBe(32_768); + }); }); describe('registerDiscovered', () => { diff --git a/harness/tests/turn-orchestrator/agent-trigger.test.ts b/harness/tests/turn-orchestrator/agent-trigger.test.ts index f5053dd6..ed73f488 100644 --- a/harness/tests/turn-orchestrator/agent-trigger.test.ts +++ b/harness/tests/turn-orchestrator/agent-trigger.test.ts @@ -1,5 +1,5 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; -import type { ISdk } from '../../src/runtime/iii.js'; +import { IIIInvocationError, type ISdk } from '../../src/runtime/iii.js'; import type { DispatchResult } from '../../src/turn-orchestrator/agent-trigger.js'; import { TOOL_NAME, @@ -95,6 +95,78 @@ describe('triggerFunctionCall', () => { function_id: 'shell::fs::write', }); }); + + it('surfaces structured S-code handler errors verbatim, not gate_unavailable', async () => { + // Mimic what `iii-worker`'s sandbox daemon emits: its `Display` impl + // serialises the error envelope as JSON, which the engine then forwards + // through `IIIInvocationError`. The harness must hand that envelope to + // the agent untouched so it sees `code`, `docs_url`, `fix`, etc. + const envelope = { + code: 'S210', + type: 'filesystem', + message: 'path is required', + docs_url: 'https://example.invalid/README.md#S210', + retryable: false, + fix: 'pass an absolute `path` argument', + }; + const triggerError = new IIIInvocationError({ + code: 'HANDLER', + message: JSON.stringify(envelope), + function_id: 'sandbox::fs::write', + }); + const iii = { + trigger: vi.fn().mockRejectedValue(triggerError), + } as unknown as ISdk; + const result = await triggerFunctionCall(iii, { + id: 'fc-1', + function_id: 'sandbox::fs::write', + arguments: {}, + }); + expect(isErrorResult(result)).toBe(true); + // Envelope passes through verbatim alongside a `handler_error` + // discriminator that lets isErrorResult and any retry gate + // classify the result correctly. + expect(result.details).toMatchObject({ error: 'handler_error', ...envelope }); + expect(result.details).not.toMatchObject({ denied_by: 'gate_unavailable' }); + }); + + it('falls back to gate_unavailable when message is not structured JSON', async () => { + const triggerError = new IIIInvocationError({ + code: 'HANDLER', + message: 'opaque handler text', + function_id: 'sandbox::fs::write', + }); + const iii = { + trigger: vi.fn().mockRejectedValue(triggerError), + } as unknown as ISdk; + const result = await triggerFunctionCall(iii, { + id: 'fc-1', + function_id: 'sandbox::fs::write', + arguments: {}, + }); + expect(result.details).toMatchObject({ denied_by: 'gate_unavailable' }); + }); + + it('falls back to gate_unavailable when JSON message lacks code/message fields', async () => { + // A partial JSON payload (e.g. `{"hint": "..."}`) does NOT count as a + // structured envelope — only `{code, message, ...}` shapes get the + // verbatim treatment. Anything else stays in the gate path so we don't + // silently misroute unrelated wire shapes. + const triggerError = new IIIInvocationError({ + code: 'HANDLER', + message: JSON.stringify({ hint: 'try again' }), + function_id: 'sandbox::fs::write', + }); + const iii = { + trigger: vi.fn().mockRejectedValue(triggerError), + } as unknown as ISdk; + const result = await triggerFunctionCall(iii, { + id: 'fc-1', + function_id: 'sandbox::fs::write', + arguments: {}, + }); + expect(result.details).toMatchObject({ denied_by: 'gate_unavailable' }); + }); }); describe('dispatchWithHook returns DispatchResult', () => { diff --git a/shell/Cargo.lock b/shell/Cargo.lock index e5bcfb47..8e868ce8 100644 --- a/shell/Cargo.lock +++ b/shell/Cargo.lock @@ -1519,7 +1519,7 @@ dependencies = [ [[package]] name = "shell" -version = "0.3.3" +version = "0.3.5" dependencies = [ "anyhow", "async-trait", diff --git a/storage/Cargo.lock b/storage/Cargo.lock index 2ce024b1..31c8a40b 100644 --- a/storage/Cargo.lock +++ b/storage/Cargo.lock @@ -3425,7 +3425,7 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "storage" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "async-trait",