diff --git a/apps/demo-frontend/public/app.js b/apps/demo-frontend/public/app.js index bf895ca1..bd0b3b20 100644 --- a/apps/demo-frontend/public/app.js +++ b/apps/demo-frontend/public/app.js @@ -10030,6 +10030,9 @@ function bindEvents() { } async function bootstrap() { + const initialTabId = readTabIdFromHash() ?? readStoredTabId(); + setActiveTab(initialTabId, { syncHash: false }); + const runtimeConfig = await loadRuntimeConfig(); if (runtimeConfig?.wsUrl) { el.wsUrl.value = runtimeConfig.wsUrl; @@ -10065,7 +10068,7 @@ async function bootstrap() { resetOperatorBoardView({ mode: readStoredOperatorBoardMode(), persistMode: false }); renderTaskList(); evaluateConstraints(); - setActiveTab(readStoredTabId()); + setActiveTab(readStoredTabId(), { syncHash: false }); setUiTaskFieldsVisibility(); initBackgroundVideoLoopBlend(); enhanceSelectControls(); diff --git a/apps/demo-frontend/public/styles.css b/apps/demo-frontend/public/styles.css index e6dad0c0..3b9ce1c2 100644 --- a/apps/demo-frontend/public/styles.css +++ b/apps/demo-frontend/public/styles.css @@ -738,9 +738,9 @@ textarea { } select { - appearance: none; - -webkit-appearance: none; - -moz-appearance: none; + appearance: none !important; + -webkit-appearance: none !important; + -moz-appearance: none !important; cursor: pointer; padding-right: 42px; --select-surface-start: var(--surface-control); @@ -751,6 +751,10 @@ select { linear-gradient(180deg, var(--select-surface-start), var(--select-surface-end)); } +select::-ms-expand { + display: none; +} + select:hover { --select-surface-start: var(--surface-control-hover); } @@ -1750,6 +1754,7 @@ textarea { min-height: 74px; padding-top: 11px; padding-bottom: 11px; + overflow: visible; } .panel-live-connection .action-group-primary > .export-menu { @@ -1759,11 +1764,11 @@ textarea { } .panel-live-connection .export-menu[open] { - z-index: 240; + z-index: 520; } .panel-live-connection .export-menu-list { - z-index: 260; + z-index: 540; } .action-group { @@ -1788,6 +1793,12 @@ textarea { .export-menu { position: relative; min-width: 0; + z-index: 140; + isolation: isolate; +} + +.export-menu[open] { + z-index: 420; } .export-menu > summary { @@ -1890,7 +1901,7 @@ textarea { position: absolute; right: 0; top: calc(100% + 8px); - z-index: 120; + z-index: 460; min-width: min(320px, calc(100vw - 56px)); padding: 8px; display: grid; @@ -2305,7 +2316,7 @@ button:focus-visible { .meta-row-status-live { margin-top: 14px; - padding: 8px; + padding: 10px; border: 1px solid color-mix(in oklch, var(--primary) 20%, var(--border-soft)); border-radius: calc(var(--radius) - 4px); background: @@ -2318,14 +2329,14 @@ button:focus-visible { display: grid; grid-template-columns: repeat(auto-fit, minmax(162px, 1fr)); grid-template-columns: repeat(4, minmax(0, 1fr)); - gap: 8px; + gap: 10px; overflow: visible; } .meta-row-status-live > div { min-width: 0; - min-height: 46px; - padding: 7px 10px; + min-height: 48px; + padding: 8px 11px; border-radius: calc(var(--radius) - 10px); display: inline-flex; align-items: center; @@ -2401,18 +2412,18 @@ button:focus-visible { } .meta-row-status-live strong { - color: color-mix(in oklch, white 98%, var(--foreground)); - font-size: 0.74rem; + color: color-mix(in oklch, white 99.6%, var(--foreground)); + font-size: 0.76rem; text-transform: none; - letter-spacing: 0.015em; + letter-spacing: 0.012em; white-space: nowrap; text-shadow: 0 1px 0 color-mix(in oklch, black 30%, transparent); } .meta-row-status-live > div > span:not(.status-pill) { - color: color-mix(in oklch, white 99.5%, var(--foreground)); - font-size: 0.82rem; - font-weight: 650; + color: color-mix(in oklch, white 99.8%, var(--foreground)); + font-size: 0.84rem; + font-weight: 670; line-height: 1.28; } @@ -2435,7 +2446,7 @@ button:focus-visible { max-width: 100%; padding: 4px 9px; border-radius: 999px; - border: 1px solid color-mix(in oklch, var(--primary) 32%, var(--border-soft)); + border: 1px solid color-mix(in oklch, var(--primary) 38%, var(--border-soft)); background: radial-gradient(160px 64px at 12% -34%, color-mix(in oklch, var(--primary) 10%, transparent), transparent 72%), linear-gradient( @@ -2453,7 +2464,7 @@ button:focus-visible { word-break: break-word; box-shadow: inset 0 1px 0 color-mix(in oklch, var(--foreground) 10%, transparent), - 0 0 0 1px color-mix(in oklch, var(--primary) 16%, transparent); + 0 0 0 1px color-mix(in oklch, var(--primary) 20%, transparent); } .meta-row > div { diff --git a/docs/judge-quickstart.md b/docs/judge-quickstart.md index ba860228..9d67e60c 100644 --- a/docs/judge-quickstart.md +++ b/docs/judge-quickstart.md @@ -6,7 +6,7 @@ Fast, judge-facing entry point for a 5-10 minute evaluation run. This project covers all three challenge categories in one platform: -1. Live Agent (realtime speech, interruption, translation, negotiation) +1. Live Agent (realtime speech, interruption, translation, negotiation, grounded research) 2. Creative Storyteller (text + audio + image + video narrative flow) 3. UI Navigator (computer-use style UI planning/execution with approval guardrails) @@ -55,6 +55,8 @@ Artifacts: 8. `artifacts/judge-visual-evidence/presentation.md` 9. `artifacts/demo-e2e/epic-summary.json` +If deploy/publish artifacts are present, `manifest.md` and `presentation.md` also surface compact deploy/publish provenance from `artifacts/deploy/railway-deploy-summary.json` and `artifacts/deploy/repo-publish-summary.json`. Ordinary local judge flows omit that section, and raw deploy/publish JSON is not embedded into the judge-facing markdown. + ## 3) Validate Release Readiness ```bash @@ -64,21 +66,30 @@ npm run verify:release ## 4) What Judges Should See in UI 1. Connection + assistant lifecycle (`idle/streaming/speaking`). -2. Live interruption, truncate/delete evidence, and gateway error correlation. +2. Live interruption, truncate/delete evidence, gateway error correlation, and optional `research` citations/source URLs. 3. Operator Console panels: - Live Bridge Status - Approvals Queue + - Workflow Runtime / Runtime Guardrails - Device Nodes Health / Updates + - Bootstrap Doctor / Browser Workers - Governance Policy Lifecycle - Skills Registry Lifecycle - Plugin Marketplace Lifecycle - Agent Usage Evidence - Cost & Tokens Evidence -4. Session export controls: +4. Operator support panels: + - `Runtime Drill Runner` for repo-owned dry-run/live recovery drills and `followUpContext` handoff. + - `Workflow Control Panel` for redacted assistive-router/runtime override posture. + - `Operator Session Ops` for saved `operatorPurpose`, session replay, and cross-agent discovery. + - `Bootstrap Doctor & Auth Profiles` for provider/auth-profile/device/fallback posture. + - `Browser Worker Control` for queue/checkpoint posture on long-running UI jobs. +5. Session export controls: - `Export Session -> Export Markdown` - `Export Session -> Export JSON` - `Export Session -> Export Audio (WAV)` -5. Story Timeline panel: + - Confirm exported Markdown/JSON include `runtimeGuardrailsSignalPaths`, `operatorPurpose`, `operatorSessionReplay`, and `operatorDiscovery`. +6. Story Timeline panel: - Confirm `Timeline State` KPI transitions (`0%` idle -> ready/pending) as story output arrives. - Segment scrubber/selector reflects `output.story.timeline` - Preview card shows segment text + `image/video/audio` refs @@ -100,6 +111,7 @@ npm run verify:release - Start mic, send live request, then trigger interruption. - Show truncate/delete/gateway-correlation evidence in Operator Console. - Mention roundtrip and interrupt KPI lanes in `artifacts/demo-e2e/badge-details.json`. + - If judges ask for grounded-research proof, switch to `intent=research` once and show citation-bearing `answer`, `citations`, and `sourceUrls`. 3. `02:15-03:30` Creative Storyteller category: - Send storyteller prompt. - Open `Story Timeline` panel and scrub segments. @@ -107,8 +119,10 @@ npm run verify:release 4. `03:30-04:45` UI Navigator category: - Send `ui_task` intent with grounding fields. - Show approval flow and damage-control verdict in Operator Console. + - Save a short purpose in `Operator Session Ops`, then open `Bootstrap Doctor & Auth Profiles` and `Browser Worker Control` once to show runtime posture before execution. - Confirm safety gates before execution. 5. `04:45-05:30` Evidence close: - Run `npm run demo:epic` (or fallback `npm run demo:e2e:visual:judge` if e2e/policy/badge were already executed). - Open `artifacts/judge-visual-evidence/presentation.md`. - Confirm all evidence lanes are `pass` in `artifacts/demo-e2e/badge-details.json`. + - Export session `JSON` or `Markdown` and confirm `runtimeGuardrailsSignalPaths`, `operatorPurpose`, `operatorSessionReplay`, and `operatorDiscovery`. diff --git a/docs/judge-visual-evidence.md b/docs/judge-visual-evidence.md index 47c89ab6..55d35a18 100644 --- a/docs/judge-visual-evidence.md +++ b/docs/judge-visual-evidence.md @@ -5,8 +5,9 @@ Create one reproducible visual bundle for judges: 1. Screenshot checklist status (present/missing). -2. Critical badge-evidence lane status (`pass/fail/unavailable`). +2. Critical badge-evidence lane status (`pass/fail/unavailable`), including runtime guardrails and provider provenance. 3. Single manifest for quick go/no-go before submission. +4. One-page presentation bundle with runtime guardrails snapshot, provider adapter snapshot, and compact deploy/publish provenance when optional Railway/repo-publish artifacts are available. ## Commands @@ -76,7 +77,9 @@ Defaults used by `scripts/judge-visual-evidence-pack.mjs`: 1. Badge details: `artifacts/demo-e2e/badge-details.json` 2. Demo summary: `artifacts/demo-e2e/summary.json` -3. Screenshot directory: `artifacts/judge-visual-evidence/screenshots` +3. Optional Railway deploy summary: `artifacts/deploy/railway-deploy-summary.json` +4. Optional repo publish summary: `artifacts/deploy/repo-publish-summary.json` +5. Screenshot directory: `artifacts/judge-visual-evidence/screenshots` Defaults used by `scripts/judge-visual-capture.mjs`: @@ -94,6 +97,8 @@ Defaults used by `scripts/judge-visual-capture.mjs`: 5. `artifacts/judge-visual-evidence/presentation.md` 6. `artifacts/demo-e2e/epic-summary.json` +`manifest.md` and `presentation.md` surface compact deploy/publish provenance from `railway-deploy-summary.json` / `repo-publish-summary.json` when those optional files are present. Ordinary local judge flows omit that section instead of filling the page with `unavailable` placeholders, and raw deploy/publish JSON is not embedded into the judge-facing markdown. + ## Required Screenshot Filenames Put files into `artifacts/judge-visual-evidence/screenshots`: @@ -120,4 +125,6 @@ Pack marks these as critical: 6. `pluginMarketplace` 7. `deviceNodes` 8. `agentUsage` -9. `deviceNodeUpdates` (derived from `deviceNodes` updates fields) +9. `runtimeGuardrailsSignalPaths` +10. `providerUsage` +11. `deviceNodeUpdates` (derived from `deviceNodes` updates fields) diff --git a/scripts/judge-presentation-bundle.mjs b/scripts/judge-presentation-bundle.mjs index d2ccc835..6ae4f503 100644 --- a/scripts/judge-presentation-bundle.mjs +++ b/scripts/judge-presentation-bundle.mjs @@ -9,6 +9,8 @@ function parseArgs(argv) { badge: "artifacts/demo-e2e/badge.json", badgeDetails: "artifacts/demo-e2e/badge-details.json", releaseEvidence: "artifacts/release-evidence/report.json", + railwayDeploySummary: "artifacts/deploy/railway-deploy-summary.json", + repoPublishSummary: "artifacts/deploy/repo-publish-summary.json", visualManifest: "artifacts/judge-visual-evidence/manifest.json", visualGallery: "artifacts/judge-visual-evidence/gallery.md", }; @@ -39,6 +41,14 @@ function parseArgs(argv) { options.releaseEvidence = String(argv[++index] ?? options.releaseEvidence); continue; } + if (arg === "--railwayDeploySummary") { + options.railwayDeploySummary = String(argv[++index] ?? options.railwayDeploySummary); + continue; + } + if (arg === "--repoPublishSummary") { + options.repoPublishSummary = String(argv[++index] ?? options.repoPublishSummary); + continue; + } if (arg === "--visualManifest") { options.visualManifest = String(argv[++index] ?? options.visualManifest); continue; @@ -89,6 +99,24 @@ function toStatus(value) { return "unavailable"; } +function toOptionalText(value) { + if (typeof value !== "string") { + return "unavailable"; + } + const normalized = value.trim(); + return normalized.length > 0 ? normalized : "unavailable"; +} + +function toEnabledLabel(value) { + if (value === true) { + return "enabled"; + } + if (value === false) { + return "disabled"; + } + return "n/a"; +} + function toRelativePath(fromFile, toFile) { const raw = relative(dirname(fromFile), toFile); return raw.split(sep).join("/"); @@ -137,6 +165,131 @@ function deriveDeviceNodeUpdatesStatus(visualManifest, badgeDetails) { return "unavailable"; } +function toProviderEntrySummary(entry) { + if (!entry || typeof entry !== "object") { + return null; + } + + return { + route: String(entry.route ?? "n/a"), + capability: String(entry.capability ?? "n/a"), + selectedProvider: String(entry.selectedProvider ?? "n/a"), + selectedModel: String(entry.selectedModel ?? "n/a"), + defaultProvider: String(entry.defaultProvider ?? "n/a"), + selectionReason: String(entry.selectionReason ?? "n/a"), + secondaryActive: entry.secondaryActive === true ? "yes" : "no", + }; +} + +function summarizePrimaryPath(primaryPath) { + if (!primaryPath || typeof primaryPath !== "object") { + return "n/a"; + } + + const title = String(primaryPath.title ?? "n/a"); + const kind = String(primaryPath.kind ?? "n/a"); + const phase = String(primaryPath.phase ?? "n/a"); + return `${title} (${kind} / ${phase})`; +} + +function sanitizeDeployProvenanceRows(rows) { + if (!Array.isArray(rows)) { + return []; + } + return rows + .map((row) => ({ + id: String(row?.id ?? "").trim(), + title: String(row?.title ?? "").trim(), + summary: String(row?.summary ?? "").trim(), + })) + .filter((row) => row.title.length > 0 && row.summary.length > 0); +} + +function buildDeployProvenanceRows(deployProvenance) { + const rows = []; + const railwayDeploy = deployProvenance.railwayDeploy; + const repoPublish = deployProvenance.repoPublish; + + if (railwayDeploy.present) { + rows.push({ + id: "railwayDeploy", + title: "Railway deploy", + summary: `status ${railwayDeploy.status}; deployment ${railwayDeploy.deploymentId}; public URL ${railwayDeploy.effectivePublicUrl}`, + }); + + const badgeParts = []; + if (railwayDeploy.badgeEndpoint !== "unavailable") { + badgeParts.push(`badge ${railwayDeploy.badgeEndpoint}`); + } + if (railwayDeploy.badgeDetailsEndpoint !== "unavailable") { + badgeParts.push(`badge-details ${railwayDeploy.badgeDetailsEndpoint}`); + } + if (badgeParts.length > 0) { + rows.push({ + id: "railwayBadge", + title: "Public badge", + summary: badgeParts.join("; "), + }); + } + } + + if (repoPublish.present) { + rows.push({ + id: "repoPublish", + title: "Repo publish", + summary: [ + `verification ${repoPublish.verificationScript}`, + repoPublish.releaseEvidenceValidated === "true" + ? "release evidence validated" + : "release evidence not validated", + `Railway deploy ${repoPublish.railwayDeployEnabledLabel}`, + `frontend deploy ${repoPublish.railwayFrontendDeployEnabledLabel}`, + ].join("; "), + }); + } + + return rows; +} + +function summarizeDeployProvenance(visualManifest, railwayDeploySummaryRead, repoPublishSummaryRead) { + const railwayDeploySummary = + railwayDeploySummaryRead.present && railwayDeploySummaryRead.parsed ? railwayDeploySummaryRead.value : null; + const repoPublishSummary = + repoPublishSummaryRead.present && repoPublishSummaryRead.parsed ? repoPublishSummaryRead.value : null; + const railwayPublicBadge = railwayDeploySummary?.checks?.publicBadge ?? {}; + const repoPublishVerification = repoPublishSummary?.verification ?? {}; + const repoPublishSteps = repoPublishSummary?.steps ?? {}; + + const deployProvenance = { + available: Boolean(railwayDeploySummary || repoPublishSummary), + rows: [], + railwayDeploy: { + present: railwayDeploySummaryRead.present && railwayDeploySummaryRead.parsed, + status: toOptionalText(railwayDeploySummary?.status), + deploymentId: toOptionalText(railwayDeploySummary?.deploymentId), + effectivePublicUrl: toOptionalText(railwayDeploySummary?.effectivePublicUrl), + badgeEndpoint: toOptionalText(railwayPublicBadge?.badgeEndpoint), + badgeDetailsEndpoint: toOptionalText(railwayPublicBadge?.badgeDetailsEndpoint), + }, + repoPublish: { + present: repoPublishSummaryRead.present && repoPublishSummaryRead.parsed, + verificationScript: toOptionalText(repoPublishVerification?.script), + releaseEvidenceValidated: repoPublishVerification?.releaseEvidenceArtifactsValidated === true ? "true" : "false", + railwayDeployEnabled: repoPublishSteps?.railwayDeployEnabled === true ? "true" : "false", + railwayFrontendDeployEnabled: repoPublishSteps?.railwayFrontendDeployEnabled === true ? "true" : "false", + railwayDeployEnabledLabel: toEnabledLabel(repoPublishSteps?.railwayDeployEnabled), + railwayFrontendDeployEnabledLabel: toEnabledLabel(repoPublishSteps?.railwayFrontendDeployEnabled), + }, + }; + + const rowsFromVisualManifest = sanitizeDeployProvenanceRows(visualManifest?.deployProvenance?.rows); + deployProvenance.rows = + rowsFromVisualManifest.length > 0 ? rowsFromVisualManifest : buildDeployProvenanceRows(deployProvenance); + deployProvenance.available = deployProvenance.rows.length > 0 || deployProvenance.available; + + return deployProvenance; +} + function toMarkdown(bundle) { const lines = []; lines.push("# Judge Presentation Bundle"); @@ -174,6 +327,37 @@ function toMarkdown(bundle) { lines.push(`- Tokens used total: ${bundle.tokensUsedTotal}`); lines.push(""); + lines.push("## Runtime Guardrails Snapshot"); + lines.push(""); + lines.push(`- Status: ${bundle.runtimeGuardrails.status}`); + lines.push(`- Summary: ${bundle.runtimeGuardrails.summaryStatus}`); + lines.push(`- Total paths: ${bundle.runtimeGuardrails.totalPaths}`); + lines.push(`- Primary path: ${bundle.runtimeGuardrails.primaryPathSummary}`); + lines.push(""); + + lines.push("## Provider Adapter Snapshot"); + lines.push(""); + lines.push(`- Status: ${bundle.providerUsage.status}`); + lines.push(`- Active secondary providers: ${bundle.providerUsage.activeSecondaryProviders}`); + lines.push(""); + lines.push("| Route | Capability | Provider | Model | Default Provider | Selection Reason | Secondary Active |"); + lines.push("|---|---|---|---|---|---|---|"); + for (const row of bundle.providerUsage.entries) { + lines.push( + `| ${row.route} | ${row.capability} | ${row.selectedProvider} | ${row.selectedModel} | ${row.defaultProvider} | ${row.selectionReason} | ${row.secondaryActive} |`, + ); + } + + if (bundle.deployProvenance.rows.length > 0) { + lines.push(""); + lines.push("## Deploy / Publish Provenance"); + lines.push(""); + for (const row of bundle.deployProvenance.rows) { + lines.push(`- ${row.title}: ${row.summary}`); + } + } + lines.push(""); + lines.push("## Visual Assets"); lines.push(""); lines.push(`- Visual manifest: [manifest.json](${bundle.artifacts.visualManifestRel})`); @@ -209,6 +393,8 @@ function main() { const badgePath = toAbsolutePath(options.badge); const badgeDetailsPath = toAbsolutePath(options.badgeDetails); const releaseEvidencePath = toAbsolutePath(options.releaseEvidence); + const railwayDeploySummaryPath = toAbsolutePath(options.railwayDeploySummary); + const repoPublishSummaryPath = toAbsolutePath(options.repoPublishSummary); const visualManifestPath = toAbsolutePath(options.visualManifest); const visualGalleryPath = toAbsolutePath(options.visualGallery); @@ -217,6 +403,8 @@ function main() { const badgeRead = readJsonIfExists(badgePath); const badgeDetailsRead = readJsonIfExists(badgeDetailsPath); const releaseEvidenceRead = readJsonIfExists(releaseEvidencePath); + const railwayDeploySummaryRead = readJsonIfExists(railwayDeploySummaryPath); + const repoPublishSummaryRead = readJsonIfExists(repoPublishSummaryPath); const visualManifestRead = readJsonIfExists(visualManifestPath); const summary = summaryRead.value ?? {}; @@ -225,6 +413,13 @@ function main() { const badgeDetails = badgeDetailsRead.value ?? {}; const visualManifest = visualManifestRead.value ?? {}; const releaseEvidence = releaseEvidenceRead.value ?? {}; + const runtimeGuardrails = badgeDetails?.evidence?.runtimeGuardrailsSignalPaths ?? {}; + const providerUsage = badgeDetails?.providerUsage ?? releaseEvidence?.providerUsage ?? {}; + const deployProvenance = summarizeDeployProvenance( + visualManifest, + railwayDeploySummaryRead, + repoPublishSummaryRead, + ); const categories = [ { @@ -253,6 +448,8 @@ function main() { { lane: "pluginMarketplace", status: toStatus(badgeDetails?.evidence?.pluginMarketplace?.status) }, { lane: "deviceNodes", status: toStatus(badgeDetails?.evidence?.deviceNodes?.status) }, { lane: "agentUsage", status: toStatus(badgeDetails?.evidence?.agentUsage?.status) }, + { lane: "runtimeGuardrailsSignalPaths", status: toStatus(runtimeGuardrails?.status) }, + { lane: "providerUsage", status: toStatus(providerUsage?.status) }, { lane: "deviceNodeUpdates", status: deriveDeviceNodeUpdatesStatus(visualManifest, badgeDetails) }, ]; @@ -263,15 +460,19 @@ function main() { const notes = []; for (const source of [ - { name: "summary", read: summaryRead }, - { name: "policy", read: policyRead }, - { name: "badge", read: badgeRead }, - { name: "badgeDetails", read: badgeDetailsRead }, - { name: "releaseEvidence", read: releaseEvidenceRead }, - { name: "visualManifest", read: visualManifestRead }, + { name: "summary", read: summaryRead, optional: false }, + { name: "policy", read: policyRead, optional: false }, + { name: "badge", read: badgeRead, optional: false }, + { name: "badgeDetails", read: badgeDetailsRead, optional: false }, + { name: "releaseEvidence", read: releaseEvidenceRead, optional: false }, + { name: "railwayDeploySummary", read: railwayDeploySummaryRead, optional: true }, + { name: "repoPublishSummary", read: repoPublishSummaryRead, optional: true }, + { name: "visualManifest", read: visualManifestRead, optional: false }, ]) { if (!source.read.present) { - notes.push(`${source.name} source is missing`); + if (!source.optional) { + notes.push(`${source.name} source is missing`); + } continue; } if (!source.read.parsed) { @@ -294,6 +495,20 @@ function main() { gatewayRoundTripMs: Number(summary?.kpis?.gatewayWsRoundTripMs ?? 0), costTotalUsd: Number(badgeDetails?.costEstimate?.totalUsd ?? summary?.costEstimate?.totalUsd ?? 0), tokensUsedTotal: Number(badgeDetails?.tokensUsed?.total ?? summary?.tokensUsed?.total ?? 0), + runtimeGuardrails: { + status: toStatus(runtimeGuardrails?.status), + summaryStatus: String(runtimeGuardrails?.summaryStatus ?? "n/a"), + totalPaths: Number(runtimeGuardrails?.totalPaths ?? 0), + primaryPathSummary: summarizePrimaryPath(runtimeGuardrails?.primaryPath), + }, + providerUsage: { + status: toStatus(providerUsage?.status), + activeSecondaryProviders: Number(providerUsage?.activeSecondaryProviders ?? 0), + entries: Array.isArray(providerUsage?.entries) + ? providerUsage.entries.map(toProviderEntrySummary).filter(Boolean) + : [], + }, + deployProvenance, categories, evidenceLanes, artifacts: { @@ -301,6 +516,8 @@ function main() { policyRel: toRelativePath(outputMarkdownPath, policyPath), badgeDetailsRel: toRelativePath(outputMarkdownPath, badgeDetailsPath), releaseEvidenceRel: toRelativePath(outputMarkdownPath, releaseEvidencePath), + railwayDeploySummaryRel: toRelativePath(outputMarkdownPath, railwayDeploySummaryPath), + repoPublishSummaryRel: toRelativePath(outputMarkdownPath, repoPublishSummaryPath), visualManifestRel: toRelativePath(outputMarkdownPath, visualManifestPath), visualChecklistRel: toRelativePath( outputMarkdownPath, diff --git a/scripts/judge-visual-evidence-pack.mjs b/scripts/judge-visual-evidence-pack.mjs index 6d1c9665..0eab3a6a 100644 --- a/scripts/judge-visual-evidence-pack.mjs +++ b/scripts/judge-visual-evidence-pack.mjs @@ -7,6 +7,8 @@ function parseArgs(argv) { outputMarkdown: "artifacts/judge-visual-evidence/manifest.md", badgeDetails: "artifacts/demo-e2e/badge-details.json", summary: "artifacts/demo-e2e/summary.json", + railwayDeploySummary: "artifacts/deploy/railway-deploy-summary.json", + repoPublishSummary: "artifacts/deploy/repo-publish-summary.json", screenshotDir: "artifacts/judge-visual-evidence/screenshots", strict: false, }; @@ -33,6 +35,14 @@ function parseArgs(argv) { options.summary = argv[++i]; continue; } + if (arg === "--railwayDeploySummary") { + options.railwayDeploySummary = argv[++i]; + continue; + } + if (arg === "--repoPublishSummary") { + options.repoPublishSummary = argv[++i]; + continue; + } if (arg === "--screenshotDir") { options.screenshotDir = argv[++i]; continue; @@ -83,6 +93,24 @@ function toStatusValue(value) { return "unavailable"; } +function toOptionalText(value) { + if (typeof value !== "string") { + return "unavailable"; + } + const normalized = value.trim(); + return normalized.length > 0 ? normalized : "unavailable"; +} + +function toEnabledLabel(value) { + if (value === true) { + return "enabled"; + } + if (value === false) { + return "disabled"; + } + return "n/a"; +} + function deriveDeviceNodeUpdatesStatus(deviceNodesEvidence) { if (!deviceNodesEvidence || typeof deviceNodesEvidence !== "object") { return "unavailable"; @@ -103,11 +131,93 @@ function deriveDeviceNodeUpdatesStatus(deviceNodesEvidence) { return "unavailable"; } +function buildDeployProvenanceRows(deployProvenance) { + const rows = []; + const railwayDeploy = deployProvenance.railwayDeploy; + const repoPublish = deployProvenance.repoPublish; + + if (railwayDeploy.available) { + rows.push({ + id: "railwayDeploy", + title: "Railway deploy", + summary: `status ${railwayDeploy.status}; deployment ${railwayDeploy.deploymentId}; public URL ${railwayDeploy.effectivePublicUrl}`, + }); + + const badgeParts = []; + if (railwayDeploy.badgeEndpoint !== "unavailable") { + badgeParts.push(`badge ${railwayDeploy.badgeEndpoint}`); + } + if (railwayDeploy.badgeDetailsEndpoint !== "unavailable") { + badgeParts.push(`badge-details ${railwayDeploy.badgeDetailsEndpoint}`); + } + if (badgeParts.length > 0) { + rows.push({ + id: "railwayBadge", + title: "Public badge", + summary: badgeParts.join("; "), + }); + } + } + + if (repoPublish.available) { + rows.push({ + id: "repoPublish", + title: "Repo publish", + summary: [ + `verification ${repoPublish.verificationScript}`, + repoPublish.releaseEvidenceValidated ? "release evidence validated" : "release evidence not validated", + `Railway deploy ${repoPublish.railwayDeployEnabledLabel}`, + `frontend deploy ${repoPublish.railwayFrontendDeployEnabledLabel}`, + ].join("; "), + }); + } + + return rows; +} + +function collectDeployProvenance(railwayDeploySummaryRead, repoPublishSummaryRead) { + const railwayDeploySummary = + railwayDeploySummaryRead.present && railwayDeploySummaryRead.parsed ? railwayDeploySummaryRead.value : null; + const repoPublishSummary = + repoPublishSummaryRead.present && repoPublishSummaryRead.parsed ? repoPublishSummaryRead.value : null; + const railwayChecks = railwayDeploySummary?.checks?.publicBadge ?? {}; + const repoPublishVerification = repoPublishSummary?.verification ?? {}; + const repoPublishSteps = repoPublishSummary?.steps ?? {}; + + const deployProvenance = { + available: Boolean(railwayDeploySummary || repoPublishSummary), + rows: [], + railwayDeploy: { + available: railwayDeploySummaryRead.present && railwayDeploySummaryRead.parsed, + status: toOptionalText(railwayDeploySummary?.status), + deploymentId: toOptionalText(railwayDeploySummary?.deploymentId), + effectivePublicUrl: toOptionalText(railwayDeploySummary?.effectivePublicUrl), + badgeEndpoint: toOptionalText(railwayChecks?.badgeEndpoint), + badgeDetailsEndpoint: toOptionalText(railwayChecks?.badgeDetailsEndpoint), + }, + repoPublish: { + available: repoPublishSummaryRead.present && repoPublishSummaryRead.parsed, + verificationScript: toOptionalText(repoPublishVerification?.script), + releaseEvidenceValidated: repoPublishVerification?.releaseEvidenceArtifactsValidated === true, + railwayDeployEnabled: repoPublishSteps?.railwayDeployEnabled === true, + railwayFrontendDeployEnabled: repoPublishSteps?.railwayFrontendDeployEnabled === true, + railwayDeployEnabledLabel: toEnabledLabel(repoPublishSteps?.railwayDeployEnabled), + railwayFrontendDeployEnabledLabel: toEnabledLabel(repoPublishSteps?.railwayFrontendDeployEnabled), + }, + }; + + deployProvenance.rows = buildDeployProvenanceRows(deployProvenance); + return deployProvenance; +} + function collectBadgeEvidence(badgeDetailsJson) { const evidence = badgeDetailsJson?.evidence && typeof badgeDetailsJson.evidence === "object" ? badgeDetailsJson.evidence : {}; const deviceNodesEvidence = evidence.deviceNodes && typeof evidence.deviceNodes === "object" ? evidence.deviceNodes : {}; + const providerUsage = badgeDetailsJson?.providerUsage && typeof badgeDetailsJson.providerUsage === "object" + ? badgeDetailsJson.providerUsage + : {}; return { operatorTurnTruncation: toStatusValue(evidence.operatorTurnTruncation?.status), @@ -118,6 +228,8 @@ function collectBadgeEvidence(badgeDetailsJson) { pluginMarketplace: toStatusValue(evidence.pluginMarketplace?.status), deviceNodes: toStatusValue(evidence.deviceNodes?.status), agentUsage: toStatusValue(evidence.agentUsage?.status), + runtimeGuardrailsSignalPaths: toStatusValue(evidence.runtimeGuardrailsSignalPaths?.status), + providerUsage: toStatusValue(providerUsage.status), deviceNodeUpdates: deriveDeviceNodeUpdatesStatus(deviceNodesEvidence), costEstimatePresent: badgeDetailsJson?.costEstimate && typeof badgeDetailsJson.costEstimate === "object", tokensUsedPresent: badgeDetailsJson?.tokensUsed && typeof badgeDetailsJson.tokensUsed === "object", @@ -239,6 +351,15 @@ function toMarkdown(manifest) { } lines.push(`| costEstimatePresent | ${manifest.badgeEvidence.costEstimatePresent} |`); lines.push(`| tokensUsedPresent | ${manifest.badgeEvidence.tokensUsedPresent} |`); + + if (manifest.deployProvenance.rows.length > 0) { + lines.push(""); + lines.push("## Deploy / Publish Provenance"); + lines.push(""); + for (const row of manifest.deployProvenance.rows) { + lines.push(`- ${row.title}: ${row.summary}`); + } + } return lines.join("\n"); } @@ -247,13 +368,18 @@ function main() { const badgeDetailsPath = toAbsolutePath(options.badgeDetails); const summaryPath = toAbsolutePath(options.summary); + const railwayDeploySummaryPath = toAbsolutePath(options.railwayDeploySummary); + const repoPublishSummaryPath = toAbsolutePath(options.repoPublishSummary); const screenshotDir = toAbsolutePath(options.screenshotDir); const outputJsonPath = toAbsolutePath(options.outputJson); const outputMarkdownPath = toAbsolutePath(options.outputMarkdown); const badgeDetailsRead = readJsonIfExists(badgeDetailsPath); const summaryRead = readJsonIfExists(summaryPath); + const railwayDeploySummaryRead = readJsonIfExists(railwayDeploySummaryPath); + const repoPublishSummaryRead = readJsonIfExists(repoPublishSummaryPath); const badgeEvidence = collectBadgeEvidence(badgeDetailsRead.value ?? {}); + const deployProvenance = collectDeployProvenance(railwayDeploySummaryRead, repoPublishSummaryRead); const checklist = evaluateChecklist(screenshotDir, buildChecklist()); const missingRequiredCaptures = checklist.filter((item) => item.present !== true).length; @@ -267,6 +393,8 @@ function main() { "pluginMarketplace", "deviceNodes", "agentUsage", + "runtimeGuardrailsSignalPaths", + "providerUsage", "deviceNodeUpdates", ]; @@ -290,17 +418,27 @@ function main() { summaryPresent: summaryRead.present, summaryParsed: summaryRead.parsed, summaryParseError: summaryRead.parseError, + railwayDeploySummaryPath, + railwayDeploySummaryPresent: railwayDeploySummaryRead.present, + railwayDeploySummaryParsed: railwayDeploySummaryRead.parsed, + railwayDeploySummaryParseError: railwayDeploySummaryRead.parseError, + repoPublishSummaryPath, + repoPublishSummaryPresent: repoPublishSummaryRead.present, + repoPublishSummaryParsed: repoPublishSummaryRead.parsed, + repoPublishSummaryParseError: repoPublishSummaryRead.parseError, screenshotDir, }, strictMode: options.strict === true, criticalBadgeLanes, badgeEvidence, + deployProvenance, screenshotChecklist: checklist, summary: { requiredCaptures: checklist.length, presentCaptures: checklist.length - missingRequiredCaptures, missingRequiredCaptures, missingCriticalBadgeEvidence, + deployProvenanceRows: deployProvenance.rows.length, }, overallStatus, }; diff --git a/tests/unit/frontend-session-export-controls-alignment.test.ts b/tests/unit/frontend-session-export-controls-alignment.test.ts index af2f81f5..332faf29 100644 --- a/tests/unit/frontend-session-export-controls-alignment.test.ts +++ b/tests/unit/frontend-session-export-controls-alignment.test.ts @@ -83,9 +83,13 @@ test("demo frontend wires session export controls and runtime helpers", () => { const requiredStyleTokens = [ ".panel-live-connection {", ".panel-live-connection .action-group-primary > .export-menu {", + ".panel-live-connection .export-menu[open] {", + "z-index: 520;", ".panel-live-connection .export-menu-list {", + "z-index: 540;", + ".export-menu[open] {", ".export-menu-list {", - "z-index: 120;", + "z-index: 460;", ".export-menu-item:disabled {", "cursor: not-allowed;", ".export-menu-item:disabled .export-menu-item-icon {", diff --git a/tests/unit/frontend-tabbed-layout-alignment.test.ts b/tests/unit/frontend-tabbed-layout-alignment.test.ts index aecc2e5b..d6244795 100644 --- a/tests/unit/frontend-tabbed-layout-alignment.test.ts +++ b/tests/unit/frontend-tabbed-layout-alignment.test.ts @@ -66,7 +66,9 @@ test("demo frontend groups panels into tabbed layout with live tab default", () "window.localStorage?.setItem(TAB_STORAGE_KEY, resolvedTabId);", "window.addEventListener(\"hashchange\", () => {", "setActiveTab(hashTabId, { syncHash: false });", - "setActiveTab(readStoredTabId());", + "const initialTabId = readTabIdFromHash() ?? readStoredTabId();", + "setActiveTab(initialTabId, { syncHash: false });", + "setActiveTab(readStoredTabId(), { syncHash: false });", ]; for (const token of requiredRuntimeTokens) { assert.ok(appSource.includes(token), `frontend runtime missing tab token: ${token}`); diff --git a/tests/unit/judge-presentation-bundle-alignment.test.ts b/tests/unit/judge-presentation-bundle-alignment.test.ts index 759fc3ef..9ecb5023 100644 --- a/tests/unit/judge-presentation-bundle-alignment.test.ts +++ b/tests/unit/judge-presentation-bundle-alignment.test.ts @@ -5,17 +5,13 @@ import test from "node:test"; test("judge presentation bundle script is wired across package scripts and docs", () => { const packagePath = resolve(process.cwd(), "package.json"); - const readmePath = resolve(process.cwd(), "README.md"); const visualDocPath = resolve(process.cwd(), "docs", "judge-visual-evidence.md"); const quickstartPath = resolve(process.cwd(), "docs", "judge-quickstart.md"); - const runbookPath = resolve(process.cwd(), "docs", "challenge-demo-runbook.md"); const scriptPath = resolve(process.cwd(), "scripts", "judge-presentation-bundle.mjs"); const pkg = JSON.parse(readFileSync(packagePath, "utf8")) as { scripts?: Record }; - const readme = readFileSync(readmePath, "utf8"); const visualDoc = readFileSync(visualDocPath, "utf8"); const quickstart = readFileSync(quickstartPath, "utf8"); - const runbook = readFileSync(runbookPath, "utf8"); const script = readFileSync(scriptPath, "utf8"); assert.equal( @@ -31,27 +27,44 @@ test("judge presentation bundle script is wired across package scripts and docs" const docTokens = [ "npm run demo:e2e:visual:bundle", - "npm run demo:e2e:visual:judge", "presentation.md", + "railway-deploy-summary.json", + "repo-publish-summary.json", ]; for (const token of docTokens) { - assert.ok(readme.includes(token), `README missing presentation token: ${token}`); assert.ok(visualDoc.includes(token), `judge visual evidence doc missing presentation token: ${token}`); } + assert.ok( + visualDoc.includes("compact deploy/publish provenance"), + "judge visual evidence doc missing compact provenance wording", + ); assert.ok( quickstart.includes("npm run demo:e2e:visual:bundle"), "judge quickstart missing visual bundle command", ); - assert.ok(runbook.includes("npm run demo:e2e:visual:bundle"), "runbook missing visual bundle command"); - assert.ok(runbook.includes("npm run demo:e2e:visual:judge"), "runbook missing visual judge command"); + assert.ok( + quickstart.includes("artifacts/judge-visual-evidence/presentation.md"), + "judge quickstart missing presentation artifact path", + ); const scriptTokens = [ "Judge Presentation Bundle", "Challenge Category Coverage", "Critical Evidence Lanes", + "Runtime Guardrails Snapshot", + "Provider Adapter Snapshot", + "Deploy / Publish Provenance", "release-evidence/report.json", + "railway-deploy-summary.json", + "repo-publish-summary.json", "manifest.json", "gallery.md", + "runtimeGuardrailsSignalPaths", + "providerUsage", + "railwayDeploySummary", + "repoPublishSummary", + "sanitizeDeployProvenanceRows", + "buildDeployProvenanceRows", "pluginMarketplace", "deviceNodeUpdates", ]; diff --git a/tests/unit/judge-presentation-bundle-script.test.ts b/tests/unit/judge-presentation-bundle-script.test.ts new file mode 100644 index 00000000..98dc89ba --- /dev/null +++ b/tests/unit/judge-presentation-bundle-script.test.ts @@ -0,0 +1,492 @@ +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { spawnSync } from "node:child_process"; +import { join, resolve } from "node:path"; +import { tmpdir } from "node:os"; +import test from "node:test"; + +function runPresentationBundle(args) { + const scriptPath = resolve(process.cwd(), "scripts", "judge-presentation-bundle.mjs"); + return spawnSync(process.execPath, [scriptPath, ...args], { + cwd: process.cwd(), + encoding: "utf8", + }); +} + +test("judge presentation bundle includes runtime guardrails and provider adapter snapshots", () => { + const baseDir = mkdtempSync(join(tmpdir(), "mla-presentation-bundle-")); + const summaryPath = join(baseDir, "summary.json"); + const policyPath = join(baseDir, "policy-check.json"); + const badgePath = join(baseDir, "badge.json"); + const badgeDetailsPath = join(baseDir, "badge-details.json"); + const releaseEvidencePath = join(baseDir, "report.json"); + const railwayDeploySummaryPath = join(baseDir, "railway-deploy-summary.json"); + const repoPublishSummaryPath = join(baseDir, "repo-publish-summary.json"); + const visualManifestPath = join(baseDir, "manifest.json"); + const visualGalleryPath = join(baseDir, "gallery.md"); + const outputMarkdownPath = join(baseDir, "presentation.md"); + + writeFileSync( + summaryPath, + JSON.stringify( + { + scenarios: [ + { name: "live.negotiation", status: "passed" }, + { name: "storyteller.pipeline", status: "passed" }, + { name: "ui.approval.approve_resume", status: "passed" }, + ], + kpis: { + gatewayWsRoundTripMs: 37, + }, + }, + null, + 2, + ), + ); + writeFileSync( + policyPath, + JSON.stringify( + { + ok: true, + checks: 281, + violations: [], + }, + null, + 2, + ), + ); + writeFileSync( + badgePath, + JSON.stringify( + { + label: "Demo KPI Gate", + message: "pass | 281 checks | 37ms ws", + color: "brightgreen", + }, + null, + 2, + ), + ); + writeFileSync( + badgeDetailsPath, + JSON.stringify( + { + costEstimate: { totalUsd: 0.53 }, + tokensUsed: { total: 22640 }, + providerUsage: { + status: "pass", + activeSecondaryProviders: 1, + entries: [ + { + route: "storyteller-agent", + capability: "tts", + selectedProvider: "deepgram", + selectedModel: "aura-2", + defaultProvider: "gemini_api", + selectionReason: "provider_override", + secondaryActive: true, + }, + ], + }, + evidence: { + operatorTurnTruncation: { status: "pass" }, + operatorTurnDelete: { status: "pass" }, + operatorDamageControl: { status: "pass" }, + governancePolicy: { status: "pass" }, + skillsRegistry: { status: "pass" }, + pluginMarketplace: { status: "pass" }, + deviceNodes: { + status: "pass", + updatesValidated: true, + updatesHasUpsert: true, + updatesHasHeartbeat: true, + updatesApiValidated: true, + updatesTotal: 3, + }, + agentUsage: { status: "pass" }, + runtimeGuardrailsSignalPaths: { + status: "pass", + summaryStatus: "critical signals=2", + totalPaths: 2, + primaryPath: { + title: "Recovery drill - ui-executor-sandbox-audit", + kind: "runtime_drill", + phase: "recovery", + }, + }, + }, + }, + null, + 2, + ), + ); + writeFileSync( + releaseEvidencePath, + JSON.stringify( + { + statuses: { + runtimeGuardrailsSignalPathsStatus: "pass", + }, + }, + null, + 2, + ), + ); + writeFileSync( + railwayDeploySummaryPath, + JSON.stringify( + { + status: "success", + deploymentId: "railway-deploy-123", + effectivePublicUrl: "https://live-agent.example.test", + checks: { + publicBadge: { + badgeEndpoint: "https://live-agent.example.test/demo-e2e/badge.json", + badgeDetailsEndpoint: "https://live-agent.example.test/demo-e2e/badge-details.json", + }, + }, + }, + null, + 2, + ), + ); + writeFileSync( + repoPublishSummaryPath, + JSON.stringify( + { + verification: { + script: "verify:release", + releaseEvidenceArtifactsValidated: true, + }, + steps: { + railwayDeployEnabled: true, + railwayFrontendDeployEnabled: false, + }, + }, + null, + 2, + ), + ); + writeFileSync( + visualManifestPath, + JSON.stringify( + { + badgeEvidence: { + deviceNodeUpdates: "pass", + }, + }, + null, + 2, + ), + ); + mkdirSync(baseDir, { recursive: true }); + writeFileSync(visualGalleryPath, "# Gallery\n"); + + const result = runPresentationBundle([ + "--summary", + summaryPath, + "--policy", + policyPath, + "--badge", + badgePath, + "--badgeDetails", + badgeDetailsPath, + "--releaseEvidence", + releaseEvidencePath, + "--railwayDeploySummary", + railwayDeploySummaryPath, + "--repoPublishSummary", + repoPublishSummaryPath, + "--visualManifest", + visualManifestPath, + "--visualGallery", + visualGalleryPath, + "--outputMarkdown", + outputMarkdownPath, + ]); + + assert.equal(result.status, 0, `expected presentation bundle to pass, stderr=${result.stderr}`); + const markdown = readFileSync(outputMarkdownPath, "utf8"); + + for (const token of [ + "Runtime Guardrails Snapshot", + "Provider Adapter Snapshot", + "Deploy / Publish Provenance", + "runtimeGuardrailsSignalPaths", + "providerUsage", + "critical signals=2", + "Recovery drill - ui-executor-sandbox-audit (runtime_drill / recovery)", + "storyteller-agent", + "deepgram", + "aura-2", + "provider_override", + "Railway deploy: status success; deployment railway-deploy-123; public URL https://live-agent.example.test", + "Public badge: badge https://live-agent.example.test/demo-e2e/badge.json; badge-details https://live-agent.example.test/demo-e2e/badge-details.json", + "Repo publish: verification verify:release; release evidence validated; Railway deploy enabled; frontend deploy disabled", + "railway-deploy-123", + "https://live-agent.example.test/demo-e2e/badge.json", + "verify:release", + ]) { + assert.ok(markdown.includes(token), `presentation markdown missing token: ${token}`); + } + assert.ok( + !markdown.includes("Railway deploy summary present:"), + "presentation markdown should avoid verbose deploy presence lines", + ); + assert.ok( + !markdown.includes("Railway deploy summary: [railway-deploy-summary.json]("), + "presentation markdown should not link raw railway deploy summary JSON", + ); + assert.ok( + !markdown.includes("Repo publish summary: [repo-publish-summary.json]("), + "presentation markdown should not link raw repo publish summary JSON", + ); +}); + +test("judge presentation bundle reuses compact deploy provenance from visual manifest when raw summaries are absent", () => { + const baseDir = mkdtempSync(join(tmpdir(), "mla-presentation-bundle-aligned-")); + const summaryPath = join(baseDir, "summary.json"); + const policyPath = join(baseDir, "policy-check.json"); + const badgePath = join(baseDir, "badge.json"); + const badgeDetailsPath = join(baseDir, "badge-details.json"); + const releaseEvidencePath = join(baseDir, "report.json"); + const visualManifestPath = join(baseDir, "manifest.json"); + const visualGalleryPath = join(baseDir, "gallery.md"); + const outputMarkdownPath = join(baseDir, "presentation.md"); + + writeFileSync( + summaryPath, + JSON.stringify( + { + scenarios: [ + { name: "live.negotiation", status: "passed" }, + { name: "storyteller.pipeline", status: "passed" }, + { name: "ui.approval.approve_resume", status: "passed" }, + ], + }, + null, + 2, + ), + ); + writeFileSync(policyPath, JSON.stringify({ ok: true, checks: 12, violations: [] }, null, 2)); + writeFileSync( + badgePath, + JSON.stringify({ label: "Demo KPI Gate", message: "pass", color: "brightgreen" }, null, 2), + ); + writeFileSync( + badgeDetailsPath, + JSON.stringify( + { + providerUsage: { + status: "pass", + activeSecondaryProviders: 0, + entries: [], + }, + evidence: { + operatorTurnTruncation: { status: "pass" }, + operatorTurnDelete: { status: "pass" }, + operatorDamageControl: { status: "pass" }, + governancePolicy: { status: "pass" }, + skillsRegistry: { status: "pass" }, + pluginMarketplace: { status: "pass" }, + deviceNodes: { + status: "pass", + updatesValidated: true, + updatesHasUpsert: true, + updatesHasHeartbeat: true, + updatesApiValidated: true, + updatesTotal: 2, + }, + agentUsage: { status: "pass" }, + runtimeGuardrailsSignalPaths: { + status: "pass", + summaryStatus: "critical signals=1", + totalPaths: 1, + }, + }, + }, + null, + 2, + ), + ); + writeFileSync(releaseEvidencePath, JSON.stringify({ statuses: {} }, null, 2)); + writeFileSync( + visualManifestPath, + JSON.stringify( + { + badgeEvidence: { + deviceNodeUpdates: "pass", + }, + deployProvenance: { + rows: [ + { + id: "railwayDeploy", + title: "Railway deploy", + summary: "status success; deployment railway-aligned-456; public URL https://judge.example.test", + }, + { + id: "repoPublish", + title: "Repo publish", + summary: "verification verify:release; release evidence validated; Railway deploy enabled; frontend deploy disabled", + }, + ], + }, + }, + null, + 2, + ), + ); + writeFileSync(visualGalleryPath, "# Gallery\n"); + + const result = runPresentationBundle([ + "--summary", + summaryPath, + "--policy", + policyPath, + "--badge", + badgePath, + "--badgeDetails", + badgeDetailsPath, + "--releaseEvidence", + releaseEvidencePath, + "--visualManifest", + visualManifestPath, + "--visualGallery", + visualGalleryPath, + "--outputMarkdown", + outputMarkdownPath, + ]); + + assert.equal(result.status, 0, `expected presentation bundle to pass without raw deploy summaries, stderr=${result.stderr}`); + const markdown = readFileSync(outputMarkdownPath, "utf8"); + + assert.ok(markdown.includes("## Deploy / Publish Provenance")); + assert.ok(markdown.includes("Railway deploy: status success; deployment railway-aligned-456; public URL https://judge.example.test")); + assert.ok(markdown.includes("Repo publish: verification verify:release; release evidence validated; Railway deploy enabled; frontend deploy disabled")); + assert.ok(!markdown.includes("railway-deploy-summary.json"), "raw deploy summary JSON should stay out of judge-facing markdown"); + assert.ok(!markdown.includes("repo-publish-summary.json"), "raw repo publish JSON should stay out of judge-facing markdown"); + assert.ok(!markdown.includes("railwayDeploySummary source is missing"), "optional missing deploy summary should not add notes"); + assert.ok(!markdown.includes("repoPublishSummary source is missing"), "optional missing publish summary should not add notes"); +}); + +test("judge presentation bundle omits optional provenance section for ordinary local judge flows", () => { + const baseDir = mkdtempSync(join(tmpdir(), "mla-presentation-bundle-local-")); + const summaryPath = join(baseDir, "summary.json"); + const policyPath = join(baseDir, "policy-check.json"); + const badgePath = join(baseDir, "badge.json"); + const badgeDetailsPath = join(baseDir, "badge-details.json"); + const releaseEvidencePath = join(baseDir, "report.json"); + const visualManifestPath = join(baseDir, "manifest.json"); + const visualGalleryPath = join(baseDir, "gallery.md"); + const outputMarkdownPath = join(baseDir, "presentation.md"); + + writeFileSync( + summaryPath, + JSON.stringify( + { + scenarios: [ + { name: "live.negotiation", status: "passed" }, + { name: "storyteller.pipeline", status: "passed" }, + { name: "ui.approval.approve_resume", status: "passed" }, + ], + }, + null, + 2, + ), + ); + writeFileSync(policyPath, JSON.stringify({ ok: true, checks: 18, violations: [] }, null, 2)); + writeFileSync( + badgePath, + JSON.stringify({ label: "Demo KPI Gate", message: "pass", color: "brightgreen" }, null, 2), + ); + writeFileSync( + badgeDetailsPath, + JSON.stringify( + { + providerUsage: { + status: "pass", + activeSecondaryProviders: 1, + entries: [ + { + route: "storyteller-agent", + capability: "tts", + selectedProvider: "deepgram", + selectedModel: "aura-2", + defaultProvider: "gemini_api", + selectionReason: "provider_override", + secondaryActive: true, + }, + ], + }, + evidence: { + operatorTurnTruncation: { status: "pass" }, + operatorTurnDelete: { status: "pass" }, + operatorDamageControl: { status: "pass" }, + governancePolicy: { status: "pass" }, + skillsRegistry: { status: "pass" }, + pluginMarketplace: { status: "pass" }, + deviceNodes: { + status: "pass", + updatesValidated: true, + updatesHasUpsert: true, + updatesHasHeartbeat: true, + updatesApiValidated: true, + updatesTotal: 2, + }, + agentUsage: { status: "pass" }, + runtimeGuardrailsSignalPaths: { + status: "pass", + summaryStatus: "critical signals=1", + totalPaths: 1, + }, + }, + }, + null, + 2, + ), + ); + writeFileSync(releaseEvidencePath, JSON.stringify({ statuses: {} }, null, 2)); + writeFileSync( + visualManifestPath, + JSON.stringify( + { + badgeEvidence: { + deviceNodeUpdates: "pass", + }, + deployProvenance: { + rows: [], + }, + }, + null, + 2, + ), + ); + writeFileSync(visualGalleryPath, "# Gallery\n"); + + const result = runPresentationBundle([ + "--summary", + summaryPath, + "--policy", + policyPath, + "--badge", + badgePath, + "--badgeDetails", + badgeDetailsPath, + "--releaseEvidence", + releaseEvidencePath, + "--visualManifest", + visualManifestPath, + "--visualGallery", + visualGalleryPath, + "--outputMarkdown", + outputMarkdownPath, + ]); + + assert.equal(result.status, 0, `expected presentation bundle to pass for local judge flow, stderr=${result.stderr}`); + const markdown = readFileSync(outputMarkdownPath, "utf8"); + + assert.ok(!markdown.includes("## Deploy / Publish Provenance"), "optional provenance section should be omitted"); + assert.ok(!markdown.includes("unavailable"), "optional provenance should not add unavailable noise"); + assert.ok(!markdown.includes("railway-deploy-summary.json"), "missing deploy summary should not appear as a raw artifact link"); + assert.ok(!markdown.includes("repo-publish-summary.json"), "missing repo publish summary should not appear as a raw artifact link"); +}); diff --git a/tests/unit/judge-quickstart-docs-alignment.test.ts b/tests/unit/judge-quickstart-docs-alignment.test.ts index 363d951b..65ee822b 100644 --- a/tests/unit/judge-quickstart-docs-alignment.test.ts +++ b/tests/unit/judge-quickstart-docs-alignment.test.ts @@ -3,18 +3,20 @@ import { readFileSync } from "node:fs"; import { resolve } from "node:path"; import test from "node:test"; -test("readme exposes judge quick path and quickstart doc link", () => { - const readmePath = resolve(process.cwd(), "README.md"); - const source = readFileSync(readmePath, "utf8"); +test("judge quickstart exposes judge quick path and evidence entry points", () => { + const quickstartPath = resolve(process.cwd(), "docs", "judge-quickstart.md"); + const source = readFileSync(quickstartPath, "utf8"); const requiredTokens = [ - "Judge Quickstart: `docs/judge-quickstart.md`", - "## Judge Quick Path", - "npm run demo:e2e:fast && npm run demo:e2e:policy", + "# Judge Quickstart", + "Fast, judge-facing entry point for a 5-10 minute evaluation run.", + "npm run demo:e2e:fast", + "npm run demo:e2e:policy", "artifacts/demo-e2e/badge-details.json", + "intent=research", ]; for (const token of requiredTokens) { - assert.ok(source.includes(token), `README missing judge quick path token: ${token}`); + assert.ok(source.includes(token), `judge quickstart missing quick path token: ${token}`); } }); @@ -36,6 +38,17 @@ test("judge quickstart document includes core judge commands and categories", () "Export Session -> Export Markdown", "Export Session -> Export JSON", "Export Session -> Export Audio (WAV)", + "Operator Session Ops", + "Bootstrap Doctor & Auth Profiles", + "Browser Worker Control", + "Runtime Drill Runner", + "Workflow Control Panel", + "operatorPurpose", + "operatorSessionReplay", + "operatorDiscovery", + "runtimeGuardrailsSignalPaths", + "intent=research", + "sourceUrls", "docs/challenge-demo-runbook.md", "Demo Script by Minute (5-6 min)", "00:00-00:45", diff --git a/tests/unit/judge-visual-evidence-pack-alignment.test.ts b/tests/unit/judge-visual-evidence-pack-alignment.test.ts index 22d36b05..bf45d5d2 100644 --- a/tests/unit/judge-visual-evidence-pack-alignment.test.ts +++ b/tests/unit/judge-visual-evidence-pack-alignment.test.ts @@ -63,6 +63,11 @@ test("judge visual evidence pack is wired across package scripts, docs, and runb "operator-console-evidence.png", "observability-dashboard.png", "operatorTurnTruncation", + "runtimeGuardrailsSignalPaths", + "providerUsage", + "railway-deploy-summary.json", + "repo-publish-summary.json", + "compact deploy/publish provenance", "pluginMarketplace", "deviceNodeUpdates", ]; @@ -75,10 +80,17 @@ test("judge visual evidence pack is wired across package scripts, docs, and runb "overallStatus", "screenshotChecklist", "criticalBadgeLanes", + "runtimeGuardrailsSignalPaths", + "providerUsage", + "railwayDeploySummary", + "repoPublishSummary", + "buildDeployProvenanceRows", + "Deploy / Publish Provenance", "pluginMarketplace", "deviceNodeUpdates", "costEstimatePresent", "tokensUsedPresent", + "deployProvenanceRows", ]; for (const token of requiredScriptTokens) { assert.ok(script.includes(token), `judge visual evidence script missing token: ${token}`); diff --git a/tests/unit/judge-visual-evidence-pack-script.test.ts b/tests/unit/judge-visual-evidence-pack-script.test.ts index 3f7ba5bc..9ad38b90 100644 --- a/tests/unit/judge-visual-evidence-pack-script.test.ts +++ b/tests/unit/judge-visual-evidence-pack-script.test.ts @@ -27,10 +27,28 @@ function makeBadgeDetails(statusOverrides = {}) { pluginMarketplace: "pass", deviceNodes: "pass", agentUsage: "pass", + runtimeGuardrailsSignalPaths: "pass", + providerUsage: "pass", ...statusOverrides, }; return { + providerUsage: { + status: laneStatus.providerUsage, + validated: laneStatus.providerUsage === "pass", + activeSecondaryProviders: 1, + entries: [ + { + route: "storyteller-agent", + capability: "tts", + selectedProvider: "deepgram", + selectedModel: "aura-2", + defaultProvider: "gemini_api", + selectionReason: "provider_override", + secondaryActive: true, + }, + ], + }, evidence: { operatorTurnTruncation: { status: laneStatus.operatorTurnTruncation }, operatorTurnDelete: { status: laneStatus.operatorTurnDelete }, @@ -38,6 +56,11 @@ function makeBadgeDetails(statusOverrides = {}) { governancePolicy: { status: laneStatus.governancePolicy }, skillsRegistry: { status: laneStatus.skillsRegistry }, pluginMarketplace: { status: laneStatus.pluginMarketplace }, + runtimeGuardrailsSignalPaths: { + status: laneStatus.runtimeGuardrailsSignalPaths, + summaryStatus: "critical signals=1", + totalPaths: 1, + }, deviceNodes: { status: laneStatus.deviceNodes, updatesValidated: true, @@ -68,6 +91,8 @@ function runVisualPack(args) { test("judge visual evidence pack strict mode passes when required captures and badge lanes are present", () => { const baseDir = mkdtempSync(join(tmpdir(), "mla-visual-pack-pass-")); const screenshotsDir = join(baseDir, "screenshots"); + const railwayDeploySummaryPath = join(baseDir, "railway-deploy-summary.json"); + const repoPublishSummaryPath = join(baseDir, "repo-publish-summary.json"); mkdirSync(screenshotsDir, { recursive: true }); for (const fileName of REQUIRED_SCREENSHOTS) { @@ -81,6 +106,41 @@ test("judge visual evidence pack strict mode passes when required captures and b writeFileSync(badgePath, JSON.stringify(makeBadgeDetails(), null, 2)); writeFileSync(summaryPath, JSON.stringify({ ok: true }, null, 2)); + writeFileSync( + railwayDeploySummaryPath, + JSON.stringify( + { + status: "success", + deploymentId: "railway-deploy-123", + effectivePublicUrl: "https://live-agent.example.test", + checks: { + publicBadge: { + badgeEndpoint: "https://live-agent.example.test/demo-e2e/badge.json", + badgeDetailsEndpoint: "https://live-agent.example.test/demo-e2e/badge-details.json", + }, + }, + }, + null, + 2, + ), + ); + writeFileSync( + repoPublishSummaryPath, + JSON.stringify( + { + verification: { + script: "verify:release", + releaseEvidenceArtifactsValidated: true, + }, + steps: { + railwayDeployEnabled: true, + railwayFrontendDeployEnabled: false, + }, + }, + null, + 2, + ), + ); const result = runVisualPack([ "--strict", @@ -88,6 +148,10 @@ test("judge visual evidence pack strict mode passes when required captures and b badgePath, "--summary", summaryPath, + "--railwayDeploySummary", + railwayDeploySummaryPath, + "--repoPublishSummary", + repoPublishSummaryPath, "--screenshotDir", screenshotsDir, "--outputJson", @@ -102,7 +166,27 @@ test("judge visual evidence pack strict mode passes when required captures and b assert.equal(manifest.summary.missingRequiredCaptures, 0); assert.equal(manifest.summary.missingCriticalBadgeEvidence, 0); assert.equal(manifest.badgeEvidence.pluginMarketplace, "pass"); + assert.equal(manifest.badgeEvidence.runtimeGuardrailsSignalPaths, "pass"); + assert.equal(manifest.badgeEvidence.providerUsage, "pass"); assert.equal(manifest.badgeEvidence.deviceNodeUpdates, "pass"); + assert.equal(manifest.deployProvenance.railwayDeploy.status, "success"); + assert.equal(manifest.deployProvenance.railwayDeploy.available, true); + assert.equal(manifest.deployProvenance.railwayDeploy.deploymentId, "railway-deploy-123"); + assert.equal(manifest.deployProvenance.repoPublish.verificationScript, "verify:release"); + assert.equal(manifest.deployProvenance.repoPublish.available, true); + assert.equal(manifest.deployProvenance.repoPublish.releaseEvidenceValidated, true); + assert.equal(manifest.summary.deployProvenanceRows, 3); + + const markdown = readFileSync(outMd, "utf8"); + for (const token of [ + "## Deploy / Publish Provenance", + "Railway deploy: status success; deployment railway-deploy-123; public URL https://live-agent.example.test", + "Public badge: badge https://live-agent.example.test/demo-e2e/badge.json; badge-details https://live-agent.example.test/demo-e2e/badge-details.json", + "Repo publish: verification verify:release; release evidence validated; Railway deploy enabled; frontend deploy disabled", + ]) { + assert.ok(markdown.includes(token), `visual pack markdown missing token: ${token}`); + } + assert.ok(!markdown.includes("Railway deploy summary present:"), "markdown should avoid verbose presence lines"); }); test("judge visual evidence pack strict mode fails when captures are missing", () => { @@ -141,3 +225,46 @@ test("judge visual evidence pack strict mode fails when captures are missing", ( assert.equal(manifest.overallStatus, "fail"); assert.ok(manifest.summary.missingRequiredCaptures >= 1); }); + +test("judge visual evidence pack keeps deploy provenance optional for local strict flows", () => { + const baseDir = mkdtempSync(join(tmpdir(), "mla-visual-pack-optional-")); + const screenshotsDir = join(baseDir, "screenshots"); + mkdirSync(screenshotsDir, { recursive: true }); + + for (const fileName of REQUIRED_SCREENSHOTS) { + writeFileSync(join(screenshotsDir, fileName), Buffer.from("fake-png-bytes")); + } + + const badgePath = join(baseDir, "badge-details.json"); + const summaryPath = join(baseDir, "summary.json"); + const outJson = join(baseDir, "manifest.json"); + const outMd = join(baseDir, "manifest.md"); + + writeFileSync(badgePath, JSON.stringify(makeBadgeDetails(), null, 2)); + writeFileSync(summaryPath, JSON.stringify({ ok: true }, null, 2)); + + const result = runVisualPack([ + "--strict", + "--badgeDetails", + badgePath, + "--summary", + summaryPath, + "--screenshotDir", + screenshotsDir, + "--outputJson", + outJson, + "--outputMarkdown", + outMd, + ]); + + assert.equal(result.status, 0, `expected strict visual pack to pass without optional deploy summaries, stderr=${result.stderr}`); + const manifest = JSON.parse(readFileSync(outJson, "utf8")); + const markdown = readFileSync(outMd, "utf8"); + + assert.equal(manifest.overallStatus, "pass"); + assert.equal(manifest.summary.deployProvenanceRows, 0); + assert.deepEqual(manifest.deployProvenance.rows, []); + assert.equal(manifest.deployProvenance.available, false); + assert.ok(!markdown.includes("## Deploy / Publish Provenance"), "optional provenance section should be omitted when absent"); + assert.ok(!markdown.includes("unavailable"), "optional provenance should not add unavailable noise to markdown"); +});