From 1278371a869d77925b8dbd9268b2496a3943f86d Mon Sep 17 00:00:00 2001 From: Codex Bot Date: Wed, 4 Mar 2026 21:45:08 +0500 Subject: [PATCH 1/4] Polish tab bootstrap and dropdown layering in demo frontend --- apps/demo-frontend/public/app.js | 5 ++- apps/demo-frontend/public/styles.css | 43 ++++++++++++------- ...-session-export-controls-alignment.test.ts | 3 +- .../frontend-tabbed-layout-alignment.test.ts | 4 +- 4 files changed, 36 insertions(+), 19 deletions(-) diff --git a/apps/demo-frontend/public/app.js b/apps/demo-frontend/public/app.js index bf895ca1..bd0b3b20 100644 --- a/apps/demo-frontend/public/app.js +++ b/apps/demo-frontend/public/app.js @@ -10030,6 +10030,9 @@ function bindEvents() { } async function bootstrap() { + const initialTabId = readTabIdFromHash() ?? readStoredTabId(); + setActiveTab(initialTabId, { syncHash: false }); + const runtimeConfig = await loadRuntimeConfig(); if (runtimeConfig?.wsUrl) { el.wsUrl.value = runtimeConfig.wsUrl; @@ -10065,7 +10068,7 @@ async function bootstrap() { resetOperatorBoardView({ mode: readStoredOperatorBoardMode(), persistMode: false }); renderTaskList(); evaluateConstraints(); - setActiveTab(readStoredTabId()); + setActiveTab(readStoredTabId(), { syncHash: false }); setUiTaskFieldsVisibility(); initBackgroundVideoLoopBlend(); enhanceSelectControls(); diff --git a/apps/demo-frontend/public/styles.css b/apps/demo-frontend/public/styles.css index e6dad0c0..31146b42 100644 --- a/apps/demo-frontend/public/styles.css +++ b/apps/demo-frontend/public/styles.css @@ -738,9 +738,9 @@ textarea { } select { - appearance: none; - -webkit-appearance: none; - -moz-appearance: none; + appearance: none !important; + -webkit-appearance: none !important; + -moz-appearance: none !important; cursor: pointer; padding-right: 42px; --select-surface-start: var(--surface-control); @@ -751,6 +751,10 @@ select { linear-gradient(180deg, var(--select-surface-start), var(--select-surface-end)); } +select::-ms-expand { + display: none; +} + select:hover { --select-surface-start: var(--surface-control-hover); } @@ -1750,6 +1754,7 @@ textarea { min-height: 74px; padding-top: 11px; padding-bottom: 11px; + overflow: visible; } .panel-live-connection .action-group-primary > .export-menu { @@ -1788,6 +1793,12 @@ textarea { .export-menu { position: relative; min-width: 0; + z-index: 140; + isolation: isolate; +} + +.export-menu[open] { + z-index: 420; } .export-menu > summary { @@ -1890,7 +1901,7 @@ textarea { position: absolute; right: 0; top: calc(100% + 8px); - z-index: 120; + z-index: 460; min-width: min(320px, calc(100vw - 56px)); padding: 8px; display: grid; @@ -2305,7 +2316,7 @@ button:focus-visible { .meta-row-status-live { margin-top: 14px; - padding: 8px; + padding: 10px; border: 1px solid color-mix(in oklch, var(--primary) 20%, var(--border-soft)); border-radius: calc(var(--radius) - 4px); background: @@ -2318,14 +2329,14 @@ button:focus-visible { display: grid; grid-template-columns: repeat(auto-fit, minmax(162px, 1fr)); grid-template-columns: repeat(4, minmax(0, 1fr)); - gap: 8px; + gap: 10px; overflow: visible; } .meta-row-status-live > div { min-width: 0; - min-height: 46px; - padding: 7px 10px; + min-height: 48px; + padding: 8px 11px; border-radius: calc(var(--radius) - 10px); display: inline-flex; align-items: center; @@ -2401,18 +2412,18 @@ button:focus-visible { } .meta-row-status-live strong { - color: color-mix(in oklch, white 98%, var(--foreground)); - font-size: 0.74rem; + color: color-mix(in oklch, white 99.6%, var(--foreground)); + font-size: 0.76rem; text-transform: none; - letter-spacing: 0.015em; + letter-spacing: 0.012em; white-space: nowrap; text-shadow: 0 1px 0 color-mix(in oklch, black 30%, transparent); } .meta-row-status-live > div > span:not(.status-pill) { - color: color-mix(in oklch, white 99.5%, var(--foreground)); - font-size: 0.82rem; - font-weight: 650; + color: color-mix(in oklch, white 99.8%, var(--foreground)); + font-size: 0.84rem; + font-weight: 670; line-height: 1.28; } @@ -2435,7 +2446,7 @@ button:focus-visible { max-width: 100%; padding: 4px 9px; border-radius: 999px; - border: 1px solid color-mix(in oklch, var(--primary) 32%, var(--border-soft)); + border: 1px solid color-mix(in oklch, var(--primary) 38%, var(--border-soft)); background: radial-gradient(160px 64px at 12% -34%, color-mix(in oklch, var(--primary) 10%, transparent), transparent 72%), linear-gradient( @@ -2453,7 +2464,7 @@ button:focus-visible { word-break: break-word; box-shadow: inset 0 1px 0 color-mix(in oklch, var(--foreground) 10%, transparent), - 0 0 0 1px color-mix(in oklch, var(--primary) 16%, transparent); + 0 0 0 1px color-mix(in oklch, var(--primary) 20%, transparent); } .meta-row > div { diff --git a/tests/unit/frontend-session-export-controls-alignment.test.ts b/tests/unit/frontend-session-export-controls-alignment.test.ts index af2f81f5..40b4cac0 100644 --- a/tests/unit/frontend-session-export-controls-alignment.test.ts +++ b/tests/unit/frontend-session-export-controls-alignment.test.ts @@ -84,8 +84,9 @@ test("demo frontend wires session export controls and runtime helpers", () => { ".panel-live-connection {", ".panel-live-connection .action-group-primary > .export-menu {", ".panel-live-connection .export-menu-list {", + ".export-menu[open] {", ".export-menu-list {", - "z-index: 120;", + "z-index: 460;", ".export-menu-item:disabled {", "cursor: not-allowed;", ".export-menu-item:disabled .export-menu-item-icon {", diff --git a/tests/unit/frontend-tabbed-layout-alignment.test.ts b/tests/unit/frontend-tabbed-layout-alignment.test.ts index aecc2e5b..d6244795 100644 --- a/tests/unit/frontend-tabbed-layout-alignment.test.ts +++ b/tests/unit/frontend-tabbed-layout-alignment.test.ts @@ -66,7 +66,9 @@ test("demo frontend groups panels into tabbed layout with live tab default", () "window.localStorage?.setItem(TAB_STORAGE_KEY, resolvedTabId);", "window.addEventListener(\"hashchange\", () => {", "setActiveTab(hashTabId, { syncHash: false });", - "setActiveTab(readStoredTabId());", + "const initialTabId = readTabIdFromHash() ?? readStoredTabId();", + "setActiveTab(initialTabId, { syncHash: false });", + "setActiveTab(readStoredTabId(), { syncHash: false });", ]; for (const token of requiredRuntimeTokens) { assert.ok(appSource.includes(token), `frontend runtime missing tab token: ${token}`); From ddc18b6c7a98df38bd6febf89825bc6cd4b22996 Mon Sep 17 00:00:00 2001 From: Codex Bot Date: Wed, 4 Mar 2026 21:54:30 +0500 Subject: [PATCH 2/4] Raise frontend export dropdown stacking priority --- apps/demo-frontend/public/styles.css | 4 ++-- tests/unit/frontend-session-export-controls-alignment.test.ts | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/apps/demo-frontend/public/styles.css b/apps/demo-frontend/public/styles.css index 31146b42..3b9ce1c2 100644 --- a/apps/demo-frontend/public/styles.css +++ b/apps/demo-frontend/public/styles.css @@ -1764,11 +1764,11 @@ textarea { } .panel-live-connection .export-menu[open] { - z-index: 240; + z-index: 520; } .panel-live-connection .export-menu-list { - z-index: 260; + z-index: 540; } .action-group { diff --git a/tests/unit/frontend-session-export-controls-alignment.test.ts b/tests/unit/frontend-session-export-controls-alignment.test.ts index 40b4cac0..332faf29 100644 --- a/tests/unit/frontend-session-export-controls-alignment.test.ts +++ b/tests/unit/frontend-session-export-controls-alignment.test.ts @@ -83,7 +83,10 @@ test("demo frontend wires session export controls and runtime helpers", () => { const requiredStyleTokens = [ ".panel-live-connection {", ".panel-live-connection .action-group-primary > .export-menu {", + ".panel-live-connection .export-menu[open] {", + "z-index: 520;", ".panel-live-connection .export-menu-list {", + "z-index: 540;", ".export-menu[open] {", ".export-menu-list {", "z-index: 460;", From 4450f5f87ce619b0d3135265a39e2b986938cbd9 Mon Sep 17 00:00:00 2001 From: Codex Bot Date: Sun, 8 Mar 2026 12:53:46 +0500 Subject: [PATCH 3/4] Align judge bundle provenance with optional deploy/publish evidence --- docs/judge-quickstart.md | 22 +- docs/judge-visual-evidence.md | 13 +- scripts/judge-presentation-bundle.mjs | 231 +++++++- scripts/judge-visual-evidence-pack.mjs | 138 +++++ scripts/release-readiness.ps1 | 416 ++++++++++++++- ...udge-presentation-bundle-alignment.test.ts | 17 + .../judge-presentation-bundle-script.test.ts | 492 ++++++++++++++++++ .../judge-quickstart-docs-alignment.test.ts | 12 + ...dge-visual-evidence-pack-alignment.test.ts | 12 + .../judge-visual-evidence-pack-script.test.ts | 127 +++++ tests/unit/skills-catalog.test.ts | 281 ++++++++++ 11 files changed, 1743 insertions(+), 18 deletions(-) create mode 100644 tests/unit/judge-presentation-bundle-script.test.ts create mode 100644 tests/unit/skills-catalog.test.ts diff --git a/docs/judge-quickstart.md b/docs/judge-quickstart.md index ba860228..9d67e60c 100644 --- a/docs/judge-quickstart.md +++ b/docs/judge-quickstart.md @@ -6,7 +6,7 @@ Fast, judge-facing entry point for a 5-10 minute evaluation run. This project covers all three challenge categories in one platform: -1. Live Agent (realtime speech, interruption, translation, negotiation) +1. Live Agent (realtime speech, interruption, translation, negotiation, grounded research) 2. Creative Storyteller (text + audio + image + video narrative flow) 3. UI Navigator (computer-use style UI planning/execution with approval guardrails) @@ -55,6 +55,8 @@ Artifacts: 8. `artifacts/judge-visual-evidence/presentation.md` 9. `artifacts/demo-e2e/epic-summary.json` +If deploy/publish artifacts are present, `manifest.md` and `presentation.md` also surface compact deploy/publish provenance from `artifacts/deploy/railway-deploy-summary.json` and `artifacts/deploy/repo-publish-summary.json`. Ordinary local judge flows omit that section, and raw deploy/publish JSON is not embedded into the judge-facing markdown. + ## 3) Validate Release Readiness ```bash @@ -64,21 +66,30 @@ npm run verify:release ## 4) What Judges Should See in UI 1. Connection + assistant lifecycle (`idle/streaming/speaking`). -2. Live interruption, truncate/delete evidence, and gateway error correlation. +2. Live interruption, truncate/delete evidence, gateway error correlation, and optional `research` citations/source URLs. 3. Operator Console panels: - Live Bridge Status - Approvals Queue + - Workflow Runtime / Runtime Guardrails - Device Nodes Health / Updates + - Bootstrap Doctor / Browser Workers - Governance Policy Lifecycle - Skills Registry Lifecycle - Plugin Marketplace Lifecycle - Agent Usage Evidence - Cost & Tokens Evidence -4. Session export controls: +4. Operator support panels: + - `Runtime Drill Runner` for repo-owned dry-run/live recovery drills and `followUpContext` handoff. + - `Workflow Control Panel` for redacted assistive-router/runtime override posture. + - `Operator Session Ops` for saved `operatorPurpose`, session replay, and cross-agent discovery. + - `Bootstrap Doctor & Auth Profiles` for provider/auth-profile/device/fallback posture. + - `Browser Worker Control` for queue/checkpoint posture on long-running UI jobs. +5. Session export controls: - `Export Session -> Export Markdown` - `Export Session -> Export JSON` - `Export Session -> Export Audio (WAV)` -5. Story Timeline panel: + - Confirm exported Markdown/JSON include `runtimeGuardrailsSignalPaths`, `operatorPurpose`, `operatorSessionReplay`, and `operatorDiscovery`. +6. Story Timeline panel: - Confirm `Timeline State` KPI transitions (`0%` idle -> ready/pending) as story output arrives. - Segment scrubber/selector reflects `output.story.timeline` - Preview card shows segment text + `image/video/audio` refs @@ -100,6 +111,7 @@ npm run verify:release - Start mic, send live request, then trigger interruption. - Show truncate/delete/gateway-correlation evidence in Operator Console. - Mention roundtrip and interrupt KPI lanes in `artifacts/demo-e2e/badge-details.json`. + - If judges ask for grounded-research proof, switch to `intent=research` once and show citation-bearing `answer`, `citations`, and `sourceUrls`. 3. `02:15-03:30` Creative Storyteller category: - Send storyteller prompt. - Open `Story Timeline` panel and scrub segments. @@ -107,8 +119,10 @@ npm run verify:release 4. `03:30-04:45` UI Navigator category: - Send `ui_task` intent with grounding fields. - Show approval flow and damage-control verdict in Operator Console. + - Save a short purpose in `Operator Session Ops`, then open `Bootstrap Doctor & Auth Profiles` and `Browser Worker Control` once to show runtime posture before execution. - Confirm safety gates before execution. 5. `04:45-05:30` Evidence close: - Run `npm run demo:epic` (or fallback `npm run demo:e2e:visual:judge` if e2e/policy/badge were already executed). - Open `artifacts/judge-visual-evidence/presentation.md`. - Confirm all evidence lanes are `pass` in `artifacts/demo-e2e/badge-details.json`. + - Export session `JSON` or `Markdown` and confirm `runtimeGuardrailsSignalPaths`, `operatorPurpose`, `operatorSessionReplay`, and `operatorDiscovery`. diff --git a/docs/judge-visual-evidence.md b/docs/judge-visual-evidence.md index 47c89ab6..55d35a18 100644 --- a/docs/judge-visual-evidence.md +++ b/docs/judge-visual-evidence.md @@ -5,8 +5,9 @@ Create one reproducible visual bundle for judges: 1. Screenshot checklist status (present/missing). -2. Critical badge-evidence lane status (`pass/fail/unavailable`). +2. Critical badge-evidence lane status (`pass/fail/unavailable`), including runtime guardrails and provider provenance. 3. Single manifest for quick go/no-go before submission. +4. One-page presentation bundle with runtime guardrails snapshot, provider adapter snapshot, and compact deploy/publish provenance when optional Railway/repo-publish artifacts are available. ## Commands @@ -76,7 +77,9 @@ Defaults used by `scripts/judge-visual-evidence-pack.mjs`: 1. Badge details: `artifacts/demo-e2e/badge-details.json` 2. Demo summary: `artifacts/demo-e2e/summary.json` -3. Screenshot directory: `artifacts/judge-visual-evidence/screenshots` +3. Optional Railway deploy summary: `artifacts/deploy/railway-deploy-summary.json` +4. Optional repo publish summary: `artifacts/deploy/repo-publish-summary.json` +5. Screenshot directory: `artifacts/judge-visual-evidence/screenshots` Defaults used by `scripts/judge-visual-capture.mjs`: @@ -94,6 +97,8 @@ Defaults used by `scripts/judge-visual-capture.mjs`: 5. `artifacts/judge-visual-evidence/presentation.md` 6. `artifacts/demo-e2e/epic-summary.json` +`manifest.md` and `presentation.md` surface compact deploy/publish provenance from `railway-deploy-summary.json` / `repo-publish-summary.json` when those optional files are present. Ordinary local judge flows omit that section instead of filling the page with `unavailable` placeholders, and raw deploy/publish JSON is not embedded into the judge-facing markdown. + ## Required Screenshot Filenames Put files into `artifacts/judge-visual-evidence/screenshots`: @@ -120,4 +125,6 @@ Pack marks these as critical: 6. `pluginMarketplace` 7. `deviceNodes` 8. `agentUsage` -9. `deviceNodeUpdates` (derived from `deviceNodes` updates fields) +9. `runtimeGuardrailsSignalPaths` +10. `providerUsage` +11. `deviceNodeUpdates` (derived from `deviceNodes` updates fields) diff --git a/scripts/judge-presentation-bundle.mjs b/scripts/judge-presentation-bundle.mjs index d2ccc835..6ae4f503 100644 --- a/scripts/judge-presentation-bundle.mjs +++ b/scripts/judge-presentation-bundle.mjs @@ -9,6 +9,8 @@ function parseArgs(argv) { badge: "artifacts/demo-e2e/badge.json", badgeDetails: "artifacts/demo-e2e/badge-details.json", releaseEvidence: "artifacts/release-evidence/report.json", + railwayDeploySummary: "artifacts/deploy/railway-deploy-summary.json", + repoPublishSummary: "artifacts/deploy/repo-publish-summary.json", visualManifest: "artifacts/judge-visual-evidence/manifest.json", visualGallery: "artifacts/judge-visual-evidence/gallery.md", }; @@ -39,6 +41,14 @@ function parseArgs(argv) { options.releaseEvidence = String(argv[++index] ?? options.releaseEvidence); continue; } + if (arg === "--railwayDeploySummary") { + options.railwayDeploySummary = String(argv[++index] ?? options.railwayDeploySummary); + continue; + } + if (arg === "--repoPublishSummary") { + options.repoPublishSummary = String(argv[++index] ?? options.repoPublishSummary); + continue; + } if (arg === "--visualManifest") { options.visualManifest = String(argv[++index] ?? options.visualManifest); continue; @@ -89,6 +99,24 @@ function toStatus(value) { return "unavailable"; } +function toOptionalText(value) { + if (typeof value !== "string") { + return "unavailable"; + } + const normalized = value.trim(); + return normalized.length > 0 ? normalized : "unavailable"; +} + +function toEnabledLabel(value) { + if (value === true) { + return "enabled"; + } + if (value === false) { + return "disabled"; + } + return "n/a"; +} + function toRelativePath(fromFile, toFile) { const raw = relative(dirname(fromFile), toFile); return raw.split(sep).join("/"); @@ -137,6 +165,131 @@ function deriveDeviceNodeUpdatesStatus(visualManifest, badgeDetails) { return "unavailable"; } +function toProviderEntrySummary(entry) { + if (!entry || typeof entry !== "object") { + return null; + } + + return { + route: String(entry.route ?? "n/a"), + capability: String(entry.capability ?? "n/a"), + selectedProvider: String(entry.selectedProvider ?? "n/a"), + selectedModel: String(entry.selectedModel ?? "n/a"), + defaultProvider: String(entry.defaultProvider ?? "n/a"), + selectionReason: String(entry.selectionReason ?? "n/a"), + secondaryActive: entry.secondaryActive === true ? "yes" : "no", + }; +} + +function summarizePrimaryPath(primaryPath) { + if (!primaryPath || typeof primaryPath !== "object") { + return "n/a"; + } + + const title = String(primaryPath.title ?? "n/a"); + const kind = String(primaryPath.kind ?? "n/a"); + const phase = String(primaryPath.phase ?? "n/a"); + return `${title} (${kind} / ${phase})`; +} + +function sanitizeDeployProvenanceRows(rows) { + if (!Array.isArray(rows)) { + return []; + } + return rows + .map((row) => ({ + id: String(row?.id ?? "").trim(), + title: String(row?.title ?? "").trim(), + summary: String(row?.summary ?? "").trim(), + })) + .filter((row) => row.title.length > 0 && row.summary.length > 0); +} + +function buildDeployProvenanceRows(deployProvenance) { + const rows = []; + const railwayDeploy = deployProvenance.railwayDeploy; + const repoPublish = deployProvenance.repoPublish; + + if (railwayDeploy.present) { + rows.push({ + id: "railwayDeploy", + title: "Railway deploy", + summary: `status ${railwayDeploy.status}; deployment ${railwayDeploy.deploymentId}; public URL ${railwayDeploy.effectivePublicUrl}`, + }); + + const badgeParts = []; + if (railwayDeploy.badgeEndpoint !== "unavailable") { + badgeParts.push(`badge ${railwayDeploy.badgeEndpoint}`); + } + if (railwayDeploy.badgeDetailsEndpoint !== "unavailable") { + badgeParts.push(`badge-details ${railwayDeploy.badgeDetailsEndpoint}`); + } + if (badgeParts.length > 0) { + rows.push({ + id: "railwayBadge", + title: "Public badge", + summary: badgeParts.join("; "), + }); + } + } + + if (repoPublish.present) { + rows.push({ + id: "repoPublish", + title: "Repo publish", + summary: [ + `verification ${repoPublish.verificationScript}`, + repoPublish.releaseEvidenceValidated === "true" + ? "release evidence validated" + : "release evidence not validated", + `Railway deploy ${repoPublish.railwayDeployEnabledLabel}`, + `frontend deploy ${repoPublish.railwayFrontendDeployEnabledLabel}`, + ].join("; "), + }); + } + + return rows; +} + +function summarizeDeployProvenance(visualManifest, railwayDeploySummaryRead, repoPublishSummaryRead) { + const railwayDeploySummary = + railwayDeploySummaryRead.present && railwayDeploySummaryRead.parsed ? railwayDeploySummaryRead.value : null; + const repoPublishSummary = + repoPublishSummaryRead.present && repoPublishSummaryRead.parsed ? repoPublishSummaryRead.value : null; + const railwayPublicBadge = railwayDeploySummary?.checks?.publicBadge ?? {}; + const repoPublishVerification = repoPublishSummary?.verification ?? {}; + const repoPublishSteps = repoPublishSummary?.steps ?? {}; + + const deployProvenance = { + available: Boolean(railwayDeploySummary || repoPublishSummary), + rows: [], + railwayDeploy: { + present: railwayDeploySummaryRead.present && railwayDeploySummaryRead.parsed, + status: toOptionalText(railwayDeploySummary?.status), + deploymentId: toOptionalText(railwayDeploySummary?.deploymentId), + effectivePublicUrl: toOptionalText(railwayDeploySummary?.effectivePublicUrl), + badgeEndpoint: toOptionalText(railwayPublicBadge?.badgeEndpoint), + badgeDetailsEndpoint: toOptionalText(railwayPublicBadge?.badgeDetailsEndpoint), + }, + repoPublish: { + present: repoPublishSummaryRead.present && repoPublishSummaryRead.parsed, + verificationScript: toOptionalText(repoPublishVerification?.script), + releaseEvidenceValidated: repoPublishVerification?.releaseEvidenceArtifactsValidated === true ? "true" : "false", + railwayDeployEnabled: repoPublishSteps?.railwayDeployEnabled === true ? "true" : "false", + railwayFrontendDeployEnabled: repoPublishSteps?.railwayFrontendDeployEnabled === true ? "true" : "false", + railwayDeployEnabledLabel: toEnabledLabel(repoPublishSteps?.railwayDeployEnabled), + railwayFrontendDeployEnabledLabel: toEnabledLabel(repoPublishSteps?.railwayFrontendDeployEnabled), + }, + }; + + const rowsFromVisualManifest = sanitizeDeployProvenanceRows(visualManifest?.deployProvenance?.rows); + deployProvenance.rows = + rowsFromVisualManifest.length > 0 ? rowsFromVisualManifest : buildDeployProvenanceRows(deployProvenance); + deployProvenance.available = deployProvenance.rows.length > 0 || deployProvenance.available; + + return deployProvenance; +} + function toMarkdown(bundle) { const lines = []; lines.push("# Judge Presentation Bundle"); @@ -174,6 +327,37 @@ function toMarkdown(bundle) { lines.push(`- Tokens used total: ${bundle.tokensUsedTotal}`); lines.push(""); + lines.push("## Runtime Guardrails Snapshot"); + lines.push(""); + lines.push(`- Status: ${bundle.runtimeGuardrails.status}`); + lines.push(`- Summary: ${bundle.runtimeGuardrails.summaryStatus}`); + lines.push(`- Total paths: ${bundle.runtimeGuardrails.totalPaths}`); + lines.push(`- Primary path: ${bundle.runtimeGuardrails.primaryPathSummary}`); + lines.push(""); + + lines.push("## Provider Adapter Snapshot"); + lines.push(""); + lines.push(`- Status: ${bundle.providerUsage.status}`); + lines.push(`- Active secondary providers: ${bundle.providerUsage.activeSecondaryProviders}`); + lines.push(""); + lines.push("| Route | Capability | Provider | Model | Default Provider | Selection Reason | Secondary Active |"); + lines.push("|---|---|---|---|---|---|---|"); + for (const row of bundle.providerUsage.entries) { + lines.push( + `| ${row.route} | ${row.capability} | ${row.selectedProvider} | ${row.selectedModel} | ${row.defaultProvider} | ${row.selectionReason} | ${row.secondaryActive} |`, + ); + } + + if (bundle.deployProvenance.rows.length > 0) { + lines.push(""); + lines.push("## Deploy / Publish Provenance"); + lines.push(""); + for (const row of bundle.deployProvenance.rows) { + lines.push(`- ${row.title}: ${row.summary}`); + } + } + lines.push(""); + lines.push("## Visual Assets"); lines.push(""); lines.push(`- Visual manifest: [manifest.json](${bundle.artifacts.visualManifestRel})`); @@ -209,6 +393,8 @@ function main() { const badgePath = toAbsolutePath(options.badge); const badgeDetailsPath = toAbsolutePath(options.badgeDetails); const releaseEvidencePath = toAbsolutePath(options.releaseEvidence); + const railwayDeploySummaryPath = toAbsolutePath(options.railwayDeploySummary); + const repoPublishSummaryPath = toAbsolutePath(options.repoPublishSummary); const visualManifestPath = toAbsolutePath(options.visualManifest); const visualGalleryPath = toAbsolutePath(options.visualGallery); @@ -217,6 +403,8 @@ function main() { const badgeRead = readJsonIfExists(badgePath); const badgeDetailsRead = readJsonIfExists(badgeDetailsPath); const releaseEvidenceRead = readJsonIfExists(releaseEvidencePath); + const railwayDeploySummaryRead = readJsonIfExists(railwayDeploySummaryPath); + const repoPublishSummaryRead = readJsonIfExists(repoPublishSummaryPath); const visualManifestRead = readJsonIfExists(visualManifestPath); const summary = summaryRead.value ?? {}; @@ -225,6 +413,13 @@ function main() { const badgeDetails = badgeDetailsRead.value ?? {}; const visualManifest = visualManifestRead.value ?? {}; const releaseEvidence = releaseEvidenceRead.value ?? {}; + const runtimeGuardrails = badgeDetails?.evidence?.runtimeGuardrailsSignalPaths ?? {}; + const providerUsage = badgeDetails?.providerUsage ?? releaseEvidence?.providerUsage ?? {}; + const deployProvenance = summarizeDeployProvenance( + visualManifest, + railwayDeploySummaryRead, + repoPublishSummaryRead, + ); const categories = [ { @@ -253,6 +448,8 @@ function main() { { lane: "pluginMarketplace", status: toStatus(badgeDetails?.evidence?.pluginMarketplace?.status) }, { lane: "deviceNodes", status: toStatus(badgeDetails?.evidence?.deviceNodes?.status) }, { lane: "agentUsage", status: toStatus(badgeDetails?.evidence?.agentUsage?.status) }, + { lane: "runtimeGuardrailsSignalPaths", status: toStatus(runtimeGuardrails?.status) }, + { lane: "providerUsage", status: toStatus(providerUsage?.status) }, { lane: "deviceNodeUpdates", status: deriveDeviceNodeUpdatesStatus(visualManifest, badgeDetails) }, ]; @@ -263,15 +460,19 @@ function main() { const notes = []; for (const source of [ - { name: "summary", read: summaryRead }, - { name: "policy", read: policyRead }, - { name: "badge", read: badgeRead }, - { name: "badgeDetails", read: badgeDetailsRead }, - { name: "releaseEvidence", read: releaseEvidenceRead }, - { name: "visualManifest", read: visualManifestRead }, + { name: "summary", read: summaryRead, optional: false }, + { name: "policy", read: policyRead, optional: false }, + { name: "badge", read: badgeRead, optional: false }, + { name: "badgeDetails", read: badgeDetailsRead, optional: false }, + { name: "releaseEvidence", read: releaseEvidenceRead, optional: false }, + { name: "railwayDeploySummary", read: railwayDeploySummaryRead, optional: true }, + { name: "repoPublishSummary", read: repoPublishSummaryRead, optional: true }, + { name: "visualManifest", read: visualManifestRead, optional: false }, ]) { if (!source.read.present) { - notes.push(`${source.name} source is missing`); + if (!source.optional) { + notes.push(`${source.name} source is missing`); + } continue; } if (!source.read.parsed) { @@ -294,6 +495,20 @@ function main() { gatewayRoundTripMs: Number(summary?.kpis?.gatewayWsRoundTripMs ?? 0), costTotalUsd: Number(badgeDetails?.costEstimate?.totalUsd ?? summary?.costEstimate?.totalUsd ?? 0), tokensUsedTotal: Number(badgeDetails?.tokensUsed?.total ?? summary?.tokensUsed?.total ?? 0), + runtimeGuardrails: { + status: toStatus(runtimeGuardrails?.status), + summaryStatus: String(runtimeGuardrails?.summaryStatus ?? "n/a"), + totalPaths: Number(runtimeGuardrails?.totalPaths ?? 0), + primaryPathSummary: summarizePrimaryPath(runtimeGuardrails?.primaryPath), + }, + providerUsage: { + status: toStatus(providerUsage?.status), + activeSecondaryProviders: Number(providerUsage?.activeSecondaryProviders ?? 0), + entries: Array.isArray(providerUsage?.entries) + ? providerUsage.entries.map(toProviderEntrySummary).filter(Boolean) + : [], + }, + deployProvenance, categories, evidenceLanes, artifacts: { @@ -301,6 +516,8 @@ function main() { policyRel: toRelativePath(outputMarkdownPath, policyPath), badgeDetailsRel: toRelativePath(outputMarkdownPath, badgeDetailsPath), releaseEvidenceRel: toRelativePath(outputMarkdownPath, releaseEvidencePath), + railwayDeploySummaryRel: toRelativePath(outputMarkdownPath, railwayDeploySummaryPath), + repoPublishSummaryRel: toRelativePath(outputMarkdownPath, repoPublishSummaryPath), visualManifestRel: toRelativePath(outputMarkdownPath, visualManifestPath), visualChecklistRel: toRelativePath( outputMarkdownPath, diff --git a/scripts/judge-visual-evidence-pack.mjs b/scripts/judge-visual-evidence-pack.mjs index 6d1c9665..0eab3a6a 100644 --- a/scripts/judge-visual-evidence-pack.mjs +++ b/scripts/judge-visual-evidence-pack.mjs @@ -7,6 +7,8 @@ function parseArgs(argv) { outputMarkdown: "artifacts/judge-visual-evidence/manifest.md", badgeDetails: "artifacts/demo-e2e/badge-details.json", summary: "artifacts/demo-e2e/summary.json", + railwayDeploySummary: "artifacts/deploy/railway-deploy-summary.json", + repoPublishSummary: "artifacts/deploy/repo-publish-summary.json", screenshotDir: "artifacts/judge-visual-evidence/screenshots", strict: false, }; @@ -33,6 +35,14 @@ function parseArgs(argv) { options.summary = argv[++i]; continue; } + if (arg === "--railwayDeploySummary") { + options.railwayDeploySummary = argv[++i]; + continue; + } + if (arg === "--repoPublishSummary") { + options.repoPublishSummary = argv[++i]; + continue; + } if (arg === "--screenshotDir") { options.screenshotDir = argv[++i]; continue; @@ -83,6 +93,24 @@ function toStatusValue(value) { return "unavailable"; } +function toOptionalText(value) { + if (typeof value !== "string") { + return "unavailable"; + } + const normalized = value.trim(); + return normalized.length > 0 ? normalized : "unavailable"; +} + +function toEnabledLabel(value) { + if (value === true) { + return "enabled"; + } + if (value === false) { + return "disabled"; + } + return "n/a"; +} + function deriveDeviceNodeUpdatesStatus(deviceNodesEvidence) { if (!deviceNodesEvidence || typeof deviceNodesEvidence !== "object") { return "unavailable"; @@ -103,11 +131,93 @@ function deriveDeviceNodeUpdatesStatus(deviceNodesEvidence) { return "unavailable"; } +function buildDeployProvenanceRows(deployProvenance) { + const rows = []; + const railwayDeploy = deployProvenance.railwayDeploy; + const repoPublish = deployProvenance.repoPublish; + + if (railwayDeploy.available) { + rows.push({ + id: "railwayDeploy", + title: "Railway deploy", + summary: `status ${railwayDeploy.status}; deployment ${railwayDeploy.deploymentId}; public URL ${railwayDeploy.effectivePublicUrl}`, + }); + + const badgeParts = []; + if (railwayDeploy.badgeEndpoint !== "unavailable") { + badgeParts.push(`badge ${railwayDeploy.badgeEndpoint}`); + } + if (railwayDeploy.badgeDetailsEndpoint !== "unavailable") { + badgeParts.push(`badge-details ${railwayDeploy.badgeDetailsEndpoint}`); + } + if (badgeParts.length > 0) { + rows.push({ + id: "railwayBadge", + title: "Public badge", + summary: badgeParts.join("; "), + }); + } + } + + if (repoPublish.available) { + rows.push({ + id: "repoPublish", + title: "Repo publish", + summary: [ + `verification ${repoPublish.verificationScript}`, + repoPublish.releaseEvidenceValidated ? "release evidence validated" : "release evidence not validated", + `Railway deploy ${repoPublish.railwayDeployEnabledLabel}`, + `frontend deploy ${repoPublish.railwayFrontendDeployEnabledLabel}`, + ].join("; "), + }); + } + + return rows; +} + +function collectDeployProvenance(railwayDeploySummaryRead, repoPublishSummaryRead) { + const railwayDeploySummary = + railwayDeploySummaryRead.present && railwayDeploySummaryRead.parsed ? railwayDeploySummaryRead.value : null; + const repoPublishSummary = + repoPublishSummaryRead.present && repoPublishSummaryRead.parsed ? repoPublishSummaryRead.value : null; + const railwayChecks = railwayDeploySummary?.checks?.publicBadge ?? {}; + const repoPublishVerification = repoPublishSummary?.verification ?? {}; + const repoPublishSteps = repoPublishSummary?.steps ?? {}; + + const deployProvenance = { + available: Boolean(railwayDeploySummary || repoPublishSummary), + rows: [], + railwayDeploy: { + available: railwayDeploySummaryRead.present && railwayDeploySummaryRead.parsed, + status: toOptionalText(railwayDeploySummary?.status), + deploymentId: toOptionalText(railwayDeploySummary?.deploymentId), + effectivePublicUrl: toOptionalText(railwayDeploySummary?.effectivePublicUrl), + badgeEndpoint: toOptionalText(railwayChecks?.badgeEndpoint), + badgeDetailsEndpoint: toOptionalText(railwayChecks?.badgeDetailsEndpoint), + }, + repoPublish: { + available: repoPublishSummaryRead.present && repoPublishSummaryRead.parsed, + verificationScript: toOptionalText(repoPublishVerification?.script), + releaseEvidenceValidated: repoPublishVerification?.releaseEvidenceArtifactsValidated === true, + railwayDeployEnabled: repoPublishSteps?.railwayDeployEnabled === true, + railwayFrontendDeployEnabled: repoPublishSteps?.railwayFrontendDeployEnabled === true, + railwayDeployEnabledLabel: toEnabledLabel(repoPublishSteps?.railwayDeployEnabled), + railwayFrontendDeployEnabledLabel: toEnabledLabel(repoPublishSteps?.railwayFrontendDeployEnabled), + }, + }; + + deployProvenance.rows = buildDeployProvenanceRows(deployProvenance); + return deployProvenance; +} + function collectBadgeEvidence(badgeDetailsJson) { const evidence = badgeDetailsJson?.evidence && typeof badgeDetailsJson.evidence === "object" ? badgeDetailsJson.evidence : {}; const deviceNodesEvidence = evidence.deviceNodes && typeof evidence.deviceNodes === "object" ? evidence.deviceNodes : {}; + const providerUsage = badgeDetailsJson?.providerUsage && typeof badgeDetailsJson.providerUsage === "object" + ? badgeDetailsJson.providerUsage + : {}; return { operatorTurnTruncation: toStatusValue(evidence.operatorTurnTruncation?.status), @@ -118,6 +228,8 @@ function collectBadgeEvidence(badgeDetailsJson) { pluginMarketplace: toStatusValue(evidence.pluginMarketplace?.status), deviceNodes: toStatusValue(evidence.deviceNodes?.status), agentUsage: toStatusValue(evidence.agentUsage?.status), + runtimeGuardrailsSignalPaths: toStatusValue(evidence.runtimeGuardrailsSignalPaths?.status), + providerUsage: toStatusValue(providerUsage.status), deviceNodeUpdates: deriveDeviceNodeUpdatesStatus(deviceNodesEvidence), costEstimatePresent: badgeDetailsJson?.costEstimate && typeof badgeDetailsJson.costEstimate === "object", tokensUsedPresent: badgeDetailsJson?.tokensUsed && typeof badgeDetailsJson.tokensUsed === "object", @@ -239,6 +351,15 @@ function toMarkdown(manifest) { } lines.push(`| costEstimatePresent | ${manifest.badgeEvidence.costEstimatePresent} |`); lines.push(`| tokensUsedPresent | ${manifest.badgeEvidence.tokensUsedPresent} |`); + + if (manifest.deployProvenance.rows.length > 0) { + lines.push(""); + lines.push("## Deploy / Publish Provenance"); + lines.push(""); + for (const row of manifest.deployProvenance.rows) { + lines.push(`- ${row.title}: ${row.summary}`); + } + } return lines.join("\n"); } @@ -247,13 +368,18 @@ function main() { const badgeDetailsPath = toAbsolutePath(options.badgeDetails); const summaryPath = toAbsolutePath(options.summary); + const railwayDeploySummaryPath = toAbsolutePath(options.railwayDeploySummary); + const repoPublishSummaryPath = toAbsolutePath(options.repoPublishSummary); const screenshotDir = toAbsolutePath(options.screenshotDir); const outputJsonPath = toAbsolutePath(options.outputJson); const outputMarkdownPath = toAbsolutePath(options.outputMarkdown); const badgeDetailsRead = readJsonIfExists(badgeDetailsPath); const summaryRead = readJsonIfExists(summaryPath); + const railwayDeploySummaryRead = readJsonIfExists(railwayDeploySummaryPath); + const repoPublishSummaryRead = readJsonIfExists(repoPublishSummaryPath); const badgeEvidence = collectBadgeEvidence(badgeDetailsRead.value ?? {}); + const deployProvenance = collectDeployProvenance(railwayDeploySummaryRead, repoPublishSummaryRead); const checklist = evaluateChecklist(screenshotDir, buildChecklist()); const missingRequiredCaptures = checklist.filter((item) => item.present !== true).length; @@ -267,6 +393,8 @@ function main() { "pluginMarketplace", "deviceNodes", "agentUsage", + "runtimeGuardrailsSignalPaths", + "providerUsage", "deviceNodeUpdates", ]; @@ -290,17 +418,27 @@ function main() { summaryPresent: summaryRead.present, summaryParsed: summaryRead.parsed, summaryParseError: summaryRead.parseError, + railwayDeploySummaryPath, + railwayDeploySummaryPresent: railwayDeploySummaryRead.present, + railwayDeploySummaryParsed: railwayDeploySummaryRead.parsed, + railwayDeploySummaryParseError: railwayDeploySummaryRead.parseError, + repoPublishSummaryPath, + repoPublishSummaryPresent: repoPublishSummaryRead.present, + repoPublishSummaryParsed: repoPublishSummaryRead.parsed, + repoPublishSummaryParseError: repoPublishSummaryRead.parseError, screenshotDir, }, strictMode: options.strict === true, criticalBadgeLanes, badgeEvidence, + deployProvenance, screenshotChecklist: checklist, summary: { requiredCaptures: checklist.length, presentCaptures: checklist.length - missingRequiredCaptures, missingRequiredCaptures, missingCriticalBadgeEvidence, + deployProvenanceRows: deployProvenance.rows.length, }, overallStatus, }; diff --git a/scripts/release-readiness.ps1 b/scripts/release-readiness.ps1 index af36ec81..bfaeb328 100644 --- a/scripts/release-readiness.ps1 +++ b/scripts/release-readiness.ps1 @@ -33,7 +33,9 @@ param( [string]$PerfPolicyPath = "artifacts/perf-load/policy-check.json", [string]$SourceRunManifestPath = "artifacts/release-artifact-revalidation/source-run.json", [string]$ReleaseEvidenceReportPath = "artifacts/release-evidence/report.json", - [string]$ReleaseEvidenceReportMarkdownPath = "artifacts/release-evidence/report.md" + [string]$ReleaseEvidenceReportMarkdownPath = "artifacts/release-evidence/report.md", + [string]$ReleaseEvidenceManifestPath = "artifacts/release-evidence/manifest.json", + [string]$ReleaseEvidenceManifestMarkdownPath = "artifacts/release-evidence/manifest.md" ) $ErrorActionPreference = "Stop" @@ -98,6 +100,95 @@ function To-BoolOrNull([object]$Value) { return $null } +function Get-ObjectPropertyValue { + param( + [Parameter(Mandatory = $false)] + [object]$Object, + [Parameter(Mandatory = $true)] + [string]$Name + ) + + if ($null -eq $Object) { + return $null + } + + $property = $Object.PSObject.Properties[$Name] + if ($null -eq $property) { + return $null + } + + return $property.Value +} + +function Get-OptionalNonEmptyStringPropertyValue { + param( + [Parameter(Mandatory = $false)] + [object]$Object, + [Parameter(Mandatory = $true)] + [string]$Name, + [Parameter(Mandatory = $true)] + [string]$ContextLabel + ) + + $value = Get-ObjectPropertyValue -Object $Object -Name $Name + if ($null -eq $value) { + return $null + } + + $stringValue = [string]$value + if ([string]::IsNullOrWhiteSpace($stringValue)) { + Fail ($ContextLabel + "." + $Name + " expected non-empty string when provided") + } + + return $stringValue +} + +function Get-OptionalBooleanPropertyValue { + param( + [Parameter(Mandatory = $false)] + [object]$Object, + [Parameter(Mandatory = $true)] + [string]$Name, + [Parameter(Mandatory = $true)] + [string]$ContextLabel + ) + + $value = Get-ObjectPropertyValue -Object $Object -Name $Name + if ($null -eq $value) { + return $null + } + + $boolValue = To-BoolOrNull $value + if ($null -eq $boolValue) { + Fail ($ContextLabel + "." + $Name + " expected boolean when provided") + } + + return $boolValue +} + +function Get-OptionalNonNegativeNumberPropertyValue { + param( + [Parameter(Mandatory = $false)] + [object]$Object, + [Parameter(Mandatory = $true)] + [string]$Name, + [Parameter(Mandatory = $true)] + [string]$ContextLabel + ) + + $value = Get-ObjectPropertyValue -Object $Object -Name $Name + if ($null -eq $value) { + return [double]::NaN + } + + $numberValue = To-NumberOrNaN $value + if ([double]::IsNaN($numberValue) -or $numberValue -lt 0) { + Fail ($ContextLabel + "." + $Name + " expected >= 0 when provided") + } + + return $numberValue +} + function Fail([string]$Message) { Write-Error $Message exit 1 @@ -168,10 +259,11 @@ if (-not $SkipProfileSmoke) { if ((-not $SkipDemoE2E) -and (-not $SkipDemoRun)) { $runFastDemo = $UseFastDemoE2E -or (-not $SkipBuild) $scenarioRetryArgs = "-ScenarioRetryMaxAttempts $DemoScenarioRetryMaxAttempts -ScenarioRetryBackoffMs $DemoScenarioRetryBackoffMs" + $serviceRestartArgs = "-RestartHealthyServices" $demoCommand = if ($runFastDemo) { - "npm run demo:e2e:fast -- -StartupTimeoutSec $DemoStartupTimeoutSec -RequestTimeoutSec $DemoRequestTimeoutSec $scenarioRetryArgs" + "npm run demo:e2e:fast -- -StartupTimeoutSec $DemoStartupTimeoutSec -RequestTimeoutSec $DemoRequestTimeoutSec $scenarioRetryArgs $serviceRestartArgs" } else { - "npm run demo:e2e -- -StartupTimeoutSec $DemoStartupTimeoutSec -RequestTimeoutSec $DemoRequestTimeoutSec $scenarioRetryArgs" + "npm run demo:e2e -- -StartupTimeoutSec $DemoStartupTimeoutSec -RequestTimeoutSec $DemoRequestTimeoutSec $scenarioRetryArgs $serviceRestartArgs" } Run-StepWithRetry "Run demo e2e" $demoCommand $DemoRunMaxAttempts $DemoRunRetryBackoffMs } @@ -206,7 +298,7 @@ if (Test-Path $BadgeDetailsPath) { } Write-Host "[release-check] Build release evidence report" - & powershell -NoProfile -ExecutionPolicy Bypass -File $releaseEvidenceScriptPath -BadgeDetailsPath $BadgeDetailsPath -OutputJsonPath $ReleaseEvidenceReportPath -OutputMarkdownPath $ReleaseEvidenceReportMarkdownPath + & powershell -NoProfile -ExecutionPolicy Bypass -File $releaseEvidenceScriptPath -BadgeDetailsPath $BadgeDetailsPath -OutputJsonPath $ReleaseEvidenceReportPath -OutputMarkdownPath $ReleaseEvidenceReportMarkdownPath -OutputManifestJsonPath $ReleaseEvidenceManifestPath -OutputManifestMarkdownPath $ReleaseEvidenceManifestMarkdownPath if ($LASTEXITCODE -ne 0) { Fail "Step failed: Build release evidence report" } @@ -237,6 +329,8 @@ if ($IsArtifactOnlyMode) { if (Test-Path $BadgeDetailsPath) { $requiredFiles += $ReleaseEvidenceReportPath $requiredFiles += $ReleaseEvidenceReportMarkdownPath + $requiredFiles += $ReleaseEvidenceManifestPath + $requiredFiles += $ReleaseEvidenceManifestMarkdownPath } $missing = @($requiredFiles | Where-Object { -not (Test-Path $_) }) @@ -422,6 +516,105 @@ if ($IsArtifactOnlyMode -and (Test-Path $SourceRunManifestPath)) { ) } + $manifestRuntimeGuardrailsSignalPathsStatusRaw = [string]$manifestEvidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsStatus + $manifestRuntimeGuardrailsSignalPathsStatus = $manifestRuntimeGuardrailsSignalPathsStatusRaw.ToLowerInvariant() + if ($manifestRuntimeGuardrailsSignalPathsStatus -ne "pass") { + Fail ( + "source run manifest evidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsStatus expected pass, actual " + + $manifestRuntimeGuardrailsSignalPathsStatusRaw + ) + } + + $manifestRuntimeGuardrailsSignalPathsSummaryStatus = [string]$manifestEvidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsSummaryStatus + if ([string]::IsNullOrWhiteSpace($manifestRuntimeGuardrailsSignalPathsSummaryStatus)) { + Fail ("source run manifest evidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsSummaryStatus is required") + } + + $manifestRuntimeGuardrailsSignalPathsTotalPaths = To-NumberOrNaN $manifestEvidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsTotalPaths + if ([double]::IsNaN($manifestRuntimeGuardrailsSignalPathsTotalPaths) -or $manifestRuntimeGuardrailsSignalPathsTotalPaths -lt 0) { + Fail ( + "source run manifest evidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsTotalPaths expected >= 0, actual " + + $manifestEvidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsTotalPaths + ) + } + + $manifestRuntimeGuardrailsSignalPathsPrimaryPath = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "badgeEvidenceRuntimeGuardrailsSignalPathsPrimaryPath" + if ($manifestRuntimeGuardrailsSignalPathsTotalPaths -eq 0) { + if ($null -ne $manifestRuntimeGuardrailsSignalPathsPrimaryPath) { + Fail ("source run manifest evidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsPrimaryPath expected null when totalPaths=0") + } + } + else { + if ($null -eq $manifestRuntimeGuardrailsSignalPathsPrimaryPath) { + Fail ("source run manifest evidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsPrimaryPath is required when totalPaths > 0") + } + + $manifestRuntimeGuardrailsSignalPathsPrimaryPathTitle = [string](Get-ObjectPropertyValue -Object $manifestRuntimeGuardrailsSignalPathsPrimaryPath -Name "title") + $manifestRuntimeGuardrailsSignalPathsPrimaryPathKind = [string](Get-ObjectPropertyValue -Object $manifestRuntimeGuardrailsSignalPathsPrimaryPath -Name "kind") + $manifestRuntimeGuardrailsSignalPathsPrimaryPathSummaryText = [string](Get-ObjectPropertyValue -Object $manifestRuntimeGuardrailsSignalPathsPrimaryPath -Name "summaryText") + if ( + [string]::IsNullOrWhiteSpace($manifestRuntimeGuardrailsSignalPathsPrimaryPathTitle) -or + [string]::IsNullOrWhiteSpace($manifestRuntimeGuardrailsSignalPathsPrimaryPathKind) -or + [string]::IsNullOrWhiteSpace($manifestRuntimeGuardrailsSignalPathsPrimaryPathSummaryText) + ) { + Fail ( + "source run manifest evidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsPrimaryPath expected title/kind/summaryText when totalPaths > 0" + ) + } + } + + $manifestProviderUsageStatusRaw = [string]$manifestEvidenceSnapshot.badgeEvidenceProviderUsageStatus + $manifestProviderUsageStatus = $manifestProviderUsageStatusRaw.ToLowerInvariant() + if ($manifestProviderUsageStatus -ne "pass") { + Fail ( + "source run manifest evidenceSnapshot.badgeEvidenceProviderUsageStatus expected pass, actual " + + $manifestProviderUsageStatusRaw + ) + } + + $manifestProviderUsageValidated = [bool]$manifestEvidenceSnapshot.badgeEvidenceProviderUsageValidated + if (-not $manifestProviderUsageValidated) { + Fail ("source run manifest evidenceSnapshot.badgeEvidenceProviderUsageValidated expected true") + } + + $manifestProviderUsageActiveSecondaryProviders = To-NumberOrNaN $manifestEvidenceSnapshot.badgeEvidenceProviderUsageActiveSecondaryProviders + if ([double]::IsNaN($manifestProviderUsageActiveSecondaryProviders) -or $manifestProviderUsageActiveSecondaryProviders -lt 0) { + Fail ( + "source run manifest evidenceSnapshot.badgeEvidenceProviderUsageActiveSecondaryProviders expected >= 0, actual " + + $manifestEvidenceSnapshot.badgeEvidenceProviderUsageActiveSecondaryProviders + ) + } + + $manifestProviderUsageEntriesCount = To-NumberOrNaN $manifestEvidenceSnapshot.badgeEvidenceProviderUsageEntriesCount + if ([double]::IsNaN($manifestProviderUsageEntriesCount) -or $manifestProviderUsageEntriesCount -lt 1) { + Fail ( + "source run manifest evidenceSnapshot.badgeEvidenceProviderUsageEntriesCount expected >= 1, actual " + + $manifestEvidenceSnapshot.badgeEvidenceProviderUsageEntriesCount + ) + } + + $manifestProviderUsagePrimaryEntry = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "badgeEvidenceProviderUsagePrimaryEntry" + if ($null -eq $manifestProviderUsagePrimaryEntry) { + Fail ("source run manifest evidenceSnapshot.badgeEvidenceProviderUsagePrimaryEntry is required when entriesCount > 0") + } + + $manifestProviderUsagePrimaryEntryRoute = [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "route") + $manifestProviderUsagePrimaryEntryCapability = [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "capability") + $manifestProviderUsagePrimaryEntrySelectedProvider = [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "selectedProvider") + $manifestProviderUsagePrimaryEntrySelectedModel = [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "selectedModel") + $manifestProviderUsagePrimaryEntrySelectionReason = [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "selectionReason") + if ( + [string]::IsNullOrWhiteSpace($manifestProviderUsagePrimaryEntryRoute) -or + [string]::IsNullOrWhiteSpace($manifestProviderUsagePrimaryEntryCapability) -or + [string]::IsNullOrWhiteSpace($manifestProviderUsagePrimaryEntrySelectedProvider) -or + [string]::IsNullOrWhiteSpace($manifestProviderUsagePrimaryEntrySelectedModel) -or + [string]::IsNullOrWhiteSpace($manifestProviderUsagePrimaryEntrySelectionReason) + ) { + Fail ( + "source run manifest evidenceSnapshot.badgeEvidenceProviderUsagePrimaryEntry expected route/capability/selectedProvider/selectedModel/selectionReason when entriesCount > 0" + ) + } + $manifestDeviceNodeUpdatesStatusRaw = [string]$manifestEvidenceSnapshot.badgeEvidenceDeviceNodeUpdatesStatus $manifestDeviceNodeUpdatesStatus = $manifestDeviceNodeUpdatesStatusRaw.ToLowerInvariant() if ($manifestDeviceNodeUpdatesStatus -ne "pass") { @@ -431,6 +624,105 @@ if ($IsArtifactOnlyMode -and (Test-Path $SourceRunManifestPath)) { ) } + $manifestRailwayDeploySummaryPresent = To-BoolOrNull $manifestEvidenceSnapshot.railwayDeploySummaryPresent + if ($null -eq $manifestRailwayDeploySummaryPresent) { + $manifestRailwayDeploySummaryPresent = $false + } + if ($manifestRailwayDeploySummaryPresent) { + $manifestEvidenceContextLabel = "source run manifest evidenceSnapshot" + $manifestRailwayDeploySummaryStatus = [string]$manifestEvidenceSnapshot.railwayDeploySummaryStatus + if ([string]::IsNullOrWhiteSpace($manifestRailwayDeploySummaryStatus)) { + Fail ("source run manifest evidenceSnapshot.railwayDeploySummaryStatus is required when railwayDeploySummaryPresent=true") + } + + $manifestRailwayDeploySummaryDeploymentId = [string]$manifestEvidenceSnapshot.railwayDeploySummaryDeploymentId + if ([string]::IsNullOrWhiteSpace($manifestRailwayDeploySummaryDeploymentId)) { + Fail ("source run manifest evidenceSnapshot.railwayDeploySummaryDeploymentId is required when railwayDeploySummaryPresent=true") + } + + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryEffectivePublicUrl" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryBadgeEndpoint" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryBadgeDetailsEndpoint" -ContextLabel $manifestEvidenceContextLabel + # Contract: source run manifest evidenceSnapshot.railwayDeploySummaryProjectId expected non-empty string when provided + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryProjectId" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryService" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryEnvironment" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryEffectiveStartCommand" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryConfigSource" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryRootDescriptorExpectedUiUrl" -ContextLabel $manifestEvidenceContextLabel + + # Contract: source run manifest evidenceSnapshot.railwayDeploySummaryRootDescriptorAttempted expected boolean when provided + $manifestRailwayDeploySummaryRootDescriptorAttempted = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryRootDescriptorAttempted" -ContextLabel $manifestEvidenceContextLabel + $manifestRailwayDeploySummaryRootDescriptorSkipped = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryRootDescriptorSkipped" -ContextLabel $manifestEvidenceContextLabel + if ( + ($null -ne $manifestRailwayDeploySummaryRootDescriptorAttempted) -and + ($null -ne $manifestRailwayDeploySummaryRootDescriptorSkipped) -and + $manifestRailwayDeploySummaryRootDescriptorAttempted -and + $manifestRailwayDeploySummaryRootDescriptorSkipped + ) { + Fail ("source run manifest evidenceSnapshot.railwayDeploySummaryRootDescriptorAttempted and railwayDeploySummaryRootDescriptorSkipped cannot both be true") + } + + $manifestRailwayDeploySummaryPublicBadgeAttempted = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryPublicBadgeAttempted" -ContextLabel $manifestEvidenceContextLabel + $manifestRailwayDeploySummaryPublicBadgeSkipped = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryPublicBadgeSkipped" -ContextLabel $manifestEvidenceContextLabel + if ( + ($null -ne $manifestRailwayDeploySummaryPublicBadgeAttempted) -and + ($null -ne $manifestRailwayDeploySummaryPublicBadgeSkipped) -and + $manifestRailwayDeploySummaryPublicBadgeAttempted -and + $manifestRailwayDeploySummaryPublicBadgeSkipped + ) { + Fail ("source run manifest evidenceSnapshot.railwayDeploySummaryPublicBadgeAttempted and railwayDeploySummaryPublicBadgeSkipped cannot both be true") + } + } + + $manifestRepoPublishSummaryPresent = To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryPresent + if ($null -eq $manifestRepoPublishSummaryPresent) { + $manifestRepoPublishSummaryPresent = $false + } + if ($manifestRepoPublishSummaryPresent) { + $manifestRepoPublishSummaryVerificationScript = [string]$manifestEvidenceSnapshot.repoPublishSummaryVerificationScript + if ([string]::IsNullOrWhiteSpace($manifestRepoPublishSummaryVerificationScript)) { + Fail ("source run manifest evidenceSnapshot.repoPublishSummaryVerificationScript is required when repoPublishSummaryPresent=true") + } + + $manifestRepoPublishSummaryReleaseEvidenceValidated = To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryReleaseEvidenceValidated + if ($manifestRepoPublishSummaryReleaseEvidenceValidated -ne $true) { + Fail ("source run manifest evidenceSnapshot.repoPublishSummaryReleaseEvidenceValidated expected true when repoPublishSummaryPresent=true") + } + + $manifestRepoPublishSummaryRailwayDeployEnabled = To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryRailwayDeployEnabled + if ($null -eq $manifestRepoPublishSummaryRailwayDeployEnabled) { + Fail ("source run manifest evidenceSnapshot.repoPublishSummaryRailwayDeployEnabled is required when repoPublishSummaryPresent=true") + } + + $manifestRepoPublishSummaryRailwayFrontendDeployEnabled = To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryRailwayFrontendDeployEnabled + if ($null -eq $manifestRepoPublishSummaryRailwayFrontendDeployEnabled) { + Fail ("source run manifest evidenceSnapshot.repoPublishSummaryRailwayFrontendDeployEnabled is required when repoPublishSummaryPresent=true") + } + + $manifestEvidenceContextLabel = "source run manifest evidenceSnapshot" + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryBranch" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRemoteName" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayPublicUrl" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayDemoFrontendPublicUrl" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactSelf" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactRailwayDeploySummary" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactReleaseEvidenceReportJson" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactReleaseEvidenceManifestJson" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactBadgeDetailsJson" -ContextLabel $manifestEvidenceContextLabel + + $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryVerificationSkipped" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryVerificationStrict" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryCommitEnabled" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryPushEnabled" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryPagesEnabled" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryBadgeCheckEnabled" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayNoWait" -ContextLabel $manifestEvidenceContextLabel + $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayFrontendNoWait" -ContextLabel $manifestEvidenceContextLabel + # Contract: source run manifest evidenceSnapshot.repoPublishSummaryReleaseEvidenceArtifactsCount expected >= 0 when provided + $null = Get-OptionalNonNegativeNumberPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryReleaseEvidenceArtifactsCount" -ContextLabel $manifestEvidenceContextLabel + } + $manifestDamageControlLatestVerdictRaw = [string]$manifestEvidenceSnapshot.operatorDamageControlLatestVerdict $manifestDamageControlLatestVerdict = $manifestDamageControlLatestVerdictRaw.ToLowerInvariant() $allowedOperatorDamageControlLatestVerdicts = @("allow", "ask", "block") @@ -1523,6 +1815,16 @@ if ((-not $SkipDemoE2E) -and (Test-Path $SummaryPath)) { Fail ("Critical KPI check failed: assistiveRouterMode expected deterministic|assistive_override|assistive_match|assistive_fallback, actual " + $assistiveRouterMode) } + $assistiveRouterProviderMetadataValidated = To-BoolOrNull $summary.kpis.assistiveRouterProviderMetadataValidated + if ($assistiveRouterProviderMetadataValidated -ne $true) { + Fail ("Critical KPI check failed: assistiveRouterProviderMetadataValidated expected True, actual " + $summary.kpis.assistiveRouterProviderMetadataValidated) + } + + $assistiveRouterProvider = [string]$summary.kpis.assistiveRouterProvider + if (@("gemini_api", "openai", "anthropic", "deepseek", "moonshot") -notcontains $assistiveRouterProvider) { + Fail ("Critical KPI check failed: assistiveRouterProvider expected gemini_api|openai|anthropic|deepseek|moonshot, actual " + $assistiveRouterProvider) + } + $transportModeValidated = To-BoolOrNull $summary.kpis.transportModeValidated if ($transportModeValidated -ne $true) { Fail ("Critical KPI check failed: transportModeValidated expected True, actual " + $summary.kpis.transportModeValidated) @@ -2037,7 +2339,60 @@ if ($IsArtifactOnlyMode -and (Test-Path $SourceRunManifestPath)) { $manifestPluginMarketplaceStatus = [string]$manifestEvidenceSnapshot.badgeEvidencePluginMarketplaceStatus $manifestDeviceNodesStatus = [string]$manifestEvidenceSnapshot.badgeEvidenceDeviceNodesStatus $manifestAgentUsageStatus = [string]$manifestEvidenceSnapshot.badgeEvidenceAgentUsageStatus + $manifestRuntimeGuardrailsSignalPathsStatus = [string]$manifestEvidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsStatus + $manifestRuntimeGuardrailsSignalPathsSummaryStatus = [string]$manifestEvidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsSummaryStatus + $manifestRuntimeGuardrailsSignalPathsTotalPaths = [string]$manifestEvidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsTotalPaths + $manifestRuntimeGuardrailsSignalPathsPrimaryPath = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "badgeEvidenceRuntimeGuardrailsSignalPathsPrimaryPath" + $manifestRuntimeGuardrailsSignalPathsPrimaryPathTitle = if ($null -ne $manifestRuntimeGuardrailsSignalPathsPrimaryPath) { [string](Get-ObjectPropertyValue -Object $manifestRuntimeGuardrailsSignalPathsPrimaryPath -Name "title") } else { "" } + $manifestProviderUsageStatus = [string]$manifestEvidenceSnapshot.badgeEvidenceProviderUsageStatus + $manifestProviderUsageValidated = if ([bool]$manifestEvidenceSnapshot.badgeEvidenceProviderUsageValidated) { "true" } else { "false" } + $manifestProviderUsageActiveSecondaryProviders = [string]$manifestEvidenceSnapshot.badgeEvidenceProviderUsageActiveSecondaryProviders + $manifestProviderUsageEntriesCount = [string]$manifestEvidenceSnapshot.badgeEvidenceProviderUsageEntriesCount + $manifestProviderUsagePrimaryEntry = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "badgeEvidenceProviderUsagePrimaryEntry" + $manifestProviderUsagePrimaryEntryRoute = if ($null -ne $manifestProviderUsagePrimaryEntry) { [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "route") } else { "" } + $manifestProviderUsagePrimaryEntryCapability = if ($null -ne $manifestProviderUsagePrimaryEntry) { [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "capability") } else { "" } + $manifestProviderUsagePrimaryEntrySelectedProvider = if ($null -ne $manifestProviderUsagePrimaryEntry) { [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "selectedProvider") } else { "" } + $manifestProviderUsagePrimaryEntrySelectedModel = if ($null -ne $manifestProviderUsagePrimaryEntry) { [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "selectedModel") } else { "" } $manifestDeviceNodeUpdatesStatus = [string]$manifestEvidenceSnapshot.badgeEvidenceDeviceNodeUpdatesStatus + $manifestRailwayDeploySummaryPresent = if ((To-BoolOrNull $manifestEvidenceSnapshot.railwayDeploySummaryPresent) -eq $true) { "true" } else { "false" } + $manifestRailwayDeploySummaryStatus = [string]$manifestEvidenceSnapshot.railwayDeploySummaryStatus + $manifestRailwayDeploySummaryDeploymentId = [string]$manifestEvidenceSnapshot.railwayDeploySummaryDeploymentId + $manifestRailwayDeploySummaryEffectivePublicUrl = [string]$manifestEvidenceSnapshot.railwayDeploySummaryEffectivePublicUrl + $manifestRailwayDeploySummaryBadgeEndpoint = [string]$manifestEvidenceSnapshot.railwayDeploySummaryBadgeEndpoint + $manifestRailwayDeploySummaryBadgeDetailsEndpoint = [string]$manifestEvidenceSnapshot.railwayDeploySummaryBadgeDetailsEndpoint + $manifestRailwayDeploySummaryProjectId = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryProjectId") + $manifestRailwayDeploySummaryService = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryService") + $manifestRailwayDeploySummaryEnvironment = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryEnvironment") + $manifestRailwayDeploySummaryEffectiveStartCommand = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryEffectiveStartCommand") + $manifestRailwayDeploySummaryConfigSource = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryConfigSource") + $manifestRailwayDeploySummaryRootDescriptorAttempted = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryRootDescriptorAttempted" + $manifestRailwayDeploySummaryRootDescriptorSkipped = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryRootDescriptorSkipped" + $manifestRailwayDeploySummaryRootDescriptorExpectedUiUrl = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryRootDescriptorExpectedUiUrl") + $manifestRailwayDeploySummaryPublicBadgeAttempted = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryPublicBadgeAttempted" + $manifestRailwayDeploySummaryPublicBadgeSkipped = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryPublicBadgeSkipped" + $manifestRepoPublishSummaryPresent = if ((To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryPresent) -eq $true) { "true" } else { "false" } + $manifestRepoPublishSummaryBranch = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryBranch") + $manifestRepoPublishSummaryRemoteName = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRemoteName") + $manifestRepoPublishSummaryVerificationScript = [string]$manifestEvidenceSnapshot.repoPublishSummaryVerificationScript + $manifestRepoPublishSummaryVerificationSkipped = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryVerificationSkipped" + $manifestRepoPublishSummaryVerificationStrict = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryVerificationStrict" + $manifestRepoPublishSummaryReleaseEvidenceValidated = if ((To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryReleaseEvidenceValidated) -eq $true) { "true" } else { "false" } + $manifestRepoPublishSummaryReleaseEvidenceArtifactsCount = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryReleaseEvidenceArtifactsCount") + $manifestRepoPublishSummaryCommitEnabled = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryCommitEnabled" + $manifestRepoPublishSummaryPushEnabled = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryPushEnabled" + $manifestRepoPublishSummaryPagesEnabled = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryPagesEnabled" + $manifestRepoPublishSummaryBadgeCheckEnabled = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryBadgeCheckEnabled" + $manifestRepoPublishSummaryRailwayDeployEnabled = if ((To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryRailwayDeployEnabled) -eq $true) { "true" } else { "false" } + $manifestRepoPublishSummaryRailwayFrontendDeployEnabled = if ((To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryRailwayFrontendDeployEnabled) -eq $true) { "true" } else { "false" } + $manifestRepoPublishSummaryRuntimeRailwayPublicUrl = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayPublicUrl") + $manifestRepoPublishSummaryRuntimeRailwayDemoFrontendPublicUrl = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayDemoFrontendPublicUrl") + $manifestRepoPublishSummaryRuntimeRailwayNoWait = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayNoWait" + $manifestRepoPublishSummaryRuntimeRailwayFrontendNoWait = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayFrontendNoWait" + $manifestRepoPublishSummaryArtifactSelf = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactSelf") + $manifestRepoPublishSummaryArtifactRailwayDeploySummary = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactRailwayDeploySummary") + $manifestRepoPublishSummaryArtifactReleaseEvidenceReportJson = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactReleaseEvidenceReportJson") + $manifestRepoPublishSummaryArtifactReleaseEvidenceManifestJson = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactReleaseEvidenceManifestJson") + $manifestRepoPublishSummaryArtifactBadgeDetailsJson = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactBadgeDetailsJson") $manifestDamageControlLatestVerdict = [string]$manifestEvidenceSnapshot.operatorDamageControlLatestVerdict $manifestDamageControlLatestSource = [string]$manifestEvidenceSnapshot.operatorDamageControlLatestSource Write-Host ( @@ -2053,9 +2408,62 @@ if ($IsArtifactOnlyMode -and (Test-Path $SourceRunManifestPath)) { ", plugin_marketplace_status=" + $manifestPluginMarketplaceStatus + ", device_nodes_status=" + $manifestDeviceNodesStatus + ", agent_usage_status=" + $manifestAgentUsageStatus + + ", runtime_guardrails_signal_paths_status=" + $manifestRuntimeGuardrailsSignalPathsStatus + + ", runtime_guardrails_signal_paths_summary_status=" + $manifestRuntimeGuardrailsSignalPathsSummaryStatus + + ", runtime_guardrails_signal_paths_total_paths=" + $manifestRuntimeGuardrailsSignalPathsTotalPaths + + ", runtime_guardrails_signal_paths_primary_path_title=" + $manifestRuntimeGuardrailsSignalPathsPrimaryPathTitle + + ", provider_usage_status=" + $manifestProviderUsageStatus + + ", provider_usage_validated=" + $manifestProviderUsageValidated + + ", provider_usage_active_secondary_providers=" + $manifestProviderUsageActiveSecondaryProviders + + ", provider_usage_entries_count=" + $manifestProviderUsageEntriesCount + + ", provider_usage_primary_entry_route=" + $manifestProviderUsagePrimaryEntryRoute + + ", provider_usage_primary_entry_capability=" + $manifestProviderUsagePrimaryEntryCapability + + ", provider_usage_primary_entry_selected_provider=" + $manifestProviderUsagePrimaryEntrySelectedProvider + + ", provider_usage_primary_entry_selected_model=" + $manifestProviderUsagePrimaryEntrySelectedModel + ", device_node_updates_status=" + $manifestDeviceNodeUpdatesStatus + + ", railway_deploy_summary_present=" + $manifestRailwayDeploySummaryPresent + + ", railway_deploy_summary_status=" + $manifestRailwayDeploySummaryStatus + + ", railway_deploy_summary_deployment_id=" + $manifestRailwayDeploySummaryDeploymentId + + ", railway_deploy_summary_public_url=" + $manifestRailwayDeploySummaryEffectivePublicUrl + + ", railway_deploy_summary_badge_endpoint=" + $manifestRailwayDeploySummaryBadgeEndpoint + + ", railway_deploy_summary_badge_details_endpoint=" + $manifestRailwayDeploySummaryBadgeDetailsEndpoint + + ", railway_deploy_summary_project_id=" + $manifestRailwayDeploySummaryProjectId + + ", railway_deploy_summary_service=" + $manifestRailwayDeploySummaryService + + ", railway_deploy_summary_environment=" + $manifestRailwayDeploySummaryEnvironment + + ", railway_deploy_summary_effective_start_command=" + $manifestRailwayDeploySummaryEffectiveStartCommand + + ", railway_deploy_summary_config_source=" + $manifestRailwayDeploySummaryConfigSource + + ", railway_deploy_summary_root_descriptor_attempted=" + $(if ($null -ne $manifestRailwayDeploySummaryRootDescriptorAttempted) { [string]$manifestRailwayDeploySummaryRootDescriptorAttempted } else { "" }) + + ", railway_deploy_summary_root_descriptor_skipped=" + $(if ($null -ne $manifestRailwayDeploySummaryRootDescriptorSkipped) { [string]$manifestRailwayDeploySummaryRootDescriptorSkipped } else { "" }) + + ", railway_deploy_summary_expected_ui_url=" + $manifestRailwayDeploySummaryRootDescriptorExpectedUiUrl + + ", railway_deploy_summary_public_badge_attempted=" + $(if ($null -ne $manifestRailwayDeploySummaryPublicBadgeAttempted) { [string]$manifestRailwayDeploySummaryPublicBadgeAttempted } else { "" }) + + ", railway_deploy_summary_public_badge_skipped=" + $(if ($null -ne $manifestRailwayDeploySummaryPublicBadgeSkipped) { [string]$manifestRailwayDeploySummaryPublicBadgeSkipped } else { "" }) + + ", repo_publish_summary_present=" + $manifestRepoPublishSummaryPresent + + ", repo_publish_summary_branch=" + $manifestRepoPublishSummaryBranch + + ", repo_publish_summary_remote_name=" + $manifestRepoPublishSummaryRemoteName + + ", repo_publish_summary_verification_script=" + $manifestRepoPublishSummaryVerificationScript + + ", repo_publish_summary_verification_skipped=" + $(if ($null -ne $manifestRepoPublishSummaryVerificationSkipped) { [string]$manifestRepoPublishSummaryVerificationSkipped } else { "" }) + + ", repo_publish_summary_verification_strict=" + $(if ($null -ne $manifestRepoPublishSummaryVerificationStrict) { [string]$manifestRepoPublishSummaryVerificationStrict } else { "" }) + + ", repo_publish_summary_release_evidence_validated=" + $manifestRepoPublishSummaryReleaseEvidenceValidated + + ", repo_publish_summary_release_evidence_artifacts_count=" + $manifestRepoPublishSummaryReleaseEvidenceArtifactsCount + + ", repo_publish_summary_commit_enabled=" + $(if ($null -ne $manifestRepoPublishSummaryCommitEnabled) { [string]$manifestRepoPublishSummaryCommitEnabled } else { "" }) + + ", repo_publish_summary_push_enabled=" + $(if ($null -ne $manifestRepoPublishSummaryPushEnabled) { [string]$manifestRepoPublishSummaryPushEnabled } else { "" }) + + ", repo_publish_summary_pages_enabled=" + $(if ($null -ne $manifestRepoPublishSummaryPagesEnabled) { [string]$manifestRepoPublishSummaryPagesEnabled } else { "" }) + + ", repo_publish_summary_badge_check_enabled=" + $(if ($null -ne $manifestRepoPublishSummaryBadgeCheckEnabled) { [string]$manifestRepoPublishSummaryBadgeCheckEnabled } else { "" }) + + ", repo_publish_summary_railway_deploy_enabled=" + $manifestRepoPublishSummaryRailwayDeployEnabled + + ", repo_publish_summary_railway_frontend_deploy_enabled=" + $manifestRepoPublishSummaryRailwayFrontendDeployEnabled + + ", repo_publish_summary_runtime_railway_public_url=" + $manifestRepoPublishSummaryRuntimeRailwayPublicUrl + + ", repo_publish_summary_runtime_railway_frontend_public_url=" + $manifestRepoPublishSummaryRuntimeRailwayDemoFrontendPublicUrl + + ", repo_publish_summary_runtime_railway_no_wait=" + $(if ($null -ne $manifestRepoPublishSummaryRuntimeRailwayNoWait) { [string]$manifestRepoPublishSummaryRuntimeRailwayNoWait } else { "" }) + + ", repo_publish_summary_runtime_railway_frontend_no_wait=" + $(if ($null -ne $manifestRepoPublishSummaryRuntimeRailwayFrontendNoWait) { [string]$manifestRepoPublishSummaryRuntimeRailwayFrontendNoWait } else { "" }) + + ", repo_publish_summary_artifact_self=" + $manifestRepoPublishSummaryArtifactSelf + + ", repo_publish_summary_artifact_railway_deploy_summary=" + $manifestRepoPublishSummaryArtifactRailwayDeploySummary + + ", repo_publish_summary_artifact_release_evidence_report_json=" + $manifestRepoPublishSummaryArtifactReleaseEvidenceReportJson + + ", repo_publish_summary_artifact_release_evidence_manifest_json=" + $manifestRepoPublishSummaryArtifactReleaseEvidenceManifestJson + + ", repo_publish_summary_artifact_badge_details_json=" + $manifestRepoPublishSummaryArtifactBadgeDetailsJson + ", operator_damage_control_latest_verdict=" + $manifestDamageControlLatestVerdict + ", operator_damage_control_latest_source=" + $manifestDamageControlLatestSource ) } } + + diff --git a/tests/unit/judge-presentation-bundle-alignment.test.ts b/tests/unit/judge-presentation-bundle-alignment.test.ts index 759fc3ef..6d06f535 100644 --- a/tests/unit/judge-presentation-bundle-alignment.test.ts +++ b/tests/unit/judge-presentation-bundle-alignment.test.ts @@ -33,11 +33,17 @@ test("judge presentation bundle script is wired across package scripts and docs" "npm run demo:e2e:visual:bundle", "npm run demo:e2e:visual:judge", "presentation.md", + "railway-deploy-summary.json", + "repo-publish-summary.json", ]; for (const token of docTokens) { assert.ok(readme.includes(token), `README missing presentation token: ${token}`); assert.ok(visualDoc.includes(token), `judge visual evidence doc missing presentation token: ${token}`); } + assert.ok( + visualDoc.includes("compact deploy/publish provenance"), + "judge visual evidence doc missing compact provenance wording", + ); assert.ok( quickstart.includes("npm run demo:e2e:visual:bundle"), "judge quickstart missing visual bundle command", @@ -49,9 +55,20 @@ test("judge presentation bundle script is wired across package scripts and docs" "Judge Presentation Bundle", "Challenge Category Coverage", "Critical Evidence Lanes", + "Runtime Guardrails Snapshot", + "Provider Adapter Snapshot", + "Deploy / Publish Provenance", "release-evidence/report.json", + "railway-deploy-summary.json", + "repo-publish-summary.json", "manifest.json", "gallery.md", + "runtimeGuardrailsSignalPaths", + "providerUsage", + "railwayDeploySummary", + "repoPublishSummary", + "sanitizeDeployProvenanceRows", + "buildDeployProvenanceRows", "pluginMarketplace", "deviceNodeUpdates", ]; diff --git a/tests/unit/judge-presentation-bundle-script.test.ts b/tests/unit/judge-presentation-bundle-script.test.ts new file mode 100644 index 00000000..98dc89ba --- /dev/null +++ b/tests/unit/judge-presentation-bundle-script.test.ts @@ -0,0 +1,492 @@ +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { spawnSync } from "node:child_process"; +import { join, resolve } from "node:path"; +import { tmpdir } from "node:os"; +import test from "node:test"; + +function runPresentationBundle(args) { + const scriptPath = resolve(process.cwd(), "scripts", "judge-presentation-bundle.mjs"); + return spawnSync(process.execPath, [scriptPath, ...args], { + cwd: process.cwd(), + encoding: "utf8", + }); +} + +test("judge presentation bundle includes runtime guardrails and provider adapter snapshots", () => { + const baseDir = mkdtempSync(join(tmpdir(), "mla-presentation-bundle-")); + const summaryPath = join(baseDir, "summary.json"); + const policyPath = join(baseDir, "policy-check.json"); + const badgePath = join(baseDir, "badge.json"); + const badgeDetailsPath = join(baseDir, "badge-details.json"); + const releaseEvidencePath = join(baseDir, "report.json"); + const railwayDeploySummaryPath = join(baseDir, "railway-deploy-summary.json"); + const repoPublishSummaryPath = join(baseDir, "repo-publish-summary.json"); + const visualManifestPath = join(baseDir, "manifest.json"); + const visualGalleryPath = join(baseDir, "gallery.md"); + const outputMarkdownPath = join(baseDir, "presentation.md"); + + writeFileSync( + summaryPath, + JSON.stringify( + { + scenarios: [ + { name: "live.negotiation", status: "passed" }, + { name: "storyteller.pipeline", status: "passed" }, + { name: "ui.approval.approve_resume", status: "passed" }, + ], + kpis: { + gatewayWsRoundTripMs: 37, + }, + }, + null, + 2, + ), + ); + writeFileSync( + policyPath, + JSON.stringify( + { + ok: true, + checks: 281, + violations: [], + }, + null, + 2, + ), + ); + writeFileSync( + badgePath, + JSON.stringify( + { + label: "Demo KPI Gate", + message: "pass | 281 checks | 37ms ws", + color: "brightgreen", + }, + null, + 2, + ), + ); + writeFileSync( + badgeDetailsPath, + JSON.stringify( + { + costEstimate: { totalUsd: 0.53 }, + tokensUsed: { total: 22640 }, + providerUsage: { + status: "pass", + activeSecondaryProviders: 1, + entries: [ + { + route: "storyteller-agent", + capability: "tts", + selectedProvider: "deepgram", + selectedModel: "aura-2", + defaultProvider: "gemini_api", + selectionReason: "provider_override", + secondaryActive: true, + }, + ], + }, + evidence: { + operatorTurnTruncation: { status: "pass" }, + operatorTurnDelete: { status: "pass" }, + operatorDamageControl: { status: "pass" }, + governancePolicy: { status: "pass" }, + skillsRegistry: { status: "pass" }, + pluginMarketplace: { status: "pass" }, + deviceNodes: { + status: "pass", + updatesValidated: true, + updatesHasUpsert: true, + updatesHasHeartbeat: true, + updatesApiValidated: true, + updatesTotal: 3, + }, + agentUsage: { status: "pass" }, + runtimeGuardrailsSignalPaths: { + status: "pass", + summaryStatus: "critical signals=2", + totalPaths: 2, + primaryPath: { + title: "Recovery drill - ui-executor-sandbox-audit", + kind: "runtime_drill", + phase: "recovery", + }, + }, + }, + }, + null, + 2, + ), + ); + writeFileSync( + releaseEvidencePath, + JSON.stringify( + { + statuses: { + runtimeGuardrailsSignalPathsStatus: "pass", + }, + }, + null, + 2, + ), + ); + writeFileSync( + railwayDeploySummaryPath, + JSON.stringify( + { + status: "success", + deploymentId: "railway-deploy-123", + effectivePublicUrl: "https://live-agent.example.test", + checks: { + publicBadge: { + badgeEndpoint: "https://live-agent.example.test/demo-e2e/badge.json", + badgeDetailsEndpoint: "https://live-agent.example.test/demo-e2e/badge-details.json", + }, + }, + }, + null, + 2, + ), + ); + writeFileSync( + repoPublishSummaryPath, + JSON.stringify( + { + verification: { + script: "verify:release", + releaseEvidenceArtifactsValidated: true, + }, + steps: { + railwayDeployEnabled: true, + railwayFrontendDeployEnabled: false, + }, + }, + null, + 2, + ), + ); + writeFileSync( + visualManifestPath, + JSON.stringify( + { + badgeEvidence: { + deviceNodeUpdates: "pass", + }, + }, + null, + 2, + ), + ); + mkdirSync(baseDir, { recursive: true }); + writeFileSync(visualGalleryPath, "# Gallery\n"); + + const result = runPresentationBundle([ + "--summary", + summaryPath, + "--policy", + policyPath, + "--badge", + badgePath, + "--badgeDetails", + badgeDetailsPath, + "--releaseEvidence", + releaseEvidencePath, + "--railwayDeploySummary", + railwayDeploySummaryPath, + "--repoPublishSummary", + repoPublishSummaryPath, + "--visualManifest", + visualManifestPath, + "--visualGallery", + visualGalleryPath, + "--outputMarkdown", + outputMarkdownPath, + ]); + + assert.equal(result.status, 0, `expected presentation bundle to pass, stderr=${result.stderr}`); + const markdown = readFileSync(outputMarkdownPath, "utf8"); + + for (const token of [ + "Runtime Guardrails Snapshot", + "Provider Adapter Snapshot", + "Deploy / Publish Provenance", + "runtimeGuardrailsSignalPaths", + "providerUsage", + "critical signals=2", + "Recovery drill - ui-executor-sandbox-audit (runtime_drill / recovery)", + "storyteller-agent", + "deepgram", + "aura-2", + "provider_override", + "Railway deploy: status success; deployment railway-deploy-123; public URL https://live-agent.example.test", + "Public badge: badge https://live-agent.example.test/demo-e2e/badge.json; badge-details https://live-agent.example.test/demo-e2e/badge-details.json", + "Repo publish: verification verify:release; release evidence validated; Railway deploy enabled; frontend deploy disabled", + "railway-deploy-123", + "https://live-agent.example.test/demo-e2e/badge.json", + "verify:release", + ]) { + assert.ok(markdown.includes(token), `presentation markdown missing token: ${token}`); + } + assert.ok( + !markdown.includes("Railway deploy summary present:"), + "presentation markdown should avoid verbose deploy presence lines", + ); + assert.ok( + !markdown.includes("Railway deploy summary: [railway-deploy-summary.json]("), + "presentation markdown should not link raw railway deploy summary JSON", + ); + assert.ok( + !markdown.includes("Repo publish summary: [repo-publish-summary.json]("), + "presentation markdown should not link raw repo publish summary JSON", + ); +}); + +test("judge presentation bundle reuses compact deploy provenance from visual manifest when raw summaries are absent", () => { + const baseDir = mkdtempSync(join(tmpdir(), "mla-presentation-bundle-aligned-")); + const summaryPath = join(baseDir, "summary.json"); + const policyPath = join(baseDir, "policy-check.json"); + const badgePath = join(baseDir, "badge.json"); + const badgeDetailsPath = join(baseDir, "badge-details.json"); + const releaseEvidencePath = join(baseDir, "report.json"); + const visualManifestPath = join(baseDir, "manifest.json"); + const visualGalleryPath = join(baseDir, "gallery.md"); + const outputMarkdownPath = join(baseDir, "presentation.md"); + + writeFileSync( + summaryPath, + JSON.stringify( + { + scenarios: [ + { name: "live.negotiation", status: "passed" }, + { name: "storyteller.pipeline", status: "passed" }, + { name: "ui.approval.approve_resume", status: "passed" }, + ], + }, + null, + 2, + ), + ); + writeFileSync(policyPath, JSON.stringify({ ok: true, checks: 12, violations: [] }, null, 2)); + writeFileSync( + badgePath, + JSON.stringify({ label: "Demo KPI Gate", message: "pass", color: "brightgreen" }, null, 2), + ); + writeFileSync( + badgeDetailsPath, + JSON.stringify( + { + providerUsage: { + status: "pass", + activeSecondaryProviders: 0, + entries: [], + }, + evidence: { + operatorTurnTruncation: { status: "pass" }, + operatorTurnDelete: { status: "pass" }, + operatorDamageControl: { status: "pass" }, + governancePolicy: { status: "pass" }, + skillsRegistry: { status: "pass" }, + pluginMarketplace: { status: "pass" }, + deviceNodes: { + status: "pass", + updatesValidated: true, + updatesHasUpsert: true, + updatesHasHeartbeat: true, + updatesApiValidated: true, + updatesTotal: 2, + }, + agentUsage: { status: "pass" }, + runtimeGuardrailsSignalPaths: { + status: "pass", + summaryStatus: "critical signals=1", + totalPaths: 1, + }, + }, + }, + null, + 2, + ), + ); + writeFileSync(releaseEvidencePath, JSON.stringify({ statuses: {} }, null, 2)); + writeFileSync( + visualManifestPath, + JSON.stringify( + { + badgeEvidence: { + deviceNodeUpdates: "pass", + }, + deployProvenance: { + rows: [ + { + id: "railwayDeploy", + title: "Railway deploy", + summary: "status success; deployment railway-aligned-456; public URL https://judge.example.test", + }, + { + id: "repoPublish", + title: "Repo publish", + summary: "verification verify:release; release evidence validated; Railway deploy enabled; frontend deploy disabled", + }, + ], + }, + }, + null, + 2, + ), + ); + writeFileSync(visualGalleryPath, "# Gallery\n"); + + const result = runPresentationBundle([ + "--summary", + summaryPath, + "--policy", + policyPath, + "--badge", + badgePath, + "--badgeDetails", + badgeDetailsPath, + "--releaseEvidence", + releaseEvidencePath, + "--visualManifest", + visualManifestPath, + "--visualGallery", + visualGalleryPath, + "--outputMarkdown", + outputMarkdownPath, + ]); + + assert.equal(result.status, 0, `expected presentation bundle to pass without raw deploy summaries, stderr=${result.stderr}`); + const markdown = readFileSync(outputMarkdownPath, "utf8"); + + assert.ok(markdown.includes("## Deploy / Publish Provenance")); + assert.ok(markdown.includes("Railway deploy: status success; deployment railway-aligned-456; public URL https://judge.example.test")); + assert.ok(markdown.includes("Repo publish: verification verify:release; release evidence validated; Railway deploy enabled; frontend deploy disabled")); + assert.ok(!markdown.includes("railway-deploy-summary.json"), "raw deploy summary JSON should stay out of judge-facing markdown"); + assert.ok(!markdown.includes("repo-publish-summary.json"), "raw repo publish JSON should stay out of judge-facing markdown"); + assert.ok(!markdown.includes("railwayDeploySummary source is missing"), "optional missing deploy summary should not add notes"); + assert.ok(!markdown.includes("repoPublishSummary source is missing"), "optional missing publish summary should not add notes"); +}); + +test("judge presentation bundle omits optional provenance section for ordinary local judge flows", () => { + const baseDir = mkdtempSync(join(tmpdir(), "mla-presentation-bundle-local-")); + const summaryPath = join(baseDir, "summary.json"); + const policyPath = join(baseDir, "policy-check.json"); + const badgePath = join(baseDir, "badge.json"); + const badgeDetailsPath = join(baseDir, "badge-details.json"); + const releaseEvidencePath = join(baseDir, "report.json"); + const visualManifestPath = join(baseDir, "manifest.json"); + const visualGalleryPath = join(baseDir, "gallery.md"); + const outputMarkdownPath = join(baseDir, "presentation.md"); + + writeFileSync( + summaryPath, + JSON.stringify( + { + scenarios: [ + { name: "live.negotiation", status: "passed" }, + { name: "storyteller.pipeline", status: "passed" }, + { name: "ui.approval.approve_resume", status: "passed" }, + ], + }, + null, + 2, + ), + ); + writeFileSync(policyPath, JSON.stringify({ ok: true, checks: 18, violations: [] }, null, 2)); + writeFileSync( + badgePath, + JSON.stringify({ label: "Demo KPI Gate", message: "pass", color: "brightgreen" }, null, 2), + ); + writeFileSync( + badgeDetailsPath, + JSON.stringify( + { + providerUsage: { + status: "pass", + activeSecondaryProviders: 1, + entries: [ + { + route: "storyteller-agent", + capability: "tts", + selectedProvider: "deepgram", + selectedModel: "aura-2", + defaultProvider: "gemini_api", + selectionReason: "provider_override", + secondaryActive: true, + }, + ], + }, + evidence: { + operatorTurnTruncation: { status: "pass" }, + operatorTurnDelete: { status: "pass" }, + operatorDamageControl: { status: "pass" }, + governancePolicy: { status: "pass" }, + skillsRegistry: { status: "pass" }, + pluginMarketplace: { status: "pass" }, + deviceNodes: { + status: "pass", + updatesValidated: true, + updatesHasUpsert: true, + updatesHasHeartbeat: true, + updatesApiValidated: true, + updatesTotal: 2, + }, + agentUsage: { status: "pass" }, + runtimeGuardrailsSignalPaths: { + status: "pass", + summaryStatus: "critical signals=1", + totalPaths: 1, + }, + }, + }, + null, + 2, + ), + ); + writeFileSync(releaseEvidencePath, JSON.stringify({ statuses: {} }, null, 2)); + writeFileSync( + visualManifestPath, + JSON.stringify( + { + badgeEvidence: { + deviceNodeUpdates: "pass", + }, + deployProvenance: { + rows: [], + }, + }, + null, + 2, + ), + ); + writeFileSync(visualGalleryPath, "# Gallery\n"); + + const result = runPresentationBundle([ + "--summary", + summaryPath, + "--policy", + policyPath, + "--badge", + badgePath, + "--badgeDetails", + badgeDetailsPath, + "--releaseEvidence", + releaseEvidencePath, + "--visualManifest", + visualManifestPath, + "--visualGallery", + visualGalleryPath, + "--outputMarkdown", + outputMarkdownPath, + ]); + + assert.equal(result.status, 0, `expected presentation bundle to pass for local judge flow, stderr=${result.stderr}`); + const markdown = readFileSync(outputMarkdownPath, "utf8"); + + assert.ok(!markdown.includes("## Deploy / Publish Provenance"), "optional provenance section should be omitted"); + assert.ok(!markdown.includes("unavailable"), "optional provenance should not add unavailable noise"); + assert.ok(!markdown.includes("railway-deploy-summary.json"), "missing deploy summary should not appear as a raw artifact link"); + assert.ok(!markdown.includes("repo-publish-summary.json"), "missing repo publish summary should not appear as a raw artifact link"); +}); diff --git a/tests/unit/judge-quickstart-docs-alignment.test.ts b/tests/unit/judge-quickstart-docs-alignment.test.ts index 363d951b..3b6340a3 100644 --- a/tests/unit/judge-quickstart-docs-alignment.test.ts +++ b/tests/unit/judge-quickstart-docs-alignment.test.ts @@ -12,6 +12,7 @@ test("readme exposes judge quick path and quickstart doc link", () => { "## Judge Quick Path", "npm run demo:e2e:fast && npm run demo:e2e:policy", "artifacts/demo-e2e/badge-details.json", + "Frontend `Intent Request` also supports `intent=research`", ]; for (const token of requiredTokens) { assert.ok(source.includes(token), `README missing judge quick path token: ${token}`); @@ -36,6 +37,17 @@ test("judge quickstart document includes core judge commands and categories", () "Export Session -> Export Markdown", "Export Session -> Export JSON", "Export Session -> Export Audio (WAV)", + "Operator Session Ops", + "Bootstrap Doctor & Auth Profiles", + "Browser Worker Control", + "Runtime Drill Runner", + "Workflow Control Panel", + "operatorPurpose", + "operatorSessionReplay", + "operatorDiscovery", + "runtimeGuardrailsSignalPaths", + "intent=research", + "sourceUrls", "docs/challenge-demo-runbook.md", "Demo Script by Minute (5-6 min)", "00:00-00:45", diff --git a/tests/unit/judge-visual-evidence-pack-alignment.test.ts b/tests/unit/judge-visual-evidence-pack-alignment.test.ts index 22d36b05..bf45d5d2 100644 --- a/tests/unit/judge-visual-evidence-pack-alignment.test.ts +++ b/tests/unit/judge-visual-evidence-pack-alignment.test.ts @@ -63,6 +63,11 @@ test("judge visual evidence pack is wired across package scripts, docs, and runb "operator-console-evidence.png", "observability-dashboard.png", "operatorTurnTruncation", + "runtimeGuardrailsSignalPaths", + "providerUsage", + "railway-deploy-summary.json", + "repo-publish-summary.json", + "compact deploy/publish provenance", "pluginMarketplace", "deviceNodeUpdates", ]; @@ -75,10 +80,17 @@ test("judge visual evidence pack is wired across package scripts, docs, and runb "overallStatus", "screenshotChecklist", "criticalBadgeLanes", + "runtimeGuardrailsSignalPaths", + "providerUsage", + "railwayDeploySummary", + "repoPublishSummary", + "buildDeployProvenanceRows", + "Deploy / Publish Provenance", "pluginMarketplace", "deviceNodeUpdates", "costEstimatePresent", "tokensUsedPresent", + "deployProvenanceRows", ]; for (const token of requiredScriptTokens) { assert.ok(script.includes(token), `judge visual evidence script missing token: ${token}`); diff --git a/tests/unit/judge-visual-evidence-pack-script.test.ts b/tests/unit/judge-visual-evidence-pack-script.test.ts index 3f7ba5bc..9ad38b90 100644 --- a/tests/unit/judge-visual-evidence-pack-script.test.ts +++ b/tests/unit/judge-visual-evidence-pack-script.test.ts @@ -27,10 +27,28 @@ function makeBadgeDetails(statusOverrides = {}) { pluginMarketplace: "pass", deviceNodes: "pass", agentUsage: "pass", + runtimeGuardrailsSignalPaths: "pass", + providerUsage: "pass", ...statusOverrides, }; return { + providerUsage: { + status: laneStatus.providerUsage, + validated: laneStatus.providerUsage === "pass", + activeSecondaryProviders: 1, + entries: [ + { + route: "storyteller-agent", + capability: "tts", + selectedProvider: "deepgram", + selectedModel: "aura-2", + defaultProvider: "gemini_api", + selectionReason: "provider_override", + secondaryActive: true, + }, + ], + }, evidence: { operatorTurnTruncation: { status: laneStatus.operatorTurnTruncation }, operatorTurnDelete: { status: laneStatus.operatorTurnDelete }, @@ -38,6 +56,11 @@ function makeBadgeDetails(statusOverrides = {}) { governancePolicy: { status: laneStatus.governancePolicy }, skillsRegistry: { status: laneStatus.skillsRegistry }, pluginMarketplace: { status: laneStatus.pluginMarketplace }, + runtimeGuardrailsSignalPaths: { + status: laneStatus.runtimeGuardrailsSignalPaths, + summaryStatus: "critical signals=1", + totalPaths: 1, + }, deviceNodes: { status: laneStatus.deviceNodes, updatesValidated: true, @@ -68,6 +91,8 @@ function runVisualPack(args) { test("judge visual evidence pack strict mode passes when required captures and badge lanes are present", () => { const baseDir = mkdtempSync(join(tmpdir(), "mla-visual-pack-pass-")); const screenshotsDir = join(baseDir, "screenshots"); + const railwayDeploySummaryPath = join(baseDir, "railway-deploy-summary.json"); + const repoPublishSummaryPath = join(baseDir, "repo-publish-summary.json"); mkdirSync(screenshotsDir, { recursive: true }); for (const fileName of REQUIRED_SCREENSHOTS) { @@ -81,6 +106,41 @@ test("judge visual evidence pack strict mode passes when required captures and b writeFileSync(badgePath, JSON.stringify(makeBadgeDetails(), null, 2)); writeFileSync(summaryPath, JSON.stringify({ ok: true }, null, 2)); + writeFileSync( + railwayDeploySummaryPath, + JSON.stringify( + { + status: "success", + deploymentId: "railway-deploy-123", + effectivePublicUrl: "https://live-agent.example.test", + checks: { + publicBadge: { + badgeEndpoint: "https://live-agent.example.test/demo-e2e/badge.json", + badgeDetailsEndpoint: "https://live-agent.example.test/demo-e2e/badge-details.json", + }, + }, + }, + null, + 2, + ), + ); + writeFileSync( + repoPublishSummaryPath, + JSON.stringify( + { + verification: { + script: "verify:release", + releaseEvidenceArtifactsValidated: true, + }, + steps: { + railwayDeployEnabled: true, + railwayFrontendDeployEnabled: false, + }, + }, + null, + 2, + ), + ); const result = runVisualPack([ "--strict", @@ -88,6 +148,10 @@ test("judge visual evidence pack strict mode passes when required captures and b badgePath, "--summary", summaryPath, + "--railwayDeploySummary", + railwayDeploySummaryPath, + "--repoPublishSummary", + repoPublishSummaryPath, "--screenshotDir", screenshotsDir, "--outputJson", @@ -102,7 +166,27 @@ test("judge visual evidence pack strict mode passes when required captures and b assert.equal(manifest.summary.missingRequiredCaptures, 0); assert.equal(manifest.summary.missingCriticalBadgeEvidence, 0); assert.equal(manifest.badgeEvidence.pluginMarketplace, "pass"); + assert.equal(manifest.badgeEvidence.runtimeGuardrailsSignalPaths, "pass"); + assert.equal(manifest.badgeEvidence.providerUsage, "pass"); assert.equal(manifest.badgeEvidence.deviceNodeUpdates, "pass"); + assert.equal(manifest.deployProvenance.railwayDeploy.status, "success"); + assert.equal(manifest.deployProvenance.railwayDeploy.available, true); + assert.equal(manifest.deployProvenance.railwayDeploy.deploymentId, "railway-deploy-123"); + assert.equal(manifest.deployProvenance.repoPublish.verificationScript, "verify:release"); + assert.equal(manifest.deployProvenance.repoPublish.available, true); + assert.equal(manifest.deployProvenance.repoPublish.releaseEvidenceValidated, true); + assert.equal(manifest.summary.deployProvenanceRows, 3); + + const markdown = readFileSync(outMd, "utf8"); + for (const token of [ + "## Deploy / Publish Provenance", + "Railway deploy: status success; deployment railway-deploy-123; public URL https://live-agent.example.test", + "Public badge: badge https://live-agent.example.test/demo-e2e/badge.json; badge-details https://live-agent.example.test/demo-e2e/badge-details.json", + "Repo publish: verification verify:release; release evidence validated; Railway deploy enabled; frontend deploy disabled", + ]) { + assert.ok(markdown.includes(token), `visual pack markdown missing token: ${token}`); + } + assert.ok(!markdown.includes("Railway deploy summary present:"), "markdown should avoid verbose presence lines"); }); test("judge visual evidence pack strict mode fails when captures are missing", () => { @@ -141,3 +225,46 @@ test("judge visual evidence pack strict mode fails when captures are missing", ( assert.equal(manifest.overallStatus, "fail"); assert.ok(manifest.summary.missingRequiredCaptures >= 1); }); + +test("judge visual evidence pack keeps deploy provenance optional for local strict flows", () => { + const baseDir = mkdtempSync(join(tmpdir(), "mla-visual-pack-optional-")); + const screenshotsDir = join(baseDir, "screenshots"); + mkdirSync(screenshotsDir, { recursive: true }); + + for (const fileName of REQUIRED_SCREENSHOTS) { + writeFileSync(join(screenshotsDir, fileName), Buffer.from("fake-png-bytes")); + } + + const badgePath = join(baseDir, "badge-details.json"); + const summaryPath = join(baseDir, "summary.json"); + const outJson = join(baseDir, "manifest.json"); + const outMd = join(baseDir, "manifest.md"); + + writeFileSync(badgePath, JSON.stringify(makeBadgeDetails(), null, 2)); + writeFileSync(summaryPath, JSON.stringify({ ok: true }, null, 2)); + + const result = runVisualPack([ + "--strict", + "--badgeDetails", + badgePath, + "--summary", + summaryPath, + "--screenshotDir", + screenshotsDir, + "--outputJson", + outJson, + "--outputMarkdown", + outMd, + ]); + + assert.equal(result.status, 0, `expected strict visual pack to pass without optional deploy summaries, stderr=${result.stderr}`); + const manifest = JSON.parse(readFileSync(outJson, "utf8")); + const markdown = readFileSync(outMd, "utf8"); + + assert.equal(manifest.overallStatus, "pass"); + assert.equal(manifest.summary.deployProvenanceRows, 0); + assert.deepEqual(manifest.deployProvenance.rows, []); + assert.equal(manifest.deployProvenance.available, false); + assert.ok(!markdown.includes("## Deploy / Publish Provenance"), "optional provenance section should be omitted when absent"); + assert.ok(!markdown.includes("unavailable"), "optional provenance should not add unavailable noise to markdown"); +}); diff --git a/tests/unit/skills-catalog.test.ts b/tests/unit/skills-catalog.test.ts new file mode 100644 index 00000000..5eaffc3d --- /dev/null +++ b/tests/unit/skills-catalog.test.ts @@ -0,0 +1,281 @@ +import assert from "node:assert/strict"; +import { mkdtemp, mkdir, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import test from "node:test"; +import { getSkillsCatalogSnapshot, getSkillsRuntimeCatalogSnapshot } from "../../shared/skills/src/index.js"; + +async function writeJson(path: string, value: unknown): Promise { + await writeFile(path, JSON.stringify(value, null, 2), "utf8"); +} + +test("skills catalog loads repo-owned personas and recipes with readiness overlay", async () => { + const rootDir = await mkdtemp(join(tmpdir(), "mla-skills-catalog-")); + try { + await mkdir(join(rootDir, "configs"), { recursive: true }); + await mkdir(join(rootDir, "skills", "workspace", "calendar-assistant"), { recursive: true }); + await writeJson(join(rootDir, "configs", "skills.catalog.json"), { + version: 3, + updatedAt: "2026-03-06T00:00:00.000Z", + personas: [ + { + id: "calendar-operator", + name: "Calendar Operator", + description: "Drive calendar automation demos.", + agentIds: ["live-agent"], + recommendedSkillIds: ["calendar-assistant"], + defaultRecipeId: "calendar-demo", + }, + ], + recipes: [ + { + id: "calendar-demo", + personaId: "calendar-operator", + name: "Calendar Demo", + description: "Run a scheduling demo.", + agentId: "live-agent", + intent: "conversation", + promptTemplate: "Schedule a meeting with two fallback options.", + recommendedSkillIds: ["calendar-assistant", "calendar-managed-demo"], + }, + ], + }); + await writeFile( + join(rootDir, "skills", "workspace", "calendar-assistant", "SKILL.md"), + [ + "# Calendar Assistant", + "id: calendar-assistant", + "scope: live-agent", + "prompt: Prefer scheduling clarity and offer two concrete slots.", + ].join("\n"), + "utf8", + ); + await writeJson(join(rootDir, "skills", "workspace", "calendar-assistant", "managed-skill-upsert.sample.json"), { + skillId: "calendar-managed-demo", + name: "Calendar Managed Demo", + prompt: "Validate managed-skill lifecycle.", + }); + + const snapshot = await getSkillsCatalogSnapshot({ + cwd: rootDir, + agentId: "live-agent", + activeSkillIds: ["calendar-assistant"], + env: {}, + }); + + assert.equal(snapshot.version, 3); + assert.equal(snapshot.source, "path"); + assert.equal(snapshot.personas.length, 1); + assert.equal(snapshot.recipes.length, 1); + assert.deepEqual(snapshot.personas[0]?.availableSkillIds, ["calendar-assistant"]); + assert.deepEqual(snapshot.personas[0]?.missingSkillIds, []); + assert.equal(snapshot.personas[0]?.ready, true); + assert.deepEqual(snapshot.recipes[0]?.missingSkillIds, ["calendar-managed-demo"]); + assert.equal(snapshot.recipes[0]?.ready, false); + assert.deepEqual(snapshot.personas[0]?.repoKnownSkillIds, ["calendar-assistant"]); + assert.deepEqual(snapshot.recipes[0]?.repoKnownSkillIds, ["calendar-assistant", "calendar-managed-demo"]); + assert.deepEqual(snapshot.repoKnownSkillIds, ["calendar-assistant", "calendar-managed-demo"]); + assert.equal(snapshot.warnings.length, 0); + } finally { + await rm(rootDir, { recursive: true, force: true }); + } +}); + +test("skills runtime catalog snapshot overlays active workspace skills onto recommendations", async () => { + const rootDir = await mkdtemp(join(tmpdir(), "mla-skills-runtime-catalog-")); + try { + await mkdir(join(rootDir, "configs"), { recursive: true }); + await mkdir(join(rootDir, "skills", "workspace", "calendar-assistant"), { recursive: true }); + await writeJson(join(rootDir, "configs", "skills.catalog.json"), { + version: 1, + personas: [ + { + id: "calendar-operator", + name: "Calendar Operator", + description: "Drive calendar automation demos.", + agentIds: ["live-agent"], + recommendedSkillIds: ["calendar-assistant"], + defaultRecipeId: "calendar-demo", + }, + ], + recipes: [ + { + id: "calendar-demo", + personaId: "calendar-operator", + name: "Calendar Demo", + description: "Run a scheduling demo.", + agentId: "live-agent", + intent: "conversation", + promptTemplate: "Schedule a meeting with two fallback options.", + recommendedSkillIds: ["calendar-assistant"], + }, + ], + }); + await writeFile( + join(rootDir, "skills", "workspace", "calendar-assistant", "SKILL.md"), + [ + "# Calendar Assistant", + "id: calendar-assistant", + "scope: live-agent", + "trustLevel: reviewed", + "prompt: Prefer scheduling clarity and offer two concrete slots.", + ].join("\n"), + "utf8", + ); + + const snapshot = await getSkillsRuntimeCatalogSnapshot({ + agentId: "live-agent", + cwd: rootDir, + env: { + SKILLS_RUNTIME_ENABLED: "true", + SKILLS_WORKSPACE_DIR: "skills/workspace", + SKILLS_BUNDLED_DIR: "skills/bundled", + SKILLS_SOURCE_PRECEDENCE: "workspace", + SKILLS_ALLOWED_SOURCES: "workspace", + SKILLS_CATALOG_PATH: "configs/skills.catalog.json", + }, + }); + + assert.equal(snapshot.runtime.activeSkills.length, 1); + assert.equal(snapshot.runtime.activeSkills[0]?.id, "calendar-assistant"); + assert.equal(snapshot.catalog.personas[0]?.ready, true); + assert.equal(snapshot.catalog.recipes[0]?.ready, true); + assert.deepEqual(snapshot.catalog.activeSkillIds, ["calendar-assistant"]); + assert.equal(snapshot.runtimeSummary.activeCount, 1); + } finally { + await rm(rootDir, { recursive: true, force: true }); + } +}); + +test("skills catalog returns invalid status and warning for malformed env json", async () => { + const snapshot = await getSkillsCatalogSnapshot({ + env: { + SKILLS_CATALOG_JSON: "{invalid", + }, + }); + + assert.equal(snapshot.source, "invalid"); + assert.equal(snapshot.personas.length, 0); + assert.equal(snapshot.recipes.length, 0); + assert.ok(snapshot.warnings.some((item) => item.includes("SKILLS_CATALOG_JSON"))); +}); + +test("skills catalog warns when personas or recipes reference unknown repo-owned skill ids", async () => { + const rootDir = await mkdtemp(join(tmpdir(), "mla-skills-catalog-warning-")); + try { + await mkdir(join(rootDir, "configs"), { recursive: true }); + await mkdir(join(rootDir, "skills", "workspace", "calendar-assistant"), { recursive: true }); + await writeJson(join(rootDir, "configs", "skills.catalog.json"), { + version: 1, + personas: [ + { + id: "calendar-operator", + name: "Calendar Operator", + description: "Drive calendar automation demos.", + agentIds: ["live-agent"], + recommendedSkillIds: ["calendar-assistant", "missing-skill"], + defaultRecipeId: "calendar-demo", + }, + ], + recipes: [ + { + id: "calendar-demo", + personaId: "calendar-operator", + name: "Calendar Demo", + description: "Run a scheduling demo.", + agentId: "live-agent", + intent: "conversation", + promptTemplate: "Schedule a meeting with two fallback options.", + recommendedSkillIds: ["missing-skill"], + }, + ], + }); + await writeFile( + join(rootDir, "skills", "workspace", "calendar-assistant", "SKILL.md"), + [ + "# Calendar Assistant", + "id: calendar-assistant", + "scope: live-agent", + "prompt: Prefer scheduling clarity and offer two concrete slots.", + ].join("\n"), + "utf8", + ); + + const snapshot = await getSkillsCatalogSnapshot({ + cwd: rootDir, + agentId: "live-agent", + env: {}, + }); + + assert.deepEqual(snapshot.personas[0]?.repoUnknownSkillIds, ["missing-skill"]); + assert.deepEqual(snapshot.recipes[0]?.repoUnknownSkillIds, ["missing-skill"]); + assert.ok(snapshot.warnings.some((item) => item.includes("Persona calendar-operator recommends unknown repo-owned skill ids: missing-skill."))); + assert.ok(snapshot.warnings.some((item) => item.includes("Recipe calendar-demo recommends unknown repo-owned skill ids: missing-skill."))); + } finally { + await rm(rootDir, { recursive: true, force: true }); + } +}); + +test("skills catalog convergence follows configured workspace and bundled skill directories", async () => { + const rootDir = await mkdtemp(join(tmpdir(), "mla-skills-catalog-custom-roots-")); + try { + const workspaceDir = join("custom-skills", "workspace"); + const bundledDir = join("custom-skills", "bundled"); + await mkdir(join(rootDir, workspaceDir, "calendar-assistant"), { recursive: true }); + await mkdir(join(rootDir, "configs"), { recursive: true }); + await writeJson(join(rootDir, "configs", "skills.catalog.json"), { + version: 1, + personas: [ + { + id: "calendar-operator", + name: "Calendar Operator", + description: "Drive calendar automation demos.", + agentIds: ["live-agent"], + recommendedSkillIds: ["calendar-assistant", "calendar-managed-demo"], + defaultRecipeId: "calendar-demo", + }, + ], + recipes: [ + { + id: "calendar-demo", + personaId: "calendar-operator", + name: "Calendar Demo", + description: "Run a scheduling demo.", + agentId: "live-agent", + intent: "conversation", + promptTemplate: "Schedule a meeting with two fallback options.", + recommendedSkillIds: ["calendar-managed-demo"], + }, + ], + }); + await writeFile( + join(rootDir, workspaceDir, "calendar-assistant", "SKILL.md"), + [ + "# Calendar Assistant", + "id: calendar-assistant", + "scope: live-agent", + "prompt: Prefer scheduling clarity and offer two concrete slots.", + ].join("\n"), + "utf8", + ); + await writeJson(join(rootDir, workspaceDir, "calendar-assistant", "managed-skill-upsert.sample.json"), { + skillId: "calendar-managed-demo", + name: "Calendar Managed Demo", + prompt: "Validate managed-skill lifecycle.", + }); + + const snapshot = await getSkillsCatalogSnapshot({ + cwd: rootDir, + agentId: "live-agent", + env: { + SKILLS_WORKSPACE_DIR: workspaceDir.replace(/\\/g, "/"), + SKILLS_BUNDLED_DIR: bundledDir.replace(/\\/g, "/"), + }, + }); + + assert.deepEqual(snapshot.repoKnownSkillIds, ["calendar-assistant", "calendar-managed-demo"]); + assert.equal(snapshot.warnings.length, 0); + } finally { + await rm(rootDir, { recursive: true, force: true }); + } +}); From ff8817e9bc5597cfdf834979f15340d559ef3fce Mon Sep 17 00:00:00 2001 From: Codex Bot Date: Sun, 8 Mar 2026 13:08:25 +0500 Subject: [PATCH 4/4] Trim PR to judge scope and fix alignment tests --- scripts/release-readiness.ps1 | 416 +----------------- ...udge-presentation-bundle-alignment.test.ts | 12 +- .../judge-quickstart-docs-alignment.test.ts | 17 +- tests/unit/skills-catalog.test.ts | 281 ------------ 4 files changed, 17 insertions(+), 709 deletions(-) delete mode 100644 tests/unit/skills-catalog.test.ts diff --git a/scripts/release-readiness.ps1 b/scripts/release-readiness.ps1 index bfaeb328..af36ec81 100644 --- a/scripts/release-readiness.ps1 +++ b/scripts/release-readiness.ps1 @@ -33,9 +33,7 @@ param( [string]$PerfPolicyPath = "artifacts/perf-load/policy-check.json", [string]$SourceRunManifestPath = "artifacts/release-artifact-revalidation/source-run.json", [string]$ReleaseEvidenceReportPath = "artifacts/release-evidence/report.json", - [string]$ReleaseEvidenceReportMarkdownPath = "artifacts/release-evidence/report.md", - [string]$ReleaseEvidenceManifestPath = "artifacts/release-evidence/manifest.json", - [string]$ReleaseEvidenceManifestMarkdownPath = "artifacts/release-evidence/manifest.md" + [string]$ReleaseEvidenceReportMarkdownPath = "artifacts/release-evidence/report.md" ) $ErrorActionPreference = "Stop" @@ -100,95 +98,6 @@ function To-BoolOrNull([object]$Value) { return $null } -function Get-ObjectPropertyValue { - param( - [Parameter(Mandatory = $false)] - [object]$Object, - [Parameter(Mandatory = $true)] - [string]$Name - ) - - if ($null -eq $Object) { - return $null - } - - $property = $Object.PSObject.Properties[$Name] - if ($null -eq $property) { - return $null - } - - return $property.Value -} - -function Get-OptionalNonEmptyStringPropertyValue { - param( - [Parameter(Mandatory = $false)] - [object]$Object, - [Parameter(Mandatory = $true)] - [string]$Name, - [Parameter(Mandatory = $true)] - [string]$ContextLabel - ) - - $value = Get-ObjectPropertyValue -Object $Object -Name $Name - if ($null -eq $value) { - return $null - } - - $stringValue = [string]$value - if ([string]::IsNullOrWhiteSpace($stringValue)) { - Fail ($ContextLabel + "." + $Name + " expected non-empty string when provided") - } - - return $stringValue -} - -function Get-OptionalBooleanPropertyValue { - param( - [Parameter(Mandatory = $false)] - [object]$Object, - [Parameter(Mandatory = $true)] - [string]$Name, - [Parameter(Mandatory = $true)] - [string]$ContextLabel - ) - - $value = Get-ObjectPropertyValue -Object $Object -Name $Name - if ($null -eq $value) { - return $null - } - - $boolValue = To-BoolOrNull $value - if ($null -eq $boolValue) { - Fail ($ContextLabel + "." + $Name + " expected boolean when provided") - } - - return $boolValue -} - -function Get-OptionalNonNegativeNumberPropertyValue { - param( - [Parameter(Mandatory = $false)] - [object]$Object, - [Parameter(Mandatory = $true)] - [string]$Name, - [Parameter(Mandatory = $true)] - [string]$ContextLabel - ) - - $value = Get-ObjectPropertyValue -Object $Object -Name $Name - if ($null -eq $value) { - return [double]::NaN - } - - $numberValue = To-NumberOrNaN $value - if ([double]::IsNaN($numberValue) -or $numberValue -lt 0) { - Fail ($ContextLabel + "." + $Name + " expected >= 0 when provided") - } - - return $numberValue -} - function Fail([string]$Message) { Write-Error $Message exit 1 @@ -259,11 +168,10 @@ if (-not $SkipProfileSmoke) { if ((-not $SkipDemoE2E) -and (-not $SkipDemoRun)) { $runFastDemo = $UseFastDemoE2E -or (-not $SkipBuild) $scenarioRetryArgs = "-ScenarioRetryMaxAttempts $DemoScenarioRetryMaxAttempts -ScenarioRetryBackoffMs $DemoScenarioRetryBackoffMs" - $serviceRestartArgs = "-RestartHealthyServices" $demoCommand = if ($runFastDemo) { - "npm run demo:e2e:fast -- -StartupTimeoutSec $DemoStartupTimeoutSec -RequestTimeoutSec $DemoRequestTimeoutSec $scenarioRetryArgs $serviceRestartArgs" + "npm run demo:e2e:fast -- -StartupTimeoutSec $DemoStartupTimeoutSec -RequestTimeoutSec $DemoRequestTimeoutSec $scenarioRetryArgs" } else { - "npm run demo:e2e -- -StartupTimeoutSec $DemoStartupTimeoutSec -RequestTimeoutSec $DemoRequestTimeoutSec $scenarioRetryArgs $serviceRestartArgs" + "npm run demo:e2e -- -StartupTimeoutSec $DemoStartupTimeoutSec -RequestTimeoutSec $DemoRequestTimeoutSec $scenarioRetryArgs" } Run-StepWithRetry "Run demo e2e" $demoCommand $DemoRunMaxAttempts $DemoRunRetryBackoffMs } @@ -298,7 +206,7 @@ if (Test-Path $BadgeDetailsPath) { } Write-Host "[release-check] Build release evidence report" - & powershell -NoProfile -ExecutionPolicy Bypass -File $releaseEvidenceScriptPath -BadgeDetailsPath $BadgeDetailsPath -OutputJsonPath $ReleaseEvidenceReportPath -OutputMarkdownPath $ReleaseEvidenceReportMarkdownPath -OutputManifestJsonPath $ReleaseEvidenceManifestPath -OutputManifestMarkdownPath $ReleaseEvidenceManifestMarkdownPath + & powershell -NoProfile -ExecutionPolicy Bypass -File $releaseEvidenceScriptPath -BadgeDetailsPath $BadgeDetailsPath -OutputJsonPath $ReleaseEvidenceReportPath -OutputMarkdownPath $ReleaseEvidenceReportMarkdownPath if ($LASTEXITCODE -ne 0) { Fail "Step failed: Build release evidence report" } @@ -329,8 +237,6 @@ if ($IsArtifactOnlyMode) { if (Test-Path $BadgeDetailsPath) { $requiredFiles += $ReleaseEvidenceReportPath $requiredFiles += $ReleaseEvidenceReportMarkdownPath - $requiredFiles += $ReleaseEvidenceManifestPath - $requiredFiles += $ReleaseEvidenceManifestMarkdownPath } $missing = @($requiredFiles | Where-Object { -not (Test-Path $_) }) @@ -516,105 +422,6 @@ if ($IsArtifactOnlyMode -and (Test-Path $SourceRunManifestPath)) { ) } - $manifestRuntimeGuardrailsSignalPathsStatusRaw = [string]$manifestEvidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsStatus - $manifestRuntimeGuardrailsSignalPathsStatus = $manifestRuntimeGuardrailsSignalPathsStatusRaw.ToLowerInvariant() - if ($manifestRuntimeGuardrailsSignalPathsStatus -ne "pass") { - Fail ( - "source run manifest evidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsStatus expected pass, actual " + - $manifestRuntimeGuardrailsSignalPathsStatusRaw - ) - } - - $manifestRuntimeGuardrailsSignalPathsSummaryStatus = [string]$manifestEvidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsSummaryStatus - if ([string]::IsNullOrWhiteSpace($manifestRuntimeGuardrailsSignalPathsSummaryStatus)) { - Fail ("source run manifest evidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsSummaryStatus is required") - } - - $manifestRuntimeGuardrailsSignalPathsTotalPaths = To-NumberOrNaN $manifestEvidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsTotalPaths - if ([double]::IsNaN($manifestRuntimeGuardrailsSignalPathsTotalPaths) -or $manifestRuntimeGuardrailsSignalPathsTotalPaths -lt 0) { - Fail ( - "source run manifest evidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsTotalPaths expected >= 0, actual " + - $manifestEvidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsTotalPaths - ) - } - - $manifestRuntimeGuardrailsSignalPathsPrimaryPath = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "badgeEvidenceRuntimeGuardrailsSignalPathsPrimaryPath" - if ($manifestRuntimeGuardrailsSignalPathsTotalPaths -eq 0) { - if ($null -ne $manifestRuntimeGuardrailsSignalPathsPrimaryPath) { - Fail ("source run manifest evidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsPrimaryPath expected null when totalPaths=0") - } - } - else { - if ($null -eq $manifestRuntimeGuardrailsSignalPathsPrimaryPath) { - Fail ("source run manifest evidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsPrimaryPath is required when totalPaths > 0") - } - - $manifestRuntimeGuardrailsSignalPathsPrimaryPathTitle = [string](Get-ObjectPropertyValue -Object $manifestRuntimeGuardrailsSignalPathsPrimaryPath -Name "title") - $manifestRuntimeGuardrailsSignalPathsPrimaryPathKind = [string](Get-ObjectPropertyValue -Object $manifestRuntimeGuardrailsSignalPathsPrimaryPath -Name "kind") - $manifestRuntimeGuardrailsSignalPathsPrimaryPathSummaryText = [string](Get-ObjectPropertyValue -Object $manifestRuntimeGuardrailsSignalPathsPrimaryPath -Name "summaryText") - if ( - [string]::IsNullOrWhiteSpace($manifestRuntimeGuardrailsSignalPathsPrimaryPathTitle) -or - [string]::IsNullOrWhiteSpace($manifestRuntimeGuardrailsSignalPathsPrimaryPathKind) -or - [string]::IsNullOrWhiteSpace($manifestRuntimeGuardrailsSignalPathsPrimaryPathSummaryText) - ) { - Fail ( - "source run manifest evidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsPrimaryPath expected title/kind/summaryText when totalPaths > 0" - ) - } - } - - $manifestProviderUsageStatusRaw = [string]$manifestEvidenceSnapshot.badgeEvidenceProviderUsageStatus - $manifestProviderUsageStatus = $manifestProviderUsageStatusRaw.ToLowerInvariant() - if ($manifestProviderUsageStatus -ne "pass") { - Fail ( - "source run manifest evidenceSnapshot.badgeEvidenceProviderUsageStatus expected pass, actual " + - $manifestProviderUsageStatusRaw - ) - } - - $manifestProviderUsageValidated = [bool]$manifestEvidenceSnapshot.badgeEvidenceProviderUsageValidated - if (-not $manifestProviderUsageValidated) { - Fail ("source run manifest evidenceSnapshot.badgeEvidenceProviderUsageValidated expected true") - } - - $manifestProviderUsageActiveSecondaryProviders = To-NumberOrNaN $manifestEvidenceSnapshot.badgeEvidenceProviderUsageActiveSecondaryProviders - if ([double]::IsNaN($manifestProviderUsageActiveSecondaryProviders) -or $manifestProviderUsageActiveSecondaryProviders -lt 0) { - Fail ( - "source run manifest evidenceSnapshot.badgeEvidenceProviderUsageActiveSecondaryProviders expected >= 0, actual " + - $manifestEvidenceSnapshot.badgeEvidenceProviderUsageActiveSecondaryProviders - ) - } - - $manifestProviderUsageEntriesCount = To-NumberOrNaN $manifestEvidenceSnapshot.badgeEvidenceProviderUsageEntriesCount - if ([double]::IsNaN($manifestProviderUsageEntriesCount) -or $manifestProviderUsageEntriesCount -lt 1) { - Fail ( - "source run manifest evidenceSnapshot.badgeEvidenceProviderUsageEntriesCount expected >= 1, actual " + - $manifestEvidenceSnapshot.badgeEvidenceProviderUsageEntriesCount - ) - } - - $manifestProviderUsagePrimaryEntry = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "badgeEvidenceProviderUsagePrimaryEntry" - if ($null -eq $manifestProviderUsagePrimaryEntry) { - Fail ("source run manifest evidenceSnapshot.badgeEvidenceProviderUsagePrimaryEntry is required when entriesCount > 0") - } - - $manifestProviderUsagePrimaryEntryRoute = [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "route") - $manifestProviderUsagePrimaryEntryCapability = [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "capability") - $manifestProviderUsagePrimaryEntrySelectedProvider = [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "selectedProvider") - $manifestProviderUsagePrimaryEntrySelectedModel = [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "selectedModel") - $manifestProviderUsagePrimaryEntrySelectionReason = [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "selectionReason") - if ( - [string]::IsNullOrWhiteSpace($manifestProviderUsagePrimaryEntryRoute) -or - [string]::IsNullOrWhiteSpace($manifestProviderUsagePrimaryEntryCapability) -or - [string]::IsNullOrWhiteSpace($manifestProviderUsagePrimaryEntrySelectedProvider) -or - [string]::IsNullOrWhiteSpace($manifestProviderUsagePrimaryEntrySelectedModel) -or - [string]::IsNullOrWhiteSpace($manifestProviderUsagePrimaryEntrySelectionReason) - ) { - Fail ( - "source run manifest evidenceSnapshot.badgeEvidenceProviderUsagePrimaryEntry expected route/capability/selectedProvider/selectedModel/selectionReason when entriesCount > 0" - ) - } - $manifestDeviceNodeUpdatesStatusRaw = [string]$manifestEvidenceSnapshot.badgeEvidenceDeviceNodeUpdatesStatus $manifestDeviceNodeUpdatesStatus = $manifestDeviceNodeUpdatesStatusRaw.ToLowerInvariant() if ($manifestDeviceNodeUpdatesStatus -ne "pass") { @@ -624,105 +431,6 @@ if ($IsArtifactOnlyMode -and (Test-Path $SourceRunManifestPath)) { ) } - $manifestRailwayDeploySummaryPresent = To-BoolOrNull $manifestEvidenceSnapshot.railwayDeploySummaryPresent - if ($null -eq $manifestRailwayDeploySummaryPresent) { - $manifestRailwayDeploySummaryPresent = $false - } - if ($manifestRailwayDeploySummaryPresent) { - $manifestEvidenceContextLabel = "source run manifest evidenceSnapshot" - $manifestRailwayDeploySummaryStatus = [string]$manifestEvidenceSnapshot.railwayDeploySummaryStatus - if ([string]::IsNullOrWhiteSpace($manifestRailwayDeploySummaryStatus)) { - Fail ("source run manifest evidenceSnapshot.railwayDeploySummaryStatus is required when railwayDeploySummaryPresent=true") - } - - $manifestRailwayDeploySummaryDeploymentId = [string]$manifestEvidenceSnapshot.railwayDeploySummaryDeploymentId - if ([string]::IsNullOrWhiteSpace($manifestRailwayDeploySummaryDeploymentId)) { - Fail ("source run manifest evidenceSnapshot.railwayDeploySummaryDeploymentId is required when railwayDeploySummaryPresent=true") - } - - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryEffectivePublicUrl" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryBadgeEndpoint" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryBadgeDetailsEndpoint" -ContextLabel $manifestEvidenceContextLabel - # Contract: source run manifest evidenceSnapshot.railwayDeploySummaryProjectId expected non-empty string when provided - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryProjectId" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryService" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryEnvironment" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryEffectiveStartCommand" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryConfigSource" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryRootDescriptorExpectedUiUrl" -ContextLabel $manifestEvidenceContextLabel - - # Contract: source run manifest evidenceSnapshot.railwayDeploySummaryRootDescriptorAttempted expected boolean when provided - $manifestRailwayDeploySummaryRootDescriptorAttempted = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryRootDescriptorAttempted" -ContextLabel $manifestEvidenceContextLabel - $manifestRailwayDeploySummaryRootDescriptorSkipped = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryRootDescriptorSkipped" -ContextLabel $manifestEvidenceContextLabel - if ( - ($null -ne $manifestRailwayDeploySummaryRootDescriptorAttempted) -and - ($null -ne $manifestRailwayDeploySummaryRootDescriptorSkipped) -and - $manifestRailwayDeploySummaryRootDescriptorAttempted -and - $manifestRailwayDeploySummaryRootDescriptorSkipped - ) { - Fail ("source run manifest evidenceSnapshot.railwayDeploySummaryRootDescriptorAttempted and railwayDeploySummaryRootDescriptorSkipped cannot both be true") - } - - $manifestRailwayDeploySummaryPublicBadgeAttempted = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryPublicBadgeAttempted" -ContextLabel $manifestEvidenceContextLabel - $manifestRailwayDeploySummaryPublicBadgeSkipped = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryPublicBadgeSkipped" -ContextLabel $manifestEvidenceContextLabel - if ( - ($null -ne $manifestRailwayDeploySummaryPublicBadgeAttempted) -and - ($null -ne $manifestRailwayDeploySummaryPublicBadgeSkipped) -and - $manifestRailwayDeploySummaryPublicBadgeAttempted -and - $manifestRailwayDeploySummaryPublicBadgeSkipped - ) { - Fail ("source run manifest evidenceSnapshot.railwayDeploySummaryPublicBadgeAttempted and railwayDeploySummaryPublicBadgeSkipped cannot both be true") - } - } - - $manifestRepoPublishSummaryPresent = To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryPresent - if ($null -eq $manifestRepoPublishSummaryPresent) { - $manifestRepoPublishSummaryPresent = $false - } - if ($manifestRepoPublishSummaryPresent) { - $manifestRepoPublishSummaryVerificationScript = [string]$manifestEvidenceSnapshot.repoPublishSummaryVerificationScript - if ([string]::IsNullOrWhiteSpace($manifestRepoPublishSummaryVerificationScript)) { - Fail ("source run manifest evidenceSnapshot.repoPublishSummaryVerificationScript is required when repoPublishSummaryPresent=true") - } - - $manifestRepoPublishSummaryReleaseEvidenceValidated = To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryReleaseEvidenceValidated - if ($manifestRepoPublishSummaryReleaseEvidenceValidated -ne $true) { - Fail ("source run manifest evidenceSnapshot.repoPublishSummaryReleaseEvidenceValidated expected true when repoPublishSummaryPresent=true") - } - - $manifestRepoPublishSummaryRailwayDeployEnabled = To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryRailwayDeployEnabled - if ($null -eq $manifestRepoPublishSummaryRailwayDeployEnabled) { - Fail ("source run manifest evidenceSnapshot.repoPublishSummaryRailwayDeployEnabled is required when repoPublishSummaryPresent=true") - } - - $manifestRepoPublishSummaryRailwayFrontendDeployEnabled = To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryRailwayFrontendDeployEnabled - if ($null -eq $manifestRepoPublishSummaryRailwayFrontendDeployEnabled) { - Fail ("source run manifest evidenceSnapshot.repoPublishSummaryRailwayFrontendDeployEnabled is required when repoPublishSummaryPresent=true") - } - - $manifestEvidenceContextLabel = "source run manifest evidenceSnapshot" - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryBranch" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRemoteName" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayPublicUrl" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayDemoFrontendPublicUrl" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactSelf" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactRailwayDeploySummary" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactReleaseEvidenceReportJson" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactReleaseEvidenceManifestJson" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalNonEmptyStringPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactBadgeDetailsJson" -ContextLabel $manifestEvidenceContextLabel - - $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryVerificationSkipped" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryVerificationStrict" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryCommitEnabled" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryPushEnabled" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryPagesEnabled" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryBadgeCheckEnabled" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayNoWait" -ContextLabel $manifestEvidenceContextLabel - $null = Get-OptionalBooleanPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayFrontendNoWait" -ContextLabel $manifestEvidenceContextLabel - # Contract: source run manifest evidenceSnapshot.repoPublishSummaryReleaseEvidenceArtifactsCount expected >= 0 when provided - $null = Get-OptionalNonNegativeNumberPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryReleaseEvidenceArtifactsCount" -ContextLabel $manifestEvidenceContextLabel - } - $manifestDamageControlLatestVerdictRaw = [string]$manifestEvidenceSnapshot.operatorDamageControlLatestVerdict $manifestDamageControlLatestVerdict = $manifestDamageControlLatestVerdictRaw.ToLowerInvariant() $allowedOperatorDamageControlLatestVerdicts = @("allow", "ask", "block") @@ -1815,16 +1523,6 @@ if ((-not $SkipDemoE2E) -and (Test-Path $SummaryPath)) { Fail ("Critical KPI check failed: assistiveRouterMode expected deterministic|assistive_override|assistive_match|assistive_fallback, actual " + $assistiveRouterMode) } - $assistiveRouterProviderMetadataValidated = To-BoolOrNull $summary.kpis.assistiveRouterProviderMetadataValidated - if ($assistiveRouterProviderMetadataValidated -ne $true) { - Fail ("Critical KPI check failed: assistiveRouterProviderMetadataValidated expected True, actual " + $summary.kpis.assistiveRouterProviderMetadataValidated) - } - - $assistiveRouterProvider = [string]$summary.kpis.assistiveRouterProvider - if (@("gemini_api", "openai", "anthropic", "deepseek", "moonshot") -notcontains $assistiveRouterProvider) { - Fail ("Critical KPI check failed: assistiveRouterProvider expected gemini_api|openai|anthropic|deepseek|moonshot, actual " + $assistiveRouterProvider) - } - $transportModeValidated = To-BoolOrNull $summary.kpis.transportModeValidated if ($transportModeValidated -ne $true) { Fail ("Critical KPI check failed: transportModeValidated expected True, actual " + $summary.kpis.transportModeValidated) @@ -2339,60 +2037,7 @@ if ($IsArtifactOnlyMode -and (Test-Path $SourceRunManifestPath)) { $manifestPluginMarketplaceStatus = [string]$manifestEvidenceSnapshot.badgeEvidencePluginMarketplaceStatus $manifestDeviceNodesStatus = [string]$manifestEvidenceSnapshot.badgeEvidenceDeviceNodesStatus $manifestAgentUsageStatus = [string]$manifestEvidenceSnapshot.badgeEvidenceAgentUsageStatus - $manifestRuntimeGuardrailsSignalPathsStatus = [string]$manifestEvidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsStatus - $manifestRuntimeGuardrailsSignalPathsSummaryStatus = [string]$manifestEvidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsSummaryStatus - $manifestRuntimeGuardrailsSignalPathsTotalPaths = [string]$manifestEvidenceSnapshot.badgeEvidenceRuntimeGuardrailsSignalPathsTotalPaths - $manifestRuntimeGuardrailsSignalPathsPrimaryPath = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "badgeEvidenceRuntimeGuardrailsSignalPathsPrimaryPath" - $manifestRuntimeGuardrailsSignalPathsPrimaryPathTitle = if ($null -ne $manifestRuntimeGuardrailsSignalPathsPrimaryPath) { [string](Get-ObjectPropertyValue -Object $manifestRuntimeGuardrailsSignalPathsPrimaryPath -Name "title") } else { "" } - $manifestProviderUsageStatus = [string]$manifestEvidenceSnapshot.badgeEvidenceProviderUsageStatus - $manifestProviderUsageValidated = if ([bool]$manifestEvidenceSnapshot.badgeEvidenceProviderUsageValidated) { "true" } else { "false" } - $manifestProviderUsageActiveSecondaryProviders = [string]$manifestEvidenceSnapshot.badgeEvidenceProviderUsageActiveSecondaryProviders - $manifestProviderUsageEntriesCount = [string]$manifestEvidenceSnapshot.badgeEvidenceProviderUsageEntriesCount - $manifestProviderUsagePrimaryEntry = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "badgeEvidenceProviderUsagePrimaryEntry" - $manifestProviderUsagePrimaryEntryRoute = if ($null -ne $manifestProviderUsagePrimaryEntry) { [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "route") } else { "" } - $manifestProviderUsagePrimaryEntryCapability = if ($null -ne $manifestProviderUsagePrimaryEntry) { [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "capability") } else { "" } - $manifestProviderUsagePrimaryEntrySelectedProvider = if ($null -ne $manifestProviderUsagePrimaryEntry) { [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "selectedProvider") } else { "" } - $manifestProviderUsagePrimaryEntrySelectedModel = if ($null -ne $manifestProviderUsagePrimaryEntry) { [string](Get-ObjectPropertyValue -Object $manifestProviderUsagePrimaryEntry -Name "selectedModel") } else { "" } $manifestDeviceNodeUpdatesStatus = [string]$manifestEvidenceSnapshot.badgeEvidenceDeviceNodeUpdatesStatus - $manifestRailwayDeploySummaryPresent = if ((To-BoolOrNull $manifestEvidenceSnapshot.railwayDeploySummaryPresent) -eq $true) { "true" } else { "false" } - $manifestRailwayDeploySummaryStatus = [string]$manifestEvidenceSnapshot.railwayDeploySummaryStatus - $manifestRailwayDeploySummaryDeploymentId = [string]$manifestEvidenceSnapshot.railwayDeploySummaryDeploymentId - $manifestRailwayDeploySummaryEffectivePublicUrl = [string]$manifestEvidenceSnapshot.railwayDeploySummaryEffectivePublicUrl - $manifestRailwayDeploySummaryBadgeEndpoint = [string]$manifestEvidenceSnapshot.railwayDeploySummaryBadgeEndpoint - $manifestRailwayDeploySummaryBadgeDetailsEndpoint = [string]$manifestEvidenceSnapshot.railwayDeploySummaryBadgeDetailsEndpoint - $manifestRailwayDeploySummaryProjectId = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryProjectId") - $manifestRailwayDeploySummaryService = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryService") - $manifestRailwayDeploySummaryEnvironment = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryEnvironment") - $manifestRailwayDeploySummaryEffectiveStartCommand = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryEffectiveStartCommand") - $manifestRailwayDeploySummaryConfigSource = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryConfigSource") - $manifestRailwayDeploySummaryRootDescriptorAttempted = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryRootDescriptorAttempted" - $manifestRailwayDeploySummaryRootDescriptorSkipped = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryRootDescriptorSkipped" - $manifestRailwayDeploySummaryRootDescriptorExpectedUiUrl = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryRootDescriptorExpectedUiUrl") - $manifestRailwayDeploySummaryPublicBadgeAttempted = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryPublicBadgeAttempted" - $manifestRailwayDeploySummaryPublicBadgeSkipped = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "railwayDeploySummaryPublicBadgeSkipped" - $manifestRepoPublishSummaryPresent = if ((To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryPresent) -eq $true) { "true" } else { "false" } - $manifestRepoPublishSummaryBranch = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryBranch") - $manifestRepoPublishSummaryRemoteName = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRemoteName") - $manifestRepoPublishSummaryVerificationScript = [string]$manifestEvidenceSnapshot.repoPublishSummaryVerificationScript - $manifestRepoPublishSummaryVerificationSkipped = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryVerificationSkipped" - $manifestRepoPublishSummaryVerificationStrict = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryVerificationStrict" - $manifestRepoPublishSummaryReleaseEvidenceValidated = if ((To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryReleaseEvidenceValidated) -eq $true) { "true" } else { "false" } - $manifestRepoPublishSummaryReleaseEvidenceArtifactsCount = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryReleaseEvidenceArtifactsCount") - $manifestRepoPublishSummaryCommitEnabled = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryCommitEnabled" - $manifestRepoPublishSummaryPushEnabled = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryPushEnabled" - $manifestRepoPublishSummaryPagesEnabled = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryPagesEnabled" - $manifestRepoPublishSummaryBadgeCheckEnabled = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryBadgeCheckEnabled" - $manifestRepoPublishSummaryRailwayDeployEnabled = if ((To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryRailwayDeployEnabled) -eq $true) { "true" } else { "false" } - $manifestRepoPublishSummaryRailwayFrontendDeployEnabled = if ((To-BoolOrNull $manifestEvidenceSnapshot.repoPublishSummaryRailwayFrontendDeployEnabled) -eq $true) { "true" } else { "false" } - $manifestRepoPublishSummaryRuntimeRailwayPublicUrl = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayPublicUrl") - $manifestRepoPublishSummaryRuntimeRailwayDemoFrontendPublicUrl = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayDemoFrontendPublicUrl") - $manifestRepoPublishSummaryRuntimeRailwayNoWait = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayNoWait" - $manifestRepoPublishSummaryRuntimeRailwayFrontendNoWait = Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryRuntimeRailwayFrontendNoWait" - $manifestRepoPublishSummaryArtifactSelf = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactSelf") - $manifestRepoPublishSummaryArtifactRailwayDeploySummary = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactRailwayDeploySummary") - $manifestRepoPublishSummaryArtifactReleaseEvidenceReportJson = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactReleaseEvidenceReportJson") - $manifestRepoPublishSummaryArtifactReleaseEvidenceManifestJson = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactReleaseEvidenceManifestJson") - $manifestRepoPublishSummaryArtifactBadgeDetailsJson = [string](Get-ObjectPropertyValue -Object $manifestEvidenceSnapshot -Name "repoPublishSummaryArtifactBadgeDetailsJson") $manifestDamageControlLatestVerdict = [string]$manifestEvidenceSnapshot.operatorDamageControlLatestVerdict $manifestDamageControlLatestSource = [string]$manifestEvidenceSnapshot.operatorDamageControlLatestSource Write-Host ( @@ -2408,62 +2053,9 @@ if ($IsArtifactOnlyMode -and (Test-Path $SourceRunManifestPath)) { ", plugin_marketplace_status=" + $manifestPluginMarketplaceStatus + ", device_nodes_status=" + $manifestDeviceNodesStatus + ", agent_usage_status=" + $manifestAgentUsageStatus + - ", runtime_guardrails_signal_paths_status=" + $manifestRuntimeGuardrailsSignalPathsStatus + - ", runtime_guardrails_signal_paths_summary_status=" + $manifestRuntimeGuardrailsSignalPathsSummaryStatus + - ", runtime_guardrails_signal_paths_total_paths=" + $manifestRuntimeGuardrailsSignalPathsTotalPaths + - ", runtime_guardrails_signal_paths_primary_path_title=" + $manifestRuntimeGuardrailsSignalPathsPrimaryPathTitle + - ", provider_usage_status=" + $manifestProviderUsageStatus + - ", provider_usage_validated=" + $manifestProviderUsageValidated + - ", provider_usage_active_secondary_providers=" + $manifestProviderUsageActiveSecondaryProviders + - ", provider_usage_entries_count=" + $manifestProviderUsageEntriesCount + - ", provider_usage_primary_entry_route=" + $manifestProviderUsagePrimaryEntryRoute + - ", provider_usage_primary_entry_capability=" + $manifestProviderUsagePrimaryEntryCapability + - ", provider_usage_primary_entry_selected_provider=" + $manifestProviderUsagePrimaryEntrySelectedProvider + - ", provider_usage_primary_entry_selected_model=" + $manifestProviderUsagePrimaryEntrySelectedModel + ", device_node_updates_status=" + $manifestDeviceNodeUpdatesStatus + - ", railway_deploy_summary_present=" + $manifestRailwayDeploySummaryPresent + - ", railway_deploy_summary_status=" + $manifestRailwayDeploySummaryStatus + - ", railway_deploy_summary_deployment_id=" + $manifestRailwayDeploySummaryDeploymentId + - ", railway_deploy_summary_public_url=" + $manifestRailwayDeploySummaryEffectivePublicUrl + - ", railway_deploy_summary_badge_endpoint=" + $manifestRailwayDeploySummaryBadgeEndpoint + - ", railway_deploy_summary_badge_details_endpoint=" + $manifestRailwayDeploySummaryBadgeDetailsEndpoint + - ", railway_deploy_summary_project_id=" + $manifestRailwayDeploySummaryProjectId + - ", railway_deploy_summary_service=" + $manifestRailwayDeploySummaryService + - ", railway_deploy_summary_environment=" + $manifestRailwayDeploySummaryEnvironment + - ", railway_deploy_summary_effective_start_command=" + $manifestRailwayDeploySummaryEffectiveStartCommand + - ", railway_deploy_summary_config_source=" + $manifestRailwayDeploySummaryConfigSource + - ", railway_deploy_summary_root_descriptor_attempted=" + $(if ($null -ne $manifestRailwayDeploySummaryRootDescriptorAttempted) { [string]$manifestRailwayDeploySummaryRootDescriptorAttempted } else { "" }) + - ", railway_deploy_summary_root_descriptor_skipped=" + $(if ($null -ne $manifestRailwayDeploySummaryRootDescriptorSkipped) { [string]$manifestRailwayDeploySummaryRootDescriptorSkipped } else { "" }) + - ", railway_deploy_summary_expected_ui_url=" + $manifestRailwayDeploySummaryRootDescriptorExpectedUiUrl + - ", railway_deploy_summary_public_badge_attempted=" + $(if ($null -ne $manifestRailwayDeploySummaryPublicBadgeAttempted) { [string]$manifestRailwayDeploySummaryPublicBadgeAttempted } else { "" }) + - ", railway_deploy_summary_public_badge_skipped=" + $(if ($null -ne $manifestRailwayDeploySummaryPublicBadgeSkipped) { [string]$manifestRailwayDeploySummaryPublicBadgeSkipped } else { "" }) + - ", repo_publish_summary_present=" + $manifestRepoPublishSummaryPresent + - ", repo_publish_summary_branch=" + $manifestRepoPublishSummaryBranch + - ", repo_publish_summary_remote_name=" + $manifestRepoPublishSummaryRemoteName + - ", repo_publish_summary_verification_script=" + $manifestRepoPublishSummaryVerificationScript + - ", repo_publish_summary_verification_skipped=" + $(if ($null -ne $manifestRepoPublishSummaryVerificationSkipped) { [string]$manifestRepoPublishSummaryVerificationSkipped } else { "" }) + - ", repo_publish_summary_verification_strict=" + $(if ($null -ne $manifestRepoPublishSummaryVerificationStrict) { [string]$manifestRepoPublishSummaryVerificationStrict } else { "" }) + - ", repo_publish_summary_release_evidence_validated=" + $manifestRepoPublishSummaryReleaseEvidenceValidated + - ", repo_publish_summary_release_evidence_artifacts_count=" + $manifestRepoPublishSummaryReleaseEvidenceArtifactsCount + - ", repo_publish_summary_commit_enabled=" + $(if ($null -ne $manifestRepoPublishSummaryCommitEnabled) { [string]$manifestRepoPublishSummaryCommitEnabled } else { "" }) + - ", repo_publish_summary_push_enabled=" + $(if ($null -ne $manifestRepoPublishSummaryPushEnabled) { [string]$manifestRepoPublishSummaryPushEnabled } else { "" }) + - ", repo_publish_summary_pages_enabled=" + $(if ($null -ne $manifestRepoPublishSummaryPagesEnabled) { [string]$manifestRepoPublishSummaryPagesEnabled } else { "" }) + - ", repo_publish_summary_badge_check_enabled=" + $(if ($null -ne $manifestRepoPublishSummaryBadgeCheckEnabled) { [string]$manifestRepoPublishSummaryBadgeCheckEnabled } else { "" }) + - ", repo_publish_summary_railway_deploy_enabled=" + $manifestRepoPublishSummaryRailwayDeployEnabled + - ", repo_publish_summary_railway_frontend_deploy_enabled=" + $manifestRepoPublishSummaryRailwayFrontendDeployEnabled + - ", repo_publish_summary_runtime_railway_public_url=" + $manifestRepoPublishSummaryRuntimeRailwayPublicUrl + - ", repo_publish_summary_runtime_railway_frontend_public_url=" + $manifestRepoPublishSummaryRuntimeRailwayDemoFrontendPublicUrl + - ", repo_publish_summary_runtime_railway_no_wait=" + $(if ($null -ne $manifestRepoPublishSummaryRuntimeRailwayNoWait) { [string]$manifestRepoPublishSummaryRuntimeRailwayNoWait } else { "" }) + - ", repo_publish_summary_runtime_railway_frontend_no_wait=" + $(if ($null -ne $manifestRepoPublishSummaryRuntimeRailwayFrontendNoWait) { [string]$manifestRepoPublishSummaryRuntimeRailwayFrontendNoWait } else { "" }) + - ", repo_publish_summary_artifact_self=" + $manifestRepoPublishSummaryArtifactSelf + - ", repo_publish_summary_artifact_railway_deploy_summary=" + $manifestRepoPublishSummaryArtifactRailwayDeploySummary + - ", repo_publish_summary_artifact_release_evidence_report_json=" + $manifestRepoPublishSummaryArtifactReleaseEvidenceReportJson + - ", repo_publish_summary_artifact_release_evidence_manifest_json=" + $manifestRepoPublishSummaryArtifactReleaseEvidenceManifestJson + - ", repo_publish_summary_artifact_badge_details_json=" + $manifestRepoPublishSummaryArtifactBadgeDetailsJson + ", operator_damage_control_latest_verdict=" + $manifestDamageControlLatestVerdict + ", operator_damage_control_latest_source=" + $manifestDamageControlLatestSource ) } } - - diff --git a/tests/unit/judge-presentation-bundle-alignment.test.ts b/tests/unit/judge-presentation-bundle-alignment.test.ts index 6d06f535..9ecb5023 100644 --- a/tests/unit/judge-presentation-bundle-alignment.test.ts +++ b/tests/unit/judge-presentation-bundle-alignment.test.ts @@ -5,17 +5,13 @@ import test from "node:test"; test("judge presentation bundle script is wired across package scripts and docs", () => { const packagePath = resolve(process.cwd(), "package.json"); - const readmePath = resolve(process.cwd(), "README.md"); const visualDocPath = resolve(process.cwd(), "docs", "judge-visual-evidence.md"); const quickstartPath = resolve(process.cwd(), "docs", "judge-quickstart.md"); - const runbookPath = resolve(process.cwd(), "docs", "challenge-demo-runbook.md"); const scriptPath = resolve(process.cwd(), "scripts", "judge-presentation-bundle.mjs"); const pkg = JSON.parse(readFileSync(packagePath, "utf8")) as { scripts?: Record }; - const readme = readFileSync(readmePath, "utf8"); const visualDoc = readFileSync(visualDocPath, "utf8"); const quickstart = readFileSync(quickstartPath, "utf8"); - const runbook = readFileSync(runbookPath, "utf8"); const script = readFileSync(scriptPath, "utf8"); assert.equal( @@ -31,13 +27,11 @@ test("judge presentation bundle script is wired across package scripts and docs" const docTokens = [ "npm run demo:e2e:visual:bundle", - "npm run demo:e2e:visual:judge", "presentation.md", "railway-deploy-summary.json", "repo-publish-summary.json", ]; for (const token of docTokens) { - assert.ok(readme.includes(token), `README missing presentation token: ${token}`); assert.ok(visualDoc.includes(token), `judge visual evidence doc missing presentation token: ${token}`); } assert.ok( @@ -48,8 +42,10 @@ test("judge presentation bundle script is wired across package scripts and docs" quickstart.includes("npm run demo:e2e:visual:bundle"), "judge quickstart missing visual bundle command", ); - assert.ok(runbook.includes("npm run demo:e2e:visual:bundle"), "runbook missing visual bundle command"); - assert.ok(runbook.includes("npm run demo:e2e:visual:judge"), "runbook missing visual judge command"); + assert.ok( + quickstart.includes("artifacts/judge-visual-evidence/presentation.md"), + "judge quickstart missing presentation artifact path", + ); const scriptTokens = [ "Judge Presentation Bundle", diff --git a/tests/unit/judge-quickstart-docs-alignment.test.ts b/tests/unit/judge-quickstart-docs-alignment.test.ts index 3b6340a3..65ee822b 100644 --- a/tests/unit/judge-quickstart-docs-alignment.test.ts +++ b/tests/unit/judge-quickstart-docs-alignment.test.ts @@ -3,19 +3,20 @@ import { readFileSync } from "node:fs"; import { resolve } from "node:path"; import test from "node:test"; -test("readme exposes judge quick path and quickstart doc link", () => { - const readmePath = resolve(process.cwd(), "README.md"); - const source = readFileSync(readmePath, "utf8"); +test("judge quickstart exposes judge quick path and evidence entry points", () => { + const quickstartPath = resolve(process.cwd(), "docs", "judge-quickstart.md"); + const source = readFileSync(quickstartPath, "utf8"); const requiredTokens = [ - "Judge Quickstart: `docs/judge-quickstart.md`", - "## Judge Quick Path", - "npm run demo:e2e:fast && npm run demo:e2e:policy", + "# Judge Quickstart", + "Fast, judge-facing entry point for a 5-10 minute evaluation run.", + "npm run demo:e2e:fast", + "npm run demo:e2e:policy", "artifacts/demo-e2e/badge-details.json", - "Frontend `Intent Request` also supports `intent=research`", + "intent=research", ]; for (const token of requiredTokens) { - assert.ok(source.includes(token), `README missing judge quick path token: ${token}`); + assert.ok(source.includes(token), `judge quickstart missing quick path token: ${token}`); } }); diff --git a/tests/unit/skills-catalog.test.ts b/tests/unit/skills-catalog.test.ts deleted file mode 100644 index 5eaffc3d..00000000 --- a/tests/unit/skills-catalog.test.ts +++ /dev/null @@ -1,281 +0,0 @@ -import assert from "node:assert/strict"; -import { mkdtemp, mkdir, rm, writeFile } from "node:fs/promises"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; -import test from "node:test"; -import { getSkillsCatalogSnapshot, getSkillsRuntimeCatalogSnapshot } from "../../shared/skills/src/index.js"; - -async function writeJson(path: string, value: unknown): Promise { - await writeFile(path, JSON.stringify(value, null, 2), "utf8"); -} - -test("skills catalog loads repo-owned personas and recipes with readiness overlay", async () => { - const rootDir = await mkdtemp(join(tmpdir(), "mla-skills-catalog-")); - try { - await mkdir(join(rootDir, "configs"), { recursive: true }); - await mkdir(join(rootDir, "skills", "workspace", "calendar-assistant"), { recursive: true }); - await writeJson(join(rootDir, "configs", "skills.catalog.json"), { - version: 3, - updatedAt: "2026-03-06T00:00:00.000Z", - personas: [ - { - id: "calendar-operator", - name: "Calendar Operator", - description: "Drive calendar automation demos.", - agentIds: ["live-agent"], - recommendedSkillIds: ["calendar-assistant"], - defaultRecipeId: "calendar-demo", - }, - ], - recipes: [ - { - id: "calendar-demo", - personaId: "calendar-operator", - name: "Calendar Demo", - description: "Run a scheduling demo.", - agentId: "live-agent", - intent: "conversation", - promptTemplate: "Schedule a meeting with two fallback options.", - recommendedSkillIds: ["calendar-assistant", "calendar-managed-demo"], - }, - ], - }); - await writeFile( - join(rootDir, "skills", "workspace", "calendar-assistant", "SKILL.md"), - [ - "# Calendar Assistant", - "id: calendar-assistant", - "scope: live-agent", - "prompt: Prefer scheduling clarity and offer two concrete slots.", - ].join("\n"), - "utf8", - ); - await writeJson(join(rootDir, "skills", "workspace", "calendar-assistant", "managed-skill-upsert.sample.json"), { - skillId: "calendar-managed-demo", - name: "Calendar Managed Demo", - prompt: "Validate managed-skill lifecycle.", - }); - - const snapshot = await getSkillsCatalogSnapshot({ - cwd: rootDir, - agentId: "live-agent", - activeSkillIds: ["calendar-assistant"], - env: {}, - }); - - assert.equal(snapshot.version, 3); - assert.equal(snapshot.source, "path"); - assert.equal(snapshot.personas.length, 1); - assert.equal(snapshot.recipes.length, 1); - assert.deepEqual(snapshot.personas[0]?.availableSkillIds, ["calendar-assistant"]); - assert.deepEqual(snapshot.personas[0]?.missingSkillIds, []); - assert.equal(snapshot.personas[0]?.ready, true); - assert.deepEqual(snapshot.recipes[0]?.missingSkillIds, ["calendar-managed-demo"]); - assert.equal(snapshot.recipes[0]?.ready, false); - assert.deepEqual(snapshot.personas[0]?.repoKnownSkillIds, ["calendar-assistant"]); - assert.deepEqual(snapshot.recipes[0]?.repoKnownSkillIds, ["calendar-assistant", "calendar-managed-demo"]); - assert.deepEqual(snapshot.repoKnownSkillIds, ["calendar-assistant", "calendar-managed-demo"]); - assert.equal(snapshot.warnings.length, 0); - } finally { - await rm(rootDir, { recursive: true, force: true }); - } -}); - -test("skills runtime catalog snapshot overlays active workspace skills onto recommendations", async () => { - const rootDir = await mkdtemp(join(tmpdir(), "mla-skills-runtime-catalog-")); - try { - await mkdir(join(rootDir, "configs"), { recursive: true }); - await mkdir(join(rootDir, "skills", "workspace", "calendar-assistant"), { recursive: true }); - await writeJson(join(rootDir, "configs", "skills.catalog.json"), { - version: 1, - personas: [ - { - id: "calendar-operator", - name: "Calendar Operator", - description: "Drive calendar automation demos.", - agentIds: ["live-agent"], - recommendedSkillIds: ["calendar-assistant"], - defaultRecipeId: "calendar-demo", - }, - ], - recipes: [ - { - id: "calendar-demo", - personaId: "calendar-operator", - name: "Calendar Demo", - description: "Run a scheduling demo.", - agentId: "live-agent", - intent: "conversation", - promptTemplate: "Schedule a meeting with two fallback options.", - recommendedSkillIds: ["calendar-assistant"], - }, - ], - }); - await writeFile( - join(rootDir, "skills", "workspace", "calendar-assistant", "SKILL.md"), - [ - "# Calendar Assistant", - "id: calendar-assistant", - "scope: live-agent", - "trustLevel: reviewed", - "prompt: Prefer scheduling clarity and offer two concrete slots.", - ].join("\n"), - "utf8", - ); - - const snapshot = await getSkillsRuntimeCatalogSnapshot({ - agentId: "live-agent", - cwd: rootDir, - env: { - SKILLS_RUNTIME_ENABLED: "true", - SKILLS_WORKSPACE_DIR: "skills/workspace", - SKILLS_BUNDLED_DIR: "skills/bundled", - SKILLS_SOURCE_PRECEDENCE: "workspace", - SKILLS_ALLOWED_SOURCES: "workspace", - SKILLS_CATALOG_PATH: "configs/skills.catalog.json", - }, - }); - - assert.equal(snapshot.runtime.activeSkills.length, 1); - assert.equal(snapshot.runtime.activeSkills[0]?.id, "calendar-assistant"); - assert.equal(snapshot.catalog.personas[0]?.ready, true); - assert.equal(snapshot.catalog.recipes[0]?.ready, true); - assert.deepEqual(snapshot.catalog.activeSkillIds, ["calendar-assistant"]); - assert.equal(snapshot.runtimeSummary.activeCount, 1); - } finally { - await rm(rootDir, { recursive: true, force: true }); - } -}); - -test("skills catalog returns invalid status and warning for malformed env json", async () => { - const snapshot = await getSkillsCatalogSnapshot({ - env: { - SKILLS_CATALOG_JSON: "{invalid", - }, - }); - - assert.equal(snapshot.source, "invalid"); - assert.equal(snapshot.personas.length, 0); - assert.equal(snapshot.recipes.length, 0); - assert.ok(snapshot.warnings.some((item) => item.includes("SKILLS_CATALOG_JSON"))); -}); - -test("skills catalog warns when personas or recipes reference unknown repo-owned skill ids", async () => { - const rootDir = await mkdtemp(join(tmpdir(), "mla-skills-catalog-warning-")); - try { - await mkdir(join(rootDir, "configs"), { recursive: true }); - await mkdir(join(rootDir, "skills", "workspace", "calendar-assistant"), { recursive: true }); - await writeJson(join(rootDir, "configs", "skills.catalog.json"), { - version: 1, - personas: [ - { - id: "calendar-operator", - name: "Calendar Operator", - description: "Drive calendar automation demos.", - agentIds: ["live-agent"], - recommendedSkillIds: ["calendar-assistant", "missing-skill"], - defaultRecipeId: "calendar-demo", - }, - ], - recipes: [ - { - id: "calendar-demo", - personaId: "calendar-operator", - name: "Calendar Demo", - description: "Run a scheduling demo.", - agentId: "live-agent", - intent: "conversation", - promptTemplate: "Schedule a meeting with two fallback options.", - recommendedSkillIds: ["missing-skill"], - }, - ], - }); - await writeFile( - join(rootDir, "skills", "workspace", "calendar-assistant", "SKILL.md"), - [ - "# Calendar Assistant", - "id: calendar-assistant", - "scope: live-agent", - "prompt: Prefer scheduling clarity and offer two concrete slots.", - ].join("\n"), - "utf8", - ); - - const snapshot = await getSkillsCatalogSnapshot({ - cwd: rootDir, - agentId: "live-agent", - env: {}, - }); - - assert.deepEqual(snapshot.personas[0]?.repoUnknownSkillIds, ["missing-skill"]); - assert.deepEqual(snapshot.recipes[0]?.repoUnknownSkillIds, ["missing-skill"]); - assert.ok(snapshot.warnings.some((item) => item.includes("Persona calendar-operator recommends unknown repo-owned skill ids: missing-skill."))); - assert.ok(snapshot.warnings.some((item) => item.includes("Recipe calendar-demo recommends unknown repo-owned skill ids: missing-skill."))); - } finally { - await rm(rootDir, { recursive: true, force: true }); - } -}); - -test("skills catalog convergence follows configured workspace and bundled skill directories", async () => { - const rootDir = await mkdtemp(join(tmpdir(), "mla-skills-catalog-custom-roots-")); - try { - const workspaceDir = join("custom-skills", "workspace"); - const bundledDir = join("custom-skills", "bundled"); - await mkdir(join(rootDir, workspaceDir, "calendar-assistant"), { recursive: true }); - await mkdir(join(rootDir, "configs"), { recursive: true }); - await writeJson(join(rootDir, "configs", "skills.catalog.json"), { - version: 1, - personas: [ - { - id: "calendar-operator", - name: "Calendar Operator", - description: "Drive calendar automation demos.", - agentIds: ["live-agent"], - recommendedSkillIds: ["calendar-assistant", "calendar-managed-demo"], - defaultRecipeId: "calendar-demo", - }, - ], - recipes: [ - { - id: "calendar-demo", - personaId: "calendar-operator", - name: "Calendar Demo", - description: "Run a scheduling demo.", - agentId: "live-agent", - intent: "conversation", - promptTemplate: "Schedule a meeting with two fallback options.", - recommendedSkillIds: ["calendar-managed-demo"], - }, - ], - }); - await writeFile( - join(rootDir, workspaceDir, "calendar-assistant", "SKILL.md"), - [ - "# Calendar Assistant", - "id: calendar-assistant", - "scope: live-agent", - "prompt: Prefer scheduling clarity and offer two concrete slots.", - ].join("\n"), - "utf8", - ); - await writeJson(join(rootDir, workspaceDir, "calendar-assistant", "managed-skill-upsert.sample.json"), { - skillId: "calendar-managed-demo", - name: "Calendar Managed Demo", - prompt: "Validate managed-skill lifecycle.", - }); - - const snapshot = await getSkillsCatalogSnapshot({ - cwd: rootDir, - agentId: "live-agent", - env: { - SKILLS_WORKSPACE_DIR: workspaceDir.replace(/\\/g, "/"), - SKILLS_BUNDLED_DIR: bundledDir.replace(/\\/g, "/"), - }, - }); - - assert.deepEqual(snapshot.repoKnownSkillIds, ["calendar-assistant", "calendar-managed-demo"]); - assert.equal(snapshot.warnings.length, 0); - } finally { - await rm(rootDir, { recursive: true, force: true }); - } -});