diff --git a/__tests__/gate.test.ts b/__tests__/gate.test.ts index 2144d7d..61aad5d 100644 --- a/__tests__/gate.test.ts +++ b/__tests__/gate.test.ts @@ -5,8 +5,10 @@ import { applySelfReviewDowngrade, parseQualityGrades, compareGrades, + parseCitations, + verifyCitations, } from '../src/gate.js'; -import type { GateVerdict, Grade } from '../src/gate.js'; +import type { GateVerdict, Grade, Citation, FileReader } from '../src/gate.js'; import type { TeamRole } from '../src/types.js'; // Helper: wrap a verdict body with the TRANSCRIPTS+CITATIONS+QUALITY_GRADES @@ -308,3 +310,124 @@ describe('compareGrades', () => { expect(d.maxDrift).toBe(4); }); }); + +describe('parseCitations', () => { + it('parses a block-scalar citation and stops at the next header', () => { + const out = [ + 'GATE_VERDICT: APPROVE', + '', + 'CITATIONS:', + ' - claim: auth resolves identity via Okta', + ' file: src/auth/middleware.ts', + ' line_range: 42-57', + ' quoted_fragment: |', + ' const { userId } = await oktaClient.users.getByEmail(claim.email);', + ' if (!userId) throw new AuthError("unknown identity");', + '', + 'QUALITY_GRADES:', + ' security: A-', + ].join('\n'); + const cits = parseCitations(out); + expect(cits).toHaveLength(1); + expect(cits[0].file).toBe('src/auth/middleware.ts'); + expect(cits[0].lineRange).toBe('42-57'); + expect(cits[0].claim).toContain('Okta'); + expect(cits[0].quotedFragment).toContain('oktaClient.users.getByEmail'); + expect(cits[0].quotedFragment).toContain('throw new AuthError'); + expect(cits[0].quotedFragment).not.toContain('QUALITY_GRADES'); + }); + + it('parses multiple citations in one block', () => { + const out = [ + 'CITATIONS:', + ' - claim: first', + ' file: a.ts', + ' line_range: 1-2', + ' quoted_fragment: |', + ' const a = 1;', + ' - claim: second', + ' file: b.ts', + ' line_range: 3-4', + ' quoted_fragment: |', + ' const b = 2;', + ].join('\n'); + const cits = parseCitations(out); + expect(cits.map((c) => c.file)).toEqual(['a.ts', 'b.ts']); + expect(cits[1].quotedFragment).toBe('const b = 2;'); + }); + + it('returns [] when no CITATIONS header is present', () => { + expect(parseCitations('GATE_VERDICT: APPROVE')).toEqual([]); + }); +}); + +describe('verifyCitations', () => { + const cit: Citation = { claim: 'x', file: 'a.ts', lineRange: '1-2', quotedFragment: 'const a = 1;' }; + + it('passes when the fragment appears in the cited file', () => { + const reader: FileReader = (f) => (f === 'a.ts' ? 'line0\nconst a = 1;\nline2' : null); + expect(verifyCitations([cit], reader)[0].ok).toBe(true); + }); + + it('fails as fragment-not-found when the fragment is absent (fabricated)', () => { + const reader: FileReader = () => 'totally unrelated content'; + const check = verifyCitations([cit], reader)[0]; + expect(check.ok).toBe(false); + expect(check.status).toBe('fragment-not-found'); + expect(check.reason).toContain('not found verbatim'); + }); + + it('reports file-unreadable (not fabrication) when the cited file does not exist', () => { + const reader: FileReader = () => null; + const check = verifyCitations([cit], reader)[0]; + expect(check.ok).toBe(false); + expect(check.status).toBe('file-unreadable'); + }); + + it('is whitespace/indentation tolerant (block-scalar dedent vs real indent)', () => { + const multi: Citation = { ...cit, quotedFragment: 'const a = 1;\nconst b = 2;' }; + const reader: FileReader = () => ' const a = 1;\n const b = 2;'; // indented in source + expect(verifyCitations([multi], reader)[0].ok).toBe(true); + }); + + it('requires the fragment lines to be contiguous and in order', () => { + const multi: Citation = { ...cit, quotedFragment: 'const a = 1;\nconst b = 2;' }; + const reader: FileReader = () => 'const a = 1;\nsomething else;\nconst b = 2;'; + expect(verifyCitations([multi], reader)[0].ok).toBe(false); + }); +}); + +describe('parseGateVerdict with citation verification', () => { + const out = withEvidence('GATE_VERDICT: APPROVE'); + const fragment = 'const { userId } = await oktaClient.users.getByEmail(claim.email);'; + const present: FileReader = (f) => (f === 'src/auth/middleware.ts' ? `foo\n${fragment}\nbar` : null); + const absent: FileReader = () => 'unrelated content with no such line'; + const missing: FileReader = () => null; + + it('keeps APPROVE when the citation verifies against the file', () => { + expect(parseGateVerdict('pr-reviewer', out, { readFile: present }).verdict).toBe('APPROVE'); + }); + + it('downgrades APPROVE to REJECT when the cited fragment is absent', () => { + const v = parseGateVerdict('pr-reviewer', out, { readFile: absent }); + expect(v.verdict).toBe('REJECT'); + expect(v.feedback).toContain('verbatim'); + expect(v.feedback).toContain('src/auth/middleware.ts'); + }); + + it('does NOT block APPROVE when the cited file is unreadable (path-convention/infra safe)', () => { + // A 404 or transient read failure must not be mistaken for fabrication — + // only a fragment absent from a file we DID read blocks. See gate.ts. + expect(parseGateVerdict('pr-reviewer', out, { readFile: missing }).verdict).toBe('APPROVE'); + }); + + it('is unchanged (no verification) when no readFile is supplied', () => { + // Backward-compat: the default call path keeps presence-only behavior. + expect(parseGateVerdict('pr-reviewer', out).verdict).toBe('APPROVE'); + }); + + it('does not verify citations on a REJECT verdict', () => { + const rej = 'GATE_VERDICT: REJECT\nGATE_FEEDBACK: secrets in plaintext'; + expect(parseGateVerdict('qa-security', rej, { readFile: absent }).verdict).toBe('REJECT'); + }); +}); diff --git a/__tests__/git.test.ts b/__tests__/git.test.ts index d5c2736..8de57aa 100644 --- a/__tests__/git.test.ts +++ b/__tests__/git.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; -import { parseGitHubUrl, slugForBranch, createBranchIfMissing } from '../src/git.js'; +import { parseGitHubUrl, slugForBranch, createBranchIfMissing, fetchRepoFile } from '../src/git.js'; describe('parseGitHubUrl', () => { it('parses canonical https URL', () => { @@ -129,3 +129,62 @@ describe('createBranchIfMissing', () => { expect(fetchMock.mock.calls[0][1].headers.Authorization).toBe('Bearer my-token'); }); }); + +describe('fetchRepoFile', () => { + let fetchMock: ReturnType; + + beforeEach(() => { + fetchMock = vi.fn(); + global.fetch = fetchMock as unknown as typeof fetch; + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('decodes base64 file content on 200', async () => { + const body = 'const x = 1;\nconst y = 2;\n'; + fetchMock.mockResolvedValueOnce( + new Response( + JSON.stringify({ type: 'file', encoding: 'base64', content: Buffer.from(body).toString('base64') }), + { status: 200 }, + ), + ); + expect(await fetchRepoFile('tok', 'nanohype', 'protohype', 'src/x.ts', 'feat/almanac')).toBe(body); + }); + + it('returns null on 404 (file does not exist — a clean signal, not an error)', async () => { + fetchMock.mockResolvedValueOnce(new Response('not found', { status: 404 })); + expect(await fetchRepoFile('tok', 'o', 'r', 'missing.ts', 'feat/x')).toBeNull(); + }); + + it('throws on a non-404 error (auth / rate-limit)', async () => { + fetchMock.mockResolvedValueOnce(new Response('forbidden', { status: 403 })); + await expect(fetchRepoFile('tok', 'o', 'r', 'a.ts', 'feat/x')).rejects.toThrow(/GET contents a.ts failed \(403\)/); + }); + + it('throws on unsupported encoding (>1MB file returns encoding "none")', async () => { + fetchMock.mockResolvedValueOnce( + new Response(JSON.stringify({ type: 'file', encoding: 'none', content: '' }), { status: 200 }), + ); + await expect(fetchRepoFile('tok', 'o', 'r', 'big.bin', 'feat/x')).rejects.toThrow(/unsupported encoding/); + }); + + it('returns null when the path is a directory (array response, no type:"file")', async () => { + fetchMock.mockResolvedValueOnce(new Response(JSON.stringify([{ type: 'file', name: 'a.ts' }]), { status: 200 })); + expect(await fetchRepoFile('tok', 'o', 'r', 'src', 'feat/x')).toBeNull(); + }); + + it('url-encodes path segments + ref and sends the auth header', async () => { + fetchMock.mockResolvedValueOnce( + new Response(JSON.stringify({ type: 'file', encoding: 'base64', content: Buffer.from('x').toString('base64') }), { + status: 200, + }), + ); + await fetchRepoFile('my-token', 'nanohype', 'protohype', 'src/a b.ts', 'feat/almanac'); + const [url, init] = fetchMock.mock.calls[0]; + expect(url).toContain('/repos/nanohype/protohype/contents/src/a%20b.ts'); + expect(url).toContain('?ref=feat%2Falmanac'); + expect(init.headers.Authorization).toBe('Bearer my-token'); + }); +}); diff --git a/src/gate.ts b/src/gate.ts index a099844..36c7eee 100644 --- a/src/gate.ts +++ b/src/gate.ts @@ -66,7 +66,7 @@ function hasEvidenceBlock(output: string, header: 'TRANSCRIPTS' | 'CITATIONS'): * auto-downgrade to REJECT (EVIDENCE_CONTRACT enforcement at the pipeline layer). * - REJECT may ship without TRANSCRIPTS/CITATIONS — the point there is to fail fast. */ -export function parseGateVerdict(role: TeamRole, output: string): GateVerdict { +export function parseGateVerdict(role: TeamRole, output: string, opts?: { readFile?: FileReader }): GateVerdict { const verdictMatch = output.match(VERDICT_RE); const feedbackMatch = output.match(FEEDBACK_RE); const feedback = feedbackMatch ? feedbackMatch[1].trim() : ''; @@ -104,11 +104,215 @@ export function parseGateVerdict(role: TeamRole, output: string): GateVerdict { grades, }; } + + // Verbatim citation verification — only when the caller can read the + // cited files (local cwd in sdk/claude-cli, or a GitHub-backed reader). + // Without a reader this is a no-op and behavior is unchanged. + // + // Conservative blocking policy: REJECT only on `fragment-not-found` — the + // file WAS read and the cited code is not in it, which is unambiguous + // fabrication (EVIDENCE_CONTRACT's "fragment that appears nowhere"). A + // `file-unreadable` result (e.g. a 404 from a path-convention mismatch or + // a transient fetch failure) and a `malformed`/unparseable citation are + // left non-blocking, so verification can only ever strengthen the gate — + // it never turns an infra hiccup or parser gap into a false REJECT. + if (opts?.readFile) { + const fabricated = verifyCitations(parseCitations(output), opts.readFile).filter( + (c) => c.status === 'fragment-not-found', + ); + if (fabricated.length > 0) { + const detail = fabricated.map((f) => ` - ${f.citation.file}: ${f.reason}`).join('\n'); + return { + role, + verdict: 'REJECT', + feedback: `${role} emitted ${verdict} but citation verification failed per EVIDENCE_CONTRACT — every quoted_fragment must appear verbatim at the cited file:\n${detail}\nOriginal feedback: ${feedback || '(none)'}`, + grades, + }; + } + } } return { role, verdict, feedback, grades }; } +// ── Citation verification (EVIDENCE_CONTRACT) ────────────────────── +// +// A CITATIONS block carries {claim, file, line_range, quoted_fragment} +// tuples; the contract says each quoted_fragment must appear verbatim at the +// cited file. parseGateVerdict checks block *presence* unconditionally; given +// a FileReader it also confirms each fragment exists in the file and +// downgrades to REJECT on a fabricated citation (the "fragment that appears +// nowhere in the codebase" anti-pattern). The reader is injected so this +// module stays pure and transport-agnostic — the caller supplies local-fs or +// GitHub-backed access. Without a reader, verification is skipped. + +export interface Citation { + claim: string; + file: string; + lineRange: string; + quotedFragment: string; +} + +/** Returns the cited file's full text, or null when it cannot be read. */ +export type FileReader = (file: string) => string | null; + +export type CitationStatus = 'ok' | 'fragment-not-found' | 'file-unreadable' | 'malformed'; + +export interface CitationCheck { + citation: Citation; + ok: boolean; // true only when the fragment was found verbatim + status: CitationStatus; + reason?: string; +} + +function isBlockScalar(value: string): boolean { + const t = value.trim(); + return t === '' || t === '|' || t === '|-' || t === '>'; +} + +function dedent(lines: string[]): string { + const out = [...lines]; + while (out.length && out[0].trim() === '') out.shift(); + while (out.length && out[out.length - 1].trim() === '') out.pop(); + const indents = out.filter((l) => l.trim() !== '').map((l) => l.length - l.trimStart().length); + const min = indents.length ? Math.min(...indents) : 0; + return out.map((l) => l.slice(min)).join('\n'); +} + +/** + * Parse the CITATIONS block into structured tuples. Hand-rolled (no YAML + * dependency, matching fab's zero-dep style) and tolerant: it reads the + * `- claim/file/line_range` scalars and the `quoted_fragment: |` block scalar, + * dedenting the fragment body. Returns [] when no parseable entries are found + * — verification then no-ops rather than risk a false REJECT from a format + * this parser doesn't recognize. + */ +export function parseCitations(output: string): Citation[] { + const headerMatch = output.match(/^[ \t]*CITATIONS:[ \t]*$/im); + if (!headerMatch || headerMatch.index === undefined) return []; + const rest = output.slice(headerMatch.index + headerMatch[0].length); + const nextHeader = rest.match(/^\s*(?:GATE_[A-Z]+|TRANSCRIPTS|CITATIONS|QUALITY_GRADES):/m); + const block = nextHeader && nextHeader.index !== undefined ? rest.slice(0, nextHeader.index) : rest; + + const indentOf = (s: string) => s.length - s.trimStart().length; + const stripQuotes = (s: string) => s.trim().replace(/^["']|["']$/g, ''); + + const citations: Citation[] = []; + let cur: Citation | null = null; + let collecting = false; + let keyIndent = 0; + let fragment: string[] = []; + + const flush = () => { + if (cur && cur.file) { + cur.quotedFragment = dedent(fragment); + citations.push(cur); + } + cur = null; + collecting = false; + fragment = []; + }; + + for (const line of block.split('\n')) { + if (collecting) { + if (line.trim() === '' || indentOf(line) > keyIndent) { + fragment.push(line); + continue; + } + collecting = false; // dedented — reparse this line as a key/entry + } + + const m = line.match(/^(\s*)(?:-\s+)?(claim|file|line_range|quoted_fragment):\s*(.*)$/); + if (!m) continue; + const [, indent, key, valueRaw] = m; + + if (/^\s*-\s/.test(line)) { + flush(); + cur = { claim: '', file: '', lineRange: '', quotedFragment: '' }; + } + if (!cur) continue; + + if (key === 'claim') cur.claim = stripQuotes(valueRaw); + else if (key === 'file') cur.file = stripQuotes(valueRaw); + else if (key === 'line_range') cur.lineRange = stripQuotes(valueRaw); + else if (key === 'quoted_fragment') { + if (isBlockScalar(valueRaw)) { + collecting = true; + keyIndent = indent.length; + fragment = []; + } else { + fragment = [stripQuotes(valueRaw)]; + } + } + } + flush(); + return citations; +} + +function normalizeLines(text: string): string[] { + return text + .replace(/\r\n/g, '\n') + .split('\n') + .map((l) => l.trim()) + .filter((l) => l.length > 0); +} + +function containsRun(haystack: string[], needle: string[]): boolean { + if (needle.length === 0 || needle.length > haystack.length) return false; + for (let i = 0; i + needle.length <= haystack.length; i++) { + let match = true; + for (let j = 0; j < needle.length; j++) { + if (haystack[i + j] !== needle[j]) { + match = false; + break; + } + } + if (match) return true; + } + return false; +} + +/** + * Verify each citation's quoted_fragment appears verbatim in the cited file. + * Comparison is line-based and whitespace-tolerant (each line trimmed, blank + * lines dropped) so YAML block-scalar dedenting and indentation differences + * don't cause false negatives — it requires the actual cited lines to be + * present, in order, as a contiguous run. Catches fabricated fragments and + * citations pointing at files that don't exist in the tree. + */ +export function verifyCitations(citations: Citation[], readFile: FileReader): CitationCheck[] { + return citations.map((citation): CitationCheck => { + if (!citation.file) return { citation, ok: false, status: 'malformed', reason: 'citation has no file path' }; + if (!citation.quotedFragment.trim()) + return { citation, ok: false, status: 'malformed', reason: 'citation has no quoted_fragment' }; + let content: string | null; + try { + content = readFile(citation.file); + } catch (err) { + return { + citation, + ok: false, + status: 'file-unreadable', + reason: `could not read ${citation.file}: ${err instanceof Error ? err.message : String(err)}`, + }; + } + if (content === null) + return { citation, ok: false, status: 'file-unreadable', reason: `cited file not found: ${citation.file}` }; + const needle = normalizeLines(citation.quotedFragment); + if (needle.length === 0) + return { citation, ok: false, status: 'malformed', reason: 'quoted_fragment is empty after normalization' }; + const ok = containsRun(normalizeLines(content), needle); + return ok + ? { citation, ok: true, status: 'ok' } + : { + citation, + ok: false, + status: 'fragment-not-found', + reason: `quoted_fragment not found verbatim in ${citation.file}`, + }; + }); +} + /** * Merge N gate verdicts into the single GateResult the workflow engine expects. * diff --git a/src/git.ts b/src/git.ts index 2b514ae..c8370b8 100644 --- a/src/git.ts +++ b/src/git.ts @@ -114,3 +114,57 @@ export async function createBranchIfMissing( const createdData = (await create.json()) as GitHubRef; return { created: true, sha: createdData.object.sha }; } + +interface GitHubContentFile { + type?: string; + encoding?: string; + content?: string; +} + +/** + * Fetch a single file's UTF-8 text from `owner/repo` at `ref` via the GitHub + * Contents API. Used by the merge gate to verify CITATIONS fragments against + * the feature branch in the default managed-agents transport, where the work + * tree lives in the cloud sandbox rather than on fab's disk. + * + * Returns null when the file does not exist (404) — a clean signal, not an + * error. Throws on auth / rate-limit / network failures and on the >1MB + * "encoding: none" response (which would decode to empty and read as a false + * mismatch) so callers can fail open — skip verification — rather than mistake + * an infra problem for a fabricated citation. + */ +export async function fetchRepoFile( + token: string, + owner: string, + repo: string, + path: string, + ref: string, +): Promise { + const encodedPath = path + .split('/') + .filter((seg) => seg.length > 0) + .map(encodeURIComponent) + .join('/'); + const res = await fetch( + `${GITHUB_API}/repos/${owner}/${repo}/contents/${encodedPath}?ref=${encodeURIComponent(ref)}`, + { + headers: { + Authorization: `Bearer ${token}`, + Accept: 'application/vnd.github+json', + 'X-GitHub-Api-Version': '2022-11-28', + }, + signal: AbortSignal.timeout(10000), + }, + ); + if (res.status === 404) return null; + if (!res.ok) { + const body = await res.text(); + throw new Error(`GET contents ${path} failed (${res.status}): ${body.slice(0, 200)}`); + } + const data = (await res.json()) as GitHubContentFile; + if (data.type !== 'file') return null; // a directory or symlink — not a citable file + if (data.encoding !== 'base64' || typeof data.content !== 'string') { + throw new Error(`contents ${path}: unsupported encoding "${data.encoding}" (file too large?)`); + } + return Buffer.from(data.content, 'base64').toString('utf-8'); +} diff --git a/src/workflows.ts b/src/workflows.ts index 590630c..cb64e65 100644 --- a/src/workflows.ts +++ b/src/workflows.ts @@ -6,9 +6,9 @@ import { formatEvent } from './stream.js'; import { callAdvisor } from './advisor.js'; import { getAgentByRole, getBudgetLimit, getPrimaryRepo, setProjectLanguage, setSourceDirs } from './state.js'; import { CODE_GATE_ROLES, DOCS_GATE_ROLES } from './standards.js'; -import { parseGateVerdict, mergeGateVerdicts, parseQualityGrades, compareGrades } from './gate.js'; -import type { GateVerdict, Grade } from './gate.js'; -import { slugForBranch, createBranchIfMissing } from './git.js'; +import { parseGateVerdict, mergeGateVerdicts, parseQualityGrades, compareGrades, parseCitations } from './gate.js'; +import type { GateVerdict, Grade, FileReader } from './gate.js'; +import { slugForBranch, createBranchIfMissing, fetchRepoFile } from './git.js'; import { estimateCost } from './pricing.js'; import { normalizeDelimiters, spotlight } from './guardrails.js'; @@ -693,6 +693,11 @@ export async function executeWorkflow( // sessions where the coordinator invents a repo, pushes to the wrong // place, or fabricates success — the cost of that failure mode is // much higher than the cost of a clear error here. + // + // The resolved repo + branch are captured in `citationSource` so the merge + // gate can read the feature branch and verify CITATIONS fragments (see + // buildCitationReader + runMergeGate). + let citationSource: CitationSource | null = null; if (workflow.gateProfile === 'code') { const intake = parseIntakeJson(userPrompt); const lang = intake?.constraints?.language; @@ -708,8 +713,8 @@ export async function executeWorkflow( await setSourceDirs(intakeDirs); if (intakeDirs.length) console.log(`${DIM}Source dirs: ${intakeDirs.join(', ')}${RESET}`); - const branchContext = await preCreateFeatureBranch(workflow, userPrompt); - if (!branchContext) { + const branchInfo = await preCreateFeatureBranch(workflow, userPrompt); + if (!branchInfo) { console.log( `${RED}${BOLD}Halted: code-producing workflow "${workflow.name}" requires a pre-created feature branch.${RESET}`, ); @@ -719,7 +724,8 @@ export async function executeWorkflow( console.log(`${DIM}If no primary repo is configured: fab repo add --token ${RESET}`); return; } - context = `${branchContext}\n\n${context}`; + context = `${branchInfo.context}\n\n${context}`; + citationSource = branchInfo.source; } for (const batch of batches) { @@ -790,7 +796,7 @@ ${step.instruction}`, // ── Merge Gate (workflow-level, runs after main loop) ───────── if (workflow.gateProfile) { - const gateResult = await runMergeGate(runtime, workflow.name, workflow.gateProfile, context); + const gateResult = await runMergeGate(runtime, workflow.name, workflow.gateProfile, context, citationSource); if (gateResult.decision === 'reject') { console.log(`${RED}${BOLD}Merge gate REJECTED: ${workflow.name}${RESET}`); if (gateResult.feedback) console.log(`${DIM}${gateResult.feedback}${RESET}`); @@ -855,7 +861,10 @@ function parseIntakeJson(userPrompt: string): { } } -async function preCreateFeatureBranch(workflow: Workflow, userPrompt: string): Promise { +async function preCreateFeatureBranch( + workflow: Workflow, + userPrompt: string, +): Promise<{ context: string; source: CitationSource } | null> { const intake = parseIntakeJson(userPrompt); if (!intake) { console.log(`${DIM}Branch hook: no JSON intake detected — skipping branch pre-creation.${RESET}`); @@ -895,7 +904,7 @@ async function preCreateFeatureBranch(workflow: Workflow, userPrompt: string): P } // Prepend a clear instruction block for every downstream delegation. - return [ + const context = [ `TARGET REPO: ${primary.owner}/${primary.repo}`, `BRANCH: ${branch} (already created — do NOT create, do NOT search, do NOT fork)`, `PROJECT SLUG: ${slug}`, @@ -903,6 +912,51 @@ async function preCreateFeatureBranch(workflow: Workflow, userPrompt: string): P `PR CREATION: release-manager opens the consolidated PR at workflow end — never open one yourself.`, `Workflow: ${workflow.name}`, ].join('\n'); + return { context, source: { token: primary.token, owner: primary.owner, repo: primary.repo, branch } }; +} + +interface CitationSource { + token: string; + owner: string; + repo: string; + branch: string; +} + +/** + * Build a synchronous FileReader for a verdict by prefetching every file it + * cites from the feature branch via the GitHub Contents API. This is what + * lets CITATIONS verification run in the default managed-agents transport, + * where the work tree lives in the cloud sandbox, not on fab's disk. + * + * Fail-open: if the prefetch hits an auth/network/rate-limit error (we cannot + * read the repo at all), returns undefined so parseGateVerdict skips + * verification rather than mistake an infra failure for fabrication. A clean + * 404 on a cited path is NOT an error — it maps to null in the cache, which + * parseGateVerdict treats as non-blocking `file-unreadable`, so a + * path-convention mismatch can't produce a false REJECT either. + */ +async function buildCitationReader(source: CitationSource, output: string): Promise { + const files = [ + ...new Set( + parseCitations(output) + .map((c) => c.file) + .filter((f) => f.length > 0), + ), + ]; + if (files.length === 0) return undefined; + const cache = new Map(); + try { + await Promise.all( + files.map(async (file) => { + cache.set(file, await fetchRepoFile(source.token, source.owner, source.repo, file, source.branch)); + }), + ); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + console.log(`${DIM}Citation verification skipped (could not read ${source.owner}/${source.repo}): ${msg}${RESET}`); + return undefined; + } + return (file) => cache.get(file) ?? null; } /** @@ -911,12 +965,17 @@ async function preCreateFeatureBranch(workflow: Workflow, userPrompt: string): P * * On 'revise', loops up to 3 attempts with feedback appended to context. * On 'reject' or after 3 unsuccessful attempts, returns the final verdict. + * + * When `citationSource` is set (a configured primary repo + feature branch), + * each verdict's CITATIONS fragments are verified against the branch via the + * GitHub Contents API — fabricated fragments downgrade the verdict to REJECT. */ async function runMergeGate( runtime: AgentRuntime, workflowName: string, profile: GateProfile, initialContext: string, + citationSource?: CitationSource | null, ): Promise { const gateRoles = profile === 'code' ? CODE_GATE_ROLES : DOCS_GATE_ROLES; let context = initialContext; @@ -941,7 +1000,8 @@ Your task: Review the PR candidate against your role's merge-gate criteria per FACTORY_PREAMBLE. End your response with the full block: GATE_VERDICT, GATE_FEEDBACK, TRANSCRIPTS, CITATIONS, QUALITY_GRADES — EVIDENCE_CONTRACT auto-downgrades APPROVE/REQUEST_CHANGES without transcripts + citations to REJECT.`, workflowName, ); - verdicts.push(parseGateVerdict(role, roleOutput)); + const readFile = citationSource ? await buildCitationReader(citationSource, roleOutput) : undefined; + verdicts.push(parseGateVerdict(role, roleOutput, readFile ? { readFile } : undefined)); } lastResult = mergeGateVerdicts(verdicts);