diff --git a/.github/workflows/commit-review.yml b/.github/workflows/commit-review.yml index 12b2edb43d..727f920114 100644 --- a/.github/workflows/commit-review.yml +++ b/.github/workflows/commit-review.yml @@ -52,6 +52,7 @@ jobs: name: Skip disabled target commit review if: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.target_repo == 'openclaw/clawhub' && vars.CLAWSWEEPER_ENABLE_CLAWHUB != '1' }} runs-on: ubuntu-latest + timeout-minutes: 5 steps: - name: Explain skipped commit review run: | diff --git a/.github/workflows/repair-comment-router.yml b/.github/workflows/repair-comment-router.yml index 965fbd9c29..33c6893c64 100644 --- a/.github/workflows/repair-comment-router.yml +++ b/.github/workflows/repair-comment-router.yml @@ -68,6 +68,10 @@ env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" CLAWSWEEPER_APP_CLIENT_ID: Iv23liOECG0slfuhz093 +concurrency: + group: repair-comment-router-${{ github.event.inputs.target_repo || github.event.client_payload.target_repo || 'openclaw/openclaw' }} + cancel-in-progress: false + jobs: route-comments: runs-on: ubuntu-latest diff --git a/.github/workflows/repair-commit-finding-intake.yml b/.github/workflows/repair-commit-finding-intake.yml index 658bc7f936..6da66733ec 100644 --- a/.github/workflows/repair-commit-finding-intake.yml +++ b/.github/workflows/repair-commit-finding-intake.yml @@ -74,6 +74,7 @@ jobs: name: Skip disabled target commit finding if: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.target_repo == 'openclaw/clawhub' && vars.CLAWSWEEPER_ENABLE_CLAWHUB != '1' }} runs-on: ubuntu-latest + timeout-minutes: 5 steps: - name: Explain skipped commit finding run: | diff --git a/.github/workflows/repair-finalize-open-prs.yml b/.github/workflows/repair-finalize-open-prs.yml index 5e84e3d80d..8e2e8b8ec3 100644 --- a/.github/workflows/repair-finalize-open-prs.yml +++ b/.github/workflows/repair-finalize-open-prs.yml @@ -33,6 +33,10 @@ env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" CLAWSWEEPER_APP_CLIENT_ID: Iv23liOECG0slfuhz093 +concurrency: + group: clawsweeper-repair-finalize-open-prs + cancel-in-progress: false + jobs: finalize: runs-on: ${{ inputs.runner }} diff --git a/.github/workflows/repair-self-heal.yml b/.github/workflows/repair-self-heal.yml index 180861839f..73a759d7f4 100644 --- a/.github/workflows/repair-self-heal.yml +++ b/.github/workflows/repair-self-heal.yml @@ -39,6 +39,10 @@ env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" CLAWSWEEPER_APP_CLIENT_ID: Iv23liOECG0slfuhz093 +concurrency: + group: clawsweeper-repair-self-heal + cancel-in-progress: false + jobs: self-heal: runs-on: ${{ github.event_name == 'workflow_dispatch' && inputs.runner || vars.CLAWSWEEPER_WORKER_RUNNER || 'blacksmith-4vcpu-ubuntu-2404' }} diff --git a/.github/workflows/sweep.yml b/.github/workflows/sweep.yml index 843648ece0..027d054aed 100644 --- a/.github/workflows/sweep.yml +++ b/.github/workflows/sweep.yml @@ -141,6 +141,7 @@ jobs: name: Skip disabled target event if: ${{ github.event_name == 'repository_dispatch' && github.event.client_payload.target_repo == 'openclaw/clawhub' && vars.CLAWSWEEPER_ENABLE_CLAWHUB != '1' }} runs-on: ubuntu-latest + timeout-minutes: 5 steps: - name: Explain skipped event run: | diff --git a/.github/workflows/sync-from-upstream.yml b/.github/workflows/sync-from-upstream.yml new file mode 100644 index 0000000000..710189c460 --- /dev/null +++ b/.github/workflows/sync-from-upstream.yml @@ -0,0 +1,77 @@ +name: Sync fork from upstream + +on: + schedule: + - cron: "17 * * * *" + workflow_dispatch: + +permissions: + contents: write + +concurrency: + group: sync-from-upstream-${{ github.repository }} + cancel-in-progress: false + +jobs: + replay: + name: Replay fork patches on upstream main + if: github.repository == 'ds4psb-ai/clawsweeper' + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - uses: ./.github/actions/setup-pnpm + + - name: Replay fork-only commits on latest upstream/main + shell: bash + run: | + set -euo pipefail + git config user.name "ds4psb-ai fork sync" + git config user.email "ted.taeeun.kim@gmail.com" + + git remote add upstream https://github.com/openclaw/clawsweeper.git 2>/dev/null || true + git fetch --no-tags origin main + git fetch --no-tags upstream main + + old_fork="$(git rev-parse origin/main)" + upstream_head="$(git rev-parse upstream/main)" + mapfile -t patch_stack < <(git rev-list --reverse --no-merges "${upstream_head}..${old_fork}") + + echo "upstream/main: ${upstream_head}" + echo "fork main: ${old_fork}" + echo "fork-only commits to replay: ${#patch_stack[@]}" + + git checkout -B fork-sync "${upstream_head}" + for commit in "${patch_stack[@]}"; do + subject="$(git log -1 --format=%s "${commit}")" + echo "::group::replay ${commit} ${subject}" + if git cherry-pick -x "${commit}"; then + echo "replayed ${commit}" + else + if git diff --quiet && git diff --cached --quiet; then + echo "${commit} is already represented on upstream/main; skipping empty cherry-pick" + git cherry-pick --skip + else + echo "Conflict while replaying ${commit}: ${subject}" >&2 + git status --short >&2 + exit 1 + fi + fi + echo "::endgroup::" + done + + - name: Verify canonical fork state + run: pnpm run check + + - name: Force-update fork main after successful verification + shell: bash + run: | + set -euo pipefail + if [ "$(git rev-parse HEAD^{tree})" = "$(git rev-parse origin/main^{tree})" ]; then + echo "Fork main already matches the verified replay tree." + exit 0 + fi + git push --force-with-lease=main:$(git rev-parse origin/main) origin HEAD:main diff --git a/AGENTS.md b/AGENTS.md index 6b460b34b2..909279297e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -67,6 +67,11 @@ pnpm run check Use `pnpm run check` before handoff for code/test/workflow changes. +`engines.node` is `>=24`. Node 22 will install (no `engine-strict`) but the +notifier tests' 5-second retry paths surface as `cancelledByParent` under the +old `node:test` runner. Run on Node 24 or newer before reporting test +failures. + ## GitHub Checks Useful live probes: diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e53136585..a7f728f59b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,12 @@ checkpoint, and status-only commits are intentionally omitted. ### Added +- Added the proposal-only Policy RFC Engine for mining durable ClawSweeper + records into maintainer-reviewable draft policy proposals. Thanks @Freeak88 + (#39). +- Added a fork-only `sync-from-upstream` verifier that replays the launch-ready + fork patch stack on the latest upstream `main`, runs `pnpm run check`, and + updates `ds4psb-ai/clawsweeper@main` only after the replayed tree is green. - Added a light privacy reminder and stronger screenshot-or-video nudge to real behavior proof review guidance. - Added agent-led real behavior proof judgement so ClawSweeper can inspect linked screenshots, videos, logs, and terminal output with a read-only GitHub token, explain the proof verdict in the review comment, tell contributors how to trigger a fresh review after adding proof, and sync `proof: sufficient` when the evidence is convincing. - Added a real behavior proof assessment to PR reviews so missing, mock-only, or insufficient contributor proof blocks pass/automerge markers and asks for screenshots, terminal output, redacted logs, recordings, linked artifacts, or copied live output instead. @@ -24,6 +30,23 @@ checkpoint, and status-only commits are intentionally omitted. ### Fixed +- Guarded likely-owner commit links so public review comments no longer render + PR numbers, PR URLs, or other non-SHA provenance as broken `/commit/...` + links. Thanks @samzong (#52). +- Kept Policy RFC output deterministic by deriving default timestamps from + evidence instead of wall-clock time and preserving item numbers for archived + `closed/` records. Thanks @Freeak88 (#39). +- Preserved Policy RFC recency scoring by sharing one deterministic evidence + reference date across scored patterns instead of resetting each pattern's age + to zero. Thanks @Freeak88 (#39). +- Cleared stale generated Policy RFC proposal files before each run writes the + current proposal set, while preserving non-generated operator notes. Thanks + @Freeak88 (#39). +- Documented that Node 22 notifier-test `cancelledByParent` failures are + environmental because the repository requires Node 24+. Thanks @ds4psb-ai + (#54). +- Hardened CI by adding explicit timeouts to disabled-target echo jobs and + concurrency groups to write-side repair workflows. Thanks @ds4psb-ai (#55). - Gave manual exact-item review dispatches their own concurrency group so targeted maintainer reviews no longer wait behind broad normal backfill runs. - Downgraded screenshot-only browser runtime proof so ClawSweeper no longer accepts "no visible console/CSP violation" screenshots as sufficient real behavior proof. Thanks @BunsDev. @@ -52,6 +75,8 @@ checkpoint, and status-only commits are intentionally omitted. immediate duplicate capacity probe in the dispatch loop. - Cached comment-router open-label issue lookups per run so repair-loop comment discovery and command synthesis do not repeat identical GitHub searches. +- Cached comment-router issue comment lookups per run so targeted command routing + and replay/status checks do not repeat identical comment pagination. - Retried Codex edit workers after TPM/rate-limit exits and collapsed JSONL failure transcripts into concise repair status reasons. - Added deterministic merged closing-PR provenance to issue close reports and public close comments when GitHub exposes a high-confidence closing PR. @@ -120,6 +145,14 @@ checkpoint, and status-only commits are intentionally omitted. tokens exist, keeping untargeted fix prompts cheaper to build. - Requested 100-item REST pages for paginated GitHub list calls, reducing review and repair API page fan-out on large issues and pull requests. +- Bounded repair cluster PR file and commit hydration to the context carried + into generated plans, avoiding full pagination for very large pull requests. +- Validated repair cluster hydration limit environment overrides so malformed + values fall back to safe defaults and multi-page bounded hydration stays + covered. Thanks @stainlu (#49). +- Clamped review-context tail hydration to GitHub's retrievable PR endpoint + windows so very large PRs keep the newest available file and commit context + instead of requesting pages beyond the API caps. Thanks @stainlu (#53). - Compacted review prompt context lazily so large comment, timeline, file, and commit lists no longer process entries that are omitted from Codex input. - Scoped every sweep workflow status write to the active target repository so diff --git a/docs/policy-rfc-engine.md b/docs/policy-rfc-engine.md new file mode 100644 index 0000000000..8da1bbd04d --- /dev/null +++ b/docs/policy-rfc-engine.md @@ -0,0 +1,67 @@ +# Policy RFC Engine + +The Policy RFC Engine turns repeated ClawSweeper review and repair patterns into structured, reviewable policy proposals. It is an additive, manual tool: it reads durable local records and writes generated documentation/state under `results/policy-rfc/`. It does not mutate GitHub, dispatch repairs, close issues, change labels, or alter scheduler behavior. + +## Usage + +Build the project, then run: + +```sh +pnpm run policy-rfc -- --target-repo openclaw/openclaw --min-occurrences 5 +``` + +Useful options: + +- `--target-repo`: repository profile to scan, such as `openclaw/openclaw`. +- `--records-root`: local durable record root. Defaults to `records`. +- `--output-root`: generated proposal root. Defaults to `results/policy-rfc`. +- `--min-occurrences`: minimum repeated observations before an RFC is emitted. Defaults to `5`. + +## What It Reads + +The collector scans existing markdown and JSON records below `records//`. It tolerates missing directories, unreadable files, older markdown shapes, and malformed partial records by skipping what it cannot safely parse. + +The first version extracts repeated examples of: + +- file conflict types +- labels +- repair markers +- review verdict markers +- safe-close reasons +- automerge repair causes + +## What It Writes + +For each eligible pattern, the engine writes: + +- `results/policy-rfc//.md` +- `results/policy-rfc//.json` + +Markdown RFCs contain: + +- Title +- Status: Draft +- Summary +- Observed Pattern +- Evidence +- Proposed Policy +- Safety Constraints +- Non-Goals +- Rollout Plan +- Metrics +- Reversion Plan + +JSON proposals include the stable machine-readable fields needed for review automation or later dashboards: `id`, `title`, `status`, `pattern_type`, `evidence_items`, `confidence_score`, `proposed_conditions`, `proposed_action`, `safety_constraints`, `created_at`, and `source_records`. + +## Proposal-Only Boundary + +The engine intentionally stops at documentation/state. A generated RFC is evidence that a pattern may deserve a formal policy; it is not an executable rule. Any accepted proposal must be implemented separately, reviewed as normal code, and routed through ClawSweeper's existing conservative apply paths. + +This keeps the feature out of hot scheduler paths: + +- no GitHub mutation +- no automatic policy execution +- no changes to close/apply/automerge logic +- no extra review shard work +- no live GitHub scans in the scheduler critical path + diff --git a/package.json b/package.json index 2ea2f65bad..9f06e9646c 100644 --- a/package.json +++ b/package.json @@ -15,6 +15,7 @@ "audit": "node dist/clawsweeper.js audit", "reconcile": "node dist/clawsweeper.js reconcile", "status": "node dist/clawsweeper.js status", + "policy-rfc": "node dist/policy-rfc/index.js", "commit-review": "node dist/commit-sweeper.js", "commit-reports": "node dist/commit-sweeper.js reports", "repair:validate": "node dist/repair/validate-all.js", diff --git a/src/clawsweeper.ts b/src/clawsweeper.ts index a3f3d61c51..0d1350dab5 100644 --- a/src/clawsweeper.ts +++ b/src/clawsweeper.ts @@ -306,12 +306,20 @@ interface ItemContext { pullReviewComments?: unknown[]; counts?: { comments: number; + commentsHydrated?: number; + commentsTruncated?: boolean; timeline: number; closingPullRequests?: number; relatedItems?: number; pullFiles?: number; + pullFilesHydrated?: number; + pullFilesTruncated?: boolean; pullCommits?: number; + pullCommitsHydrated?: number; + pullCommitsTruncated?: boolean; pullReviewComments?: number; + pullReviewCommentsHydrated?: number; + pullReviewCommentsTruncated?: boolean; }; } @@ -1511,15 +1519,38 @@ export function compactMappedSlice( items: readonly T[], limit: number, mapper: (item: T) => unknown, +): unknown[] { + return compactMappedWindow(items, items.length, limit, mapper); +} + +export function compactMappedWindow( + items: readonly T[], + total: number, + limit: number, + mapper: (item: T) => unknown, ): unknown[] { const boundedLimit = Math.max(0, Math.floor(limit)); - if (items.length <= boundedLimit) return items.map(mapper); + const boundedTotal = Math.max(0, Math.floor(total)); + if (boundedTotal <= boundedLimit && items.length <= boundedLimit) return items.map(mapper); + if (boundedLimit === 0) { + return boundedTotal > 0 + ? [{ omitted: boundedTotal, note: "middle entries omitted from prompt context" }] + : []; + } const keepStart = Math.floor(boundedLimit / 2); const keepEnd = Math.max(0, boundedLimit - keepStart); + const retained = + items.length > boundedLimit && boundedTotal === items.length + ? items + : items.slice(0, boundedLimit); + const retainedStart = retained.slice(0, keepStart); + const retainedEnd = + keepEnd > 0 ? retained.slice(Math.max(keepStart, retained.length - keepEnd)) : []; + const omitted = Math.max(0, boundedTotal - retainedStart.length - retainedEnd.length); return [ - ...items.slice(0, keepStart).map(mapper), - { omitted: items.length - boundedLimit, note: "middle entries omitted from prompt context" }, - ...(keepEnd > 0 ? items.slice(items.length - keepEnd).map(mapper) : []), + ...retainedStart.map(mapper), + ...(omitted > 0 ? [{ omitted, note: "middle entries omitted from prompt context" }] : []), + ...retainedEnd.map(mapper), ]; } @@ -2045,12 +2076,137 @@ export function githubPaginatedPath(path: string): string { return serialized ? `${base}?${serialized}` : base; } +function githubPagePath(path: string, page: number, perPage = 100): string { + const [basePart, query = ""] = path.split("?", 2); + const base = basePart ?? path; + const params = new URLSearchParams(query); + params.set("per_page", String(Math.max(1, Math.floor(perPage)))); + params.set("page", String(Math.max(1, Math.floor(page)))); + const serialized = params.toString(); + return serialized ? `${base}?${serialized}` : base; +} + function ghPaged(path: string): T[] { const pages = ghJson(["api", githubPaginatedPath(path), "--paginate", "--slurp"]); if (!Array.isArray(pages)) return []; return pages.flatMap((page) => (Array.isArray(page) ? (page as T[]) : [])); } +interface ContextHydration { + items: T[]; + total: number; + hydrated: number; + truncated: boolean; +} + +interface GithubContextWindowFetchers { + page?: (path: string, page: number) => T[]; + paged?: (path: string) => T[]; +} + +function ghPage(path: string, page: number): T[] { + const items = ghJson(["api", githubPagePath(path, page)]); + return Array.isArray(items) ? (items as T[]) : []; +} + +function githubCount(value: unknown): number | null { + const count = + typeof value === "number" ? value : typeof value === "string" ? Number(value) : Number.NaN; + if (!Number.isFinite(count) || count < 0) return null; + return Math.floor(count); +} + +function githubEndpointResultCap(path: string): number | null { + const [basePath = path] = path.split("?", 1); + if (/^\/?repos\/[^/]+\/[^/]+\/pulls\/\d+\/files$/.test(basePath)) return 3000; + if (/^\/?repos\/[^/]+\/[^/]+\/pulls\/\d+\/commits$/.test(basePath)) return 250; + return null; +} + +function githubRetrievableTotal(path: string, total: number): number { + const cap = githubEndpointResultCap(path); + return cap === null ? total : Math.min(total, cap); +} + +interface GithubContextWindowPlan { + keepStart: number; + keepEnd: number; + tailFirstPageNumber: number; + lastPageNumber: number; + tailOffset: number; +} + +export function githubContextWindowPlan( + total: number, + promptLimit: number, + perPage = 100, +): GithubContextWindowPlan { + const boundedTotal = Math.max(0, Math.floor(total)); + const boundedLimit = Math.max(0, Math.floor(promptLimit)); + const boundedPerPage = Math.max(1, Math.floor(perPage)); + const keepStart = Math.floor(boundedLimit / 2); + const keepEnd = Math.max(0, boundedLimit - keepStart); + const tailStartIndex = Math.max(0, boundedTotal - keepEnd); + const tailFirstPageNumber = Math.floor(tailStartIndex / boundedPerPage) + 1; + return { + keepStart, + keepEnd, + tailFirstPageNumber, + lastPageNumber: Math.max(1, Math.ceil(boundedTotal / boundedPerPage)), + tailOffset: tailStartIndex - (tailFirstPageNumber - 1) * boundedPerPage, + }; +} + +export function ghPagedContextWindow( + path: string, + totalCount: unknown, + promptLimit: number, + fetchers: GithubContextWindowFetchers = {}, +): ContextHydration { + const fetchPage = fetchers.page ?? ghPage; + const fetchPaged = fetchers.paged ?? ghPaged; + const total = githubCount(totalCount); + const boundedLimit = Math.max(0, Math.floor(promptLimit)); + if (total === null) { + const items = fetchPaged(path); + return { items, total: items.length, hydrated: items.length, truncated: false }; + } + const retrievableTotal = githubRetrievableTotal(path, total); + if (retrievableTotal === 0 || boundedLimit === 0) { + return { items: [], total, hydrated: 0, truncated: total > 0 }; + } + if (retrievableTotal <= boundedLimit) { + const items = + retrievableTotal <= 100 + ? fetchPage(path, 1).slice(0, retrievableTotal) + : fetchPaged(path).slice(0, retrievableTotal); + return { + items, + total: Math.max(total, items.length), + hydrated: items.length, + truncated: total > items.length, + }; + } + + const plan = githubContextWindowPlan(retrievableTotal, boundedLimit); + const firstPage = plan.keepStart > 0 ? fetchPage(path, 1) : []; + const headItems = firstPage.slice(0, plan.keepStart); + const tailPages: T[] = []; + if (plan.keepEnd > 0) { + for (let page = plan.tailFirstPageNumber; page <= plan.lastPageNumber; page += 1) { + tailPages.push(...(page === 1 && plan.keepStart > 0 ? firstPage : fetchPage(path, page))); + } + } + const tailItems = tailPages.slice(plan.tailOffset, plan.tailOffset + plan.keepEnd); + const items = [...headItems, ...tailItems]; + return { + items, + total, + hydrated: items.length, + truncated: total > items.length, + }; +} + function ensureDir(path: string): void { mkdirSync(path, { recursive: true }); } @@ -3161,14 +3317,22 @@ function planCandidates(options: { function collectItemContext(item: Item): ItemContext { const issue = ghJson(["api", `repos/${targetRepo()}/issues/${item.number}`]); - const comments = ghPaged(`repos/${targetRepo()}/issues/${item.number}/comments`); + const issueRecord = asRecord(issue); + const commentsWindow = ghPagedContextWindow( + `repos/${targetRepo()}/issues/${item.number}/comments`, + issueRecord.comments, + 24, + ); + const comments = commentsWindow.items; const timeline = ghPaged(`repos/${targetRepo()}/issues/${item.number}/timeline`); const context: ItemContext = { issue: compactIssue(issue), - comments: compactMappedSlice(comments, 24, compactComment), + comments: compactMappedWindow(comments, commentsWindow.total, 24, compactComment), timeline: compactMappedSlice(timeline, 80, compactTimelineEvent), counts: { - comments: comments.length, + comments: commentsWindow.total, + commentsHydrated: commentsWindow.hydrated, + commentsTruncated: commentsWindow.truncated, timeline: timeline.length, }, }; @@ -3180,7 +3344,9 @@ function collectItemContext(item: Item): ItemContext { context.closingPullRequests = compactMappedSlice(closingPullRequests, 12, compactPullRequest); context.counts = { ...context.counts, - comments: comments.length, + comments: commentsWindow.total, + commentsHydrated: commentsWindow.hydrated, + commentsTruncated: commentsWindow.truncated, timeline: timeline.length, closingPullRequests: closingPullRequests.length, }; @@ -3188,20 +3354,54 @@ function collectItemContext(item: Item): ItemContext { } if (item.kind === "pull_request") { pullRequest = ghJson(["api", `repos/${targetRepo()}/pulls/${item.number}`]); - const pullFiles = ghPaged(`repos/${targetRepo()}/pulls/${item.number}/files`); - const pullCommits = ghPaged(`repos/${targetRepo()}/pulls/${item.number}/commits`); - pullReviewComments = ghPaged(`repos/${targetRepo()}/pulls/${item.number}/comments`); + const pullRecord = asRecord(pullRequest); + const pullFilesWindow = ghPagedContextWindow( + `repos/${targetRepo()}/pulls/${item.number}/files`, + pullRecord.changed_files, + 80, + ); + const pullFiles = pullFilesWindow.items; + const pullCommitsWindow = ghPagedContextWindow( + `repos/${targetRepo()}/pulls/${item.number}/commits`, + pullRecord.commits, + 80, + ); + const pullCommits = pullCommitsWindow.items; + const pullReviewCommentsWindow = ghPagedContextWindow( + `repos/${targetRepo()}/pulls/${item.number}/comments`, + pullRecord.review_comments, + 40, + ); + pullReviewComments = pullReviewCommentsWindow.items; context.pullRequest = compactPullRequest(pullRequest); - context.pullFiles = compactMappedSlice(pullFiles, 80, compactPullFile); - context.pullCommits = compactMappedSlice(pullCommits, 80, compactPullCommit); - context.pullReviewComments = compactMappedSlice(pullReviewComments, 40, compactComment); + context.pullFiles = compactMappedWindow(pullFiles, pullFilesWindow.total, 80, compactPullFile); + context.pullCommits = compactMappedWindow( + pullCommits, + pullCommitsWindow.total, + 80, + compactPullCommit, + ); + context.pullReviewComments = compactMappedWindow( + pullReviewComments, + pullReviewCommentsWindow.total, + 40, + compactComment, + ); context.counts = { ...context.counts, - comments: comments.length, + comments: commentsWindow.total, + commentsHydrated: commentsWindow.hydrated, + commentsTruncated: commentsWindow.truncated, timeline: timeline.length, - pullFiles: pullFiles.length, - pullCommits: pullCommits.length, - pullReviewComments: pullReviewComments.length, + pullFiles: pullFilesWindow.total, + pullFilesHydrated: pullFilesWindow.hydrated, + pullFilesTruncated: pullFilesWindow.truncated, + pullCommits: pullCommitsWindow.total, + pullCommitsHydrated: pullCommitsWindow.hydrated, + pullCommitsTruncated: pullCommitsWindow.truncated, + pullReviewComments: pullReviewCommentsWindow.total, + pullReviewCommentsHydrated: pullReviewCommentsWindow.hydrated, + pullReviewCommentsTruncated: pullReviewCommentsWindow.truncated, }; } const relatedOptions: Parameters[0] = { @@ -3216,14 +3416,28 @@ function collectItemContext(item: Item): ItemContext { if (relatedItems.length) { context.relatedItems = relatedItems; const counts: NonNullable = { - comments: context.counts?.comments ?? comments.length, + comments: context.counts?.comments ?? commentsWindow.total, + commentsHydrated: context.counts?.commentsHydrated ?? commentsWindow.hydrated, + commentsTruncated: context.counts?.commentsTruncated ?? commentsWindow.truncated, timeline: context.counts?.timeline ?? timeline.length, relatedItems: relatedItems.length, }; if (context.counts?.pullFiles !== undefined) counts.pullFiles = context.counts.pullFiles; + if (context.counts?.pullFilesHydrated !== undefined) + counts.pullFilesHydrated = context.counts.pullFilesHydrated; + if (context.counts?.pullFilesTruncated !== undefined) + counts.pullFilesTruncated = context.counts.pullFilesTruncated; if (context.counts?.pullCommits !== undefined) counts.pullCommits = context.counts.pullCommits; + if (context.counts?.pullCommitsHydrated !== undefined) + counts.pullCommitsHydrated = context.counts.pullCommitsHydrated; + if (context.counts?.pullCommitsTruncated !== undefined) + counts.pullCommitsTruncated = context.counts.pullCommitsTruncated; if (context.counts?.pullReviewComments !== undefined) counts.pullReviewComments = context.counts.pullReviewComments; + if (context.counts?.pullReviewCommentsHydrated !== undefined) + counts.pullReviewCommentsHydrated = context.counts.pullReviewCommentsHydrated; + if (context.counts?.pullReviewCommentsTruncated !== undefined) + counts.pullReviewCommentsTruncated = context.counts.pullReviewCommentsTruncated; if (context.counts?.closingPullRequests !== undefined) counts.closingPullRequests = context.counts.closingPullRequests; context.counts = counts; @@ -3606,6 +3820,10 @@ function shortSha(sha: string): string { return sha.slice(0, 12); } +function isCommitSha(value: string): boolean { + return /^[0-9a-f]{7,40}$/i.test(value.trim()); +} + function releaseUrl(tag: string): string { return repoUrl(`/releases/tag/${encodeURIComponent(tag)}`); } @@ -4095,7 +4313,8 @@ function likelyOwnerLine(owner: LikelyOwner): string { const role = owner.role.trim(); const reason = sentence(owner.reason.trim() || "Related by repository history."); const commits = owner.commits - .filter(Boolean) + .map((commit) => commit.trim()) + .filter(isCommitSha) .slice(0, 3) .map((commit) => linkedSha(commit)) .join(", "); @@ -4888,6 +5107,20 @@ function reviewTelemetryNumber(value: number | undefined): string { return String(Math.max(0, Math.round(value))); } +function contextCountText( + total: number | undefined, + fallback: number, + hydrated?: number, + truncated?: boolean, +): string { + const displayTotal = + total === undefined || !Number.isFinite(total) ? Math.max(0, fallback) : Math.max(0, total); + if (hydrated === undefined || !Number.isFinite(hydrated)) return String(displayTotal); + const displayHydrated = Math.max(0, Math.round(hydrated)); + if (!truncated && displayHydrated >= displayTotal) return String(displayTotal); + return `${displayTotal} (hydrated ${displayHydrated}${truncated ? ", truncated" : ""})`; +} + function runtimeReviewTextFromReport(markdown: string): string { return runtimeReviewText({ model: frontMatterValue(markdown, "review_model") ?? "", @@ -6075,11 +6308,32 @@ ${options.action.closeComment ? options.action.closeComment : "_No close comment ## GitHub Snapshot -- comments: ${options.context.counts?.comments ?? options.context.comments.length} +- comments: ${contextCountText( + options.context.counts?.comments, + options.context.comments.length, + options.context.counts?.commentsHydrated, + options.context.counts?.commentsTruncated, + )} - timeline events: ${options.context.counts?.timeline ?? options.context.timeline.length} - related items: ${options.context.counts?.relatedItems ?? options.context.relatedItems?.length ?? 0} -- PR files: ${options.context.counts?.pullFiles ?? options.context.pullFiles?.length ?? 0} -- PR commits: ${options.context.counts?.pullCommits ?? options.context.pullCommits?.length ?? 0} +- PR files: ${contextCountText( + options.context.counts?.pullFiles, + options.context.pullFiles?.length ?? 0, + options.context.counts?.pullFilesHydrated, + options.context.counts?.pullFilesTruncated, + )} +- PR commits: ${contextCountText( + options.context.counts?.pullCommits, + options.context.pullCommits?.length ?? 0, + options.context.counts?.pullCommitsHydrated, + options.context.counts?.pullCommitsTruncated, + )} +- PR review comments: ${contextCountText( + options.context.counts?.pullReviewComments, + options.context.pullReviewComments?.length ?? 0, + options.context.counts?.pullReviewCommentsHydrated, + options.context.counts?.pullReviewCommentsTruncated, + )} ## Review Telemetry diff --git a/src/policy-rfc/collector.ts b/src/policy-rfc/collector.ts new file mode 100644 index 0000000000..18aa30b230 --- /dev/null +++ b/src/policy-rfc/collector.ts @@ -0,0 +1,329 @@ +import { existsSync, readFileSync, readdirSync, statSync } from "node:fs"; +import { join, relative, sep } from "node:path"; + +import { repositoryProfileFor, repositoryProfileForSlug } from "../repository-profiles.js"; +import type { + PatternCollectorOptions, + PolicyPatternObservation, + PolicyPatternType, +} from "./types.js"; + +interface RecordCandidate { + absolutePath: string; + relativePath: string; + repoSlug: string; +} + +export function collectPolicyPatterns( + options: PatternCollectorOptions, +): PolicyPatternObservation[] { + const repoSlugs = targetRepoSlugs(options.targetRepo); + const candidates = recordCandidates(options.recordsRoot, repoSlugs); + const observations: PolicyPatternObservation[] = []; + + for (const candidate of candidates) { + const text = safeRead(candidate.absolutePath); + if (!text) continue; + observations.push(...observationsFromRecord(candidate, text)); + } + + return observations.sort(compareObservation); +} + +function targetRepoSlugs(targetRepo: string | undefined): Set | undefined { + if (!targetRepo) return undefined; + return new Set([repositoryProfileFor(targetRepo).slug]); +} + +function recordCandidates( + recordsRoot: string, + repoSlugs: Set | undefined, +): RecordCandidate[] { + if (!existsSync(recordsRoot)) return []; + const candidates: RecordCandidate[] = []; + for (const repoSlug of safeReadDir(recordsRoot).sort()) { + if (repoSlugs && !repoSlugs.has(repoSlug)) continue; + const repoRoot = join(recordsRoot, repoSlug); + if (!safeIsDirectory(repoRoot)) continue; + for (const absolutePath of walkFiles(repoRoot)) { + if (!absolutePath.endsWith(".md") && !absolutePath.endsWith(".json")) continue; + candidates.push({ + absolutePath, + relativePath: normalizePath(relative(recordsRoot, absolutePath)), + repoSlug, + }); + } + } + return candidates.sort((left, right) => left.relativePath.localeCompare(right.relativePath)); +} + +function walkFiles(root: string): string[] { + const files: string[] = []; + for (const name of safeReadDir(root).sort()) { + const fullPath = join(root, name); + if (safeIsDirectory(fullPath)) files.push(...walkFiles(fullPath)); + else files.push(fullPath); + } + return files; +} + +function observationsFromRecord( + candidate: RecordCandidate, + text: string, +): PolicyPatternObservation[] { + const repo = repoForSlug(candidate.repoSlug); + const item = itemFromPath(candidate.relativePath); + const observedAt = firstDate([ + frontMatterValue(text, "reviewed_at"), + frontMatterValue(text, "updated_at"), + frontMatterValue(text, "created_at"), + jsonStringValue(text, "reviewedAt"), + jsonStringValue(text, "updatedAt"), + jsonStringValue(text, "createdAt"), + ]); + const successfulOutcome = hasSuccessfulOutcome(text); + const observations: PolicyPatternObservation[] = []; + + for (const label of frontMatterStringArray(text, "labels")) { + observations.push( + observation(candidate, repo, item, observedAt, successfulOutcome, "label", label), + ); + } + for (const verdict of uniqueMatches(text, /clawsweeper-verdict:([a-z0-9_-]+)/gi)) { + observations.push( + observation(candidate, repo, item, observedAt, successfulOutcome, "review_verdict", verdict), + ); + } + for (const reason of [ + frontMatterValue(text, "close_reason"), + frontMatterValue(text, "closeReason"), + labeledLineValue(text, "close reason"), + labeledLineValue(text, "safe close reason"), + ]) { + if (reason) { + observations.push( + observation( + candidate, + repo, + item, + observedAt, + successfulOutcome, + "safe_close_reason", + reason, + ), + ); + } + } + for (const marker of [ + ...uniqueMatches(text, /clawsweeper-repair:([a-z0-9_-]+)/gi), + ...jsonStringValues(text, "repair_marker"), + ...jsonStringValues(text, "repairMarker"), + ]) { + observations.push( + observation(candidate, repo, item, observedAt, successfulOutcome, "repair_marker", marker), + ); + } + for (const cause of [ + ...jsonStringValues(text, "automerge_repair_cause"), + ...jsonStringValues(text, "automergeRepairCause"), + labeledLineValue(text, "automerge repair cause"), + ]) { + if (cause) { + observations.push( + observation( + candidate, + repo, + item, + observedAt, + successfulOutcome, + "automerge_repair_cause", + cause, + ), + ); + } + } + for (const conflictType of [ + ...jsonStringValues(text, "conflict_type"), + ...jsonStringValues(text, "conflictType"), + labeledLineValue(text, "conflict type"), + labeledLineValue(text, "file conflict type"), + ]) { + if (conflictType) { + observations.push( + observation( + candidate, + repo, + item, + observedAt, + successfulOutcome, + "file_conflict_type", + conflictType, + ), + ); + } + } + + return dedupeObservations(observations); +} + +function observation( + candidate: RecordCandidate, + repo: string, + item: string, + observedAt: string | undefined, + successfulOutcome: boolean, + patternType: PolicyPatternType, + rawValue: string, +): PolicyPatternObservation { + const value = normalizeValue(rawValue); + return { + patternType, + value, + repo, + item, + sourceRecord: `records/${candidate.relativePath}`, + observedAt, + successfulOutcome, + }; +} + +function dedupeObservations(observations: PolicyPatternObservation[]): PolicyPatternObservation[] { + const seen = new Set(); + return observations.filter((candidate) => { + if (!candidate.value) return false; + const key = `${candidate.patternType}\0${candidate.value}\0${candidate.sourceRecord}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); +} + +function repoForSlug(slug: string): string { + return repositoryProfileForSlug(slug)?.targetRepo ?? slug.replace("-", "/"); +} + +function itemFromPath(relativePath: string): string { + const match = relativePath.match(/\/(?:items|closed)\/([^/.]+)\./); + return match?.[1] ? `#${match[1]}` : relativePath; +} + +function safeRead(filePath: string): string | null { + try { + return readFileSync(filePath, "utf8"); + } catch { + return null; + } +} + +function safeReadDir(dirPath: string): string[] { + try { + return readdirSync(dirPath); + } catch { + return []; + } +} + +function safeIsDirectory(path: string): boolean { + try { + return statSync(path).isDirectory(); + } catch { + return false; + } +} + +function normalizePath(path: string): string { + return path.split(sep).join("/"); +} + +function frontMatterStringArray(markdown: string, key: string): string[] { + const raw = frontMatterValue(markdown, key); + if (!raw) return []; + if (raw.startsWith("[")) { + try { + const parsed: unknown = JSON.parse(raw); + if (Array.isArray(parsed)) + return parsed.filter((value): value is string => typeof value === "string"); + } catch { + return []; + } + } + return raw + .split(",") + .map((value) => normalizeValue(value)) + .filter(Boolean); +} + +function frontMatterValue(markdown: string, key: string): string | undefined { + const frontMatter = markdown.match(/^---\r?\n([\s\S]*?)\r?\n---/); + if (!frontMatter?.[1]) return undefined; + const lines = frontMatter[1].split(/\r?\n/); + const direct = lines.find((line) => line.toLowerCase().startsWith(`${key.toLowerCase()}:`)); + if (!direct) return undefined; + return stripQuotes(direct.slice(direct.indexOf(":") + 1).trim()); +} + +function labeledLineValue(text: string, label: string): string | undefined { + const escaped = label.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const match = text.match( + new RegExp(`(?:^|\\n)\\s*(?:[-*]\\s*)?(?:\\*\\*)?${escaped}(?:\\*\\*)?\\s*:\\s*([^\\n]+)`, "i"), + ); + return match?.[1] ? normalizeValue(match[1]) : undefined; +} + +function jsonStringValue(text: string, key: string): string | undefined { + return jsonStringValues(text, key)[0]; +} + +function jsonStringValues(text: string, key: string): string[] { + const values: string[] = []; + const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const pattern = new RegExp(`"${escaped}"\\s*:\\s*"([^"]+)"`, "gi"); + for (const match of text.matchAll(pattern)) { + if (match[1]) values.push(normalizeValue(match[1])); + } + return values; +} + +function uniqueMatches(text: string, pattern: RegExp): string[] { + return [ + ...new Set([...text.matchAll(pattern)].map((match) => normalizeValue(match[1] ?? ""))), + ].filter(Boolean); +} + +function firstDate(values: Array): string | undefined { + for (const value of values) { + if (!value) continue; + const parsed = new Date(value); + if (!Number.isNaN(parsed.valueOf())) return parsed.toISOString(); + } + return undefined; +} + +function hasSuccessfulOutcome(text: string): boolean { + return /\b(applied|merged|closed|success|succeeded|pass)\b/i.test(text); +} + +function normalizeValue(value: string): string { + return stripQuotes(value) + .replace(//g, "") + .replaceAll("`", "") + .trim() + .toLowerCase() + .replace(/\s+/g, " ") + .slice(0, 120); +} + +function stripQuotes(value: string): string { + return value.replace(/^["']|["']$/g, ""); +} + +function compareObservation( + left: PolicyPatternObservation, + right: PolicyPatternObservation, +): number { + return ( + left.patternType.localeCompare(right.patternType) || + left.value.localeCompare(right.value) || + left.sourceRecord.localeCompare(right.sourceRecord) + ); +} diff --git a/src/policy-rfc/index.ts b/src/policy-rfc/index.ts new file mode 100644 index 0000000000..446a99b99c --- /dev/null +++ b/src/policy-rfc/index.ts @@ -0,0 +1,82 @@ +#!/usr/bin/env node +import { mkdirSync, readdirSync, rmSync, writeFileSync } from "node:fs"; +import { join, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { numberArg, parseArgs, stringArg } from "../clawsweeper-args.js"; +import { repositoryProfileFor } from "../repository-profiles.js"; +import { sortStable } from "../stable-json.js"; +import { collectPolicyPatterns } from "./collector.js"; +import { scorePolicyPatterns } from "./scorer.js"; +import { synthesizePolicyProposal } from "./synthesizer.js"; + +export { collectPolicyPatterns } from "./collector.js"; +export { scorePolicyPatterns } from "./scorer.js"; +export { synthesizePolicyProposal } from "./synthesizer.js"; +export type * from "./types.js"; + +interface RunPolicyRfcOptions { + recordsRoot: string; + outputRoot: string; + targetRepo: string; + minOccurrences: number; + createdAt?: string | undefined; +} + +export function runPolicyRfc(options: RunPolicyRfcOptions): { + proposals: number; + outputDir: string; +} { + const profile = repositoryProfileFor(options.targetRepo); + const outputDir = join(options.outputRoot, profile.slug); + const observations = collectPolicyPatterns({ + recordsRoot: options.recordsRoot, + targetRepo: options.targetRepo, + }); + const scored = scorePolicyPatterns(observations, { + minOccurrences: options.minOccurrences, + }); + + mkdirSync(outputDir, { recursive: true }); + removeStalePolicyProposalFiles(outputDir); + for (const pattern of scored) { + const proposal = synthesizePolicyProposal(pattern, { createdAt: options.createdAt }); + writeFileSync(join(outputDir, `${proposal.id}.md`), proposal.markdown); + writeFileSync( + join(outputDir, `${proposal.id}.json`), + `${JSON.stringify(sortStable(proposal.json), null, 2)}\n`, + ); + } + + return { proposals: scored.length, outputDir }; +} + +function removeStalePolicyProposalFiles(outputDir: string): void { + for (const entry of readdirSync(outputDir, { withFileTypes: true })) { + if (!entry.isFile()) continue; + if (!/^policy-rfc-.+\.(?:json|md)$/.test(entry.name)) continue; + rmSync(join(outputDir, entry.name), { force: true }); + } +} + +function main(): void { + const args = parseArgs(process.argv.slice(2)); + const targetRepo = stringArg(args.target_repo, "openclaw/openclaw"); + const recordsRoot = resolve(stringArg(args.records_root, "records")); + const outputRoot = resolve(stringArg(args.output_root, "results/policy-rfc")); + const minOccurrences = numberArg(args.min_occurrences, 5); + const createdAt = typeof args.created_at === "string" ? args.created_at : undefined; + const result = runPolicyRfc({ + recordsRoot, + outputRoot, + targetRepo, + minOccurrences, + createdAt, + }); + console.log(`Policy RFC proposals written: ${result.proposals}`); + console.log(`Output directory: ${result.outputDir}`); +} + +if (process.argv[1] && fileURLToPath(import.meta.url) === resolve(process.argv[1])) { + main(); +} diff --git a/src/policy-rfc/scorer.ts b/src/policy-rfc/scorer.ts new file mode 100644 index 0000000000..db1137fce5 --- /dev/null +++ b/src/policy-rfc/scorer.ts @@ -0,0 +1,199 @@ +import { createHash } from "node:crypto"; + +import type { + PatternScorerOptions, + PolicyPatternObservation, + PolicyPatternType, + ScoredPolicyPattern, +} from "./types.js"; + +export function scorePolicyPatterns( + observations: readonly PolicyPatternObservation[], + options: PatternScorerOptions, +): ScoredPolicyPattern[] { + const minDistinctItems = options.minDistinctItems ?? Math.min(2, options.minOccurrences); + const minDistinctRepos = options.minDistinctRepos ?? 1; + const referenceDate = options.now ?? deterministicReferenceDate(observations); + const groups = new Map(); + + for (const observation of observations) { + const key = `${observation.patternType}\0${observation.value}`; + const group = groups.get(key) ?? []; + group.push(observation); + groups.set(key, group); + } + + const scored: ScoredPolicyPattern[] = []; + for (const group of groups.values()) { + const sortedGroup = [...group].sort(compareObservation); + const first = sortedGroup[0]; + if (!first) continue; + const distinctItems = sortedUnique(sortedGroup.map((item) => `${item.repo}${item.item}`)); + const distinctRepos = sortedUnique(sortedGroup.map((item) => item.repo)); + if (sortedGroup.length < options.minOccurrences) continue; + if (distinctItems.length < minDistinctItems) continue; + if (distinctRepos.length < minDistinctRepos) continue; + + const successfulOutcomes = sortedGroup.filter((item) => item.successfulOutcome).length; + const latestObservedAt = latestDate(sortedGroup); + const confidenceScore = confidence({ + occurrenceCount: sortedGroup.length, + distinctItems: distinctItems.length, + distinctRepos: distinctRepos.length, + successfulOutcomes, + latestObservedAt, + now: referenceDate, + }); + + scored.push({ + id: policyPatternId(first.patternType, first.value), + patternType: first.patternType, + value: first.value, + title: policyTitle(first.patternType, first.value), + confidenceScore, + occurrenceCount: sortedGroup.length, + distinctItems, + distinctRepos, + successfulOutcomes, + latestObservedAt, + evidenceItems: sortedGroup.slice(0, 20), + proposedConditions: proposedConditions(first.patternType, first.value), + proposedAction: proposedAction(first.patternType), + safetyConstraints: safetyConstraints(first.patternType), + sourceRecords: sortedUnique(sortedGroup.map((item) => item.sourceRecord)), + }); + } + + return scored.sort( + (left, right) => + right.confidenceScore - left.confidenceScore || + right.occurrenceCount - left.occurrenceCount || + left.id.localeCompare(right.id), + ); +} + +function confidence(options: { + occurrenceCount: number; + distinctItems: number; + distinctRepos: number; + successfulOutcomes: number; + latestObservedAt?: string | undefined; + now: Date; +}): number { + const occurrence = Math.min(options.occurrenceCount / 10, 1) * 0.35; + const itemSpread = Math.min(options.distinctItems / 5, 1) * 0.25; + const repoSpread = Math.min(options.distinctRepos / 2, 1) * 0.15; + const success = + Math.min(options.successfulOutcomes / Math.max(options.occurrenceCount, 1), 1) * 0.15; + const recentness = recencyScore(options.latestObservedAt, options.now) * 0.1; + return Number((occurrence + itemSpread + repoSpread + success + recentness).toFixed(3)); +} + +function recencyScore(latestObservedAt: string | undefined, now: Date): number { + if (!latestObservedAt) return 0.3; + const latest = new Date(latestObservedAt); + if (Number.isNaN(latest.valueOf())) return 0.3; + const ageDays = Math.max(0, (now.valueOf() - latest.valueOf()) / 86_400_000); + if (ageDays <= 30) return 1; + if (ageDays <= 90) return 0.7; + if (ageDays <= 180) return 0.4; + return 0.2; +} + +function deterministicReferenceDate(observations: readonly PolicyPatternObservation[]): Date { + const latest = observations + .map((item) => (item.observedAt ? new Date(item.observedAt).valueOf() : Number.NaN)) + .filter(Number.isFinite) + .sort((left, right) => left - right) + .at(-1); + return latest === undefined ? new Date(0) : new Date(latest); +} + +function policyPatternId(patternType: PolicyPatternType, value: string): string { + const slug = `${patternType}-${value}` + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-|-$/g, "") + .slice(0, 72); + const hash = createHash("sha256").update(`${patternType}:${value}`).digest("hex").slice(0, 8); + return `policy-rfc-${slug}-${hash}`; +} + +function policyTitle(patternType: PolicyPatternType, value: string): string { + return `Policy RFC: ${labelForPatternType(patternType)} - ${value}`; +} + +function labelForPatternType(patternType: PolicyPatternType): string { + return patternType + .split("_") + .map((part) => part[0]?.toUpperCase() + part.slice(1)) + .join(" "); +} + +function proposedConditions(patternType: PolicyPatternType, value: string): string[] { + return [ + `The observed ${labelForPatternType(patternType).toLowerCase()} is "${value}".`, + "At least the configured minimum number of distinct records show the same pattern.", + "The source record has durable ClawSweeper evidence and is not a malformed or partial record.", + ]; +} + +function proposedAction(patternType: PolicyPatternType): string { + switch (patternType) { + case "label": + return "Document a candidate review heuristic for this repeated label; do not mutate labels automatically."; + case "review_verdict": + return "Document the repeated review verdict as a candidate triage heuristic for maintainer review."; + case "safe_close_reason": + return "Document a candidate close-policy clarification; keep all close actions on existing apply paths."; + case "automerge_repair_cause": + return "Document a candidate automerge repair precondition; do not change automerge behavior automatically."; + case "file_conflict_type": + return "Document a candidate conflict-handling policy for repair planning only."; + case "repair_marker": + return "Document a candidate repair policy based on repeated repair markers."; + } +} + +function safetyConstraints(patternType: PolicyPatternType): string[] { + const base = [ + "Proposal-only: this RFC must not execute policy automatically.", + "No GitHub mutation is allowed from the Policy RFC Engine.", + "Existing scheduler review, apply, close, and automerge paths remain unchanged.", + ]; + if (patternType === "safe_close_reason") { + return [ + ...base, + "Any close behavior must continue to require existing repository close rules.", + ]; + } + if (patternType === "automerge_repair_cause") { + return [ + ...base, + "Automerge eligibility must continue to be decided by existing automerge guards.", + ]; + } + return base; +} + +function latestDate(observations: readonly PolicyPatternObservation[]): string | undefined { + const dates = observations + .map((item) => item.observedAt) + .filter((item): item is string => Boolean(item)) + .sort(); + return dates.at(-1); +} + +function sortedUnique(values: readonly string[]): string[] { + return [...new Set(values)].sort(); +} + +function compareObservation( + left: PolicyPatternObservation, + right: PolicyPatternObservation, +): number { + return ( + left.repo.localeCompare(right.repo) || + left.item.localeCompare(right.item) || + left.sourceRecord.localeCompare(right.sourceRecord) + ); +} diff --git a/src/policy-rfc/synthesizer.ts b/src/policy-rfc/synthesizer.ts new file mode 100644 index 0000000000..9399bebea0 --- /dev/null +++ b/src/policy-rfc/synthesizer.ts @@ -0,0 +1,121 @@ +import type { + PolicyProposalJson, + ScoredPolicyPattern, + SynthesizeOptions, + SynthesizedPolicyProposal, +} from "./types.js"; + +export function synthesizePolicyProposal( + pattern: ScoredPolicyPattern, + options: SynthesizeOptions = {}, +): SynthesizedPolicyProposal { + const createdAt = options.createdAt ?? pattern.latestObservedAt ?? new Date(0).toISOString(); + const json = policyProposalJson(pattern, createdAt); + return { + id: pattern.id, + markdown: policyProposalMarkdown(pattern, createdAt), + json, + }; +} + +export function policyProposalJson( + pattern: ScoredPolicyPattern, + createdAt: string, +): PolicyProposalJson { + return { + id: pattern.id, + title: pattern.title, + status: "Draft", + pattern_type: pattern.patternType, + evidence_items: pattern.evidenceItems.map((item) => { + const evidence = { + repo: item.repo, + item: item.item, + source_record: item.sourceRecord, + }; + return { + ...evidence, + ...(item.observedAt ? { observed_at: item.observedAt } : {}), + ...(item.detail ? { detail: item.detail } : {}), + }; + }), + confidence_score: pattern.confidenceScore, + proposed_conditions: pattern.proposedConditions, + proposed_action: pattern.proposedAction, + safety_constraints: pattern.safetyConstraints, + created_at: createdAt, + source_records: pattern.sourceRecords, + }; +} + +export function policyProposalMarkdown(pattern: ScoredPolicyPattern, createdAt: string): string { + return [ + `# ${pattern.title}`, + "", + "Status: Draft", + "", + "## Summary", + "", + `ClawSweeper observed the repeated \`${pattern.patternType}\` pattern \`${pattern.value}\` across ${pattern.occurrenceCount} records. This RFC proposes documenting a conservative policy candidate for maintainer review only.`, + "", + "## Observed Pattern", + "", + `- Pattern type: \`${pattern.patternType}\``, + `- Pattern value: \`${pattern.value}\``, + `- Occurrences: ${pattern.occurrenceCount}`, + `- Distinct items: ${pattern.distinctItems.length}`, + `- Distinct repositories: ${pattern.distinctRepos.length}`, + `- Successful repair/apply outcomes observed: ${pattern.successfulOutcomes}`, + `- Latest observation: ${pattern.latestObservedAt ?? "unknown"}`, + `- Confidence score: ${pattern.confidenceScore.toFixed(3)}`, + "", + "## Evidence", + "", + ...evidenceLines(pattern), + "", + "## Proposed Policy", + "", + pattern.proposedAction, + "", + "Proposed conditions:", + "", + ...pattern.proposedConditions.map((condition) => `- ${condition}`), + "", + "## Safety Constraints", + "", + ...pattern.safetyConstraints.map((constraint) => `- ${constraint}`), + "", + "## Non-Goals", + "", + "- Do not auto-apply this policy.", + "- Do not mutate GitHub state from this proposal.", + "- Do not alter scheduler cadence, review shards, close behavior, apply behavior, or automerge behavior.", + "", + "## Rollout Plan", + "", + "1. Review this RFC with maintainers.", + "2. If accepted, convert the proposal into an explicit policy change in a separate pull request.", + "3. Ship any executable behavior behind existing conservative apply paths and tests.", + "", + "## Metrics", + "", + "- Number of future records matching the proposed conditions.", + "- False-positive rate found during maintainer review.", + "- Number of accepted, revised, or rejected proposals for this pattern.", + "", + "## Reversion Plan", + "", + "Archive or delete the generated RFC and JSON proposal. Since this engine is proposal-only, no runtime policy behavior needs to be reverted.", + "", + `Generated by ClawSweeper Policy RFC Engine at ${createdAt}.`, + "", + ].join("\n"); +} + +function evidenceLines(pattern: ScoredPolicyPattern): string[] { + if (!pattern.evidenceItems.length) return ["No evidence items were retained."]; + return pattern.evidenceItems.map((item) => { + const observed = item.observedAt ? ` observed ${item.observedAt}` : ""; + return `- ${item.repo} ${item.item}${observed}: \`${item.sourceRecord}\``; + }); +} diff --git a/src/policy-rfc/types.ts b/src/policy-rfc/types.ts new file mode 100644 index 0000000000..cb1299d596 --- /dev/null +++ b/src/policy-rfc/types.ts @@ -0,0 +1,78 @@ +export type PolicyPatternType = + | "file_conflict_type" + | "label" + | "repair_marker" + | "review_verdict" + | "safe_close_reason" + | "automerge_repair_cause"; + +export interface PolicyPatternObservation { + patternType: PolicyPatternType; + value: string; + repo: string; + item: string; + sourceRecord: string; + observedAt?: string | undefined; + successfulOutcome: boolean; + detail?: string | undefined; +} + +export interface PatternCollectorOptions { + recordsRoot: string; + targetRepo?: string | undefined; +} + +export interface ScoredPolicyPattern { + id: string; + patternType: PolicyPatternType; + value: string; + title: string; + confidenceScore: number; + occurrenceCount: number; + distinctItems: string[]; + distinctRepos: string[]; + successfulOutcomes: number; + latestObservedAt?: string | undefined; + evidenceItems: PolicyPatternObservation[]; + proposedConditions: string[]; + proposedAction: string; + safetyConstraints: string[]; + sourceRecords: string[]; +} + +export interface PatternScorerOptions { + minOccurrences: number; + minDistinctItems?: number | undefined; + minDistinctRepos?: number | undefined; + now?: Date | undefined; +} + +export interface PolicyProposalJson { + id: string; + title: string; + status: "Draft"; + pattern_type: PolicyPatternType; + evidence_items: Array<{ + repo: string; + item: string; + source_record: string; + observed_at?: string | undefined; + detail?: string | undefined; + }>; + confidence_score: number; + proposed_conditions: string[]; + proposed_action: string; + safety_constraints: string[]; + created_at: string; + source_records: string[]; +} + +export interface SynthesizedPolicyProposal { + id: string; + markdown: string; + json: PolicyProposalJson; +} + +export interface SynthesizeOptions { + createdAt?: string | undefined; +} diff --git a/src/repair/comment-router-core.ts b/src/repair/comment-router-core.ts index 2fac1ca548..97161ffd63 100644 --- a/src/repair/comment-router-core.ts +++ b/src/repair/comment-router-core.ts @@ -116,6 +116,48 @@ export function createCachedLabelNumberLookup(fetchNumbers: (label: string) => J }; } +export function createCachedIssueCommentsLookup( + fetchComments: (number: number) => T[], + cache = new Map(), +) { + return (number: JsonValue): T[] => { + const key = Number(number); + if (!Number.isInteger(key) || key <= 0) return []; + const cached = cache.get(key); + if (cached) return [...cached]; + const comments = fetchComments(key); + if (!Array.isArray(comments)) return []; + cache.set(key, comments); + return [...comments]; + }; +} + +export function createCachedIssueCommentsLookupAsync( + fetchComments: (number: number) => Promise, + cache = new Map(), +) { + const inFlight = new Map>(); + return async (number: JsonValue): Promise => { + const key = Number(number); + if (!Number.isInteger(key) || key <= 0) return []; + const cached = cache.get(key); + if (cached) return [...cached]; + const pending = inFlight.get(key); + if (pending) return [...(await pending)]; + const next = fetchComments(key) + .then((comments) => { + if (!Array.isArray(comments)) return []; + cache.set(key, comments); + return comments; + }) + .finally(() => { + inFlight.delete(key); + }); + inFlight.set(key, next); + return [...(await next)]; + }; +} + function uniquePositiveIntegers(values: JsonValue): number[] { if (!Array.isArray(values)) return []; return [ diff --git a/src/repair/comment-router.ts b/src/repair/comment-router.ts index 8d964f11cf..50fd77657e 100644 --- a/src/repair/comment-router.ts +++ b/src/repair/comment-router.ts @@ -33,6 +33,8 @@ import { automergeTransientWaitConfig, buildAutomergeMergeArgs, commandHasAction, + createCachedIssueCommentsLookup, + createCachedIssueCommentsLookupAsync, createCachedLabelNumberLookup, hasCommandResponseMarker, commandStatusMarker, @@ -133,6 +135,14 @@ const collaboratorPermissionCache = new Map(); const activeRepairRunsByPrefix = new Map(); const liveTargetCache = new Map(); const issueCommentsCache = new Map(); +const cachedIssueComments = createCachedIssueCommentsLookup( + (number) => ghPaged(`repos/${targetRepo}/issues/${number}/comments?per_page=100`), + issueCommentsCache, +); +const cachedIssueCommentsAsync = createCachedIssueCommentsLookupAsync( + (number) => ghPagedAsync(`repos/${targetRepo}/issues/${number}/comments?per_page=100`), + issueCommentsCache, +); const openIssueNumbersByLabel = createCachedLabelNumberLookup((label) => ghPaged( `repos/${targetRepo}/issues?state=open&labels=${encodeURIComponent(label)}&per_page=100`, @@ -306,7 +316,7 @@ async function prehydrateCommandLookups(commands: LooseRecord[]) { liveTargetCache.set(number, await fetchLiveTargetAsync(number)); }), mapLimit(issueNumbers, lookupConcurrency, async (number) => { - issueCommentsCache.set(number, await fetchIssueCommentsAsync(number)); + await cachedIssueCommentsAsync(number); }), ]); } @@ -2317,10 +2327,7 @@ function linesFromMarkdownSection(section: JsonValue): string[] { } function issueCommentsFor(number: JsonValue): JsonValue[] { - return ( - issueCommentsCache.get(Number(number)) ?? - ghPaged(`repos/${targetRepo}/issues/${number}/comments?per_page=100`) - ); + return cachedIssueComments(number); } function listRepairLoopReviewComments() { @@ -2586,9 +2593,7 @@ function hasExistingResponse( intent: JsonValue, headSha: JsonValue, ) { - const comments = - issueCommentsCache.get(Number(number)) ?? - ghPaged(`repos/${targetRepo}/issues/${number}/comments?per_page=100`); + const comments = cachedIssueComments(number); return comments.some((comment: JsonValue) => { const body = String(comment.body ?? ""); if (!hasCommandResponseMarker(body, { commentId, intent, headSha, matchAnyHead: true })) { @@ -2611,9 +2616,7 @@ function hasExistingResponse( function hasExistingModeStatusResponse(number: JsonValue, intent: JsonValue) { const markerPrefix = commandStatusMarkerPrefix({ issue_number: number, intent }); - const comments = - issueCommentsCache.get(Number(number)) ?? - ghPaged(`repos/${targetRepo}/issues/${number}/comments?per_page=100`); + const comments = cachedIssueComments(number); return comments.some((comment: JsonValue) => { if (!isTrustedStatusComment(comment)) return false; const body = String(comment.body ?? ""); @@ -2621,10 +2624,6 @@ function hasExistingModeStatusResponse(number: JsonValue, intent: JsonValue) { }); } -async function fetchIssueCommentsAsync(number: JsonValue) { - return ghPagedAsync(`repos/${targetRepo}/issues/${number}/comments?per_page=100`); -} - function postComment(command: LooseRecord, body: string) { const existing = findExistingCommandStatusComment(command); const nextBody = usesSharedAutomergeStatus(command) diff --git a/src/repair/github-cli.ts b/src/repair/github-cli.ts index 3fa6370ea5..c7602ccf2b 100644 --- a/src/repair/github-cli.ts +++ b/src/repair/github-cli.ts @@ -48,12 +48,19 @@ export function ghJsonBestEffort( } export function githubPaginatedPath(apiPath: string): string { - const [basePart, query = ""] = apiPath.split("?", 2); - const base = basePart ?? apiPath; - const params = new URLSearchParams(query); - if (!params.has("per_page")) params.set("per_page", "100"); - const serialized = params.toString(); - return serialized ? `${base}?${serialized}` : base; + return githubPathWithQueryDefaults(apiPath, { per_page: "100" }); +} + +export function githubLimitedPagePath(apiPath: string, limit: number, page = 1): string { + const normalizedLimit = Number.isFinite(limit) ? Math.floor(limit) : 1; + const normalizedPage = Number.isFinite(page) ? Math.floor(page) : 1; + const pageSize = Math.max(1, Math.min(100, normalizedLimit)); + const pageNumber = Math.max(1, normalizedPage); + return githubPathWithQueryDefaults( + apiPath, + { per_page: String(pageSize), page: String(pageNumber) }, + { override: true }, + ); } export function ghPaged(apiPath: string, options: GhRunOptions = {}): T[] { @@ -89,6 +96,28 @@ export async function ghPagedWithRetryAsync( return pages.flatMap((page: JsonValue) => (Array.isArray(page) ? (page as T[]) : [])); } +export function ghPagedLimit( + apiPath: string, + limit: number, + options: GhRunOptions = {}, +): T[] { + const max = Number.isFinite(limit) ? Math.max(0, Math.floor(limit)) : 0; + if (max <= 0) return []; + + const perPage = Math.min(100, max); + const out: T[] = []; + for (let page = 1; out.length < max; page += 1) { + const entries = ghJson( + ["api", githubLimitedPagePath(apiPath, perPage, page)], + options, + ); + if (!Array.isArray(entries) || entries.length === 0) break; + out.push(...(entries as T[])); + if (entries.length < perPage) break; + } + return out.slice(0, max); +} + export function ghText(ghArgs: string[], options: GhRunOptions = {}): string { const text = execFileSync("gh", ghArgs, { cwd: options.cwd ?? repoRoot(), @@ -234,6 +263,21 @@ function resolveRetryOptions(options: GhRetryOptions | number): GhRetryOptions { return options; } +function githubPathWithQueryDefaults( + apiPath: string, + defaults: Record, + { override = false }: { override?: boolean } = {}, +): string { + const [basePart, query = ""] = apiPath.split("?", 2); + const base = basePart ?? apiPath; + const params = new URLSearchParams(query); + for (const [key, value] of Object.entries(defaults)) { + if (override || !params.has(key)) params.set(key, value); + } + const serialized = params.toString(); + return serialized ? `${base}?${serialized}` : base; +} + function bufferLikeToString(value: unknown): string { if (Buffer.isBuffer(value)) return value.toString("utf8"); return String(value ?? ""); diff --git a/src/repair/lib.ts b/src/repair/lib.ts index 4157d2ead4..06826c7423 100755 --- a/src/repair/lib.ts +++ b/src/repair/lib.ts @@ -377,9 +377,14 @@ function compactPlanItem(item: LooseRecord) { branch_writable: pull.branch_writable, branch_write_reason: pull.branch_write_reason, changed_files: pull.changed_files, + files_hydrated: pull.files_hydrated, + files_truncated: pull.files_truncated, additions: pull.additions, deletions: pull.deletions, files: (pull.files ?? []).slice(0, 40), + commits_count: pull.commits_count, + commits_hydrated: pull.commits_hydrated, + commits_truncated: pull.commits_truncated, commits: (pull.commits ?? []).slice(0, 10), reviews: (pull.reviews ?? []).slice(0, 12), review_comments_count: pull.review_comments_count, diff --git a/src/repair/plan-cluster.ts b/src/repair/plan-cluster.ts index bd770c1f30..4370901bba 100644 --- a/src/repair/plan-cluster.ts +++ b/src/repair/plan-cluster.ts @@ -11,13 +11,29 @@ import { repoRoot, validateJob, } from "./lib.js"; -import { ghJson, ghPaged, ghText } from "./github-cli.js"; +import { ghJson, ghPaged, ghPagedLimit, ghText } from "./github-cli.js"; import { hasSecurityRepairOptInLabel } from "./security-boundary.js"; -const MAX_LINKED_REFS = Number(process.env.CLAWSWEEPER_MAX_LINKED_REFS ?? 0); +function readNonNegativeIntegerEnv(name: string, fallback: number): number { + const raw = process.env[name]; + if (raw === undefined || raw.trim() === "") return fallback; + const value = Number(raw); + if (!Number.isInteger(value) || value < 0) { + console.warn(`${name} must be a non-negative integer; using default ${fallback}`); + return fallback; + } + return Math.floor(value); +} + +const MAX_LINKED_REFS = readNonNegativeIntegerEnv("CLAWSWEEPER_MAX_LINKED_REFS", 0); const HYDRATE_COMMENTS = process.env.CLAWSWEEPER_HYDRATE_COMMENTS === "1"; -const MAX_COMMENTS_PER_ITEM = Number(process.env.CLAWSWEEPER_MAX_COMMENTS_PER_ITEM ?? 30); -const MAX_REVIEW_COMMENTS_PER_PR = Number(process.env.CLAWSWEEPER_MAX_REVIEW_COMMENTS_PER_PR ?? 50); +const MAX_COMMENTS_PER_ITEM = readNonNegativeIntegerEnv("CLAWSWEEPER_MAX_COMMENTS_PER_ITEM", 30); +const MAX_REVIEW_COMMENTS_PER_PR = readNonNegativeIntegerEnv( + "CLAWSWEEPER_MAX_REVIEW_COMMENTS_PER_PR", + 50, +); +const MAX_FILES_PER_PR = readNonNegativeIntegerEnv("CLAWSWEEPER_MAX_FILES_PER_PR", 80); +const MAX_COMMITS_PER_PR = readNonNegativeIntegerEnv("CLAWSWEEPER_MAX_COMMITS_PER_PR", 80); const MAINTAINER_AUTHOR_ASSOCIATIONS = new Set(["OWNER", "MEMBER", "COLLABORATOR"]); const REVIEW_BOT_PATTERN = /\b(greptile|codex|asile|coderabbit|code rabbit|copilot|reviewdog|sonar|deepsource|codecov|github-actions)\b/i; @@ -141,6 +157,8 @@ const plan = { hydrate_comments: HYDRATE_COMMENTS, max_comments_per_item: MAX_COMMENTS_PER_ITEM, max_review_comments_per_pr: MAX_REVIEW_COMMENTS_PER_PR, + max_files_per_pr: MAX_FILES_PER_PR, + max_commits_per_pr: MAX_COMMITS_PER_PR, }, items: itemList.map((item: JsonValue) => summarizeItem(item, job)), canonical_candidates: canonicalCandidates(itemList, job), @@ -184,10 +202,16 @@ function hydrateItem(repo: string, number: JsonValue) { } const comments = HYDRATE_COMMENTS ? ghPaged(`repos/${repo}/issues/${number}/comments`) : []; const pullRequest = issue.pull_request ? ghJson(["api", `repos/${repo}/pulls/${number}`]) : null; - const files = pullRequest ? ghPaged(`repos/${repo}/pulls/${number}/files`) : []; - const commits = pullRequest ? ghPaged(`repos/${repo}/pulls/${number}/commits`) : []; + const files = pullRequest + ? ghPagedLimit(`repos/${repo}/pulls/${number}/files`, MAX_FILES_PER_PR) + : []; + const commits = pullRequest + ? ghPagedLimit(`repos/${repo}/pulls/${number}/commits`, MAX_COMMITS_PER_PR) + : []; const reviews = pullRequest ? ghPaged(`repos/${repo}/pulls/${number}/reviews`) : []; const reviewComments = pullRequest ? ghPaged(`repos/${repo}/pulls/${number}/comments`) : []; + const changedFilesCount = countValue(pullRequest?.changed_files, files.length); + const commitsCount = countValue(pullRequest?.commits, commits.length); const checks = pullRequest ? ghPrChecks(repo, number) : []; return { @@ -240,13 +264,18 @@ function hydrateItem(repo: string, number: JsonValue) { .filter(Boolean), additions: pullRequest.additions, deletions: pullRequest.deletions, - changed_files: pullRequest.changed_files, + changed_files: changedFilesCount, + files_hydrated: files.length, + files_truncated: Math.max(0, changedFilesCount - files.length), files: files.map((file: JsonValue) => ({ filename: file.filename, status: file.status, additions: file.additions, deletions: file.deletions, })), + commits_count: commitsCount, + commits_hydrated: commits.length, + commits_truncated: Math.max(0, commitsCount - commits.length), commits: commits.map((commit: JsonValue) => ({ sha: commit.sha, message: firstLine(commit.commit?.message), @@ -302,6 +331,11 @@ function unavailableItem(repo: string, number: JsonValue, error: JsonValue) { }; } +function countValue(value: JsonValue, fallback = 0) { + const number = Number(value); + return Number.isFinite(number) && number >= 0 ? Math.floor(number) : fallback; +} + function summarizeItem(item: LooseRecord, job: LooseRecord) { return { repo: item.repo, @@ -356,9 +390,14 @@ function summarizeItem(item: LooseRecord, job: LooseRecord) { requested_reviewers: item.pull_request.requested_reviewers, requested_teams: item.pull_request.requested_teams, changed_files: item.pull_request.changed_files, + files_hydrated: item.pull_request.files_hydrated, + files_truncated: item.pull_request.files_truncated, additions: item.pull_request.additions, deletions: item.pull_request.deletions, files: item.pull_request.files, + commits_count: item.pull_request.commits_count, + commits_hydrated: item.pull_request.commits_hydrated, + commits_truncated: item.pull_request.commits_truncated, commits: item.pull_request.commits, reviews: item.pull_request.reviews, review_comments_count: item.pull_request.review_comments.length, diff --git a/test/clawsweeper.test.ts b/test/clawsweeper.test.ts index 6d3fa821bb..a33691ad77 100644 --- a/test/clawsweeper.test.ts +++ b/test/clawsweeper.test.ts @@ -17,10 +17,13 @@ import { closeReasonsArg, closingPullRequestReferenceTarget, compactMappedSlice, + compactMappedWindow, codexEnv, dashboardClosedAt, fixedPullRequestFromCommitPullsForTest, + ghPagedContextWindow, formatRecentClosedRows, + githubContextWindowPlan, githubPaginatedPath, ghRetryKind, hotIntakeRecencyMs, @@ -222,6 +225,152 @@ test("compactMappedSlice maps every entry when no compaction is needed", () => { assert.deepEqual(mapped, [1, 2, 3]); }); +test("compactMappedWindow marks omitted entries when hydration is already bounded", () => { + const mapped: number[] = []; + const result = compactMappedWindow([1, 2, 5, 6], 6, 4, (value) => { + mapped.push(value); + return value * 10; + }); + assert.deepEqual(result, [ + 10, + 20, + { omitted: 2, note: "middle entries omitted from prompt context" }, + 50, + 60, + ]); + assert.deepEqual(mapped, [1, 2, 5, 6]); +}); + +test("compactMappedWindow keeps bounded hydrated context when total is larger than limit", () => { + const mapped: number[] = []; + const result = compactMappedWindow([1, 2, 99, 100], 100, 4, (value) => { + mapped.push(value); + return value; + }); + assert.deepEqual(result, [ + 1, + 2, + { omitted: 96, note: "middle entries omitted from prompt context" }, + 99, + 100, + ]); + assert.deepEqual(mapped, [1, 2, 99, 100]); +}); + +test("githubContextWindowPlan includes prior page when the tail crosses a page boundary", () => { + assert.deepEqual(githubContextWindowPlan(101, 80), { + keepStart: 40, + keepEnd: 40, + tailFirstPageNumber: 1, + lastPageNumber: 2, + tailOffset: 61, + }); +}); + +test("githubContextWindowPlan keeps large tails to the final page when possible", () => { + assert.deepEqual(githubContextWindowPlan(3000, 80), { + keepStart: 40, + keepEnd: 40, + tailFirstPageNumber: 30, + lastPageNumber: 30, + tailOffset: 60, + }); +}); + +test("ghPagedContextWindow caps pull files tail fetches to GitHub's retrievable window", () => { + const fetchedPages: number[] = []; + const result = ghPagedContextWindow("/repos/openclaw/openclaw/pulls/1/files", 4000, 80, { + page: (_path, page) => { + fetchedPages.push(page); + if (page > 30) return []; + const start = (page - 1) * 100 + 1; + return Array.from({ length: 100 }, (_value, index) => start + index); + }, + paged: () => { + throw new Error("full pagination should not be used for truncated pull files"); + }, + }); + + assert.deepEqual(fetchedPages, [1, 30]); + assert.deepEqual(result.items, [ + ...Array.from({ length: 40 }, (_value, index) => index + 1), + ...Array.from({ length: 40 }, (_value, index) => index + 2961), + ]); + assert.equal(result.total, 4000); + assert.equal(result.hydrated, 80); + assert.equal(result.truncated, true); +}); + +test("ghPagedContextWindow caps pull commit tail fetches to GitHub's PR commits window", () => { + const fetchedPages: number[] = []; + const result = ghPagedContextWindow("/repos/openclaw/openclaw/pulls/1/commits", 270, 80, { + page: (_path, page) => { + fetchedPages.push(page); + if (page > 3) return []; + const start = (page - 1) * 100 + 1; + const length = page === 3 ? 50 : 100; + return Array.from({ length }, (_value, index) => start + index); + }, + paged: () => { + throw new Error("full pagination should not be used for truncated pull commits"); + }, + }); + + assert.deepEqual(fetchedPages, [1, 3]); + assert.deepEqual(result.items, [ + ...Array.from({ length: 40 }, (_value, index) => index + 1), + ...Array.from({ length: 40 }, (_value, index) => index + 211), + ]); + assert.equal(result.total, 270); + assert.equal(result.hydrated, 80); + assert.equal(result.truncated, true); +}); + +test("ghPagedContextWindow reuses page one when the retained tail overlaps it", () => { + const fetchedPages: number[] = []; + const result = ghPagedContextWindow("/repos/openclaw/openclaw/pulls/1/files", 101, 80, { + page: (_path, page) => { + fetchedPages.push(page); + const start = (page - 1) * 100 + 1; + const length = page === 1 ? 100 : 1; + return Array.from({ length }, (_value, index) => start + index); + }, + paged: () => { + throw new Error("full pagination should not be used when GitHub reports a total"); + }, + }); + + assert.deepEqual(fetchedPages, [1, 2]); + assert.deepEqual(result.items, [ + ...Array.from({ length: 40 }, (_value, index) => index + 1), + ...Array.from({ length: 40 }, (_value, index) => index + 62), + ]); + assert.equal(result.total, 101); + assert.equal(result.hydrated, 80); + assert.equal(result.truncated, true); +}); + +test("ghPagedContextWindow falls back to full pagination when GitHub omits totals", () => { + const result = ghPagedContextWindow( + "/repos/openclaw/openclaw/issues/1/comments", + null, + 2, + { + page: () => { + throw new Error("single-page fetch should not be used without a total"); + }, + paged: () => [1, 2, 3], + }, + ); + + assert.deepEqual(result, { + items: [1, 2, 3], + total: 3, + hydrated: 3, + truncated: false, + }); +}); + test("review prompt assets match tracked files", () => { assert.equal(reviewPromptTemplate(), readFileSync("prompts/review-item.md", "utf8")); assert.deepEqual( @@ -626,6 +775,32 @@ test("close comments suppress duplicate best solution text", () => { assert.doesNotMatch(action.closeComment, /Best possible solution:/); }); +test("likely owner commit links ignore non-sha values", () => { + const action = reviewActionForDecision({ + item: item(), + decision: closeDecision({ + likelyOwners: [ + { + person: "@alice", + role: "feature contributor", + reason: "The changelog credits a pull request for this feature surface.", + commits: ["https://github.com/openclaw/openclaw/pull/76079", " abcdef1234567890 "], + files: ["CHANGELOG.md"], + confidence: "medium", + }, + ], + }), + git, + }); + + assert.equal(action.actionTaken, "proposed_close"); + assert.doesNotMatch(action.closeComment, /\/commit\/https:/); + assert.match( + action.closeComment, + /\[abcdef123456\]\(https:\/\/github\.com\/openclaw\/openclaw\/commit\/abcdef1234567890\)/, + ); +}); + test("skill-only OpenClaw PRs can close through ClawHub with upload guidance", () => { const decision = closeDecision({ closeReason: "clawhub", diff --git a/test/policy-rfc.test.ts b/test/policy-rfc.test.ts new file mode 100644 index 0000000000..4fdb2fd3c9 --- /dev/null +++ b/test/policy-rfc.test.ts @@ -0,0 +1,304 @@ +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, readdirSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import test from "node:test"; + +import { + collectPolicyPatterns, + runPolicyRfc, + scorePolicyPatterns, + synthesizePolicyProposal, +} from "../dist/policy-rfc/index.js"; + +function writeRecord( + root: string, + item: number, + body: string, + section: "items" | "closed" = "items", +): void { + const dir = join(root, "openclaw-openclaw", section); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, `${item}.md`), body); +} + +function withPolicyFixture(run: (recordsRoot: string) => void): void { + const recordsRoot = mkdtempSync(join(tmpdir(), "clawsweeper-policy-rfc-")); + try { + writeRecord( + recordsRoot, + 1, + `--- +labels: ["clawsweeper:autofix", "bug"] +reviewed_at: 2026-05-01T00:00:00.000Z +close_reason: implemented_on_main +--- + +Automerge repair cause: flaky validation +Conflict type: generated lockfile + +Result: applied +`, + ); + writeRecord( + recordsRoot, + 2, + `--- +labels: ["bug"] +reviewed_at: 2026-05-02T00:00:00.000Z +--- + + +Result: applied +`, + ); + writeRecord( + recordsRoot, + 3, + `--- +labels: ["bug"] +reviewed_at: 2026-05-03T00:00:00.000Z +--- + + +Result: applied +`, + ); + writeRecord(recordsRoot, 4, "{ this is malformed but should not crash"); + run(recordsRoot); + } finally { + rmSync(recordsRoot, { recursive: true, force: true }); + } +} + +function collectFixture(recordsRoot: string) { + return collectPolicyPatterns({ + recordsRoot, + targetRepo: "openclaw/openclaw", + }); +} + +test("collector extracts repeated patterns from durable records", () => { + withPolicyFixture((recordsRoot) => { + const observations = collectFixture(recordsRoot); + + assert.ok( + observations.some( + (item) => item.patternType === "repair_marker" && item.value === "validation-fix", + ), + ); + assert.ok( + observations.some( + (item) => item.patternType === "review_verdict" && item.value === "needs-changes", + ), + ); + assert.ok( + observations.some( + (item) => item.patternType === "safe_close_reason" && item.value === "implemented_on_main", + ), + ); + }); +}); + +test("collector tolerates missing and malformed records", () => { + withPolicyFixture((recordsRoot) => { + assert.doesNotThrow(() => collectFixture(recordsRoot)); + assert.deepEqual( + collectPolicyPatterns({ + recordsRoot: join(recordsRoot, "does-not-exist"), + targetRepo: "openclaw/openclaw", + }), + [], + ); + }); +}); + +test("collector preserves item numbers for archived closed records", () => { + const recordsRoot = mkdtempSync(join(tmpdir(), "clawsweeper-policy-rfc-")); + try { + writeRecord( + recordsRoot, + 42, + `--- +labels: ["bug"] +reviewed_at: 2026-05-05T00:00:00.000Z +--- + +`, + "closed", + ); + + assert.ok( + collectFixture(recordsRoot).some( + (item) => + item.patternType === "repair_marker" && + item.value === "validation-fix" && + item.item === "#42" && + item.sourceRecord === "records/openclaw-openclaw/closed/42.md", + ), + ); + } finally { + rmSync(recordsRoot, { recursive: true, force: true }); + } +}); + +test("scorer rejects low-frequency patterns", () => { + withPolicyFixture((recordsRoot) => { + const rejected = scorePolicyPatterns(collectFixture(recordsRoot), { + minOccurrences: 4, + now: new Date("2026-05-04T00:00:00.000Z"), + }); + + assert.equal( + rejected.some((item) => item.patternType === "repair_marker"), + false, + ); + }); +}); + +test("scorer accepts patterns above the configured threshold", () => { + withPolicyFixture((recordsRoot) => { + const accepted = scorePolicyPatterns(collectFixture(recordsRoot), { + minOccurrences: 3, + now: new Date("2026-05-04T00:00:00.000Z"), + }); + const repairPattern = accepted.find((item) => item.patternType === "repair_marker"); + + assert.ok(repairPattern); + assert.equal(repairPattern.occurrenceCount, 3); + assert.equal(repairPattern.distinctItems.length, 3); + assert.equal(repairPattern.successfulOutcomes, 3); + }); +}); + +test("scorer default reference date is derived from evidence, not wall clock", () => { + const observations = [1, 2, 3].map((item) => ({ + patternType: "repair_marker" as const, + value: "old-pattern", + repo: "openclaw/openclaw", + item: `#${item}`, + sourceRecord: `records/openclaw-openclaw/items/${item}.md`, + observedAt: "2026-01-01T00:00:00.000Z", + successfulOutcome: true, + })); + + assert.deepEqual( + scorePolicyPatterns(observations, { minOccurrences: 3 }), + scorePolicyPatterns(observations, { + minOccurrences: 3, + now: new Date("2026-01-01T00:00:00.000Z"), + }), + ); +}); + +test("scorer default reference date preserves recency differences across patterns", () => { + const observations = [ + ...[1, 2, 3].map((item) => ({ + patternType: "repair_marker" as const, + value: "old-pattern", + repo: "openclaw/openclaw", + item: `#${item}`, + sourceRecord: `records/openclaw-openclaw/items/${item}.md`, + observedAt: "2026-01-01T00:00:00.000Z", + successfulOutcome: true, + })), + ...[4, 5, 6].map((item) => ({ + patternType: "repair_marker" as const, + value: "recent-pattern", + repo: "openclaw/openclaw", + item: `#${item}`, + sourceRecord: `records/openclaw-openclaw/items/${item}.md`, + observedAt: "2026-05-01T00:00:00.000Z", + successfulOutcome: true, + })), + ]; + + const scored = scorePolicyPatterns(observations, { minOccurrences: 3 }); + const oldPattern = scored.find((item) => item.value === "old-pattern"); + const recentPattern = scored.find((item) => item.value === "recent-pattern"); + + assert.ok(oldPattern); + assert.ok(recentPattern); + assert.ok(recentPattern.confidenceScore > oldPattern.confidenceScore); + assert.deepEqual( + scored, + scorePolicyPatterns(observations, { + minOccurrences: 3, + now: new Date("2026-05-01T00:00:00.000Z"), + }), + ); +}); + +test("synthesizer produces stable markdown and proposal JSON", () => { + withPolicyFixture((recordsRoot) => { + const accepted = scorePolicyPatterns(collectFixture(recordsRoot), { + minOccurrences: 3, + now: new Date("2026-05-04T00:00:00.000Z"), + }); + const repairPattern = accepted.find((item) => item.patternType === "repair_marker"); + assert.ok(repairPattern); + + const proposal = synthesizePolicyProposal(repairPattern, { + createdAt: "2026-05-04T00:00:00.000Z", + }); + + assert.match(proposal.markdown, /^# Policy RFC: Repair Marker - validation-fix/); + assert.match(proposal.markdown, /Status: Draft/); + assert.match(proposal.markdown, /## Safety Constraints/); + assert.equal(proposal.json.status, "Draft"); + assert.equal(proposal.json.pattern_type, "repair_marker"); + assert.equal(proposal.json.evidence_items.length, 3); + assert.equal(proposal.json.created_at, "2026-05-04T00:00:00.000Z"); + }); +}); + +test("synthesizer defaults created_at to latest evidence timestamp", () => { + withPolicyFixture((recordsRoot) => { + const accepted = scorePolicyPatterns(collectFixture(recordsRoot), { + minOccurrences: 3, + }); + const repairPattern = accepted.find((item) => item.patternType === "repair_marker"); + assert.ok(repairPattern); + + const first = synthesizePolicyProposal(repairPattern); + const second = synthesizePolicyProposal(repairPattern); + + assert.equal(first.json.created_at, "2026-05-03T00:00:00.000Z"); + assert.equal(first.markdown, second.markdown); + assert.deepEqual(first.json, second.json); + }); +}); + +test("runPolicyRfc removes stale generated proposal files before writing current output", () => { + withPolicyFixture((recordsRoot) => { + const outputRoot = mkdtempSync(join(tmpdir(), "clawsweeper-policy-rfc-output-")); + try { + const first = runPolicyRfc({ + recordsRoot, + outputRoot, + targetRepo: "openclaw/openclaw", + minOccurrences: 3, + }); + const generatedFiles = readdirSync(first.outputDir).filter( + (name) => name.endsWith(".md") || name.endsWith(".json"), + ); + assert.ok(generatedFiles.length > 0); + + writeFileSync(join(first.outputDir, "policy-rfc-stale-deadbeef.md"), "stale\n"); + writeFileSync(join(first.outputDir, "policy-rfc-stale-deadbeef.json"), "{}\n"); + writeFileSync(join(first.outputDir, "operator-note.txt"), "preserve me\n"); + + const second = runPolicyRfc({ + recordsRoot, + outputRoot, + targetRepo: "openclaw/openclaw", + minOccurrences: 4, + }); + + assert.equal(second.proposals, 0); + assert.deepEqual(readdirSync(second.outputDir).sort(), ["operator-note.txt"]); + } finally { + rmSync(outputRoot, { recursive: true, force: true }); + } + }); +}); diff --git a/test/repair/comment-router-core.test.ts b/test/repair/comment-router-core.test.ts index 325c1218e2..f04d0bb666 100644 --- a/test/repair/comment-router-core.test.ts +++ b/test/repair/comment-router-core.test.ts @@ -18,6 +18,8 @@ import { automergeTransientWaitConfig, buildAutomergeMergeArgs, commandHasAction, + createCachedIssueCommentsLookup, + createCachedIssueCommentsLookupAsync, commandResponseMarker, commandResponseMarkerPrefix, commandStatusMarkerPrefix, @@ -205,6 +207,69 @@ test("cached label number lookup fetches each label once and returns stable copi assert.deepEqual(calls, ["clawsweeper:autofix", "clawsweeper:automerge"]); }); +test("cached issue comments lookup fetches each issue once and returns stable copies", () => { + const calls: number[] = []; + const lookup = createCachedIssueCommentsLookup((number) => { + calls.push(number); + return [{ id: number * 10 }, { id: number * 10 + 1 }]; + }); + + const first = lookup(12); + first.push({ id: 999 }); + + assert.deepEqual(first, [{ id: 120 }, { id: 121 }, { id: 999 }]); + assert.deepEqual(lookup("12"), [{ id: 120 }, { id: 121 }]); + assert.deepEqual(lookup(13), [{ id: 130 }, { id: 131 }]); + assert.deepEqual(lookup(0), []); + assert.deepEqual(calls, [12, 13]); +}); + +test("cached async issue comments lookup shares cache and in-flight fetches", async () => { + const cache = new Map(); + const calls: number[] = []; + const asyncLookup = createCachedIssueCommentsLookupAsync(async (number) => { + calls.push(number); + await new Promise((resolve) => setTimeout(resolve, 5)); + return [{ id: number * 10 }]; + }, cache); + const syncLookup = createCachedIssueCommentsLookup((number) => { + calls.push(number); + return [{ id: number * 100 }]; + }, cache); + + const [first, second] = await Promise.all([asyncLookup(12), asyncLookup("12")]); + first.push({ id: 999 }); + + assert.deepEqual(first, [{ id: 120 }, { id: 999 }]); + assert.deepEqual(second, [{ id: 120 }]); + assert.deepEqual(syncLookup(12), [{ id: 120 }]); + assert.deepEqual(await asyncLookup(0), []); + assert.deepEqual(calls, [12]); +}); + +test("cached issue comments lookup does not cache malformed fetch results", async () => { + const cache = new Map(); + let syncCalls = 0; + const syncLookup = createCachedIssueCommentsLookup(() => { + syncCalls += 1; + return "bad" as never; + }, cache); + + assert.deepEqual(syncLookup(12), []); + assert.deepEqual(syncLookup(12), []); + assert.equal(syncCalls, 2); + + let asyncCalls = 0; + const asyncLookup = createCachedIssueCommentsLookupAsync(async () => { + asyncCalls += 1; + return "bad" as never; + }, cache); + + assert.deepEqual(await asyncLookup(12), []); + assert.deepEqual(await asyncLookup(12), []); + assert.equal(asyncCalls, 2); +}); + test("autoclose reason parser preserves maintainer wording", () => { assert.equal( autocloseReasonFromCommand("autoclose We don't want this feature"), diff --git a/test/repair/github-cli.test.ts b/test/repair/github-cli.test.ts index 7ec49469b9..e1f4c90b4e 100644 --- a/test/repair/github-cli.test.ts +++ b/test/repair/github-cli.test.ts @@ -1,7 +1,7 @@ import assert from "node:assert/strict"; import test from "node:test"; -import { githubPaginatedPath } from "../../dist/repair/github-cli.js"; +import { githubLimitedPagePath, githubPaginatedPath } from "../../dist/repair/github-cli.js"; test("githubPaginatedPath requests maximum REST page size by default", () => { assert.equal( @@ -17,3 +17,22 @@ test("githubPaginatedPath requests maximum REST page size by default", () => { "repos/openclaw/openclaw/issues?per_page=50&state=open", ); }); + +test("githubLimitedPagePath caps one REST page and preserves existing filters", () => { + assert.equal( + githubLimitedPagePath("repos/openclaw/openclaw/pulls/123/files", 80), + "repos/openclaw/openclaw/pulls/123/files?per_page=80&page=1", + ); + assert.equal( + githubLimitedPagePath( + "repos/openclaw/openclaw/pulls/123/files?state=open&per_page=100", + 250, + 3, + ), + "repos/openclaw/openclaw/pulls/123/files?state=open&per_page=100&page=3", + ); + assert.equal( + githubLimitedPagePath("repos/openclaw/openclaw/pulls/123/files", 0, 0), + "repos/openclaw/openclaw/pulls/123/files?per_page=1&page=1", + ); +}); diff --git a/test/repair/plan-cluster.test.ts b/test/repair/plan-cluster.test.ts index 06dc5748f2..37fb0c66ae 100644 --- a/test/repair/plan-cluster.test.ts +++ b/test/repair/plan-cluster.test.ts @@ -222,9 +222,141 @@ test("plan-cluster treats same-repo PR branches as writable despite raw maintain assert.match(pull.branch_write_reason, /same-repo head branch/); }); +test("plan-cluster bounds PR file and commit hydration", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "clawsweeper-plan-bounded-pr-")); + const binDir = path.join(tmp, "bin"); + const jobPath = path.join(tmp, "job.md"); + const runDir = path.join(tmp, "run"); + fs.mkdirSync(binDir); + fs.writeFileSync(path.join(binDir, "gh"), fakeGhScript(), { mode: 0o755 }); + + fs.writeFileSync( + jobPath, + [ + "---", + "repo: openclaw/openclaw", + "cluster_id: automerge-openclaw-openclaw-74134", + "mode: autonomous", + "allowed_actions:", + " - comment", + " - fix", + " - raise_pr", + "blocked_actions:", + " - close", + " - merge", + "source: pr_automerge", + "canonical:", + " - #74134", + "candidates:", + " - #74134", + "allow_fix_pr: true", + "allow_merge: false", + "security_policy: central_security_only", + "security_sensitive: false", + "---", + "Maintainer opted #74134 into ClawSweeper automerge.", + "", + ].join("\n"), + ); + + execFileSync(process.execPath, ["dist/repair/plan-cluster.js", jobPath, "--run-dir", runDir], { + cwd: process.cwd(), + env: { + ...process.env, + PATH: `${binDir}${path.delimiter}${process.env.PATH}`, + FAKE_GH_LARGE_PR: "1", + CLAWSWEEPER_MAX_FILES_PER_PR: "eighty", + CLAWSWEEPER_MAX_COMMITS_PER_PR: "many", + }, + stdio: "pipe", + }); + + const clusterPlan = JSON.parse(fs.readFileSync(path.join(runDir, "cluster-plan.json"), "utf8")); + const pull = clusterPlan.items[0].pull_request; + + assert.equal(pull.changed_files, 120); + assert.equal(pull.files_hydrated, 80); + assert.equal(pull.files_truncated, 40); + assert.equal(pull.files.length, 80); + assert.equal(pull.commits_count, 120); + assert.equal(pull.commits_hydrated, 80); + assert.equal(pull.commits_truncated, 40); + assert.equal(pull.commits.length, 80); +}); + +test("plan-cluster bounded PR hydration follows multiple GitHub pages", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "clawsweeper-plan-bounded-pr-pages-")); + const binDir = path.join(tmp, "bin"); + const jobPath = path.join(tmp, "job.md"); + const runDir = path.join(tmp, "run"); + const ghLog = path.join(tmp, "gh.log"); + fs.mkdirSync(binDir); + fs.writeFileSync(path.join(binDir, "gh"), fakeGhScript(), { mode: 0o755 }); + + fs.writeFileSync( + jobPath, + [ + "---", + "repo: openclaw/openclaw", + "cluster_id: automerge-openclaw-openclaw-74134", + "mode: autonomous", + "allowed_actions:", + " - comment", + " - fix", + " - raise_pr", + "blocked_actions:", + " - close", + " - merge", + "source: pr_automerge", + "canonical:", + " - #74134", + "candidates:", + " - #74134", + "allow_fix_pr: true", + "allow_merge: false", + "security_policy: central_security_only", + "security_sensitive: false", + "---", + "Maintainer opted #74134 into ClawSweeper automerge.", + "", + ].join("\n"), + ); + + execFileSync(process.execPath, ["dist/repair/plan-cluster.js", jobPath, "--run-dir", runDir], { + cwd: process.cwd(), + env: { + ...process.env, + PATH: `${binDir}${path.delimiter}${process.env.PATH}`, + FAKE_GH_LARGE_PR: "1", + FAKE_GH_LARGE_PR_COUNT: "150", + FAKE_GH_LOG: ghLog, + CLAWSWEEPER_MAX_FILES_PER_PR: "150", + CLAWSWEEPER_MAX_COMMITS_PER_PR: "150", + }, + stdio: "pipe", + }); + + const clusterPlan = JSON.parse(fs.readFileSync(path.join(runDir, "cluster-plan.json"), "utf8")); + const pull = clusterPlan.items[0].pull_request; + const ghCalls = fs.readFileSync(ghLog, "utf8"); + + assert.equal(pull.changed_files, 150); + assert.equal(pull.files_hydrated, 150); + assert.equal(pull.files_truncated, 0); + assert.equal(pull.files.length, 150); + assert.equal(pull.commits_count, 150); + assert.equal(pull.commits_hydrated, 150); + assert.equal(pull.commits_truncated, 0); + assert.equal(pull.commits.length, 150); + assert.equal((ghCalls.match(/pulls\/74134\/files\?per_page=100&page=/g) ?? []).length, 2); + assert.equal((ghCalls.match(/pulls\/74134\/commits\?per_page=100&page=/g) ?? []).length, 2); +}); + function fakeGhScript() { return `#!/usr/bin/env node +const fs = require("node:fs"); const args = process.argv.slice(2); +if (process.env.FAKE_GH_LOG) fs.appendFileSync(process.env.FAKE_GH_LOG, args.join(" ") + "\\n"); function write(value) { process.stdout.write(JSON.stringify(value)); } @@ -248,6 +380,10 @@ if (isPaged()) { write([pagedResponse(endpoint)]); process.exit(0); } +if (/\\?(?:.*&)?per_page=/.test(endpoint)) { + write(pagedResponse(endpoint)); + process.exit(0); +} if (endpoint === "repos/openclaw/openclaw/issues/74134") { write(issue(74134, [], "Replacement PR: https://github.com/openclaw/openclaw/pull/74742")); process.exit(0); @@ -282,6 +418,8 @@ function issue(number, labels, body) { }; } function pull(number, sha) { + const large = process.env.FAKE_GH_LARGE_PR === "1"; + const largeCount = Number(process.env.FAKE_GH_LARGE_PR_COUNT || 120); return { draft: false, merged: false, @@ -300,11 +438,34 @@ function pull(number, sha) { requested_teams: [], additions: 1, deletions: 0, - changed_files: 1, + changed_files: large ? largeCount : 1, + commits: large ? largeCount : 1, + review_comments: 0, }; } function pagedResponse(endpoint) { - if (endpoint.endsWith("/commits")) return [{ sha: "commit-sha", commit: { message: "test" }, author: { login: "contributor" } }]; + const [endpointPath, query = ""] = endpoint.split("?"); + const params = new URLSearchParams(query); + const limit = Math.max(1, Number(params.get("per_page") || 1)); + const page = Math.max(1, Number(params.get("page") || 1)); + const total = Number(process.env.FAKE_GH_LARGE_PR_COUNT || 120); + const start = (page - 1) * limit; + const count = Math.max(0, Math.min(limit, total - start)); + if (endpointPath.endsWith("/files")) { + return Array.from({ length: count }, (_, index) => ({ + filename: "src/file-" + (start + index) + ".ts", + status: "modified", + additions: 1, + deletions: 0, + })); + } + if (endpointPath.endsWith("/commits")) { + return Array.from({ length: count }, (_, index) => ({ + sha: "commit-sha-" + (start + index), + commit: { message: "test " + (start + index) }, + author: { login: "contributor" }, + })); + } return []; } `;