From 457daab2e3af7eae24f8c2fc9c3a6f7486873803 Mon Sep 17 00:00:00 2001 From: Malik Salim Date: Sun, 22 Mar 2026 19:18:45 -0400 Subject: [PATCH] fix: let /review satisfy ship readiness --- .agents/skills/gstack-review/SKILL.md | 21 ++++++++++++ .agents/skills/gstack-ship/SKILL.md | 6 ++-- review/SKILL.md | 21 ++++++++++++ review/SKILL.md.tmpl | 21 ++++++++++++ scripts/gen-skill-docs.ts | 46 +++++++++++++++++++++++++++ ship/SKILL.md | 6 ++-- ship/SKILL.md.tmpl | 4 +-- test/gen-skill-docs.test.ts | 8 +++++ test/skill-validation.test.ts | 12 +++++++ 9 files changed, 137 insertions(+), 8 deletions(-) diff --git a/.agents/skills/gstack-review/SKILL.md b/.agents/skills/gstack-review/SKILL.md index 03c1bdba8..77bb984be 100644 --- a/.agents/skills/gstack-review/SKILL.md +++ b/.agents/skills/gstack-review/SKILL.md @@ -695,6 +695,27 @@ If no documentation files exist, skip this step silently. +## Step 5.8: Persist Eng Review result + +After all review passes complete, persist the final `/review` outcome so `/ship` can +recognize that Eng Review was run on this branch. + +Run: + +```bash +~/.codex/skills/gstack/bin/gstack-review-log '{"skill":"review","timestamp":"TIMESTAMP","status":"STATUS","issues_found":N,"critical":N,"informational":N,"commit":"COMMIT"}' +``` + +Substitute: +- `TIMESTAMP` = ISO 8601 datetime +- `STATUS` = `"clean"` if there are no remaining unresolved findings after Fix-First handling and adversarial review, otherwise `"issues_found"` +- `issues_found` = total remaining unresolved findings +- `critical` = remaining unresolved critical findings +- `informational` = remaining unresolved informational findings +- `COMMIT` = output of `git rev-parse --short HEAD` + +If the review exits early before a real review completes (for example, no diff against the base branch), do **not** write this entry. + ## Important Rules - **Read the FULL diff before commenting.** Do not flag issues already addressed in the diff. diff --git a/.agents/skills/gstack-ship/SKILL.md b/.agents/skills/gstack-ship/SKILL.md index e53d02d8c..0ec61b083 100644 --- a/.agents/skills/gstack-ship/SKILL.md +++ b/.agents/skills/gstack-ship/SKILL.md @@ -309,7 +309,7 @@ After completing the review, read the review log and config to display the dashb ~/.codex/skills/gstack/bin/gstack-review-read ``` -Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, adversarial-review, codex-review). Ignore entries with timestamps older than 7 days. For the Adversarial row, show whichever is more recent between `adversarial-review` (new auto-scaled) and `codex-review` (legacy). For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: +Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, review, plan-design-review, design-review-lite, adversarial-review, codex-review). Ignore entries with timestamps older than 7 days. For the Eng Review row, show whichever is more recent between `review` (diff-scoped pre-landing review) and `plan-eng-review` (plan-stage architecture review). Append "(DIFF)" or "(PLAN)" to the status to distinguish. For the Adversarial row, show whichever is more recent between `adversarial-review` (new auto-scaled) and `codex-review` (legacy). For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: ``` +====================================================================+ @@ -333,7 +333,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - **Adversarial Review (automatic):** Auto-scales by diff size. Small diffs (<50 lines) skip adversarial. Medium diffs (50–199) get cross-model adversarial. Large diffs (200+) get all 4 passes: Claude structured, Codex structured, Claude adversarial subagent, Codex adversarial. No configuration needed. **Verdict logic:** -- **CLEARED**: Eng Review has >= 1 entry within 7 days with status "clean" (or \`skip_eng_review\` is \`true\`) +- **CLEARED**: Eng Review has >= 1 entry within 7 days from either \`review\` or \`plan-eng-review\` with status "clean" (or \`skip_eng_review\` is \`true\`) - **NOT CLEARED**: Eng Review missing, stale (>7 days), or has open issues - CEO, Design, and Codex reviews are shown for context but never block shipping - If \`skip_eng_review\` config is \`true\`, Eng Review shows "SKIPPED (global)" and verdict is CLEARED @@ -356,7 +356,7 @@ If the Eng Review is NOT "CLEAR": 2. **If no override exists,** use AskUserQuestion: - Show that Eng Review is missing or has open issues - RECOMMENDATION: Choose C if the change is obviously trivial (< 20 lines, typo fix, config-only); Choose B for larger changes - - Options: A) Ship anyway B) Abort — run /plan-eng-review first C) Change is too small to need eng review + - Options: A) Ship anyway B) Abort — run /review or /plan-eng-review first C) Change is too small to need eng review - If CEO Review is missing, mention as informational ("CEO Review not run — recommended for product changes") but do NOT block - For Design Review: run `source <(~/.codex/skills/gstack/bin/gstack-diff-scope 2>/dev/null)`. If `SCOPE_FRONTEND=true` and no design review (plan-design-review or design-review-lite) exists in the dashboard, mention: "Design Review not run — this PR changes frontend code. The lite design check will run automatically in Step 3.5, but consider running /design-review for a full visual audit post-implementation." Still never block. diff --git a/review/SKILL.md b/review/SKILL.md index 5bbf79c3a..f130a3e20 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -840,6 +840,27 @@ High-confidence findings (agreed on by multiple sources) should be prioritized f --- +## Step 5.8: Persist Eng Review result + +After all review passes complete, persist the final `/review` outcome so `/ship` can +recognize that Eng Review was run on this branch. + +Run: + +```bash +~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"review","timestamp":"TIMESTAMP","status":"STATUS","issues_found":N,"critical":N,"informational":N,"commit":"COMMIT"}' +``` + +Substitute: +- `TIMESTAMP` = ISO 8601 datetime +- `STATUS` = `"clean"` if there are no remaining unresolved findings after Fix-First handling and adversarial review, otherwise `"issues_found"` +- `issues_found` = total remaining unresolved findings +- `critical` = remaining unresolved critical findings +- `informational` = remaining unresolved informational findings +- `COMMIT` = output of `git rev-parse --short HEAD` + +If the review exits early before a real review completes (for example, no diff against the base branch), do **not** write this entry. + ## Important Rules - **Read the FULL diff before commenting.** Do not flag issues already addressed in the diff. diff --git a/review/SKILL.md.tmpl b/review/SKILL.md.tmpl index a33b0fa8e..8ae9045ae 100644 --- a/review/SKILL.md.tmpl +++ b/review/SKILL.md.tmpl @@ -250,6 +250,27 @@ If no documentation files exist, skip this step silently. {{ADVERSARIAL_STEP}} +## Step 5.8: Persist Eng Review result + +After all review passes complete, persist the final `/review` outcome so `/ship` can +recognize that Eng Review was run on this branch. + +Run: + +```bash +~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"review","timestamp":"TIMESTAMP","status":"STATUS","issues_found":N,"critical":N,"informational":N,"commit":"COMMIT"}' +``` + +Substitute: +- `TIMESTAMP` = ISO 8601 datetime +- `STATUS` = `"clean"` if there are no remaining unresolved findings after Fix-First handling and adversarial review, otherwise `"issues_found"` +- `issues_found` = total remaining unresolved findings +- `critical` = remaining unresolved critical findings +- `informational` = remaining unresolved informational findings +- `COMMIT` = output of `git rev-parse --short HEAD` + +If the review exits early before a real review completes (for example, no diff against the base branch), do **not** write this entry. + ## Important Rules - **Read the FULL diff before commenting.** Do not flag issues already addressed in the diff. diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts index 08c388f69..c6e3cda90 100644 --- a/scripts/gen-skill-docs.ts +++ b/scripts/gen-skill-docs.ts @@ -1246,6 +1246,51 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - If all reviews match the current HEAD, do not display any staleness notes`; } +function generateShipReviewDashboard(_ctx: TemplateContext): string { + return `## Review Readiness Dashboard + +After completing the review, read the review log and config to display the dashboard. + +\`\`\`bash +~/.claude/skills/gstack/bin/gstack-review-read +\`\`\` + +Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, review, plan-design-review, design-review-lite, adversarial-review, codex-review). Ignore entries with timestamps older than 7 days. For the Eng Review row, show whichever is more recent between \`review\` (diff-scoped pre-landing review) and \`plan-eng-review\` (plan-stage architecture review). Append "(DIFF)" or "(PLAN)" to the status to distinguish. For the Adversarial row, show whichever is more recent between \`adversarial-review\` (new auto-scaled) and \`codex-review\` (legacy). For Design Review, show whichever is more recent between \`plan-design-review\` (full visual audit) and \`design-review-lite\` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: + +\`\`\` ++====================================================================+ +| REVIEW READINESS DASHBOARD | ++====================================================================+ +| Review | Runs | Last Run | Status | Required | +|-----------------|------|---------------------|-----------|----------| +| Eng Review | 1 | 2026-03-16 15:00 | CLEAR | YES | +| CEO Review | 0 | — | — | no | +| Design Review | 0 | — | — | no | +| Adversarial | 0 | — | — | no | ++--------------------------------------------------------------------+ +| VERDICT: CLEARED — Eng Review passed | ++====================================================================+ +\`\`\` + +**Review tiers:** +- **Eng Review (required by default):** The only review that gates shipping. Covers architecture, code quality, tests, performance. Can be disabled globally with \\\`gstack-config set skip_eng_review true\\\` (the "don't bother me" setting). +- **CEO Review (optional):** Use your judgment. Recommend it for big product/business changes, new user-facing features, or scope decisions. Skip for bug fixes, refactors, infra, and cleanup. +- **Design Review (optional):** Use your judgment. Recommend it for UI/UX changes. Skip for backend-only, infra, or prompt-only changes. +- **Adversarial Review (automatic):** Auto-scales by diff size. Small diffs (<50 lines) skip adversarial. Medium diffs (50–199) get cross-model adversarial. Large diffs (200+) get all 4 passes: Claude structured, Codex structured, Claude adversarial subagent, Codex adversarial. No configuration needed. + +**Verdict logic:** +- **CLEARED**: Eng Review has >= 1 entry within 7 days from either \\\`review\\\` or \\\`plan-eng-review\\\` with status "clean" (or \\\`skip_eng_review\\\` is \\\`true\\\`) +- **NOT CLEARED**: Eng Review missing, stale (>7 days), or has open issues +- CEO, Design, and Codex reviews are shown for context but never block shipping +- If \\\`skip_eng_review\\\` config is \\\`true\\\`, Eng Review shows "SKIPPED (global)" and verdict is CLEARED + +**Staleness detection:** After displaying the dashboard, check if any existing reviews may be stale: +- Parse the \\\`---HEAD---\\\` section from the bash output to get the current HEAD commit hash +- For each review entry that has a \\\`commit\\\` field: compare it against the current HEAD. If different, count elapsed commits: \\\`git rev-list --count STORED_COMMIT..HEAD\\\`. Display: "Note: {skill} review from {date} may be stale — {N} commits since review" +- For entries without a \\\`commit\\\` field (legacy entries): display "Note: {skill} review from {date} has no commit tracking — consider re-running for accurate staleness detection" +- If all reviews match the current HEAD, do not display any staleness notes`; +} + function generatePlanFileReviewReport(_ctx: TemplateContext): string { return `## Plan File Review Report @@ -2177,6 +2222,7 @@ const RESOLVERS: Record string> = { DESIGN_METHODOLOGY: generateDesignMethodology, DESIGN_REVIEW_LITE: generateDesignReviewLite, REVIEW_DASHBOARD: generateReviewDashboard, + REVIEW_DASHBOARD_SHIP: generateShipReviewDashboard, PLAN_FILE_REVIEW_REPORT: generatePlanFileReviewReport, TEST_BOOTSTRAP: generateTestBootstrap, TEST_COVERAGE_AUDIT_PLAN: generateTestCoverageAuditPlan, diff --git a/ship/SKILL.md b/ship/SKILL.md index 23b3ed1e9..79634b142 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -320,7 +320,7 @@ After completing the review, read the review log and config to display the dashb ~/.claude/skills/gstack/bin/gstack-review-read ``` -Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, plan-design-review, design-review-lite, adversarial-review, codex-review). Ignore entries with timestamps older than 7 days. For the Adversarial row, show whichever is more recent between `adversarial-review` (new auto-scaled) and `codex-review` (legacy). For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: +Parse the output. Find the most recent entry for each skill (plan-ceo-review, plan-eng-review, review, plan-design-review, design-review-lite, adversarial-review, codex-review). Ignore entries with timestamps older than 7 days. For the Eng Review row, show whichever is more recent between `review` (diff-scoped pre-landing review) and `plan-eng-review` (plan-stage architecture review). Append "(DIFF)" or "(PLAN)" to the status to distinguish. For the Adversarial row, show whichever is more recent between `adversarial-review` (new auto-scaled) and `codex-review` (legacy). For Design Review, show whichever is more recent between `plan-design-review` (full visual audit) and `design-review-lite` (code-level check). Append "(FULL)" or "(LITE)" to the status to distinguish. Display: ``` +====================================================================+ @@ -344,7 +344,7 @@ Parse the output. Find the most recent entry for each skill (plan-ceo-review, pl - **Adversarial Review (automatic):** Auto-scales by diff size. Small diffs (<50 lines) skip adversarial. Medium diffs (50–199) get cross-model adversarial. Large diffs (200+) get all 4 passes: Claude structured, Codex structured, Claude adversarial subagent, Codex adversarial. No configuration needed. **Verdict logic:** -- **CLEARED**: Eng Review has >= 1 entry within 7 days with status "clean" (or \`skip_eng_review\` is \`true\`) +- **CLEARED**: Eng Review has >= 1 entry within 7 days from either \`review\` or \`plan-eng-review\` with status "clean" (or \`skip_eng_review\` is \`true\`) - **NOT CLEARED**: Eng Review missing, stale (>7 days), or has open issues - CEO, Design, and Codex reviews are shown for context but never block shipping - If \`skip_eng_review\` config is \`true\`, Eng Review shows "SKIPPED (global)" and verdict is CLEARED @@ -367,7 +367,7 @@ If the Eng Review is NOT "CLEAR": 2. **If no override exists,** use AskUserQuestion: - Show that Eng Review is missing or has open issues - RECOMMENDATION: Choose C if the change is obviously trivial (< 20 lines, typo fix, config-only); Choose B for larger changes - - Options: A) Ship anyway B) Abort — run /plan-eng-review first C) Change is too small to need eng review + - Options: A) Ship anyway B) Abort — run /review or /plan-eng-review first C) Change is too small to need eng review - If CEO Review is missing, mention as informational ("CEO Review not run — recommended for product changes") but do NOT block - For Design Review: run `source <(~/.claude/skills/gstack/bin/gstack-diff-scope 2>/dev/null)`. If `SCOPE_FRONTEND=true` and no design review (plan-design-review or design-review-lite) exists in the dashboard, mention: "Design Review not run — this PR changes frontend code. The lite design check will run automatically in Step 3.5, but consider running /design-review for a full visual audit post-implementation." Still never block. diff --git a/ship/SKILL.md.tmpl b/ship/SKILL.md.tmpl index bd74c197a..0ec66a69e 100644 --- a/ship/SKILL.md.tmpl +++ b/ship/SKILL.md.tmpl @@ -56,7 +56,7 @@ You are running the `/ship` workflow. This is a **non-interactive, fully automat 4. Check review readiness: -{{REVIEW_DASHBOARD}} +{{REVIEW_DASHBOARD_SHIP}} If the Eng Review is NOT "CLEAR": @@ -70,7 +70,7 @@ If the Eng Review is NOT "CLEAR": 2. **If no override exists,** use AskUserQuestion: - Show that Eng Review is missing or has open issues - RECOMMENDATION: Choose C if the change is obviously trivial (< 20 lines, typo fix, config-only); Choose B for larger changes - - Options: A) Ship anyway B) Abort — run /plan-eng-review first C) Change is too small to need eng review + - Options: A) Ship anyway B) Abort — run /review or /plan-eng-review first C) Change is too small to need eng review - If CEO Review is missing, mention as informational ("CEO Review not run — recommended for product changes") but do NOT block - For Design Review: run `source <(~/.claude/skills/gstack/bin/gstack-diff-scope 2>/dev/null)`. If `SCOPE_FRONTEND=true` and no design review (plan-design-review or design-review-lite) exists in the dashboard, mention: "Design Review not run — this PR changes frontend code. The lite design check will run automatically in Step 3.5, but consider running /design-review for a full visual audit post-implementation." Still never block. diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index d1d907a01..8e2852e0b 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -358,6 +358,14 @@ describe('REVIEW_DASHBOARD resolver', () => { expect(content).toContain('REVIEW READINESS DASHBOARD'); }); + test('dashboard treats review as a valid Eng Review source', () => { + const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); + expect(content).toContain('plan-eng-review, review, plan-design-review'); + expect(content).toContain('`review` (diff-scoped pre-landing review)'); + expect(content).toContain('`plan-eng-review` (plan-stage architecture review)'); + expect(content).toContain('from either \\`review\\` or \\`plan-eng-review\\`'); + }); + test('resolver output contains key dashboard elements', () => { const content = fs.readFileSync(path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), 'utf-8'); expect(content).toContain('VERDICT'); diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts index dd5a5c3d8..d878b8548 100644 --- a/test/skill-validation.test.ts +++ b/test/skill-validation.test.ts @@ -1351,6 +1351,18 @@ describe('Codex skill', () => { expect(content).toContain('Adversarial'); expect(content).toContain('codex-review'); }); + + test('/review persists a review-log entry for ship readiness', () => { + const content = fs.readFileSync(path.join(ROOT, 'review', 'SKILL.md'), 'utf-8'); + expect(content).toContain('"skill":"review"'); + expect(content).toContain('"issues_found":N'); + expect(content).toContain('Persist Eng Review result'); + }); + + test('/ship gate suggests /review or /plan-eng-review when Eng Review is missing', () => { + const content = fs.readFileSync(path.join(ROOT, 'ship', 'SKILL.md'), 'utf-8'); + expect(content).toContain('Abort — run /review or /plan-eng-review first'); + }); }); // --- Trigger phrase validation ---