auth0 · frederikprijck · Jun 9, 2026 · Jun 9, 2026 · Jun 10, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -137,6 +137,7 @@ When you make a change, update every doc whose described behavior is affected. T
 |---|---|
 | New eval added (`PROMPT.md` + `graders.ts`) | `AGENTS.md` eval list (if maintaining one); `docs/ADDING_EVALS.md` if the change reveals a gap in the guide |
 | `setup_command` behaviour changed (e.g. new syntax supported) | `docs/ADDING_EVALS.md` — frontmatter table and example; `AGENTS.md` checklist if relevant |
+| `compile_command` added to an eval or its context-injection behaviour changed | `docs/ADDING_EVALS.md` — frontmatter table and example; `AGENTS.md` checklist if relevant |
 | New skill added or skill resolution logic changed | `docs/TESTING_SKILLS.md`; `AGENTS.md` if skill tooling or config changed |
 | New CLI flag or runner added | `AGENTS.md` CLI flags table and Agent runners table; `README.md` quick-start if the flag is commonly used |
 | Scoring dimension added, changed, or removed | `docs/SCORING_METHODOLOGY.md` first (per the workflow); then `AGENTS.md` scoring section once merged |
@@ -151,7 +152,7 @@ When you make a change, update every doc whose described behavior is affected. T
 ## Adding an eval — checklist
 
 1. `src/evals/<category>/<eval-dir>/PROMPT.md` + `graders.ts`
-2. Add `id` (required) and optionally `name`/`category` to `PROMPT.md` frontmatter — the framework auto-discovers evals from `evalsDir`
+2. Add `id` (required) and optionally `name`/`category`/`compile_command` to `PROMPT.md` frontmatter — the framework auto-discovers evals from `evalsDir`. Set `compile_command` to point the agent at a verify-compiles command (injected into the agent's context file, e.g. `CLAUDE.md`); omit for evals with no CLI compile step (e.g. mobile).
 3. All imports use `.js` extensions; `import type` for type-only
 4. All graders have `GraderLevel`; one final holistic `judge` with no level
 5. `npm run build && npm test` passes

diff --git a/apps/auth0-evals/src/evals/quickstarts/angular/PROMPT.md b/apps/auth0-evals/src/evals/quickstarts/angular/PROMPT.md
@@ -3,6 +3,7 @@ id: angular_quickstart
 name: Angular Quickstart
 skills: auth0-angular
 setup_command: npm install
+compile_command: npm run build
 ---
 
 ## Task

diff --git a/apps/auth0-evals/src/evals/quickstarts/angular/graders.ts b/apps/auth0-evals/src/evals/quickstarts/angular/graders.ts
@@ -1,4 +1,4 @@
-import { contains, notContains, matches, judge, GraderLevel } from '@a0/eval-graders';
+import { contains, notContains, matches, judge, ranCommandOneOf, GraderLevel } from '@a0/eval-graders';
 
 export function defineGraders() {
   return [
@@ -20,13 +20,13 @@ export function defineGraders() {
     notContains('sessionStorage.setItem', 'No tokens stored in sessionStorage', GraderLevel.L3),
 
     // ── L4: Structural / behavioral correctness ───────────────────────────────
-    // Event-based install/build verification temporarily disabled — see PR scoping discussion.
+    // Install verification left disabled — a valid solution may edit package.json then run a bare `npm install`.
     // ranCommand('npm install', '@auth0/auth0-angular', 'Ran npm install for @auth0/auth0-angular', GraderLevel.L4),
-    // ranCommandOneOf(
-    //   ['npm run build', 'ng build'],
-    //   'Ran build to verify compilation (npm run build, ng build, or npx ng build)',
-    //   GraderLevel.L4,
-    // ),
+    ranCommandOneOf(
+      ['npm run build', 'ng build'],
+      'Ran build to verify compilation (npm run build, ng build, or npx ng build)',
+      GraderLevel.L4,
+    ),
     matches(String.raw`provideAuth0\s*\(`, 'Auth0 configured via provideAuth0()', GraderLevel.L4),
     matches(
       String.raw`canActivate\s*:\s*\[?\s*(AuthGuard|authGuardFn)`,

diff --git a/apps/auth0-evals/src/evals/quickstarts/express/PROMPT.md b/apps/auth0-evals/src/evals/quickstarts/express/PROMPT.md
@@ -3,6 +3,7 @@ id: express_quickstart
 name: Express Quickstart
 skills: auth0-express
 setup_command: npm install
+compile_command: node --check server.js
 ---
 
 ## Task

diff --git a/apps/auth0-evals/src/evals/quickstarts/fastapi/PROMPT.md b/apps/auth0-evals/src/evals/quickstarts/fastapi/PROMPT.md
@@ -3,6 +3,7 @@ id: fastapi_quickstart
 name: FastAPI Quickstart
 skills: auth0-fastapi-api
 setup_command: python3 -m venv .venv && .venv/bin/pip install -r requirements.txt
+compile_command: .venv/bin/python -m py_compile main.py
 ---
 
 ## Task

diff --git a/apps/auth0-evals/src/evals/quickstarts/fastify-api/PROMPT.md b/apps/auth0-evals/src/evals/quickstarts/fastify-api/PROMPT.md
@@ -3,6 +3,7 @@ id: fastify_api_quickstart
 name: Fastify API Quickstart
 skills: auth0-fastify-api
 setup_command: npm install
+compile_command: node --check server.js
 ---
 
 ## Task

diff --git a/apps/auth0-evals/src/evals/quickstarts/flask/PROMPT.md b/apps/auth0-evals/src/evals/quickstarts/flask/PROMPT.md
@@ -3,6 +3,7 @@ id: flask_quickstart
 name: Flask Quickstart
 skills: auth0-flask
 setup_command: python3 -m venv .venv && .venv/bin/pip install -r requirements.txt
+compile_command: .venv/bin/python -m py_compile app.py
 ---
 
 ## Task

diff --git a/apps/auth0-evals/src/evals/quickstarts/nextjs/PROMPT.md b/apps/auth0-evals/src/evals/quickstarts/nextjs/PROMPT.md
@@ -3,6 +3,7 @@ id: nextjs_quickstart
 name: Next.js App Router Quickstart
 skills: auth0-nextjs
 setup_command: npm install
+compile_command: npm run build
 ---
 
 ## Task

diff --git a/apps/auth0-evals/src/evals/quickstarts/nextjs/graders.ts b/apps/auth0-evals/src/evals/quickstarts/nextjs/graders.ts
@@ -1,4 +1,13 @@
-import { contains, notContains, notContainsInSource, matches, judge, wroteFile, GraderLevel } from '@a0/eval-graders';
+import {
+  contains,
+  notContains,
+  notContainsInSource,
+  matches,
+  judge,
+  wroteFile,
+  ranCommandOneOf,
+  GraderLevel,
+} from '@a0/eval-graders';
 
 export function defineGraders() {
   return [
@@ -35,9 +44,9 @@ export function defineGraders() {
     ),
 
     // ── L4: Structural / behavioral correctness ───────────────────────────────
-    // Event-based install/build verification temporarily disabled — see PR scoping discussion.
+    // Install verification left disabled — a valid solution may edit package.json then run a bare `npm install`.
     // ranCommand('npm install', '@auth0/nextjs-auth0', 'Ran npm install for @auth0/nextjs-auth0', GraderLevel.L4),
-    // ranCommandOneOf(['npm run build', 'next build'], 'Ran build to verify compilation', GraderLevel.L4),
+    ranCommandOneOf(['npm run build', 'next build'], 'Ran build to verify compilation', GraderLevel.L4),
     wroteFile('.env', 'Wrote Auth0 credentials to .env file', GraderLevel.L4, [
       'dev-barkbook.us.auth0.com',
       'barkbook_client_abc123xyz',

diff --git a/apps/auth0-evals/src/evals/quickstarts/nuxt/PROMPT.md b/apps/auth0-evals/src/evals/quickstarts/nuxt/PROMPT.md
@@ -3,6 +3,7 @@ id: nuxt_quickstart
 name: Nuxt Quickstart
 skills: auth0-nuxt
 setup_command: npm install
+compile_command: npm run build
 ---
 
 ## Task

diff --git a/apps/auth0-evals/src/evals/quickstarts/nuxt/graders.ts b/apps/auth0-evals/src/evals/quickstarts/nuxt/graders.ts
@@ -1,4 +1,13 @@
-import { contains, notContains, notContainsInSource, matches, judge, wroteFile, GraderLevel } from '@a0/eval-graders';
+import {
+  contains,
+  notContains,
+  notContainsInSource,
+  matches,
+  judge,
+  wroteFile,
+  ranCommandOneOf,
+  GraderLevel,
+} from '@a0/eval-graders';
 
 export function defineGraders() {
   return [
@@ -44,9 +53,9 @@ export function defineGraders() {
     notContains('sessionStorage.setItem', 'No tokens stored in sessionStorage', GraderLevel.L3),
 
     // ── L4: Structural / behavioral correctness ───────────────────────────────
-    // Event-based install/build verification temporarily disabled — see PR scoping discussion.
+    // Install verification left disabled — a valid solution may edit package.json then run a bare `npm install`.
     // ranCommand('npm install', '@auth0/auth0-nuxt', 'Ran npm install for @auth0/auth0-nuxt', GraderLevel.L4),
-    // ranCommandOneOf(['npm run build', 'nuxt build'], 'Ran build to verify compilation', GraderLevel.L4),
+    ranCommandOneOf(['npm run build', 'nuxt build'], 'Ran build to verify compilation', GraderLevel.L4),
     wroteFile('.env', 'Wrote Auth0 credentials to .env file', GraderLevel.L4, [
       'dev-playground.us.auth0.com',
       'playground_client_abc123xyz',

diff --git a/apps/auth0-evals/src/evals/quickstarts/react/PROMPT.md b/apps/auth0-evals/src/evals/quickstarts/react/PROMPT.md
@@ -4,6 +4,7 @@ name: React Quickstart
 scaffold: src/evals/scaffolds/react/basic
 skills: auth0-react
 setup_command: npm install
+compile_command: npm run build
 ---
 
 ## Task

diff --git a/apps/auth0-evals/src/evals/quickstarts/react/graders.ts b/apps/auth0-evals/src/evals/quickstarts/react/graders.ts
@@ -1,4 +1,4 @@
-import { contains, notContains, matches, judge, GraderLevel } from '@a0/eval-graders';
+import { contains, notContains, matches, judge, ranCommand, GraderLevel } from '@a0/eval-graders';
 
 export function defineGraders() {
   return [
@@ -21,9 +21,9 @@ export function defineGraders() {
     notContains('sessionStorage.setItem', 'No tokens stored in sessionStorage', GraderLevel.L3),
 
     // ── L4: Structural / behavioral correctness ───────────────────────────────
-    // Event-based install/build verification temporarily disabled — see PR scoping discussion.
+    // Install verification left disabled — a valid solution may edit package.json then run a bare `npm install`.
     // ranCommand('npm install', '@auth0/auth0-react', 'Ran npm install for @auth0/auth0-react', GraderLevel.L4),
-    // ranCommand('npm run', 'build', 'Ran build to verify compilation', GraderLevel.L4),
+    ranCommand('npm run', 'build', 'Ran build to verify compilation', GraderLevel.L4),
     matches(String.raw`<Auth0Provider[\s\S]*?domain`, 'Auth0Provider configured with domain prop', GraderLevel.L4),
     contains('getAccessTokenSilently', 'Uses getAccessTokenSilently to retrieve access token', GraderLevel.L4),
     judge(

diff --git a/apps/auth0-evals/src/evals/quickstarts/spa-js/PROMPT.md b/apps/auth0-evals/src/evals/quickstarts/spa-js/PROMPT.md
@@ -3,6 +3,7 @@ id: spa_js_quickstart
 name: SPA JS Quickstart
 skills: auth0-spa-js
 setup_command: npm install
+compile_command: npm run build
 ---
 
 ## Task

diff --git a/apps/auth0-evals/src/evals/quickstarts/spa-js/graders.ts b/apps/auth0-evals/src/evals/quickstarts/spa-js/graders.ts
@@ -1,4 +1,4 @@
-import { contains, notContains, matches, judge, GraderLevel } from '@a0/eval-graders';
+import { contains, notContains, matches, judge, ranCommand, GraderLevel } from '@a0/eval-graders';
 
 export function defineGraders() {
   return [
@@ -22,9 +22,9 @@ export function defineGraders() {
     notContains('sessionStorage.setItem', 'No tokens manually stored in sessionStorage', GraderLevel.L3),
 
     // ── L4: Structural / behavioral correctness ───────────────────────────────
-    // Event-based install/build verification temporarily disabled — see PR scoping discussion.
+    // Install verification left disabled — a valid solution may edit package.json then run a bare `npm install`.
     // ranCommand('npm install', '@auth0/auth0-spa-js', 'Ran npm install for @auth0/auth0-spa-js', GraderLevel.L4),
-    // ranCommand('npm run', 'build', 'Ran build to verify compilation', GraderLevel.L4),
+    ranCommand('npm run', 'build', 'Ran build to verify compilation', GraderLevel.L4),
     matches(
       String.raw`createAuth0Client\s*\(\s*\{[\s\S]*?domain`,
       'Auth0Client configured with domain',

diff --git a/apps/auth0-evals/src/evals/quickstarts/vue/PROMPT.md b/apps/auth0-evals/src/evals/quickstarts/vue/PROMPT.md
@@ -3,6 +3,7 @@ id: vue_quickstart
 name: Vue Quickstart
 skills: auth0-vue
 setup_command: npm install
+compile_command: npm run build
 ---
 
 ## Task

diff --git a/apps/auth0-evals/src/evals/quickstarts/vue/graders.ts b/apps/auth0-evals/src/evals/quickstarts/vue/graders.ts
@@ -1,4 +1,4 @@
-import { contains, notContains, matches, judge, GraderLevel } from '@a0/eval-graders';
+import { contains, notContains, matches, judge, ranCommand, GraderLevel } from '@a0/eval-graders';
 
 export function defineGraders() {
   return [
@@ -21,9 +21,9 @@ export function defineGraders() {
     notContains('sessionStorage.setItem', 'No tokens stored in sessionStorage', GraderLevel.L3),
 
     // ── L4: Structural / behavioral correctness ───────────────────────────────
-    // Event-based install/build verification temporarily disabled — see PR scoping discussion.
+    // Install verification left disabled — a valid solution may edit package.json then run a bare `npm install`.
     // ranCommand('npm install', '@auth0/auth0-vue', 'Ran npm install for @auth0/auth0-vue', GraderLevel.L4),
-    // ranCommand('npm run', 'build', 'Ran build to verify compilation', GraderLevel.L4),
+    ranCommand('npm run', 'build', 'Ran build to verify compilation', GraderLevel.L4),
     matches(String.raw`app\.use\s*\(\s*createAuth0`, 'Plugin installed via app.use(createAuth0(...))', GraderLevel.L4),
     contains('getAccessTokenSilently', 'Uses getAccessTokenSilently to retrieve access token', GraderLevel.L4),
     judge(

diff --git a/docs/ADDING_EVALS.md b/docs/ADDING_EVALS.md
@@ -31,6 +31,7 @@ id: my_new_eval
 name: My New Eval
 skills: auth0-react
 setup_command: npm install
+compile_command: npm run build
 ---
 ```
 
@@ -41,6 +42,7 @@ setup_command: npm install
 | `category` | no | Defaults to the parent directory name (e.g. `quickstarts`) |
 | `skills` | no | Comma-separated skill names from [auth0/agent-skills](https://github.com/auth0/agent-skills). Injected into agent context when running with `--tools skills` |
 | `setup_command` | no | Command run before the agent starts (e.g. `npm install`). Split on whitespace and executed directly via `spawnSync` — no shell, no operators (`&&`, `\|`, etc.), no quoting. One command only. |
+| `compile_command` | no | Compile/build command (e.g. `npm run build`, `node --check server.js`, `.venv/bin/python -m py_compile main.py`). When set, an instruction pointing the agent at this command is appended to the agent's native context file (`CLAUDE.md` / `GEMINI.md` / `AGENTS.md` / `.github/copilot-instructions.md`) alongside the "no docs files" guidance, so the agent verifies the project compiles and the command shows up in the tool trace. Agent modes only — baseline ignores it. Omit for evals with no CLI compile step (e.g. mobile). |
 
 To test a skill before it is pushed to the remote repo, see [TESTING_SKILLS.md](TESTING_SKILLS.md).
 
@@ -61,6 +63,7 @@ id: react_quickstart
 name: React Quickstart
 skills: auth0-react
 setup_command: npm install
+compile_command: npm run build
 ---
 
 ## Task

diff --git a/packages/eval-core/src/index.ts b/packages/eval-core/src/index.ts
@@ -82,6 +82,7 @@ export {
   writeAgentGuidance,
   AGENT_GUIDANCE,
   AGENT_CONTEXT_FILENAMES,
+  compileGuidance,
   collectFiles,
   readWorkspaceFile,
   isPathInside,

diff --git a/packages/eval-core/src/loader.ts b/packages/eval-core/src/loader.ts
@@ -63,6 +63,7 @@ export async function loadEval(
     .filter(Boolean);
 
   const setupCommand = meta.setup_command || undefined;
+  const compileCommand = meta.compile_command || undefined;
 
   return {
     id: evalConfig.id,
@@ -74,6 +75,7 @@ export async function loadEval(
     graders,
     scaffold,
     setupCommand,
+    compileCommand,
     skills,
     metadata: {
       provider_name: meta.provider_name ?? 'Auth0',

diff --git a/packages/eval-core/src/types/eval.ts b/packages/eval-core/src/types/eval.ts
@@ -19,6 +19,7 @@ export interface EvalDefinition {
   graders: GraderDef[];
   scaffold: Record<string, string>;
   setupCommand?: string;
+  compileCommand?: string;
   skills: string[];
   metadata: Record<string, string>;
 }
diff --git a/packages/eval-core/src/workspace/index.ts b/packages/eval-core/src/workspace/index.ts
@@ -5,6 +5,7 @@ export {
   writeAgentGuidance,
   AGENT_GUIDANCE,
   AGENT_CONTEXT_FILENAMES,
+  compileGuidance,
 } from './workspace.js';
 export type { SetupWorkspaceOptions, RunSetupCommandOptions } from './workspace.js';
 export { collectFiles, readWorkspaceFile } from './file-utils.js';

diff --git a/packages/eval-core/src/workspace/workspace.ts b/packages/eval-core/src/workspace/workspace.ts
@@ -32,6 +32,15 @@ import { resolveInside } from './path-utils.js';
 export const AGENT_GUIDANCE = `Do not create any documentation files (README.md, SETUP.md, QUICKSTART.md, IMPLEMENTATION_SUMMARY.md, or any other .md files). Do not create any .txt summary or verification files. Do not create standalone summary or status files of any kind (e.g. AUTH0_SETUP.ts, IMPLEMENTATION_COMPLETE.ts, QUICK_START.ts, FILES_CREATED.txt) — these are not application source code. Only create and modify source code files that are part of the application.
 `;
 
+/**
+ * Builds the compile-verification guidance appended to the agent's context file
+ * when the eval declares a `compileCommand`. Pointing the agent at the command
+ * means it appears in the tool trace and the agent can fix any failures.
+ */
+export function compileGuidance(compileCommand: string): string {
+  return `After making your changes, you MUST run this command to verify your integration compiles, and fix any errors it reports before finishing:\n\n\`${compileCommand}\`\n`;
+}
+
 /**
  * The context/memory file each runner reads, relative to the workspace root.
  * Writing guidance to the wrong file means the agent silently ignores it:
@@ -51,12 +60,15 @@ export const AGENT_CONTEXT_FILENAMES: Record<AgentType, string> = {
 /**
  * Writes {@link AGENT_GUIDANCE} into the context file the given runner reads.
  * Appends (preserving any scaffold-provided content) when the file already
- * exists; creates it otherwise.
+ * exists; creates it otherwise. When `compileCommand` is provided, the
+ * compile-verification guidance (see {@link compileGuidance}) is appended too.
  */
-export function writeAgentGuidance(workspace: string, agentType: AgentType): void {
+export function writeAgentGuidance(workspace: string, agentType: AgentType, compileCommand?: string): void {
   const filename = AGENT_CONTEXT_FILENAMES[agentType];
   const dest = join(workspace, filename);
 
+  const guidance = compileCommand ? `${AGENT_GUIDANCE}\n${compileGuidance(compileCommand)}` : AGENT_GUIDANCE;
+
   // If the scaffold shipped AGENTS.md but the active runner reads a different
   // file, rename it so the guidance reaches the right runner.
   const scaffoldAgentsMd = join(workspace, 'AGENTS.md');
@@ -66,10 +78,10 @@ export function writeAgentGuidance(workspace: string, agentType: AgentType): voi
   }
 
   if (existsSync(dest)) {
-    appendFileSync(dest, `\n${AGENT_GUIDANCE}`, 'utf-8');
+    appendFileSync(dest, `\n${guidance}`, 'utf-8');
   } else {
     mkdirSync(join(dest, '..'), { recursive: true });
-    writeFileSync(dest, AGENT_GUIDANCE, 'utf-8');
+    writeFileSync(dest, guidance, 'utf-8');
   }
 }
 

diff --git a/packages/eval-core/tests/loader.test.ts b/packages/eval-core/tests/loader.test.ts
@@ -151,6 +151,26 @@ describe('loadEval - setupCommand', () => {
   });
 });
 
+// ── compile_command frontmatter tests ────────────────────────────────────────
+
+describe('loadEval - compileCommand', () => {
+  it('parses compile_command from frontmatter', async () => {
+    makeEvalDir(tmpBase, '---\nskills: auth0-react\ncompile_command: npm run build\n---\n\n## Task\nDo the task.\n');
+
+    const result = await loadEval(EVAL_CONFIG, tmpBase);
+
+    expect(result.compileCommand).toBe('npm run build');
+  });
+
+  it('returns undefined when compile_command is absent', async () => {
+    makeEvalDir(tmpBase, '---\nskills: auth0-react\n---\n\n## Task\nDo the task.\n');
+
+    const result = await loadEval(EVAL_CONFIG, tmpBase);
+
+    expect(result.compileCommand).toBeUndefined();
+  });
+});
+
 // ── System prompt tests ───────────────────────────────────────────────────────
 
 describe('loadEval - system prompt', () => {
@@ -239,10 +259,8 @@ describe('loadEval - scaffold loading', () => {
     expect(result.scaffold['readable.txt']).toBe('this is fine');
     expect(result.scaffold['unreadable.txt']).toBeUndefined();
   });
-
 });
 
-
 // ── frontmatter scaffold field tests ─────────────────────────────────────────
 
 function makePromptWithScaffold(scaffoldPath: string): string {