diff --git a/package-lock.json b/package-lock.json
index 8874107..c246ef3 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -31,6 +31,7 @@
                 "globals": "^16.0.0",
                 "husky": "^9.1.7",
                 "lint-staged": "^15.5.1",
+                "nock": "^14.0.15",
                 "pino-pretty": "^13.0.0",
                 "prettier": "^3.5.3",
                 "prettier-plugin-organize-imports": "^4.1.0",
@@ -879,6 +880,49 @@
                 }
             }
         },
+        "node_modules/@mswjs/interceptors": {
+            "version": "0.41.8",
+            "resolved": "https://registry.npmjs.org/@mswjs/interceptors/-/interceptors-0.41.8.tgz",
+            "integrity": "sha512-pRLMNKTSGRoLq+KnEB/7OY5vijw1XmcheAAOiv6pj7W1FG32kAGqj1C/RK/cqxRGr1Fh+zBi8sDur8kj3EQv6A==",
+            "dev": true,
+            "license": "MIT",
+            "dependencies": {
+                "@open-draft/deferred-promise": "^2.2.0",
+                "@open-draft/logger": "^0.3.0",
+                "@open-draft/until": "^2.0.0",
+                "is-node-process": "^1.2.0",
+                "outvariant": "^1.4.3",
+                "strict-event-emitter": "^0.5.1"
+            },
+            "engines": {
+                "node": ">=18"
+            }
+        },
+        "node_modules/@open-draft/deferred-promise": {
+            "version": "2.2.0",
+            "resolved": "https://registry.npmjs.org/@open-draft/deferred-promise/-/deferred-promise-2.2.0.tgz",
+            "integrity": "sha512-CecwLWx3rhxVQF6V4bAgPS5t+So2sTbPgAzafKkVizyi7tlwpcFpdFqq+wqF2OwNBmqFuu6tOyouTuxgpMfzmA==",
+            "dev": true,
+            "license": "MIT"
+        },
+        "node_modules/@open-draft/logger": {
+            "version": "0.3.0",
+            "resolved": "https://registry.npmjs.org/@open-draft/logger/-/logger-0.3.0.tgz",
+            "integrity": "sha512-X2g45fzhxH238HKO4xbSr7+wBS8Fvw6ixhTDuvLd5mqh6bJJCFAPwU9mPDxbcrRtfxv4u5IHCEH77BmxvXmmxQ==",
+            "dev": true,
+            "license": "MIT",
+            "dependencies": {
+                "is-node-process": "^1.2.0",
+                "outvariant": "^1.4.0"
+            }
+        },
+        "node_modules/@open-draft/until": {
+            "version": "2.1.0",
+            "resolved": "https://registry.npmjs.org/@open-draft/until/-/until-2.1.0.tgz",
+            "integrity": "sha512-U69T3ItWHvLwGg5eJ0n3I62nWuE6ilHlmz7zM0npLBRvPRd7e6NYmg54vvRtP5mZG7kZqZCFVdsTWo7BPtBujg==",
+            "dev": true,
+            "license": "MIT"
+        },
         "node_modules/@pinojs/redact": {
             "version": "0.4.0",
             "resolved": "https://registry.npmjs.org/@pinojs/redact/-/redact-0.4.0.tgz",
@@ -3384,6 +3428,13 @@
                 "node": ">=0.10.0"
             }
         },
+        "node_modules/is-node-process": {
+            "version": "1.2.0",
+            "resolved": "https://registry.npmjs.org/is-node-process/-/is-node-process-1.2.0.tgz",
+            "integrity": "sha512-Vg4o6/fqPxIjtxgUH5QLJhwZ7gW5diGCVlXpuUfELC62CuxM1iHcRe51f2W1FDy04Ai4KJkagKjx3XaqyfRKXw==",
+            "dev": true,
+            "license": "MIT"
+        },
         "node_modules/is-number": {
             "version": "7.0.0",
             "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
@@ -3523,6 +3574,13 @@
             "dev": true,
             "license": "MIT"
         },
+        "node_modules/json-stringify-safe": {
+            "version": "5.0.1",
+            "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
+            "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==",
+            "dev": true,
+            "license": "ISC"
+        },
         "node_modules/keyv": {
             "version": "4.5.4",
             "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz",
@@ -3929,6 +3987,21 @@
                 "node": ">= 0.6"
             }
         },
+        "node_modules/nock": {
+            "version": "14.0.15",
+            "resolved": "https://registry.npmjs.org/nock/-/nock-14.0.15.tgz",
+            "integrity": "sha512-S0a47C9pLvcYx/Ugf0H30BVBEcUgMMBDk9VJIDlJ8XGrfH2QDUD4Tgdp45qDIiHttokBG+IbsOtsvIjGR/j3bg==",
+            "dev": true,
+            "license": "MIT",
+            "dependencies": {
+                "@mswjs/interceptors": "^0.41.0",
+                "json-stringify-safe": "^5.0.1",
+                "propagate": "^2.0.0"
+            },
+            "engines": {
+                "node": ">=18.20.0 <20 || >=20.12.1"
+            }
+        },
         "node_modules/node-cleanup": {
             "version": "2.1.2",
             "resolved": "https://registry.npmjs.org/node-cleanup/-/node-cleanup-2.1.2.tgz",
@@ -4081,6 +4154,13 @@
                 "node": ">= 0.8.0"
             }
         },
+        "node_modules/outvariant": {
+            "version": "1.4.3",
+            "resolved": "https://registry.npmjs.org/outvariant/-/outvariant-1.4.3.tgz",
+            "integrity": "sha512-+Sl2UErvtsoajRDKCE5/dBz4DIvHXQQnAxtQTF04OJxY0+DyZXSo5P5Bb7XYWOh81syohlYL24hbDwxedPUJCA==",
+            "dev": true,
+            "license": "MIT"
+        },
         "node_modules/p-limit": {
             "version": "3.1.0",
             "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
@@ -4402,6 +4482,16 @@
             ],
             "license": "MIT"
         },
+        "node_modules/propagate": {
+            "version": "2.0.1",
+            "resolved": "https://registry.npmjs.org/propagate/-/propagate-2.0.1.tgz",
+            "integrity": "sha512-vGrhOavPSTz4QVNuBNdcNXePNdNMaO1xj9yBeH1ScQPjk/rhg9sSlCXPhMkFuaNNW/syTvYqsnbIJxMBfRbbag==",
+            "dev": true,
+            "license": "MIT",
+            "engines": {
+                "node": ">= 8"
+            }
+        },
         "node_modules/proxy-addr": {
             "version": "2.0.7",
             "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
@@ -4958,6 +5048,13 @@
                 "duplexer": "~0.1.1"
             }
         },
+        "node_modules/strict-event-emitter": {
+            "version": "0.5.1",
+            "resolved": "https://registry.npmjs.org/strict-event-emitter/-/strict-event-emitter-0.5.1.tgz",
+            "integrity": "sha512-vMgjE/GGEPEFnhFub6pa4FmJBRBVOLpIII2hvCZ8Kzb7K0hlHo7mQv6xYrBvCL2LtAIBwFUK8wvuJgTVSQ5MFQ==",
+            "dev": true,
+            "license": "MIT"
+        },
         "node_modules/string-argv": {
             "version": "0.3.2",
             "resolved": "https://registry.npmjs.org/string-argv/-/string-argv-0.3.2.tgz",
diff --git a/package.json b/package.json
index 1341330..fb69597 100644
--- a/package.json
+++ b/package.json
@@ -18,12 +18,12 @@
         "practice"
     ],
     "scripts": {
-        "test": "vitest run | pino-pretty",
+        "test": "bash -o pipefail -c 'vitest run --exclude \"tests/e2e/**\" | pino-pretty'",
         "test:unit": "vitest run tests/mcp tests/utils tests/services tests/tools",
         "test:integration": "vitest run tests/integration",
-        "test:e2e": "vitest run tests/e2e",
-        "test:all": "vitest run",
-        "test:coverage": "vitest run --coverage",
+        "test:e2e": "vitest run --config vitest.e2e.config.ts",
+        "test:all": "npm run test && npm run test:e2e",
+        "test:coverage": "vitest run --exclude 'tests/e2e/**' --coverage",
         "test:watch": "vitest watch",
         "test:types": "tsc -p tsconfig.test.json",
         "build": "tsc && chmod u+x build/index.js",
@@ -83,6 +83,7 @@
         "globals": "^16.0.0",
         "husky": "^9.1.7",
         "lint-staged": "^15.5.1",
+        "nock": "^14.0.15",
         "pino-pretty": "^13.0.0",
         "prettier": "^3.5.3",
         "prettier-plugin-organize-imports": "^4.1.0",
diff --git a/scripts/sync-version.cjs b/scripts/sync-version.cjs
index 8e50a8f..d1d87a4 100644
--- a/scripts/sync-version.cjs
+++ b/scripts/sync-version.cjs
@@ -49,11 +49,10 @@ if (fs.existsSync(marketplacePath)) {
 const skillPaths = [
     "skills/interactive-leetcode-mcp/SKILL.md",
     ".claude/skills/using-interactive-leetcode-mcp/SKILL.md",
-    "clawhub-skill/interactive-leetcode-mcp/SKILL.md",
+    "clawhub-skill/interactive-leetcode-mcp/SKILL.md"
 ];
 
-const versionPattern =
-    /@sperekrestova\/interactive-leetcode-mcp@[\w.-]+/g;
+const versionPattern = /@sperekrestova\/interactive-leetcode-mcp@[\w.-]+/g;
 const versionReplacement = `@sperekrestova/interactive-leetcode-mcp@${version}`;
 
 for (const rel of skillPaths) {
diff --git a/src/auth/auth-flow.ts b/src/auth/auth-flow.ts
new file mode 100644
index 0000000..8ef6499
--- /dev/null
+++ b/src/auth/auth-flow.ts
@@ -0,0 +1,102 @@
+/**
+ * Authentication helpers that bridge the on-disk credentials store and the
+ * in-memory `LeetcodeServiceInterface`.
+ *
+ * Closes the silent-logout-on-restart gap where saved credentials existed in
+ * `~/.leetcode-mcp/credentials.json` but the running server never re-hydrated
+ * them, so every authenticated tool failed with "Authentication required" until
+ * the user pasted their cookies again.
+ */
+import { LeetcodeServiceInterface } from "../leetcode/leetcode-service-interface.js";
+import { CredentialsStorage } from "../types/credentials.js";
+import { credentialsStorage as defaultStorage } from "../utils/credentials.js";
+import logger from "../utils/logger.js";
+
+/** Outcome of an `restoreCredentials` call — useful in tests and logs. */
+export type RestoreOutcome =
+    | { status: "no_credentials" }
+    | { status: "invalid"; reason: "load_failed" | "expired" }
+    | { status: "restored"; username: string };
+
+/**
+ * Loads saved credentials from disk, validates them against LeetCode, and
+ * pushes them into the running service if they're still good.
+ *
+ * Safe to call at server startup; never throws — failures are logged and the
+ * outcome is returned for callers that want to react.
+ */
+export async function restoreCredentials(
+    service: LeetcodeServiceInterface,
+    storage: CredentialsStorage = defaultStorage
+): Promise<RestoreOutcome> {
+    let credentials: Awaited<ReturnType<CredentialsStorage["load"]>>;
+    try {
+        if (!(await storage.exists())) {
+            return { status: "no_credentials" };
+        }
+        credentials = await storage.load();
+    } catch (error) {
+        logger.warn(
+            "Saved credentials could not be loaded; ignoring: %s",
+            error instanceof Error ? error.message : String(error)
+        );
+        return { status: "invalid", reason: "load_failed" };
+    }
+
+    if (!credentials) {
+        logger.warn(
+            "Saved credentials file exists but could not be parsed; ignoring."
+        );
+        return { status: "invalid", reason: "load_failed" };
+    }
+
+    let username: string | null;
+    try {
+        username = await applyValidatedCredentials(
+            service,
+            credentials.csrftoken,
+            credentials.LEETCODE_SESSION
+        );
+    } catch (error) {
+        logger.warn(
+            "Saved credentials validation failed; user will need to re-authenticate: %s",
+            error instanceof Error ? error.message : String(error)
+        );
+        return { status: "invalid", reason: "expired" };
+    }
+
+    if (!username) {
+        logger.info(
+            "Saved credentials are no longer valid; user will need to re-authenticate."
+        );
+        return { status: "invalid", reason: "expired" };
+    }
+
+    logger.info(
+        "Restored LeetCode session for %s from saved credentials.",
+        username
+    );
+    return { status: "restored", username };
+}
+
+/**
+ * Validates `csrf` / `session` against LeetCode and, on success, pushes them
+ * into the running service so the very next authenticated tool call works
+ * without forcing a server restart.
+ *
+ * Returns the validated username, or `null` if LeetCode rejected the cookies.
+ * Trusts the `validateCredentials` interface contract (`Promise<string | null>`)
+ * and does not catch — any exception thrown by the service propagates.
+ */
+export async function applyValidatedCredentials(
+    service: LeetcodeServiceInterface,
+    csrf: string,
+    session: string
+): Promise<string | null> {
+    const username = await service.validateCredentials(csrf, session);
+    if (!username) {
+        return null;
+    }
+    service.updateCredentials(csrf, session);
+    return username;
+}
diff --git a/src/domain/hint-state-machine.ts b/src/domain/hint-state-machine.ts
new file mode 100644
index 0000000..9cd47ad
--- /dev/null
+++ b/src/domain/hint-state-machine.ts
@@ -0,0 +1,83 @@
+/**
+ * Pure-logic hint state machine.
+ *
+ * The "tutor, not solution oracle" contract is enforced **here** — not in
+ * prompts, not in tool descriptions, not in the agent's instruction-following.
+ * Every transition that affects hint progression flows through these
+ * functions, and gated tools call {@link assertSolutionUnlocked} before
+ * returning content. If a code path bypasses this module it is a bug.
+ *
+ * Intentionally has no IO: takes a {@link SessionState}, returns a new one
+ * (or throws). The session store handles persistence.
+ */
+import {
+    ErrorCode,
+    LeetCodeError,
+    MAX_HINT_LEVEL,
+    type HintLevel,
+    type SessionState
+} from "../types/index.js";
+
+/** Hint level at which the canonical solution becomes callable. */
+export const SOLUTION_HINT_LEVEL: HintLevel = MAX_HINT_LEVEL;
+
+/**
+ * Bumps `session.hintLevel` by one (clamped at {@link MAX_HINT_LEVEL}) and
+ * stamps `updatedAt`. Returns a new object — the input is not mutated.
+ *
+ * Bumping at the maximum level is a no-op rather than an error: callers
+ * that want a different behaviour should check `session.hintLevel` before
+ * calling.
+ */
+export function advanceHint(session: SessionState): SessionState {
+    const next: HintLevel =
+        session.hintLevel >= MAX_HINT_LEVEL
+            ? MAX_HINT_LEVEL
+            : ((session.hintLevel + 1) as HintLevel);
+    return {
+        ...session,
+        hintLevel: next,
+        updatedAt: new Date().toISOString()
+    };
+}
+
+/**
+ * Resets the session back to its level-0 initial state, preserving the
+ * slug / language / workspace so the user can re-attempt from scratch.
+ *
+ * `attempts` and `lastLocalRunPassed` are zeroed too, because resetting
+ * the hint level without resetting effort would mislead future hint
+ * generation about how much the user has already tried.
+ */
+export function resetSession(session: SessionState): SessionState {
+    return {
+        ...session,
+        hintLevel: 0,
+        attempts: 0,
+        lastLocalRunPassed: null,
+        lastLocalRunSnapshot: null,
+        status: "started",
+        updatedAt: new Date().toISOString()
+    };
+}
+
+/**
+ * Throws `LeetCodeError(HINT_LEVEL_TOO_LOW)` unless the session has
+ * reached the level required to unlock the canonical solution.
+ *
+ * `list_problem_solutions` and `get_problem_solution` MUST call this
+ * before returning content. If the session doesn't exist (the user
+ * never called `start_problem`) callers should throw
+ * `SESSION_NOT_FOUND` themselves — that's a different failure mode and
+ * the agent should react differently to it.
+ */
+export function assertSolutionUnlocked(session: SessionState): void {
+    if (session.hintLevel < SOLUTION_HINT_LEVEL) {
+        throw new LeetCodeError(
+            ErrorCode.HINT_LEVEL_TOO_LOW,
+            `Solution is gated behind hint level ${SOLUTION_HINT_LEVEL}; ` +
+                `session for "${session.slug}" is at level ${session.hintLevel}. ` +
+                `Drive the user through \`request_hint\` until they have engaged with each level.`
+        );
+    }
+}
diff --git a/src/domain/local-run-snapshot.ts b/src/domain/local-run-snapshot.ts
new file mode 100644
index 0000000..097ee67
--- /dev/null
+++ b/src/domain/local-run-snapshot.ts
@@ -0,0 +1,12 @@
+import { createHash } from "node:crypto";
+
+export interface LocalRunSnapshotInput {
+    code: string;
+    language: string;
+}
+
+export function createLocalRunSnapshot(input: LocalRunSnapshotInput): string {
+    return createHash("sha256")
+        .update(JSON.stringify([input.language, input.code]))
+        .digest("hex");
+}
diff --git a/src/domain/pedagogy.ts b/src/domain/pedagogy.ts
new file mode 100644
index 0000000..5cfdff6
--- /dev/null
+++ b/src/domain/pedagogy.ts
@@ -0,0 +1,72 @@
+/**
+ * Generates per-level hint text for a given problem.
+ *
+ * Phase 3 ships generic hints derived from the problem's existing
+ * `hints` and `topicTags`. Phase 5 (workspace awareness) extends this by
+ * accepting the user's actual code so the level-2/3 messages can
+ * critique what they wrote rather than describing the problem in the
+ * abstract. The `userCode` parameter is already in the signature to keep
+ * the contract stable across phases.
+ */
+import type { HintLevel, SimplifiedProblem } from "../types/index.js";
+
+/**
+ * Pure projection from problem + level → hint text. No IO.
+ *
+ * The contract per level matches `HintLevel`'s docstring:
+ *   1 — clarification (restate, edge cases)
+ *   2 — approach (paradigm / data structure)
+ *   3 — implementation sketch (pseudocode-level)
+ *   4 — optimal (full solution; the agent should call
+ *       `get_problem_solution` once this level is reached, not paraphrase)
+ *
+ * Level 0 is "no hint requested yet" and is never produced by this
+ * function — callers should never ask for it.
+ *
+ * `userCode` is reserved for Phase 5; if provided, future levels will
+ * incorporate it. The Phase 3 implementation ignores it.
+ */
+export function generateHint(
+    problem: SimplifiedProblem,
+    level: Exclude<HintLevel, 0>,
+    _userCode?: string
+): string {
+    switch (level) {
+        case 1:
+            return level1(problem);
+        case 2:
+            return level2(problem);
+        case 3:
+            return level3(problem);
+        case 4:
+            return level4(problem);
+    }
+}
+
+function level1(problem: SimplifiedProblem): string {
+    const examples = problem.exampleTestcases?.trim();
+    const examplePart = examples
+        ? `\n\nWalk through the example inputs and the expected outputs in your own words:\n\n\`\`\`\n${examples}\n\`\`\`\n\nWhat invariants must hold? What edge cases worry you?`
+        : "\n\nWhat invariants must hold? What edge cases worry you?";
+    return `Level 1 — Clarification.\n\nRestate **${problem.title}** in your own words. What are the inputs and outputs? What constraints does the problem impose on size, value range, or duplicates?${examplePart}`;
+}
+
+function level2(problem: SimplifiedProblem): string {
+    const tags = problem.topicTags?.join(", ");
+    const tagPart = tags
+        ? ` The problem is tagged: \`${tags}\`. Which of those is the most natural fit?`
+        : "";
+    return `Level 2 — Approach.\n\nWhat data structure or algorithmic paradigm does this map onto?${tagPart}\n\nThink about the asymptotic cost of the obvious O(n²) brute force and what structure would let you get to O(n) or O(n log n) — without writing any code yet.`;
+}
+
+function level3(problem: SimplifiedProblem): string {
+    const upstream = problem.hints?.[0]?.trim();
+    const upstreamPart = upstream
+        ? `\n\nLeetCode's own first hint:\n\n> ${upstream}`
+        : "";
+    return `Level 3 — Implementation sketch.\n\nNow draft the algorithm at pseudocode level. Walk through the data structures you'll allocate, the loop boundaries, what each iteration updates, and how you produce the final answer. Don't write language syntax yet — just the steps.${upstreamPart}`;
+}
+
+function level4(problem: SimplifiedProblem): string {
+    return `Level 4 — Solution unlocked.\n\nThe session for **${problem.title}** has reached the maximum hint level. \`get_problem_solution\` and \`list_problem_solutions\` are now callable — prefer fetching the canonical solution over paraphrasing it.`;
+}
diff --git a/src/domain/session-service.ts b/src/domain/session-service.ts
new file mode 100644
index 0000000..e907523
--- /dev/null
+++ b/src/domain/session-service.ts
@@ -0,0 +1,213 @@
+/**
+ * Application-layer wrapper around the session store + state machine +
+ * hint generator. Tools should depend on this, not on the lower-level
+ * pieces directly — it's the seam that makes the gate uniform.
+ */
+import {
+    ErrorCode,
+    LeetCodeError,
+    type HintLevel,
+    type SessionState,
+    type SimplifiedProblem
+} from "../types/index.js";
+import {
+    advanceHint,
+    assertSolutionUnlocked,
+    resetSession
+} from "./hint-state-machine.js";
+import { generateHint } from "./pedagogy.js";
+import { FileSessionStore, type SessionStore } from "./session-store.js";
+
+export interface StartProblemInput {
+    slug: string;
+    language?: string;
+}
+
+export class SessionService {
+    private readonly mutationQueues = new Map<string, Promise<unknown>>();
+
+    constructor(
+        private readonly store: SessionStore = new FileSessionStore()
+    ) {}
+
+    private async withSessionMutation<T>(
+        slug: string,
+        operation: () => Promise<T>
+    ): Promise<T> {
+        const previous = this.mutationQueues.get(slug);
+        const current = (async () => {
+            try {
+                await previous;
+            } catch {
+                // Prior mutation failure should not permanently poison the queue.
+            }
+            return operation();
+        })();
+        this.mutationQueues.set(slug, current);
+        try {
+            return await current;
+        } finally {
+            if (this.mutationQueues.get(slug) === current) {
+                this.mutationQueues.delete(slug);
+            }
+        }
+    }
+
+    /**
+     * Returns the existing session for a slug, or creates a fresh
+     * level-0 session if none exists. Idempotent: starting a problem the
+     * user already started just returns the in-progress session
+     * unchanged (so you don't lose hint progress by re-running
+     * `start_problem`).
+     */
+    async startOrResume(input: StartProblemInput): Promise<SessionState> {
+        return this.withSessionMutation(input.slug, async () => {
+            const existing = await this.store.load(input.slug);
+            if (existing) {
+                // Update language only if the caller specified one and we
+                // didn't have one before — never silently overwrite.
+                if (input.language && !existing.language) {
+                    const updated: SessionState = {
+                        ...existing,
+                        language: input.language,
+                        updatedAt: new Date().toISOString()
+                    };
+                    await this.store.save(updated);
+                    return updated;
+                }
+                return existing;
+            }
+            const now = new Date().toISOString();
+            const fresh: SessionState = {
+                slug: input.slug,
+                language: input.language,
+                hintLevel: 0,
+                attempts: 0,
+                lastLocalRunPassed: null,
+                lastLocalRunSnapshot: null,
+                status: "started",
+                createdAt: now,
+                updatedAt: now
+            };
+            await this.store.save(fresh);
+            return fresh;
+        });
+    }
+
+    /** Returns the session, or `null` if `start_problem` was never called. */
+    async get(slug: string): Promise<SessionState | null> {
+        return this.store.load(slug);
+    }
+
+    /**
+     * Advances the hint level by one and returns the new session +
+     * generated hint text. The text is produced from the supplied
+     * problem so callers don't need to load it twice.
+     *
+     * Throws `SESSION_NOT_FOUND` if the user never opened the problem.
+     */
+    async advance(
+        slug: string,
+        problem: SimplifiedProblem
+    ): Promise<{ session: SessionState; hint: string; level: HintLevel }> {
+        return this.withSessionMutation(slug, async () => {
+            const session = await this.requireSession(slug);
+            const next = advanceHint(session);
+            await this.store.save(next);
+            const level = next.hintLevel;
+            if (level === 0) {
+                // Unreachable — advanceHint never returns 0 — but the type
+                // narrows from HintLevel to 1..4 only with this guard.
+                throw new LeetCodeError(
+                    ErrorCode.UPSTREAM_ERROR,
+                    "Hint level transition produced level 0"
+                );
+            }
+            return {
+                session: next,
+                level,
+                hint: generateHint(problem, level)
+            };
+        });
+    }
+
+    /** Resets the session back to the level-0 initial state. */
+    async reset(slug: string): Promise<SessionState> {
+        return this.withSessionMutation(slug, async () => {
+            const session = await this.requireSession(slug);
+            const next = resetSession(session);
+            await this.store.save(next);
+            return next;
+        });
+    }
+
+    /**
+     * Throws if the canonical solution is not yet unlocked for `slug`.
+     * If `slug` is undefined, accepts the operation when *any* known
+     * session has reached the maximum level — the only way for the
+     * agent to obtain a `topicId` is via `list_problem_solutions`,
+     * which IS slug-gated, so this is sufficient for the typical flow.
+     */
+    async assertSolutionUnlocked(slug?: string): Promise<void> {
+        if (slug) {
+            const session = await this.requireSession(slug);
+            assertSolutionUnlocked(session);
+            return;
+        }
+        // No slug provided. We can't enumerate sessions without a
+        // discovery API on the store; defer to the caller to provide
+        // slug context. This branch is reserved for future expansion.
+        throw new LeetCodeError(
+            ErrorCode.HINT_LEVEL_TOO_LOW,
+            "Cannot determine session context without titleSlug. " +
+                "Provide titleSlug to verify the session has reached the required hint level."
+        );
+    }
+
+    /**
+     * Public variant of the session lookup — throws `SESSION_NOT_FOUND`
+     * when the user never opened the slug. Used by the runner-tools
+     * layer to keep `run_local_tests` aligned with the pedagogy state
+     * machine (no orphaned runs).
+     */
+    async requireSession(slug: string): Promise<SessionState> {
+        const session = await this.store.load(slug);
+        if (!session) {
+            throw new LeetCodeError(
+                ErrorCode.SESSION_NOT_FOUND,
+                `No active session for "${slug}". Call start_problem first.`
+            );
+        }
+        return session;
+    }
+
+    /**
+     * Updates the session after a `run_local_tests` invocation. Increments
+     * `attempts`, records the pass/fail result and matching source snapshot,
+     * and bumps `status` to "attempting" on active failed runs so the
+     * lifecycle remains aligned with the latest local result.
+     */
+    async recordLocalRun(
+        slug: string,
+        passed: boolean,
+        snapshot?: string
+    ): Promise<SessionState> {
+        return this.withSessionMutation(slug, async () => {
+            const session = await this.requireSession(slug);
+            const next: SessionState = {
+                ...session,
+                attempts: session.attempts + 1,
+                lastLocalRunPassed: passed,
+                lastLocalRunSnapshot: passed ? (snapshot ?? null) : null,
+                status:
+                    session.status === "started" ||
+                    (session.status === "solved" && !passed)
+                        ? "attempting"
+                        : session.status,
+                updatedAt: new Date().toISOString()
+            };
+            await this.store.save(next);
+            return next;
+        });
+    }
+}
diff --git a/src/domain/session-store.ts b/src/domain/session-store.ts
new file mode 100644
index 0000000..507cae9
--- /dev/null
+++ b/src/domain/session-store.ts
@@ -0,0 +1,100 @@
+/**
+ * Per-problem session persistence: one JSON file per slug under
+ * `~/.leetcode-mcp/sessions/<slug>.json`.
+ *
+ * The store is intentionally minimal — no migrations, no schemas — because
+ * the data is local and rebuildable. If a file is unreadable or malformed
+ * we treat it as "no session" and let the caller create a fresh one.
+ */
+import { mkdir, readFile, rm, stat, writeFile } from "node:fs/promises";
+import { homedir } from "node:os";
+import { join, resolve } from "node:path";
+import type { SessionState } from "../types/index.js";
+import logger from "../utils/logger.js";
+
+/**
+ * Slugs come from the LeetCode URL and are already filesystem-safe in
+ * practice, but defend against a malicious / typo'd input that could
+ * traverse the sessions directory.
+ */
+function assertSafeSlug(slug: string): void {
+    if (!/^[a-z0-9-]+$/.test(slug)) {
+        throw new Error(
+            `Invalid session slug: ${JSON.stringify(slug)}. ` +
+                `Expected lowercase alphanumeric and hyphens only.`
+        );
+    }
+}
+
+export interface SessionStore {
+    /** Resolves to the saved session, or `null` if none exists / is unreadable. */
+    load(slug: string): Promise<SessionState | null>;
+    /** Persists the session, creating the sessions directory if needed. */
+    save(session: SessionState): Promise<void>;
+    /** Removes the file. Idempotent — missing file is not an error. */
+    delete(slug: string): Promise<void>;
+    /** Absolute path of the file backing a given slug. */
+    pathFor(slug: string): string;
+}
+
+export interface FileSessionStoreOptions {
+    /**
+     * Override the directory the store writes to. Defaults to
+     * `${homedir()}/.leetcode-mcp/sessions`. Tests pass a temp directory.
+     */
+    dir?: string;
+}
+
+/**
+ * Default filesystem-backed implementation. Writes are atomic-ish — same
+ * pattern as `credentialsStorage`: write JSON, mode 0o600 (sessions are
+ * not secrets but neither are they other-readable by intent).
+ */
+export class FileSessionStore implements SessionStore {
+    private readonly dir: string;
+
+    constructor(options: FileSessionStoreOptions = {}) {
+        this.dir = options.dir ?? join(homedir(), ".leetcode-mcp", "sessions");
+    }
+
+    pathFor(slug: string): string {
+        assertSafeSlug(slug);
+        return resolve(this.dir, `${slug}.json`);
+    }
+
+    async load(slug: string): Promise<SessionState | null> {
+        const path = this.pathFor(slug);
+        try {
+            await stat(path);
+        } catch {
+            return null;
+        }
+        try {
+            const raw = await readFile(path, "utf-8");
+            return JSON.parse(raw) as SessionState;
+        } catch (err) {
+            // Corrupt session file is recoverable — log and return null so
+            // the caller can rebuild from `start_problem`.
+            logger.warn(
+                "Discarding malformed session file %s: %s",
+                path,
+                err instanceof Error ? err.message : String(err)
+            );
+            return null;
+        }
+    }
+
+    async save(session: SessionState): Promise<void> {
+        const path = this.pathFor(session.slug);
+        await mkdir(this.dir, { recursive: true, mode: 0o700 });
+        await writeFile(path, JSON.stringify(session, null, 2), {
+            encoding: "utf-8",
+            mode: 0o600
+        });
+    }
+
+    async delete(slug: string): Promise<void> {
+        const path = this.pathFor(slug);
+        await rm(path, { force: true });
+    }
+}
diff --git a/src/index.ts b/src/index.ts
index ecc098e..4ba550b 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -6,19 +6,25 @@ import minimist from "minimist";
 import { readFileSync } from "node:fs";
 import { dirname, join } from "node:path";
 import { fileURLToPath } from "node:url";
+import { restoreCredentials } from "./auth/auth-flow.js";
+import { SessionService } from "./domain/session-service.js";
 import { LeetCodeGlobalService } from "./leetcode/leetcode-global-service.js";
 import { LeetcodeServiceInterface } from "./leetcode/leetcode-service-interface.js";
 import { registerAuthPrompts } from "./mcp/prompts/auth-prompts.js";
 import { registerLearningPrompts } from "./mcp/prompts/learning-prompts.js";
 import { registerProblemResources } from "./mcp/resources/problem-resources.js";
 import { registerSolutionResources } from "./mcp/resources/solution-resources.js";
+import { SERVER_INSTRUCTIONS } from "./mcp/server-instructions.js";
 import { registerAuthTools } from "./mcp/tools/auth-tools.js";
 import { registerContestTools } from "./mcp/tools/contest-tools.js";
 import { registerOnboardingTools } from "./mcp/tools/onboarding-tools.js";
 import { registerProblemTools } from "./mcp/tools/problem-tools.js";
+import { registerRunnerTools } from "./mcp/tools/runner-tools.js";
+import { registerSessionTools } from "./mcp/tools/session-tools.js";
 import { registerSolutionTools } from "./mcp/tools/solution-tools.js";
 import { registerSubmissionTools } from "./mcp/tools/submission-tools.js";
 import { registerUserTools } from "./mcp/tools/user-tools.js";
+import { SubprocessRunner } from "./runner/subprocess-runner.js";
 import logger from "./utils/logger.js";
 
 /**
@@ -110,10 +116,18 @@ async function main() {
 
     const packageJSON = getPackageJson();
 
-    const server = new McpServer({
-        name: "LeetCode MCP Server",
-        version: packageJSON.version
-    });
+    const server = new McpServer(
+        {
+            name: "LeetCode MCP Server",
+            version: packageJSON.version
+        },
+        {
+            // Delivered to clients at handshake. Replaces the prompt-based
+            // "remember to invoke X first" dance with rules the agent
+            // receives once and keeps for the session.
+            instructions: SERVER_INSTRUCTIONS
+        }
+    );
 
     const credential: Credential = new Credential();
     const leetcodeService: LeetcodeServiceInterface = new LeetCodeGlobalService(
@@ -121,6 +135,23 @@ async function main() {
         credential
     );
 
+    // Re-hydrate saved credentials from disk so authenticated tools work
+    // immediately after a server restart without forcing the user to paste
+    // their cookies again.
+    await restoreCredentials(leetcodeService);
+
+    // Pedagogy state machine: per-problem session JSON under
+    // ~/.leetcode-mcp/sessions/<slug>.json. The session service is the
+    // single owner of hint progression and the gate that
+    // list_problem_solutions / get_problem_solution check before
+    // returning content.
+    const sessions = new SessionService();
+
+    // Local subprocess runner: probes python3 / go / java on first use,
+    // wraps with bwrap / firejail / sandbox-exec where available, and
+    // backs the `run_local_tests` tool. Phase 4a ships python3 only.
+    const runner = new SubprocessRunner();
+
     // Register MCP prompts for learning mode and workspace guidance
     registerLearningPrompts(server, leetcodeService);
 
@@ -132,9 +163,11 @@ async function main() {
     registerProblemTools(server, leetcodeService);
     registerUserTools(server, leetcodeService);
     registerContestTools(server, leetcodeService);
-    registerSolutionTools(server, leetcodeService);
+    registerSessionTools(server, leetcodeService, sessions);
+    registerSolutionTools(server, leetcodeService, sessions);
+    registerRunnerTools(server, leetcodeService, sessions, runner);
     registerAuthTools(server, leetcodeService);
-    registerSubmissionTools(server, leetcodeService);
+    registerSubmissionTools(server, leetcodeService, sessions);
 
     registerProblemResources(server, leetcodeService);
     registerSolutionResources(server, leetcodeService);
diff --git a/src/leetcode/leetcode-global-service.ts b/src/leetcode/leetcode-global-service.ts
index c20d2e8..c5493b3 100644
--- a/src/leetcode/leetcode-global-service.ts
+++ b/src/leetcode/leetcode-global-service.ts
@@ -1,15 +1,44 @@
 import axios, { AxiosError } from "axios";
 import { Credential, LeetCode } from "leetcode-query";
+import { ErrorCode, LeetCodeError } from "../types/errors.js";
 import {
-    LeetCodeCheckResponse,
-    LeetCodeSubmitResponse,
-    SubmissionResult
-} from "../types/submission.js";
+    DailyChallenge,
+    Problem,
+    ProblemSearchResult,
+    SimilarQuestion,
+    SimplifiedProblem,
+    TopicTag
+} from "../types/problem.js";
+import {
+    SolutionArticleDetail,
+    SolutionArticleList,
+    SolutionArticleSummary
+} from "../types/solution.js";
+import { SubmissionResult } from "../types/submission.js";
+import {
+    UserAllSubmissions,
+    UserContestInfo,
+    UserProfile,
+    UserProgressQuestionList,
+    UserRecentACSubmissions,
+    UserRecentSubmissions,
+    UserStatus,
+    UserSubmissionDetail
+} from "../types/user.js";
 import logger from "../utils/logger.js";
 import { SEARCH_PROBLEMS_QUERY } from "./graphql/search-problems.js";
 import { SOLUTION_ARTICLE_DETAIL_QUERY } from "./graphql/solution-article-detail.js";
 import { SOLUTION_ARTICLES_QUERY } from "./graphql/solution-articles.js";
-import { LeetcodeServiceInterface } from "./leetcode-service-interface.js";
+import {
+    LeetcodeServiceInterface,
+    SolutionArticlesQueryOptions
+} from "./leetcode-service-interface.js";
+import {
+    CheckResponseSchema,
+    QuestionIdResponseSchema,
+    SubmitResponseSchema,
+    ValidateCredentialsResponseSchema
+} from "./schemas.js";
 
 const LANGUAGE_MAP: Record<string, string> = {
     java: "java",
@@ -50,39 +79,42 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
         this.credential = credential;
     }
 
-    async fetchUserSubmissionDetail(id: number): Promise<any> {
+    async fetchUserSubmissionDetail(id: number): Promise<UserSubmissionDetail> {
         if (!this.isAuthenticated()) {
-            throw new Error(
+            throw new LeetCodeError(
+                ErrorCode.AUTH_REQUIRED,
                 "Authentication required to fetch user submission detail"
             );
         }
-        return await this.leetCodeApi.submission(id);
+        return (await this.leetCodeApi.submission(
+            id
+        )) as unknown as UserSubmissionDetail;
     }
 
-    async fetchUserStatus(): Promise<any> {
+    async fetchUserStatus(): Promise<UserStatus> {
         if (!this.isAuthenticated()) {
-            throw new Error("Authentication required to fetch user status");
+            throw new LeetCodeError(
+                ErrorCode.AUTH_REQUIRED,
+                "Authentication required to fetch user status"
+            );
         }
-        return await this.leetCodeApi.whoami().then((res) => {
-            return {
-                isSignedIn: res?.isSignedIn ?? false,
-                username: res?.username ?? "",
-                avatar: res?.avatar ?? "",
-                isAdmin: res?.isAdmin ?? false
-            };
-        });
+        const res = await this.leetCodeApi.whoami();
+        return {
+            isSignedIn: res?.isSignedIn ?? false,
+            username: res?.username ?? null,
+            avatar: res?.avatar ?? null,
+            isAdmin: res?.isAdmin ?? false
+        };
     }
 
     async fetchUserAllSubmissions(options: {
         offset: number;
         limit: number;
         questionSlug?: string;
-        lastKey?: string;
-        lang?: string;
-        status?: string;
-    }): Promise<any> {
+    }): Promise<UserAllSubmissions> {
         if (!this.isAuthenticated()) {
-            throw new Error(
+            throw new LeetCodeError(
+                ErrorCode.AUTH_REQUIRED,
                 "Authentication required to fetch user submissions"
             );
         }
@@ -91,21 +123,26 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
             limit: options.limit ?? 20,
             slug: options.questionSlug
         });
-        return { submissions };
+        return {
+            submissions
+        } as unknown as UserAllSubmissions;
     }
 
     async fetchUserRecentSubmissions(
         username: string,
         limit?: number
-    ): Promise<any> {
-        return await this.leetCodeApi.recent_submissions(username, limit);
+    ): Promise<UserRecentSubmissions> {
+        return (await this.leetCodeApi.recent_submissions(
+            username,
+            limit
+        )) as unknown as UserRecentSubmissions;
     }
 
     async fetchUserRecentACSubmissions(
         username: string,
         limit?: number
-    ): Promise<any> {
-        return await this.leetCodeApi.graphql({
+    ): Promise<UserRecentACSubmissions> {
+        return (await this.leetCodeApi.graphql({
             query: `
                     query ($username: String!, $limit: Int) {
                         recentAcSubmissionList(username: $username, limit: $limit) {
@@ -124,10 +161,10 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
                 username,
                 limit
             }
-        });
+        })) as unknown as UserRecentACSubmissions;
     }
 
-    async fetchUserProfile(username: string): Promise<any> {
+    async fetchUserProfile(username: string): Promise<UserProfile> {
         const profile = await this.leetCodeApi.user(username);
         if (profile && profile.matchedUser) {
             const { matchedUser } = profile;
@@ -137,29 +174,32 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
                 realName: matchedUser.profile.realName,
                 userAvatar: matchedUser.profile.userAvatar,
                 countryName: matchedUser.profile.countryName,
-                githubUrl: matchedUser.githubUrl,
+                githubUrl: matchedUser.githubUrl ?? undefined,
                 company: matchedUser.profile.company,
                 school: matchedUser.profile.school,
                 ranking: matchedUser.profile.ranking,
                 totalSubmissionNum: matchedUser.submitStats?.totalSubmissionNum
             };
         }
-        return profile;
+        throw new LeetCodeError(
+            ErrorCode.UPSTREAM_PAYLOAD_INVALID,
+            `LeetCode profile for "${username}" did not include a matched user`
+        );
     }
 
     async fetchUserContestRanking(
         username: string,
         attended: boolean = true
-    ): Promise<any> {
-        const contestInfo = await this.leetCodeApi.user_contest_info(username);
+    ): Promise<UserContestInfo> {
+        const contestInfo = (await this.leetCodeApi.user_contest_info(
+            username
+        )) as unknown as UserContestInfo;
         if (contestInfo.userContestRankingHistory) {
             if (attended) {
                 contestInfo.userContestRankingHistory =
-                    contestInfo.userContestRankingHistory.filter(
-                        (contest: any) => {
-                            return contest && contest.attended;
-                        }
-                    );
+                    contestInfo.userContestRankingHistory.filter((contest) => {
+                        return contest && contest.attended;
+                    });
             }
         } else {
             contestInfo.userContestRankingHistory = [];
@@ -167,32 +207,42 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
         return contestInfo;
     }
 
-    async fetchDailyChallenge(): Promise<any> {
-        return await this.leetCodeApi.daily();
+    async fetchDailyChallenge(): Promise<DailyChallenge> {
+        return (await this.leetCodeApi.daily()) as unknown as DailyChallenge;
     }
 
-    async fetchProblem(titleSlug: string): Promise<any> {
-        return await this.leetCodeApi.problem(titleSlug);
+    async fetchProblem(titleSlug: string): Promise<Problem> {
+        const problem = (await this.leetCodeApi.problem(
+            titleSlug
+        )) as unknown as Problem | null | undefined;
+        if (!problem) {
+            throw new LeetCodeError(
+                ErrorCode.PROBLEM_NOT_FOUND,
+                `Problem ${titleSlug} not found`
+            );
+        }
+        return problem;
     }
 
-    async fetchProblemSimplified(titleSlug: string): Promise<any> {
+    async fetchProblemSimplified(
+        titleSlug: string
+    ): Promise<SimplifiedProblem> {
         const problem = await this.fetchProblem(titleSlug);
-        if (!problem) {
-            throw new Error(`Problem ${titleSlug} not found`);
-        }
 
-        const filteredTopicTags =
-            problem.topicTags?.map((tag: any) => tag.slug) || [];
+        const filteredTopicTags: string[] =
+            problem.topicTags?.map((tag: TopicTag) => tag.slug) ?? [];
 
-        const filteredCodeSnippets = problem.codeSnippets || [];
+        const filteredCodeSnippets = problem.codeSnippets ?? [];
 
-        let parsedSimilarQuestions: any[] = [];
+        let parsedSimilarQuestions: SimilarQuestion[] = [];
         if (problem.similarQuestions) {
             try {
-                const allQuestions = JSON.parse(problem.similarQuestions);
+                const allQuestions: SimilarQuestion[] = JSON.parse(
+                    problem.similarQuestions
+                );
                 parsedSimilarQuestions = allQuestions
                     .slice(0, 3)
-                    .map((q: any) => ({
+                    .map((q: SimilarQuestion) => ({
                         titleSlug: q.titleSlug,
                         difficulty: q.difficulty
                     }));
@@ -222,8 +272,8 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
         limit: number = 10,
         offset: number = 0,
         searchKeywords?: string
-    ): Promise<any> {
-        const filters: any = {};
+    ): Promise<ProblemSearchResult> {
+        const filters: Record<string, unknown> = {};
         if (difficulty) {
             filters.difficulty = difficulty.toUpperCase();
         }
@@ -253,36 +303,50 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
         }
         return {
             total: questionList.total,
-            questions: questionList.questions.map((question: any) => ({
-                title: question.title,
-                titleSlug: question.titleSlug,
-                difficulty: question.difficulty,
-                acRate: question.acRate,
-                topicTags: question.topicTags.map((tag: any) => tag.slug)
-            }))
+            questions: questionList.questions.map(
+                (question: {
+                    title: string;
+                    titleSlug: string;
+                    difficulty: string;
+                    acRate: number;
+                    topicTags: TopicTag[];
+                }) => ({
+                    title: question.title,
+                    titleSlug: question.titleSlug,
+                    difficulty: question.difficulty,
+                    acRate: question.acRate,
+                    topicTags: question.topicTags.map((tag) => tag.slug)
+                })
+            )
         };
     }
 
-    async fetchUserProgressQuestionList(options?: {
-        offset?: number;
-        limit?: number;
+    async fetchUserProgressQuestionList(filters: {
+        offset: number;
+        limit: number;
         questionStatus?: string;
         difficulty?: string[];
-    }): Promise<any> {
+    }): Promise<UserProgressQuestionList> {
         if (!this.isAuthenticated()) {
-            throw new Error(
+            throw new LeetCodeError(
+                ErrorCode.AUTH_REQUIRED,
                 "Authentication required to fetch user progress question list"
             );
         }
 
-        const filters = {
-            skip: options?.offset || 0,
-            limit: options?.limit || 20,
-            questionStatus: options?.questionStatus as any,
-            difficulty: options?.difficulty as any[]
+        // Cast through unknown because leetcode-query types these as enums
+        // (LeetCodeQuestionStatus / LeetCodeDifficulty) but accepts the raw
+        // strings we forward from MCP tool inputs.
+        const upstreamFilters = {
+            skip: filters.offset,
+            limit: filters.limit,
+            questionStatus: filters.questionStatus as unknown as undefined,
+            difficulty: filters.difficulty as unknown as undefined
         };
 
-        return await this.leetCodeApi.user_progress_questions(filters);
+        return (await this.leetCodeApi.user_progress_questions(
+            upstreamFilters
+        )) as unknown as UserProgressQuestionList;
     }
 
     /**
@@ -294,53 +358,50 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
      */
     async fetchQuestionSolutionArticles(
         questionSlug: string,
-        options?: any
-    ): Promise<any> {
-        const variables: any = {
+        options?: SolutionArticlesQueryOptions
+    ): Promise<SolutionArticleList> {
+        const variables = {
             questionSlug,
-            first: options?.limit || 5,
-            skip: options?.skip || 0,
-            orderBy: options?.orderBy || "HOT",
+            first: options?.limit ?? 5,
+            skip: options?.skip ?? 0,
+            orderBy: options?.orderBy ?? "HOT",
             userInput: options?.userInput,
             tagSlugs: options?.tagSlugs ?? []
         };
 
-        return await this.leetCodeApi
-            .graphql({
-                query: SOLUTION_ARTICLES_QUERY,
-                variables
-            })
-            .then((res) => {
-                const ugcArticleSolutionArticles =
-                    res.data?.ugcArticleSolutionArticles;
-                if (!ugcArticleSolutionArticles) {
-                    return {
-                        totalNum: 0,
-                        hasNextPage: false,
-                        articles: []
-                    };
-                }
+        const res = await this.leetCodeApi.graphql({
+            query: SOLUTION_ARTICLES_QUERY,
+            variables
+        });
+        const ugcArticleSolutionArticles = res.data?.ugcArticleSolutionArticles;
+        if (!ugcArticleSolutionArticles) {
+            return {
+                totalNum: 0,
+                hasNextPage: false,
+                articles: []
+            };
+        }
 
-                return {
-                    totalNum: ugcArticleSolutionArticles?.totalNum || 0,
-                    hasNextPage:
-                        ugcArticleSolutionArticles?.pageInfo?.hasNextPage ||
-                        false,
-                    articles:
-                        ugcArticleSolutionArticles?.edges
-                            ?.map((edge: any) => {
-                                if (
-                                    edge?.node &&
-                                    edge.node.topicId &&
-                                    edge.node.slug
-                                ) {
-                                    edge.node.articleUrl = `https://leetcode.com/problems/${questionSlug}/solutions/${edge.node.topicId}/${edge.node.slug}`;
-                                }
-                                return edge.node;
-                            })
-                            .filter((node: any) => node && node.canSee) || []
-                };
-            });
+        return {
+            totalNum: ugcArticleSolutionArticles?.totalNum || 0,
+            hasNextPage:
+                ugcArticleSolutionArticles?.pageInfo?.hasNextPage ?? false,
+            articles:
+                ugcArticleSolutionArticles?.edges
+                    ?.map((edge: { node?: SolutionArticleSummary | null }) => {
+                        const node = edge?.node;
+                        if (node && node.topicId && node.slug) {
+                            node.articleUrl = `https://leetcode.com/problems/${questionSlug}/solutions/${node.topicId}/${node.slug}`;
+                        }
+                        return node;
+                    })
+                    .filter(
+                        (
+                            node: SolutionArticleSummary | null | undefined
+                        ): node is SolutionArticleSummary =>
+                            !!node && !!node.canSee
+                    ) || []
+        };
     }
 
     /**
@@ -349,17 +410,17 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
      * @param topicId - The topic ID of the solution
      * @returns Promise resolving to the solution detail data
      */
-    async fetchSolutionArticleDetail(topicId: string): Promise<any> {
-        return await this.leetCodeApi
-            .graphql({
-                query: SOLUTION_ARTICLE_DETAIL_QUERY,
-                variables: {
-                    topicId
-                }
-            })
-            .then((response) => {
-                return response.data?.ugcArticleSolutionArticle;
-            });
+    async fetchSolutionArticleDetail(
+        topicId: string
+    ): Promise<SolutionArticleDetail | null> {
+        const response = await this.leetCodeApi.graphql({
+            query: SOLUTION_ARTICLE_DETAIL_QUERY,
+            variables: {
+                topicId
+            }
+        });
+        return (response.data?.ugcArticleSolutionArticle ??
+            null) as SolutionArticleDetail | null;
     }
 
     async submitSolution(
@@ -394,7 +455,7 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
             // Submit solution
             const submitUrl = `${baseUrl}/problems/${problemSlug}/submit/`;
 
-            const submitResponse = await axios.post<LeetCodeSubmitResponse>(
+            const submitResponse = await axios.post(
                 submitUrl,
                 {
                     lang: leetcodeLang,
@@ -411,7 +472,17 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
                 }
             );
 
-            const submissionId = submitResponse.data.submission_id;
+            const parsedSubmit = SubmitResponseSchema.safeParse(
+                submitResponse.data
+            );
+            if (!parsedSubmit.success) {
+                throw new LeetCodeError(
+                    ErrorCode.UPSTREAM_PAYLOAD_INVALID,
+                    `Submit response did not match expected schema: ${parsedSubmit.error.message}`,
+                    parsedSubmit.error
+                );
+            }
+            const submissionId = parsedSubmit.data.submission_id;
 
             // Poll for results
             const checkUrl = `${baseUrl}/submissions/detail/${submissionId}/check/`;
@@ -422,16 +493,23 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
                 // Wait 1 second between polls
                 await new Promise((resolve) => setTimeout(resolve, 1000));
 
-                const checkResponse = await axios.get<LeetCodeCheckResponse>(
-                    checkUrl,
-                    {
-                        headers: {
-                            Cookie: `csrftoken=${this.credential.csrf}; LEETCODE_SESSION=${this.credential.session}`
-                        }
+                const checkResponse = await axios.get(checkUrl, {
+                    headers: {
+                        Cookie: `csrftoken=${this.credential.csrf}; LEETCODE_SESSION=${this.credential.session}`
                     }
-                );
+                });
 
-                const result = checkResponse.data;
+                const parsedCheck = CheckResponseSchema.safeParse(
+                    checkResponse.data
+                );
+                if (!parsedCheck.success) {
+                    throw new LeetCodeError(
+                        ErrorCode.UPSTREAM_PAYLOAD_INVALID,
+                        `Check response did not match expected schema: ${parsedCheck.error.message}`,
+                        parsedCheck.error
+                    );
+                }
+                const result = parsedCheck.data;
 
                 if (
                     result.state !== "SUCCESS" &&
@@ -454,10 +532,12 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
                             accepted: true,
                             runtime: result.runtime,
                             memory: result.memory,
-                            runtimePercentile: result.runtime_percentile,
-                            memoryPercentile: result.memory_percentile,
-                            totalCorrect: result.total_correct,
-                            totalTestcases: result.total_testcases,
+                            runtimePercentile:
+                                result.runtime_percentile ?? undefined,
+                            memoryPercentile:
+                                result.memory_percentile ?? undefined,
+                            totalCorrect: result.total_correct ?? undefined,
+                            totalTestcases: result.total_testcases ?? undefined,
                             statusMessage: "Accepted"
                         };
                     } else {
@@ -482,11 +562,11 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
 
                         return {
                             accepted: false,
-                            statusMessage: result.status_msg,
+                            statusMessage: result.status_msg ?? "Unknown",
                             failedTestCase,
                             errorMessage,
-                            totalCorrect: result.total_correct,
-                            totalTestcases: result.total_testcases
+                            totalCorrect: result.total_correct ?? undefined,
+                            totalTestcases: result.total_testcases ?? undefined
                         };
                     }
                 }
@@ -553,9 +633,18 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
             }
         });
 
-        const question = response.data.data?.question;
+        const parsed = QuestionIdResponseSchema.safeParse(response.data);
+        if (!parsed.success) {
+            throw new LeetCodeError(
+                ErrorCode.UPSTREAM_PAYLOAD_INVALID,
+                `Question-id response did not match expected schema: ${parsed.error.message}`,
+                parsed.error
+            );
+        }
+        const question = parsed.data.data?.question;
         if (!question) {
-            throw new Error(
+            throw new LeetCodeError(
+                ErrorCode.PROBLEM_NOT_FOUND,
                 `Problem slug "${problemSlug}" not found or invalid.`
             );
         }
@@ -599,9 +688,18 @@ export class LeetCodeGlobalService implements LeetcodeServiceInterface {
                 }
             );
 
-            // Check if user is signed in and return username
-            const userStatus = response.data?.data?.userStatus;
-            if (userStatus?.isSignedIn === true && userStatus?.username) {
+            const parsed = ValidateCredentialsResponseSchema.safeParse(
+                response.data
+            );
+            if (!parsed.success) {
+                logger.warn(
+                    "validateCredentials: upstream payload did not match schema: %s",
+                    parsed.error.message
+                );
+                return null;
+            }
+            const userStatus = parsed.data.data?.userStatus;
+            if (userStatus?.isSignedIn === true && userStatus.username) {
                 return userStatus.username;
             }
             return null;
diff --git a/src/leetcode/leetcode-service-interface.ts b/src/leetcode/leetcode-service-interface.ts
index afcedf2..aa01cd6 100644
--- a/src/leetcode/leetcode-service-interface.ts
+++ b/src/leetcode/leetcode-service-interface.ts
@@ -1,4 +1,33 @@
+import {
+    DailyChallenge,
+    Problem,
+    ProblemSearchResult,
+    SimplifiedProblem
+} from "../types/problem.js";
+import {
+    SolutionArticleDetail,
+    SolutionArticleList
+} from "../types/solution.js";
 import { SubmissionResult } from "../types/submission.js";
+import {
+    UserAllSubmissions,
+    UserContestInfo,
+    UserProfile,
+    UserProgressQuestionList,
+    UserRecentACSubmissions,
+    UserRecentSubmissions,
+    UserStatus,
+    UserSubmissionDetail
+} from "../types/user.js";
+
+/** Optional sort/pagination knobs for `fetchQuestionSolutionArticles`. */
+export interface SolutionArticlesQueryOptions {
+    limit?: number;
+    skip?: number;
+    orderBy?: string;
+    userInput?: string;
+    tagSlugs?: string[];
+}
 
 /**
  * Base interface for LeetCode API service implementations.
@@ -11,16 +40,16 @@ export interface LeetcodeServiceInterface {
      * @param username - The LeetCode username to fetch profile data for
      * @returns Promise resolving to the user's profile data
      */
-    fetchUserProfile(username: string): Promise<any>;
+    fetchUserProfile(username: string): Promise<UserProfile>;
 
     /**
      * Retrieves the authenticated user's status information.
      * Includes login status, subscription details, and user identification information.
      *
      * @returns Promise resolving to the user's status information
-     * @throws Error if not authenticated
+     * @throws LeetCodeError(AUTH_REQUIRED) if not authenticated
      */
-    fetchUserStatus(): Promise<any>;
+    fetchUserStatus(): Promise<UserStatus>;
 
     /**
      * Retrieves the authenticated user's submission history with various filtering options.
@@ -29,20 +58,14 @@ export interface LeetcodeServiceInterface {
      * @param options.offset - Number of submissions to skip (required for pagination)
      * @param options.limit - Maximum number of submissions to return (required)
      * @param options.questionSlug - Optional filter for problem slug/identifier
-     * @param options.lastKey - Optional pagination token for subsequent requests
-     * @param options.lang - Optional filter for programming language
-     * @param options.status - Optional filter for submission status
      * @returns Promise resolving to the filtered submission data
-     * @throws Error if not authenticated
+     * @throws LeetCodeError(AUTH_REQUIRED) if not authenticated
      */
     fetchUserAllSubmissions(options: {
         offset: number;
         limit: number;
         questionSlug?: string;
-        lastKey?: string;
-        lang?: string | null;
-        status?: string | null;
-    }): Promise<any>;
+    }): Promise<UserAllSubmissions>;
 
     /**
      * Retrieves the authenticated user's progress on problems with filtering options.
@@ -53,14 +76,14 @@ export interface LeetcodeServiceInterface {
      * @param filters.questionStatus - Optional filter for problem status (e.g., "ATTEMPTED", "SOLVED")
      * @param filters.difficulty - Optional array of difficulty levels to filter by
      * @returns Promise resolving to the user's progress data
-     * @throws Error if not authenticated
+     * @throws LeetCodeError(AUTH_REQUIRED) if not authenticated
      */
     fetchUserProgressQuestionList(filters: {
         offset: number;
         limit: number;
         questionStatus?: string;
         difficulty?: string[];
-    }): Promise<any>;
+    }): Promise<UserProgressQuestionList>;
 
     /**
      * Retrieves a user's recent submissions (both accepted and failed).
@@ -69,7 +92,10 @@ export interface LeetcodeServiceInterface {
      * @param limit - Optional maximum number of submissions to return
      * @returns Promise resolving to the recent submissions data
      */
-    fetchUserRecentSubmissions(username: string, limit?: number): Promise<any>;
+    fetchUserRecentSubmissions(
+        username: string,
+        limit?: number
+    ): Promise<UserRecentSubmissions>;
 
     /**
      * Retrieves a user's recent accepted (AC) submissions only.
@@ -81,7 +107,7 @@ export interface LeetcodeServiceInterface {
     fetchUserRecentACSubmissions(
         username: string,
         limit?: number
-    ): Promise<any>;
+    ): Promise<UserRecentACSubmissions>;
 
     /**
      * Retrieves detailed information about a specific submission.
@@ -89,9 +115,9 @@ export interface LeetcodeServiceInterface {
      *
      * @param id - Numeric submission ID
      * @returns Promise resolving to the submission details
-     * @throws Error if not authenticated or submission not found
+     * @throws LeetCodeError(AUTH_REQUIRED) if not authenticated
      */
-    fetchUserSubmissionDetail(id: number): Promise<any>;
+    fetchUserSubmissionDetail(id: number): Promise<UserSubmissionDetail>;
 
     /**
      * Retrieves a user's contest ranking information and participation history.
@@ -100,14 +126,17 @@ export interface LeetcodeServiceInterface {
      * @param attended - Whether to include only contests the user participated in
      * @returns Promise resolving to the contest ranking data
      */
-    fetchUserContestRanking(username: string, attended: boolean): Promise<any>;
+    fetchUserContestRanking(
+        username: string,
+        attended: boolean
+    ): Promise<UserContestInfo>;
 
     /**
      * Retrieves today's LeetCode Daily Challenge problem.
      *
      * @returns Promise resolving to the daily challenge problem data
      */
-    fetchDailyChallenge(): Promise<any>;
+    fetchDailyChallenge(): Promise<DailyChallenge>;
 
     /**
      * Retrieves simplified information about a specific problem.
@@ -115,16 +144,18 @@ export interface LeetcodeServiceInterface {
      *
      * @param titleSlug - Problem identifier/slug as used in the LeetCode URL
      * @returns Promise resolving to the simplified problem details
+     * @throws LeetCodeError(PROBLEM_NOT_FOUND) if the slug doesn't exist
      */
-    fetchProblemSimplified(titleSlug: string): Promise<any>;
+    fetchProblemSimplified(titleSlug: string): Promise<SimplifiedProblem>;
 
     /**
      * Retrieves detailed information about a specific problem.
      *
      * @param titleSlug - Problem identifier/slug as used in the LeetCode URL
      * @returns Promise resolving to the problem details
+     * @throws LeetCodeError(PROBLEM_NOT_FOUND) if the slug doesn't exist
      */
-    fetchProblem(titleSlug: string): Promise<any>;
+    fetchProblem(titleSlug: string): Promise<Problem>;
 
     /**
      * Searches for problems matching specified criteria.
@@ -144,7 +175,7 @@ export interface LeetcodeServiceInterface {
         limit?: number,
         offset?: number,
         searchKeywords?: string
-    ): Promise<any>;
+    ): Promise<ProblemSearchResult>;
 
     /**
      * Determines if the current service has valid authentication credentials.
@@ -171,8 +202,8 @@ export interface LeetcodeServiceInterface {
      */
     fetchQuestionSolutionArticles(
         questionSlug: string,
-        options?: any
-    ): Promise<any>;
+        options?: SolutionArticlesQueryOptions
+    ): Promise<SolutionArticleList>;
 
     /**
      * Retrieves detailed information about a specific solution.
@@ -180,7 +211,9 @@ export interface LeetcodeServiceInterface {
      * @param identifier - The identifier of the solution (topicId or slug)
      * @returns Promise resolving to the solution detail data
      */
-    fetchSolutionArticleDetail(identifier: string): Promise<any>;
+    fetchSolutionArticleDetail(
+        identifier: string
+    ): Promise<SolutionArticleDetail | null>;
 
     /**
      * Submits a solution to a problem and polls for the result.
diff --git a/src/leetcode/schemas.ts b/src/leetcode/schemas.ts
new file mode 100644
index 0000000..3a62f9c
--- /dev/null
+++ b/src/leetcode/schemas.ts
@@ -0,0 +1,81 @@
+/**
+ * Runtime validators for payloads coming back from LeetCode.
+ *
+ * These are the *minimum* schemas needed to tell whether the upstream still
+ * speaks the contract our types describe. They use `passthrough()` so unknown
+ * fields are kept (we re-emit some payloads verbatim to MCP clients), and they
+ * mark fields optional when LeetCode has been observed to omit them.
+ *
+ * Use `parse` (throws `ZodError`) at the service boundary when we want to fail
+ * loudly, and `safeParse` when we want to log-and-fall-back. Translate any
+ * `ZodError` into `new LeetCodeError(ErrorCode.UPSTREAM_PAYLOAD_INVALID, ...)`
+ * so the MCP layer can render a structured error.
+ */
+import { z } from "zod";
+
+export const SubmitResponseSchema = z
+    .object({
+        submission_id: z.number()
+    })
+    .passthrough();
+
+export const CheckResponseSchema = z
+    .object({
+        state: z.string(),
+        // LeetCode omits status_msg on PENDING/STARTED responses; only
+        // populated once `state === "SUCCESS"`.
+        status_msg: z.string().optional(),
+        status_code: z.number().optional(),
+        runtime: z.string().optional(),
+        memory: z.string().optional(),
+        runtime_percentile: z.number().nullable().optional(),
+        memory_percentile: z.number().nullable().optional(),
+        // LeetCode has been observed to return both an array of strings (one
+        // per test case) and a single JSON-encoded string here; accept both.
+        code_answer: z.union([z.array(z.string()), z.string()]).optional(),
+        expected_answer: z.union([z.array(z.string()), z.string()]).optional(),
+        input: z.string().optional(),
+        std_output: z.string().optional(),
+        compile_error: z.string().optional(),
+        full_compile_error: z.string().optional(),
+        runtime_error: z.string().optional(),
+        full_runtime_error: z.string().optional(),
+        total_correct: z.number().nullable().optional(),
+        total_testcases: z.number().nullable().optional()
+    })
+    .passthrough();
+
+export const QuestionIdResponseSchema = z
+    .object({
+        data: z
+            .object({
+                question: z
+                    .object({
+                        questionId: z.string(),
+                        questionFrontendId: z.string().optional()
+                    })
+                    .nullable()
+                    .optional()
+            })
+            .passthrough()
+    })
+    .passthrough();
+
+export const ValidateCredentialsResponseSchema = z
+    .object({
+        data: z
+            .object({
+                userStatus: z
+                    .object({
+                        username: z.string().nullable().optional(),
+                        isSignedIn: z.boolean()
+                    })
+                    .passthrough()
+                    .optional()
+            })
+            .passthrough()
+    })
+    .passthrough();
+
+export type SubmitResponse = z.infer<typeof SubmitResponseSchema>;
+export type CheckResponse = z.infer<typeof CheckResponseSchema>;
diff --git a/src/mcp/server-instructions.ts b/src/mcp/server-instructions.ts
new file mode 100644
index 0000000..82dbf61
--- /dev/null
+++ b/src/mcp/server-instructions.ts
@@ -0,0 +1,28 @@
+/**
+ * The MCP `instructions` field — a single block delivered to clients at
+ * handshake. Replaces the SKILL-style "remember to invoke prompt X first"
+ * dance with rules the agent receives once and keeps for the session.
+ *
+ * Kept as a small constant so it can be unit-tested independently and
+ * is easy to evolve as the rest of the redesign lands.
+ */
+export const SERVER_INSTRUCTIONS: string = `
+You are connected to the LeetCode MCP server, an AI tutor — not a solution oracle.
+
+# Pedagogy contract (server-enforced)
+
+- Every problem the user works on lives in a session. Open one with **start_problem({ titleSlug, language? })** before any other problem-specific call.
+- Hints are progressive and gated. Use **request_hint({ titleSlug })** to advance the user from clarification → approach → implementation sketch → optimal solution. Do not paraphrase later levels before they are unlocked.
+- The community-solutions tools (\`list_problem_solutions\`, \`get_problem_solution\`) are gated by the server. They will reject with \`HINT_LEVEL_TOO_LOW\` until the session has reached the maximum hint level. Drive the user there through hints rather than trying to bypass the gate.
+- Inspect progress with **get_session_state({ titleSlug })**; restart a problem with **reset_session({ titleSlug })**.
+
+# Authoring style
+
+- Match the user's language. The session remembers it; honour it.
+- When you produce hints yourself (vs. paraphrasing the server's hint payload), reference what the user has actually written when possible — generic hints are worse than no hint.
+- Submissions cost the user a real LeetCode submission. Prefer reasoning + \`run_local_tests\` before calling \`submit_solution\`.
+
+# Authentication
+
+- Credentials are auto-restored at startup if the user has saved them. If \`check_auth_status\` reports unauthenticated, point the user at \`start_leetcode_auth\` and the **leetcode_authentication_guide** prompt.
+`.trim();
diff --git a/src/mcp/tools/auth-tools.ts b/src/mcp/tools/auth-tools.ts
index 1b51c86..be96de1 100644
--- a/src/mcp/tools/auth-tools.ts
+++ b/src/mcp/tools/auth-tools.ts
@@ -1,5 +1,6 @@
 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { z } from "zod";
+import { applyValidatedCredentials } from "../../auth/auth-flow.js";
 import { LeetcodeServiceInterface } from "../../leetcode/leetcode-service-interface.js";
 import { openDefaultBrowser } from "../../utils/browser-launcher.js";
 import { credentialsStorage } from "../../utils/credentials.js";
@@ -191,12 +192,14 @@ export class AuthToolRegistry extends ToolRegistry {
                         };
                     }
 
-                    // Validate credentials are still valid
-                    const username =
-                        await this.leetcodeService.validateCredentials(
-                            credentials.csrftoken,
-                            credentials.LEETCODE_SESSION
-                        );
+                    // Validate credentials and, on success, push them into
+                    // the running service so the very next authenticated
+                    // tool call works without a server restart.
+                    const username = await applyValidatedCredentials(
+                        this.leetcodeService,
+                        credentials.csrftoken,
+                        credentials.LEETCODE_SESSION
+                    );
 
                     if (!username) {
                         return {
diff --git a/src/mcp/tools/runner-tools.ts b/src/mcp/tools/runner-tools.ts
new file mode 100644
index 0000000..2764568
--- /dev/null
+++ b/src/mcp/tools/runner-tools.ts
@@ -0,0 +1,190 @@
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { z } from "zod";
+import { createLocalRunSnapshot } from "../../domain/local-run-snapshot.js";
+import type { SessionService } from "../../domain/session-service.js";
+import { LeetcodeServiceInterface } from "../../leetcode/leetcode-service-interface.js";
+import {
+    IMPLEMENTED_LANGUAGES,
+    SUPPORTED_LANGUAGES,
+    type LocalRunner
+} from "../../runner/runner.js";
+import type { RunnerLanguage } from "../../types/index.js";
+import { ErrorCode, LeetCodeError } from "../../types/index.js";
+import { errorEnvelope } from "./session-tools.js";
+import { ToolRegistry } from "./tool-registry.js";
+
+/**
+ * Local-runner tools introduced in Phase 4.
+ *
+ * `run_local_tests` is the inner-loop primitive: agent passes code,
+ * runner spawns a sandboxed subprocess, captures stdout/stderr/exit
+ * code, and reports back. The session's `lastLocalRunPassed` flag is
+ * updated as a side effect so `submit_solution`'s strict-mode gate
+ * (Phase 6) and any future analytics have a stable hook.
+ *
+ * v1 deliberately does *not* parse `exampleTestcases` server-side or
+ * synthesize a per-problem harness. The agent — which already has the
+ * problem in context after `start_problem` — is responsible for adding
+ * test invocations to the code it submits to the runner. That keeps
+ * the wire surface tiny, language-agnostic, and free of LeetCode-
+ * specific signature parsing.
+ */
+export class RunnerToolRegistry extends ToolRegistry {
+    constructor(
+        server: McpServer,
+        leetcodeService: LeetcodeServiceInterface,
+        private readonly sessions: SessionService,
+        private readonly runner: LocalRunner
+    ) {
+        super(server, leetcodeService);
+    }
+
+    protected registerPublic(): void {
+        this.registerRunLocalTests();
+        this.registerDoctor();
+    }
+
+    private registerRunLocalTests(): void {
+        const supportedLiteral = z.enum(
+            SUPPORTED_LANGUAGES as unknown as [string, ...string[]]
+        );
+        this.server.registerTool(
+            "run_local_tests",
+            {
+                description:
+                    "Runs the user's code locally in an isolated subprocess, captures stdout / stderr / exit code, and updates the session's lastLocalRunPassed flag. Use this in the inner loop instead of submit_solution — it costs no LeetCode submission and turns around in seconds. The agent is responsible for including test invocations (e.g. `print(Solution().twoSum([2,7,11,15], 9))`) in the code passed in. Phase 4a ships python3; go and java land in Phase 4b/4c.",
+                inputSchema: {
+                    titleSlug: z
+                        .string()
+                        .min(1)
+                        .describe(
+                            "The URL slug of the problem (must match an active session opened with start_problem)."
+                        ),
+                    language: supportedLiteral.describe(
+                        `Language to execute as. Currently runnable: ${IMPLEMENTED_LANGUAGES.join(
+                            ", "
+                        )}. Other LeetCode languages remain valid for submit_solution.`
+                    ),
+                    code: z
+                        .string()
+                        .min(1)
+                        .describe(
+                            "Complete source code to execute. Should include test invocations that print results / raise on failure."
+                        ),
+                    timeoutMs: z
+                        .number()
+                        .int()
+                        .min(100)
+                        .max(60_000)
+                        .optional()
+                        .describe(
+                            "Optional wall-clock budget in milliseconds. Defaults to 5000."
+                        )
+                }
+            },
+            async ({ titleSlug, language, code, timeoutMs }) => {
+                try {
+                    // Require a session — keeps the runner aligned with
+                    // the pedagogy state machine (and gives us a sane
+                    // place to record `attempts` / `lastLocalRunPassed`).
+                    await this.sessions.requireSession(titleSlug);
+
+                    const result = await this.runner.run({
+                        titleSlug,
+                        language: language as RunnerLanguage,
+                        code,
+                        timeoutMs
+                    });
+
+                    await this.sessions.recordLocalRun(
+                        titleSlug,
+                        result.passed,
+                        createLocalRunSnapshot({ code, language })
+                    );
+
+                    return {
+                        content: [
+                            {
+                                type: "text" as const,
+                                text: JSON.stringify({
+                                    titleSlug,
+                                    language,
+                                    result
+                                })
+                            }
+                        ]
+                    };
+                } catch (error) {
+                    return errorEnvelope(
+                        "Failed to run local tests",
+                        wrapTimeout(error)
+                    );
+                }
+            }
+        );
+    }
+
+    private registerDoctor(): void {
+        this.server.registerTool(
+            "runner_doctor",
+            {
+                description:
+                    "Reports which language runtimes (python3, go, java) and OS sandbox tools (bwrap, firejail, sandbox-exec) are detected on this host. Useful for diagnosing 'LANGUAGE_RUNTIME_NOT_FOUND' errors and confirming whether run_local_tests will be sandboxed.",
+                inputSchema: {}
+            },
+            async () => {
+                try {
+                    const capabilities = await this.runner.capabilities();
+                    return {
+                        content: [
+                            {
+                                type: "text" as const,
+                                text: JSON.stringify(capabilities)
+                            }
+                        ]
+                    };
+                } catch (error) {
+                    return errorEnvelope(
+                        "Failed to inspect runner capabilities",
+                        error
+                    );
+                }
+            }
+        );
+    }
+}
+
+/**
+ * `RUNNER_TIMEOUT` is reported as a plain `RunResult` with `timedOut: true`,
+ * not as a thrown error — but `run` itself can throw for the runtime-
+ * not-found / language-not-implemented cases. Anything else is normalised
+ * into `UPSTREAM_ERROR` by the shared envelope.
+ */
+function wrapTimeout(error: unknown): unknown {
+    if (error instanceof LeetCodeError) {
+        return error;
+    }
+    if (error instanceof Error && /timed out/i.test(error.message)) {
+        return new LeetCodeError(
+            ErrorCode.RUNNER_TIMEOUT,
+            error.message,
+            error
+        );
+    }
+    return error;
+}
+
+export function registerRunnerTools(
+    server: McpServer,
+    leetcodeService: LeetcodeServiceInterface,
+    sessions: SessionService,
+    runner: LocalRunner
+): void {
+    const registry = new RunnerToolRegistry(
+        server,
+        leetcodeService,
+        sessions,
+        runner
+    );
+    registry.register();
+}
diff --git a/src/mcp/tools/session-tools.ts b/src/mcp/tools/session-tools.ts
new file mode 100644
index 0000000..0726e71
--- /dev/null
+++ b/src/mcp/tools/session-tools.ts
@@ -0,0 +1,246 @@
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { z } from "zod";
+import type { SessionService } from "../../domain/session-service.js";
+import { LeetcodeServiceInterface } from "../../leetcode/leetcode-service-interface.js";
+import { ErrorCode, isLeetCodeError } from "../../types/index.js";
+import { ToolRegistry } from "./tool-registry.js";
+
+/**
+ * Pedagogy-flow tools: session lifecycle + hint progression.
+ *
+ * These four tools replace the prompt-based "remember to invoke X" flow
+ * with explicit, server-tracked state. The agent calls `start_problem`
+ * once, then drives `request_hint` until the user has engaged with each
+ * level, and only then are the solution-returning tools callable.
+ */
+export class SessionToolRegistry extends ToolRegistry {
+    constructor(
+        server: McpServer,
+        leetcodeService: LeetcodeServiceInterface,
+        private readonly sessions: SessionService
+    ) {
+        super(server, leetcodeService);
+    }
+
+    protected registerPublic(): void {
+        this.registerStartProblem();
+        this.registerRequestHint();
+        this.registerGetSessionState();
+        this.registerResetSession();
+    }
+
+    private registerStartProblem(): void {
+        this.server.registerTool(
+            "start_problem",
+            {
+                description:
+                    "Opens (or resumes) a tutoring session for a LeetCode problem. Must be called before request_hint, list_problem_solutions, or get_problem_solution. Idempotent: re-running on a slug the user is already mid-way through preserves their hint progress.",
+                inputSchema: {
+                    titleSlug: z
+                        .string()
+                        .min(1)
+                        .describe(
+                            "The URL slug of the problem (e.g., 'two-sum')."
+                        ),
+                    language: z
+                        .string()
+                        .optional()
+                        .describe(
+                            "Optional: the language the user is solving in. Recorded on the session for future workspace / runner phases."
+                        )
+                }
+            },
+            async ({ titleSlug, language }) => {
+                try {
+                    const problem =
+                        await this.leetcodeService.fetchProblemSimplified(
+                            titleSlug
+                        );
+                    const session = await this.sessions.startOrResume({
+                        slug: titleSlug,
+                        language
+                    });
+                    return {
+                        content: [
+                            {
+                                type: "text",
+                                text: JSON.stringify({
+                                    titleSlug,
+                                    session,
+                                    problem
+                                })
+                            }
+                        ]
+                    };
+                } catch (error) {
+                    return errorEnvelope("Failed to start problem", error);
+                }
+            }
+        );
+    }
+
+    private registerRequestHint(): void {
+        this.server.registerTool(
+            "request_hint",
+            {
+                description:
+                    "Advances the hint level for an active session and returns the next hint. Levels: 1 clarification → 2 approach → 3 implementation sketch → 4 solution unlock. The community-solutions tools become callable only after this has been driven to level 4.",
+                inputSchema: {
+                    titleSlug: z
+                        .string()
+                        .min(1)
+                        .describe(
+                            "The URL slug of the problem the user is working on."
+                        )
+                }
+            },
+            async ({ titleSlug }) => {
+                try {
+                    const problem =
+                        await this.leetcodeService.fetchProblemSimplified(
+                            titleSlug
+                        );
+                    const result = await this.sessions.advance(
+                        titleSlug,
+                        problem
+                    );
+                    return {
+                        content: [
+                            {
+                                type: "text",
+                                text: JSON.stringify({
+                                    titleSlug,
+                                    level: result.level,
+                                    hint: result.hint,
+                                    session: result.session
+                                })
+                            }
+                        ]
+                    };
+                } catch (error) {
+                    return errorEnvelope("Failed to request hint", error);
+                }
+            }
+        );
+    }
+
+    private registerGetSessionState(): void {
+        this.server.registerTool(
+            "get_session_state",
+            {
+                description:
+                    "Returns the persisted session for a problem, or null if the user has not called start_problem for it. Useful for restoring context after a restart.",
+                inputSchema: {
+                    titleSlug: z
+                        .string()
+                        .min(1)
+                        .describe("The URL slug of the problem.")
+                }
+            },
+            async ({ titleSlug }) => {
+                try {
+                    const session = await this.sessions.get(titleSlug);
+                    return {
+                        content: [
+                            {
+                                type: "text",
+                                text: JSON.stringify({
+                                    titleSlug,
+                                    session
+                                })
+                            }
+                        ]
+                    };
+                } catch (error) {
+                    return errorEnvelope("Failed to read session", error);
+                }
+            }
+        );
+    }
+
+    private registerResetSession(): void {
+        this.server.registerTool(
+            "reset_session",
+            {
+                description:
+                    "Resets the tutoring session for a problem back to hint level 0. Use when the user wants to re-attempt the problem from scratch.",
+                inputSchema: {
+                    titleSlug: z
+                        .string()
+                        .min(1)
+                        .describe("The URL slug of the problem to reset.")
+                }
+            },
+            async ({ titleSlug }) => {
+                try {
+                    const session = await this.sessions.reset(titleSlug);
+                    return {
+                        content: [
+                            {
+                                type: "text",
+                                text: JSON.stringify({
+                                    titleSlug,
+                                    session
+                                })
+                            }
+                        ]
+                    };
+                } catch (error) {
+                    return errorEnvelope("Failed to reset session", error);
+                }
+            }
+        );
+    }
+}
+
+/**
+ * Renders a `LeetCodeError` (or any unknown failure) into the MCP
+ * tool-result envelope shape, with the structured `code` field surfaced
+ * alongside the human-readable message so clients can dispatch on it.
+ *
+ * Returns the MCP SDK tool-result shape; widened from the literal
+ * single-content-item type so handler signatures unify with the SDK's
+ * inferred return type.
+ */
+function errorEnvelope(fallbackMessage: string, error: unknown) {
+    if (isLeetCodeError(error)) {
+        return {
+            content: [
+                {
+                    type: "text" as const,
+                    text: JSON.stringify({
+                        error: fallbackMessage,
+                        code: error.code,
+                        message: error.message
+                    })
+                }
+            ]
+        };
+    }
+    const message =
+        error instanceof Error ? error.message : String(error ?? "unknown");
+    return {
+        content: [
+            {
+                type: "text" as const,
+                text: JSON.stringify({
+                    error: fallbackMessage,
+                    code: ErrorCode.UPSTREAM_ERROR,
+                    message
+                })
+            }
+        ]
+    };
+}
+// Re-exported so other tool registries can render the same shape when
+// gating on the session service throws.
+export { errorEnvelope };
+
+export function registerSessionTools(
+    server: McpServer,
+    leetcodeService: LeetcodeServiceInterface,
+    sessions: SessionService
+): void {
+    const registry = new SessionToolRegistry(server, leetcodeService, sessions);
+    registry.register();
+}
diff --git a/src/mcp/tools/solution-tools.ts b/src/mcp/tools/solution-tools.ts
index f30272a..ed75860 100644
--- a/src/mcp/tools/solution-tools.ts
+++ b/src/mcp/tools/solution-tools.ts
@@ -1,59 +1,110 @@
 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { z } from "zod";
+import type { SessionService } from "../../domain/session-service.js";
 import { LeetcodeServiceInterface } from "../../leetcode/leetcode-service-interface.js";
+import { ErrorCode, LeetCodeError } from "../../types/index.js";
+import { errorEnvelope } from "./session-tools.js";
 import { ToolRegistry } from "./tool-registry.js";
 
 /**
- * Solution tool registry class that handles registration of LeetCode solution-related tools.
- * This class manages tools for accessing solutions, filtering solutions, and reading solution details.
+ * Solution tool registry — community-solution access.
+ *
+ * Both tools are gated by the pedagogy state machine: they reject with
+ * `HINT_LEVEL_TOO_LOW` until the active session for the slug has reached
+ * the maximum hint level. The agent is expected to drive the user
+ * through `request_hint` first.
  */
 export class SolutionToolRegistry extends ToolRegistry {
+    constructor(
+        server: McpServer,
+        leetcodeService: LeetcodeServiceInterface,
+        private readonly sessions: SessionService
+    ) {
+        super(server, leetcodeService);
+    }
+
+    private async assertTopicBelongsToSlug(
+        topicId: string,
+        titleSlug: string
+    ): Promise<void> {
+        const wantedTopicId = String(topicId);
+        const pageSize = 50;
+        let skip = 0;
+        let hasNextPage = true;
+
+        while (hasNextPage && skip < 500) {
+            const page =
+                await this.leetcodeService.fetchQuestionSolutionArticles(
+                    titleSlug,
+                    { limit: pageSize, skip }
+                );
+            if (
+                page.articles.some(
+                    (article) => String(article.topicId) === wantedTopicId
+                )
+            ) {
+                return;
+            }
+            hasNextPage = page.hasNextPage;
+            skip += pageSize;
+        }
+
+        throw new LeetCodeError(
+            ErrorCode.SOLUTION_NOT_FOUND,
+            `Solution topicId ${topicId} was not found for problem ${titleSlug}`
+        );
+    }
+
     protected registerPublic(): void {
-        // Problem solutions listing tool (Global-specific)
         this.server.registerTool(
             "list_problem_solutions",
             {
                 description:
-                    "Retrieves a list of community solutions for a specific LeetCode problem, including only metadata like topicId. To view the full content of a solution, use the 'get_problem_solution' tool with the topicId returned by this tool.",
-
+                    "Retrieves community solution metadata (topicIds) for a problem. GATED: rejects with HINT_LEVEL_TOO_LOW unless the active session for the slug has reached the maximum hint level. Drive the user through request_hint until that level is reached.",
                 inputSchema: {
                     questionSlug: z
                         .string()
                         .describe(
-                            "The URL slug/identifier of the problem to retrieve solutions for (e.g., 'two-sum', 'add-two-numbers'). This is the same string that appears in the LeetCode problem URL after '/problems/'"
+                            "The URL slug of the problem (e.g., 'two-sum')."
                         ),
                     limit: z
                         .number()
+                        .int()
+                        .min(1)
+                        .max(50)
                         .optional()
                         .default(10)
                         .describe(
-                            "Maximum number of solutions to return per request. Used for pagination and controlling response size. Default is 10 if not specified. Must be a positive integer."
+                            "Maximum number of solutions to return per request. Default 10. Must be a positive integer."
                         ),
                     skip: z
                         .number()
+                        .int()
+                        .min(0)
                         .optional()
+                        .default(0)
                         .describe(
-                            "Number of solutions to skip before starting to collect results. Used in conjunction with 'limit' for implementing pagination. Default is 0 if not specified. Must be a non-negative integer."
+                            "Number of solutions to skip before collecting results. Used with `limit` for pagination."
                         ),
                     orderBy: z
                         .enum(["HOT", "MOST_RECENT", "MOST_VOTES"])
                         .default("HOT")
                         .optional()
                         .describe(
-                            "Sorting criteria for the returned solutions. 'DEFAULT' sorts by LeetCode's default algorithm (typically a combination of recency and popularity), 'MOST_VOTES' sorts by the number of upvotes (highest first), and 'MOST_RECENT' sorts by publication date (newest first)."
+                            "Sorting criteria. 'HOT' is LeetCode's default (recency × popularity), 'MOST_VOTES' = upvotes, 'MOST_RECENT' = newest."
                         ),
                     userInput: z
                         .string()
                         .optional()
                         .describe(
-                            "Search term to filter solutions by title, content, or author name. Case insensitive. Useful for finding specific approaches or algorithms mentioned in solutions."
+                            "Search term to filter solutions by title, content, or author name. Case-insensitive."
                         ),
                     tagSlugs: z
                         .array(z.string())
                         .optional()
                         .default([])
                         .describe(
-                            "Array of tag identifiers to filter solutions by programming languages (e.g., 'python', 'java') or problem algorithm/data-structure tags (e.g., 'dynamic-programming', 'recursion'). Only solutions tagged with at least one of the specified tags will be returned."
+                            "Tag slugs to filter by (languages or algorithm tags). Solutions must match at least one tag."
                         )
                 }
             },
@@ -66,20 +117,12 @@ export class SolutionToolRegistry extends ToolRegistry {
                 tagSlugs
             }) => {
                 try {
-                    const options = {
-                        limit,
-                        skip,
-                        orderBy,
-                        userInput,
-                        tagSlugs
-                    };
-
+                    await this.sessions.assertSolutionUnlocked(questionSlug);
                     const data =
                         await this.leetcodeService.fetchQuestionSolutionArticles(
                             questionSlug,
-                            options
+                            { limit, skip, orderBy, userInput, tagSlugs }
                         );
-
                     return {
                         content: [
                             {
@@ -91,83 +134,70 @@ export class SolutionToolRegistry extends ToolRegistry {
                             }
                         ]
                     };
-                } catch (error: any) {
-                    return {
-                        content: [
-                            {
-                                type: "text",
-                                text: JSON.stringify({
-                                    error: "Failed to fetch solutions",
-                                    message: error.message
-                                })
-                            }
-                        ]
-                    };
+                } catch (error) {
+                    return errorEnvelope("Failed to fetch solutions", error);
                 }
             }
         );
 
-        // Solution article detail tool (Global-specific)
         this.server.registerTool(
             "get_problem_solution",
             {
                 description:
-                    "Retrieves the complete content and metadata of a specific solution, including the full article text, author information, and related navigation links. This returns a FULL community solution — only call this after the user has exhausted progressive hints or has explicitly requested the solution after receiving earlier hints.",
-
+                    "Retrieves the full content of a specific community solution. GATED: rejects with HINT_LEVEL_TOO_LOW unless the session for `titleSlug` has reached the maximum hint level. Pass the topicId returned by `list_problem_solutions`.",
                 inputSchema: {
                     topicId: z
                         .string()
                         .describe(
-                            "The unique topic ID of the solution to retrieve. This ID can be obtained from the 'topicId' field in the response of the 'list_problem_solutions' tool. Format is typically a string of numbers and letters that uniquely identifies the solution in LeetCode's database."
+                            "The unique topic ID of the solution, returned by list_problem_solutions."
+                        ),
+                    titleSlug: z
+                        .string()
+                        .describe(
+                            "The URL slug of the problem the solution belongs to. Required to verify the session has reached the unlock level."
                         )
                 }
             },
-            async ({ topicId }) => {
+            async ({ topicId, titleSlug }) => {
                 try {
+                    await this.sessions.assertSolutionUnlocked(titleSlug);
+                    await this.assertTopicBelongsToSlug(topicId, titleSlug);
                     const data =
                         await this.leetcodeService.fetchSolutionArticleDetail(
                             topicId
                         );
-
                     return {
                         content: [
                             {
                                 type: "text",
                                 text: JSON.stringify({
                                     topicId,
+                                    titleSlug,
                                     solution: data
                                 })
                             }
                         ]
                     };
-                } catch (error: any) {
-                    return {
-                        content: [
-                            {
-                                type: "text",
-                                text: JSON.stringify({
-                                    error: "Failed to fetch solution detail",
-                                    message: error.message
-                                })
-                            }
-                        ]
-                    };
+                } catch (error) {
+                    return errorEnvelope(
+                        "Failed to fetch solution detail",
+                        error
+                    );
                 }
             }
         );
     }
 }
 
-/**
- * Registers all solution-related tools with the MCP server.
- *
- * @param server - The MCP server instance to register tools with
- * @param leetcodeService - The LeetCode service implementation to use for API calls
- */
 export function registerSolutionTools(
     server: McpServer,
-    leetcodeService: LeetcodeServiceInterface
+    leetcodeService: LeetcodeServiceInterface,
+    sessions: SessionService
 ): void {
-    const registry = new SolutionToolRegistry(server, leetcodeService);
+    const registry = new SolutionToolRegistry(
+        server,
+        leetcodeService,
+        sessions
+    );
     registry.register();
 }
diff --git a/src/mcp/tools/submission-tools.ts b/src/mcp/tools/submission-tools.ts
index 04ea8cf..dfea998 100644
--- a/src/mcp/tools/submission-tools.ts
+++ b/src/mcp/tools/submission-tools.ts
@@ -1,19 +1,42 @@
 import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { z } from "zod";
+import { createLocalRunSnapshot } from "../../domain/local-run-snapshot.js";
+import type { SessionService } from "../../domain/session-service.js";
 import { LeetcodeServiceInterface } from "../../leetcode/leetcode-service-interface.js";
+import { ErrorCode, LeetCodeError } from "../../types/index.js";
+import { errorEnvelope } from "./session-tools.js";
 import { ToolRegistry } from "./tool-registry.js";
 
 /**
  * Submission tool registry class that handles registration of LeetCode submission tools.
+ *
+ * Phase 4 wires the strict-mode gate (`LEETCODE_MCP_STRICT_MODE=1`):
+ * when enabled, `submit_solution` refuses to spend a real LeetCode
+ * submission unless the active session's `lastLocalRunPassed === true`.
+ * Default is *off* (preserves current behaviour); session is optional
+ * so existing flows without `start_problem` aren't broken.
  */
 export class SubmissionToolRegistry extends ToolRegistry {
+    constructor(
+        server: McpServer,
+        leetcodeService: LeetcodeServiceInterface,
+        private readonly sessions?: SessionService
+    ) {
+        super(server, leetcodeService);
+    }
+
+    private isStrictMode(): boolean {
+        const value = process.env.LEETCODE_MCP_STRICT_MODE;
+        return value === "1" || value === "true";
+    }
+
     protected registerPublic(): void {
         // Submission tool
         this.server.registerTool(
             "submit_solution",
             {
                 description:
-                    "Submit a solution to a LeetCode problem and get results. Returns acceptance status, runtime/memory stats, or failed test case details.",
+                    "Submit a solution to a LeetCode problem and get results. Returns acceptance status, runtime/memory stats, or failed test case details. When LEETCODE_MCP_STRICT_MODE=1 is set, requires `run_local_tests` to have last passed for the problem first — saves real LeetCode submissions for solutions that pass examples locally.",
                 inputSchema: {
                     problemSlug: z
                         .string()
@@ -51,6 +74,29 @@ export class SubmissionToolRegistry extends ToolRegistry {
             },
             async ({ problemSlug, code, language }) => {
                 try {
+                    if (this.isStrictMode()) {
+                        if (!this.sessions) {
+                            throw new LeetCodeError(
+                                ErrorCode.LOCAL_TESTS_NOT_PASSED,
+                                "Strict mode is enabled but no session service is available to verify local test results."
+                            );
+                        }
+                        const session =
+                            await this.sessions.requireSession(problemSlug);
+                        const snapshot = createLocalRunSnapshot({
+                            code,
+                            language
+                        });
+                        if (
+                            session.lastLocalRunPassed !== true ||
+                            session.lastLocalRunSnapshot !== snapshot
+                        ) {
+                            throw new LeetCodeError(
+                                ErrorCode.LOCAL_TESTS_NOT_PASSED,
+                                "Strict mode is enabled and submit_solution requires run_local_tests to pass for the exact code and language being submitted."
+                            );
+                        }
+                    }
                     const result = await this.leetcodeService.submitSolution(
                         problemSlug,
                         code,
@@ -59,23 +105,13 @@ export class SubmissionToolRegistry extends ToolRegistry {
                     return {
                         content: [
                             {
-                                type: "text",
+                                type: "text" as const,
                                 text: JSON.stringify(result, null, 2)
                             }
                         ]
                     };
-                } catch (error: any) {
-                    return {
-                        content: [
-                            {
-                                type: "text",
-                                text: JSON.stringify({
-                                    error: "Failed to submit solution",
-                                    message: error.message
-                                })
-                            }
-                        ]
-                    };
+                } catch (error) {
+                    return errorEnvelope("Failed to submit solution", error);
                 }
             }
         );
@@ -87,11 +123,17 @@ export class SubmissionToolRegistry extends ToolRegistry {
  *
  * @param server - The MCP server instance to register tools with
  * @param leetcodeService - The LeetCode service implementation to use for API calls
+ * @param sessions - Optional session service used for the strict-mode gate
  */
 export function registerSubmissionTools(
     server: McpServer,
-    leetcodeService: LeetcodeServiceInterface
+    leetcodeService: LeetcodeServiceInterface,
+    sessions?: SessionService
 ): void {
-    const registry = new SubmissionToolRegistry(server, leetcodeService);
+    const registry = new SubmissionToolRegistry(
+        server,
+        leetcodeService,
+        sessions
+    );
     registry.register();
 }
diff --git a/src/runner/runner.ts b/src/runner/runner.ts
new file mode 100644
index 0000000..addb32d
--- /dev/null
+++ b/src/runner/runner.ts
@@ -0,0 +1,45 @@
+/**
+ * The local runner contract — implemented by `SubprocessRunner` for
+ * production and easily faked in tests.
+ *
+ * Tools should depend on this interface, never on the concrete
+ * implementation. Phase 4d will add an alternative implementation that
+ * delegates to a stronger sandbox; Phase 5 will compose this with the
+ * workspace abstraction.
+ */
+import type {
+    RunInput,
+    RunResult,
+    RunnerCapabilities,
+    RunnerLanguage
+} from "../types/index.js";
+
+export interface LocalRunner {
+    /** Runs the user's code; returns the result envelope (never throws on user-code failures). */
+    run(input: RunInput): Promise<RunResult>;
+    /** Snapshot of what the runner detected on this host — drives the `doctor` command. */
+    capabilities(): Promise<RunnerCapabilities>;
+}
+
+/**
+ * Languages the runner currently knows about. Used by the tool layer
+ * for early validation before spawning anything.
+ */
+export const SUPPORTED_LANGUAGES: readonly RunnerLanguage[] = [
+    "python3",
+    "go",
+    "java"
+] as const;
+
+/**
+ * The languages this build of the runner has *implemented*. Phase 4a
+ * ships `python3` only. Phase 4b/4c grow this list.
+ *
+ * Kept distinct from `SUPPORTED_LANGUAGES` so the wire-level
+ * `RUNNER_NOT_IMPLEMENTED_FOR_LANGUAGE` error has a single source of
+ * truth: anything in `SUPPORTED_LANGUAGES` but not in this list is a
+ * "coming soon" language.
+ */
+export const IMPLEMENTED_LANGUAGES: readonly RunnerLanguage[] = [
+    "python3"
+] as const;
diff --git a/src/runner/sandbox.ts b/src/runner/sandbox.ts
new file mode 100644
index 0000000..34ed322
--- /dev/null
+++ b/src/runner/sandbox.ts
@@ -0,0 +1,197 @@
+/**
+ * Detect the strongest OS-level sandbox available on this host and turn
+ * a plain command into a sandbox-wrapped command.
+ *
+ * We deliberately ship no JS-level sandbox; the threat model is
+ * "user-running-their-own-code", not "untrusted multi-tenant input". The
+ * sandbox reduces blast radius of accidental rm-rf or runaway loops, not
+ * malicious code escapes.
+ *
+ * Priority:
+ *   - Linux: bwrap > firejail > none
+ *   - macOS: sandbox-exec > none
+ *   - Windows: none (native AppContainer wrappers are too platform-
+ *              specific to ship in v1)
+ *
+ * If nothing is detected the runner falls back to a plain subprocess and
+ * surfaces a `warning` in the `RunResult`. Users who want to refuse to
+ * run without a sandbox can set `LEETCODE_MCP_REQUIRE_SANDBOX=1`; the
+ * tool layer enforces this — the runner only reports.
+ */
+import { execFile as execFileCb } from "node:child_process";
+import { access, constants as fsConstants } from "node:fs/promises";
+import { promisify } from "node:util";
+
+import type { SandboxKind } from "../types/index.js";
+
+const execFile = promisify(execFileCb);
+
+interface DetectedSandbox {
+    kind: SandboxKind;
+    /** When `kind === "none"`, the absolute path to the wrapping
+     *  binary (`bwrap`, `firejail`, `sandbox-exec`) is undefined. */
+    path?: string;
+}
+
+let cached: DetectedSandbox | undefined;
+
+function escapeSandboxProfileString(value: string): string {
+    if (/[\r\n]/.test(value)) {
+        throw new Error("sandbox-exec profile strings cannot contain newlines");
+    }
+    return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
+}
+
+/**
+ * Returns whether `<bin> --version` succeeds. Uses the no-shell
+ * `execFile` so the probe never re-interprets `bin`/`args` through
+ * `/bin/sh -c` — important because future callers might be tempted to
+ * pass dynamic values, and the default `child_process.exec` is a
+ * shell-expansion foot-gun.
+ */
+async function probe(
+    bin: string,
+    args: string[] = ["--version"]
+): Promise<boolean> {
+    try {
+        await execFile(bin, args, { timeout: 1500 });
+        return true;
+    } catch {
+        return false;
+    }
+}
+
+/**
+ * Probe the host once per server lifetime. Subsequent calls return the
+ * cached result; tests can use `__resetSandboxCacheForTest` to force
+ * re-detection.
+ */
+export async function detectSandbox(): Promise<DetectedSandbox> {
+    if (cached) {
+        return cached;
+    }
+
+    const platform = process.platform;
+    if (platform === "darwin") {
+        // sandbox-exec lives at /usr/bin/sandbox-exec on every macOS
+        // version we care about. Detect by file existence + executable
+        // bit rather than spawning the binary — its `-help` flag is
+        // undocumented and exits non-zero on some macOS versions, which
+        // would silently fall through to `kind: "none"` and lie to
+        // users that no sandbox is available.
+        try {
+            await access("/usr/bin/sandbox-exec", fsConstants.X_OK);
+            cached = { kind: "sandbox-exec", path: "/usr/bin/sandbox-exec" };
+            return cached;
+        } catch {
+            /* fall through to "none" */
+        }
+    } else if (platform === "linux") {
+        if (await probe("bwrap")) {
+            cached = { kind: "bwrap" };
+            return cached;
+        }
+        if (await probe("firejail")) {
+            cached = { kind: "firejail" };
+            return cached;
+        }
+    }
+
+    cached = { kind: "none" };
+    return cached;
+}
+
+/**
+ * Wrap an existing command with the detected sandbox. Returns the new
+ * `[cmd, args]` pair plus the kind that was applied. When no sandbox is
+ * available, returns the input pair untouched and `kind: "none"`.
+ *
+ * `cwdAllowed` is the temp directory the user code is permitted to read
+ * + write — the rest of the filesystem is read-only (Linux) or denied
+ * (macOS).
+ */
+export async function wrapWithSandbox(
+    cmd: string,
+    args: string[],
+    cwdAllowed: string
+): Promise<{ cmd: string; args: string[]; kind: SandboxKind }> {
+    const detected = await detectSandbox();
+    if (detected.kind === "bwrap") {
+        return {
+            cmd: "bwrap",
+            args: [
+                "--ro-bind",
+                "/",
+                "/",
+                "--tmpfs",
+                "/tmp",
+                "--bind",
+                cwdAllowed,
+                cwdAllowed,
+                "--proc",
+                "/proc",
+                "--dev",
+                "/dev",
+                "--unshare-all",
+                "--die-with-parent",
+                "--",
+                cmd,
+                ...args
+            ],
+            kind: "bwrap"
+        };
+    }
+    if (detected.kind === "firejail") {
+        return {
+            cmd: "firejail",
+            args: [
+                "--quiet",
+                "--noprofile",
+                "--net=none",
+                "--private-tmp",
+                `--whitelist=${cwdAllowed}`,
+                "--",
+                cmd,
+                ...args
+            ],
+            kind: "firejail"
+        };
+    }
+    if (detected.kind === "sandbox-exec") {
+        // Minimal sandbox-exec profile — deny by default, allow process
+        // primitives + reads everywhere + writes only under cwdAllowed.
+        const writableSubpath = escapeSandboxProfileString(cwdAllowed);
+        const profile = `(version 1)
+(deny default)
+(allow process-fork)
+(allow process-exec)
+(allow file-read*)
+(allow file-write* (subpath "${writableSubpath}"))
+(allow file-write* (regex #"^/dev/null$"))
+(allow file-write* (regex #"^/dev/dtracehelper$"))
+(allow sysctl-read)
+(allow mach-lookup)
+(allow signal (target self))
+(allow ipc-posix-shm)
+(deny network*)`;
+        return {
+            cmd: "/usr/bin/sandbox-exec",
+            args: ["-p", profile, cmd, ...args],
+            kind: "sandbox-exec"
+        };
+    }
+    return { cmd, args, kind: "none" };
+}
+
+/** Test helper — clears the per-process cache so unit tests can re-probe. */
+export function __resetSandboxCacheForTest(): void {
+    cached = undefined;
+}
+
+/** Test helper — seeds the per-process sandbox cache. */
+export function __setSandboxCacheForTest(next: {
+    kind: SandboxKind;
+    path?: string;
+}): void {
+    cached = next;
+}
diff --git a/src/runner/subprocess-runner.ts b/src/runner/subprocess-runner.ts
new file mode 100644
index 0000000..c816206
--- /dev/null
+++ b/src/runner/subprocess-runner.ts
@@ -0,0 +1,377 @@
+/**
+ * Plain-subprocess `LocalRunner` implementation.
+ *
+ * Per-language registry (currently `python3`) describes how to:
+ *   - probe whether the runtime is available on PATH
+ *   - spawn the runtime against a source file written to the run's
+ *     temp dir
+ *
+ * Probes run lazily on the first `run()` for the language and the
+ * results are cached for the lifetime of the process.
+ *
+ * Safety nets every run gets, even with no OS sandbox:
+ *   - per-process wall-clock timeout (default 5_000 ms; configurable
+ *     per `RunInput`)
+ *   - clean env (just PATH / HOME / LANG forwarded — secrets in the
+ *     user's shell never leak in)
+ *   - cwd is a freshly-mkdtemp'd directory under the OS tmp; it is
+ *     removed after the run regardless of outcome
+ *   - stdout/stderr captured with a 1 MB ceiling; runaway output gets
+ *     truncated with a marker rather than blowing memory
+ */
+import {
+    execFile as execFileCb,
+    spawn,
+    type ChildProcess
+} from "node:child_process";
+import { mkdtemp, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { performance } from "node:perf_hooks";
+import { promisify } from "node:util";
+
+import type {
+    RunInput,
+    RunResult,
+    RunnerCapabilities,
+    RunnerLanguage,
+    SandboxKind
+} from "../types/index.js";
+import { ErrorCode, LeetCodeError } from "../types/index.js";
+import logger from "../utils/logger.js";
+import type { LocalRunner } from "./runner.js";
+import { IMPLEMENTED_LANGUAGES, SUPPORTED_LANGUAGES } from "./runner.js";
+import { wrapWithSandbox } from "./sandbox.js";
+
+// `execFile` (no shell) — never `promisify(exec)`, which routes through
+// `/bin/sh -c` and is a shell-expansion foot-gun if anyone interpolates
+// a dynamic value into a probe in the future.
+const execFile = promisify(execFileCb);
+
+const MAX_OUTPUT_BYTES = 1_000_000; // 1 MB per stream
+const DEFAULT_TIMEOUT_MS = 5_000;
+const TRUNCATION_MARKER = "\n[...output truncated at 1 MB...]";
+
+interface LanguageSpec {
+    /** File extension (without dot) used for the temp source file. */
+    extension: string;
+    /** `[binary, args]` to probe — exit code 0 means available. */
+    probe: { cmd: string; args: string[] };
+    /**
+     * Build the spawn args given the path of the source file we wrote
+     * for this run. Compiled languages (Go, Java) will hook in extra
+     * compile steps via subclassing later.
+     */
+    buildArgs(sourcePath: string): { cmd: string; args: string[] };
+}
+
+const LANGUAGES: Record<RunnerLanguage, LanguageSpec> = {
+    python3: {
+        extension: "py",
+        probe: { cmd: "python3", args: ["--version"] },
+        buildArgs: (sourcePath) => ({
+            cmd: "python3",
+            args: [sourcePath]
+        })
+    },
+    // Phase 4b/4c stubs — present in the registry so the type system
+    // requires they stay in sync with `RunnerLanguage`. The runner
+    // refuses to use these until we actually wire harnesses.
+    go: {
+        extension: "go",
+        probe: { cmd: "go", args: ["version"] },
+        buildArgs: () => {
+            throw new LeetCodeError(
+                ErrorCode.RUNNER_NOT_IMPLEMENTED_FOR_LANGUAGE,
+                "Go runner ships in Phase 4b"
+            );
+        }
+    },
+    java: {
+        extension: "java",
+        probe: { cmd: "java", args: ["-version"] },
+        buildArgs: () => {
+            throw new LeetCodeError(
+                ErrorCode.RUNNER_NOT_IMPLEMENTED_FOR_LANGUAGE,
+                "Java runner ships in Phase 4c"
+            );
+        }
+    }
+};
+
+interface ProbeResult {
+    available: boolean;
+    version?: string;
+    path?: string;
+}
+
+const probeCache = new Map<RunnerLanguage, ProbeResult>();
+
+async function probeLanguage(language: RunnerLanguage): Promise<ProbeResult> {
+    const cached = probeCache.get(language);
+    if (cached) {
+        return cached;
+    }
+    const spec = LANGUAGES[language];
+    try {
+        const { stdout, stderr } = await execFile(
+            spec.probe.cmd,
+            spec.probe.args,
+            { timeout: 2000 }
+        );
+        // `python3 --version` and `go version` write to stdout; `java
+        // -version` historically writes to stderr — accept either.
+        const versionLine = (stdout || stderr || "").split("\n")[0]?.trim();
+        const result: ProbeResult = {
+            available: true,
+            version: versionLine || undefined
+        };
+        try {
+            const { stdout: which } = await execFile(
+                "which",
+                [spec.probe.cmd],
+                { timeout: 1000 }
+            );
+            result.path = which.trim() || undefined;
+        } catch {
+            /* `which` may not exist (Windows); leave `path` undefined */
+        }
+        probeCache.set(language, result);
+        return result;
+    } catch (error) {
+        const result: ProbeResult = { available: false };
+        probeCache.set(language, result);
+        logger.debug(
+            { language, error: (error as Error)?.message },
+            "Language probe failed"
+        );
+        return result;
+    }
+}
+
+/** Test helper — clears the probe cache so unit tests can re-detect. */
+export function __resetProbeCacheForTest(): void {
+    probeCache.clear();
+}
+
+function clampOutput(buf: Buffer): string {
+    if (buf.length <= MAX_OUTPUT_BYTES) {
+        return buf.toString("utf-8");
+    }
+    return (
+        buf.subarray(0, MAX_OUTPUT_BYTES).toString("utf-8") + TRUNCATION_MARKER
+    );
+}
+
+function killProcessTree(child: ChildProcess, signal: NodeJS.Signals): void {
+    if (child.pid === undefined) {
+        return;
+    }
+    if (process.platform === "win32") {
+        child.kill(signal);
+        return;
+    }
+    try {
+        process.kill(-child.pid, signal);
+    } catch (error) {
+        if ((error as NodeJS.ErrnoException).code !== "ESRCH") {
+            child.kill(signal);
+        }
+    }
+}
+
+export class SubprocessRunner implements LocalRunner {
+    async capabilities(): Promise<RunnerCapabilities> {
+        const languages = await Promise.all(
+            SUPPORTED_LANGUAGES.map(async (language) => {
+                const probe = await probeLanguage(language);
+                return {
+                    language,
+                    available: probe.available,
+                    version: probe.version,
+                    path: probe.path
+                };
+            })
+        );
+        // Sandbox detection is in `./sandbox.ts`; importing inline here
+        // avoids a dependency cycle with `subprocess-runner` ↔ `sandbox`.
+        const { detectSandbox } = await import("./sandbox.js");
+        const detected = await detectSandbox();
+        return {
+            languages,
+            sandbox: {
+                kind: detected.kind,
+                available: detected.kind !== "none"
+            }
+        };
+    }
+
+    async run(input: RunInput): Promise<RunResult> {
+        if (!IMPLEMENTED_LANGUAGES.includes(input.language)) {
+            throw new LeetCodeError(
+                ErrorCode.RUNNER_NOT_IMPLEMENTED_FOR_LANGUAGE,
+                `Local runner has no harness for ${input.language} yet`
+            );
+        }
+
+        const probe = await probeLanguage(input.language);
+        if (!probe.available) {
+            throw new LeetCodeError(
+                ErrorCode.LANGUAGE_RUNTIME_NOT_FOUND,
+                `Required runtime for ${input.language} not found on PATH`
+            );
+        }
+
+        const spec = LANGUAGES[input.language];
+        const timeoutMs = input.timeoutMs ?? DEFAULT_TIMEOUT_MS;
+        const workDir = await mkdtemp(join(tmpdir(), "leetcode-mcp-run-"));
+        const sourcePath = join(workDir, `solution.${spec.extension}`);
+
+        try {
+            await writeFile(sourcePath, input.code, "utf-8");
+            const baseArgs = spec.buildArgs(sourcePath);
+            const wrapped = await wrapWithSandbox(
+                baseArgs.cmd,
+                baseArgs.args,
+                workDir
+            );
+
+            return await this.spawnAndCapture({
+                cmd: wrapped.cmd,
+                args: wrapped.args,
+                cwd: workDir,
+                timeoutMs,
+                sandbox: wrapped.kind
+            });
+        } finally {
+            await rm(workDir, { recursive: true, force: true }).catch(
+                (error) => {
+                    logger.debug(
+                        { error: (error as Error)?.message, workDir },
+                        "Failed to clean up runner workdir"
+                    );
+                }
+            );
+        }
+    }
+
+    private spawnAndCapture(options: {
+        cmd: string;
+        args: string[];
+        cwd: string;
+        timeoutMs: number;
+        sandbox: SandboxKind;
+    }): Promise<RunResult> {
+        return new Promise((resolve) => {
+            const start = performance.now();
+            const child = spawn(options.cmd, options.args, {
+                cwd: options.cwd,
+                env: {
+                    PATH: process.env.PATH ?? "",
+                    HOME: options.cwd,
+                    LANG: process.env.LANG ?? "C.UTF-8"
+                },
+                detached: process.platform !== "win32",
+                stdio: ["ignore", "pipe", "pipe"]
+            });
+
+            const stdout: Buffer[] = [];
+            const stderr: Buffer[] = [];
+            let stdoutBytes = 0;
+            let stderrBytes = 0;
+            let timedOut = false;
+            let killTimer: NodeJS.Timeout | undefined;
+
+            // Tight guard: never let the buffered total exceed
+            // `MAX_OUTPUT_BYTES` even by a chunk. We slice the
+            // overflowing chunk to the exact remaining headroom and
+            // drop the rest. `clampOutput` still runs at finalize as a
+            // belt-and-braces final cap.
+            const captureChunk = (
+                buffers: Buffer[],
+                bytes: number,
+                chunk: Buffer
+            ): number => {
+                const remaining = MAX_OUTPUT_BYTES - bytes;
+                if (remaining <= 0) {
+                    return bytes;
+                }
+                if (chunk.length <= remaining) {
+                    buffers.push(chunk);
+                    return bytes + chunk.length;
+                }
+                buffers.push(chunk.subarray(0, remaining));
+                return bytes + remaining;
+            };
+
+            child.stdout?.on("data", (chunk: Buffer) => {
+                stdoutBytes = captureChunk(stdout, stdoutBytes, chunk);
+            });
+            child.stderr?.on("data", (chunk: Buffer) => {
+                stderrBytes = captureChunk(stderr, stderrBytes, chunk);
+            });
+
+            const timer = setTimeout(() => {
+                timedOut = true;
+                // SIGTERM first; if the child ignores it, hard SIGKILL
+                // 500 ms later. Belt + braces for runaway loops.
+                killProcessTree(child, "SIGTERM");
+                killTimer = setTimeout(
+                    () => killProcessTree(child, "SIGKILL"),
+                    500
+                );
+            }, options.timeoutMs);
+
+            let settled = false;
+
+            const finalize = (
+                exitCode: number | null,
+                runnerError?: Error
+            ): void => {
+                if (settled) {
+                    return;
+                }
+                settled = true;
+                clearTimeout(timer);
+                if (killTimer) {
+                    clearTimeout(killTimer);
+                }
+                const durationMs = Math.round(performance.now() - start);
+                const passed = !timedOut && !runnerError && exitCode === 0;
+                const stderrText =
+                    clampOutput(Buffer.concat(stderr)) +
+                    (runnerError
+                        ? `\n[runner error: ${runnerError.message}]`
+                        : "");
+                resolve({
+                    passed,
+                    exitCode,
+                    stdout: clampOutput(Buffer.concat(stdout)),
+                    stderr: stderrText,
+                    timedOut: runnerError ? false : timedOut,
+                    durationMs,
+                    sandbox: options.sandbox,
+                    warning:
+                        !runnerError && options.sandbox === "none"
+                            ? "No OS sandbox available on this host; ran without isolation."
+                            : undefined
+                });
+            };
+
+            child.on("close", (code, signal) => {
+                if (signal && code === null) {
+                    finalize(null);
+                } else {
+                    finalize(code);
+                }
+            });
+            child.on("error", (error) => {
+                logger.warn(
+                    { error: error.message, cmd: options.cmd },
+                    "Runner subprocess errored before exit"
+                );
+                finalize(null, error);
+            });
+        });
+    }
+}
diff --git a/src/types/errors.ts b/src/types/errors.ts
new file mode 100644
index 0000000..b3ffad1
--- /dev/null
+++ b/src/types/errors.ts
@@ -0,0 +1,98 @@
+/**
+ * Structured error codes for the LeetCode MCP server.
+ *
+ * Tools and the service layer should throw `LeetCodeError` with one of these
+ * codes instead of stringly-typed `Error`s, so the MCP layer can map them to
+ * predictable, machine-readable error envelopes.
+ */
+export const ErrorCode = {
+    /** Caller is not authenticated — credentials missing or invalid. */
+    AUTH_REQUIRED: "AUTH_REQUIRED",
+    /** Stored credentials were rejected by LeetCode (expired / revoked). */
+    AUTH_INVALID: "AUTH_INVALID",
+    /** Requested LeetCode problem slug doesn't exist. */
+    PROBLEM_NOT_FOUND: "PROBLEM_NOT_FOUND",
+    /** Requested solution article doesn't exist. */
+    SOLUTION_NOT_FOUND: "SOLUTION_NOT_FOUND",
+    /** Submission language isn't supported. */
+    LANGUAGE_UNSUPPORTED: "LANGUAGE_UNSUPPORTED",
+    /** LeetCode rejected the request as rate-limited. */
+    RATE_LIMITED: "RATE_LIMITED",
+    /** Submission polling timed out before LeetCode produced a verdict. */
+    SUBMISSION_TIMEOUT: "SUBMISSION_TIMEOUT",
+    /** Network failure talking to LeetCode (DNS, connection refused, etc). */
+    NETWORK_ERROR: "NETWORK_ERROR",
+    /** LeetCode returned a payload that didn't match the expected schema. */
+    UPSTREAM_PAYLOAD_INVALID: "UPSTREAM_PAYLOAD_INVALID",
+    /** Catch-all for unexpected upstream errors. */
+    UPSTREAM_ERROR: "UPSTREAM_ERROR",
+    /**
+     * Tutoring gate: the caller asked for content (typically a full
+     * solution) that is gated behind a higher hint level than the active
+     * session has reached. The pedagogy state machine refuses; the agent
+     * is expected to drive the user through `request_hint` first.
+     */
+    HINT_LEVEL_TOO_LOW: "HINT_LEVEL_TOO_LOW",
+    /**
+     * Tutoring gate: the operation requires an active session for a
+     * particular problem slug, but no `start_problem` has been called for
+     * it (or the session was reset).
+     */
+    SESSION_NOT_FOUND: "SESSION_NOT_FOUND",
+    /**
+     * `run_local_tests` was asked for a language the local runner has no
+     * harness for. `submit_solution` keeps working for these languages —
+     * the runner is purely additive.
+     */
+    RUNNER_NOT_IMPLEMENTED_FOR_LANGUAGE: "RUNNER_NOT_IMPLEMENTED_FOR_LANGUAGE",
+    /**
+     * The language is supported in principle but the required runtime
+     * binary (e.g. `python3`, `go`, `java`) was not found on PATH. The
+     * `doctor` subcommand reports which runtimes are detected.
+     */
+    LANGUAGE_RUNTIME_NOT_FOUND: "LANGUAGE_RUNTIME_NOT_FOUND",
+    /**
+     * The user's code exceeded the per-run wall-clock budget. The runner
+     * killed the process; partial output (if any) is included in the
+     * result envelope.
+     */
+    RUNNER_TIMEOUT: "RUNNER_TIMEOUT",
+    /**
+     * `LEETCODE_MCP_REQUIRE_SANDBOX=1` is set but no OS sandbox tool was
+     * found on this host. The runner refuses to fall back to the unsandboxed
+     * subprocess path.
+     */
+    SANDBOX_REQUIRED: "SANDBOX_REQUIRED",
+    /**
+     * Strict mode is enabled (`LEETCODE_MCP_STRICT_MODE=1`) and
+     * `submit_solution` was called before `run_local_tests` last passed.
+     * Drives the recommended local-first practice loop.
+     */
+    LOCAL_TESTS_NOT_PASSED: "LOCAL_TESTS_NOT_PASSED"
+} as const;
+
+export type ErrorCodeValue = (typeof ErrorCode)[keyof typeof ErrorCode];
+
+/**
+ * Error thrown by the service layer with a structured, machine-readable code.
+ *
+ * Catchers can dispatch on `error.code` to render appropriate user-facing
+ * messages without parsing free-form `error.message` strings.
+ */
+export class LeetCodeError extends Error {
+    public readonly code: ErrorCodeValue;
+
+    constructor(code: ErrorCodeValue, message: string, cause?: unknown) {
+        // Forward `cause` to the native ES2022 `Error` field so loggers and
+        // stack-walkers that rely on the standard chain see it without us
+        // shadowing it via a redeclared class field.
+        super(message, cause === undefined ? undefined : { cause });
+        this.name = "LeetCodeError";
+        this.code = code;
+    }
+}
+
+/** Type-narrowing helper for `instanceof LeetCodeError` checks. */
+export function isLeetCodeError(value: unknown): value is LeetCodeError {
+    return value instanceof LeetCodeError;
+}
diff --git a/src/types/index.ts b/src/types/index.ts
new file mode 100644
index 0000000..e9f0d85
--- /dev/null
+++ b/src/types/index.ts
@@ -0,0 +1,14 @@
+/**
+ * Re-export hub for the type contracts used across the codebase.
+ *
+ * Prefer `import { Problem } from "./types/index.js"` over digging into the
+ * individual files — keeps imports stable as we reorganize.
+ */
+export * from "./credentials.js";
+export * from "./errors.js";
+export * from "./problem.js";
+export * from "./runner.js";
+export * from "./session.js";
+export * from "./solution.js";
+export * from "./submission.js";
+export * from "./user.js";
diff --git a/src/types/problem.ts b/src/types/problem.ts
new file mode 100644
index 0000000..690a767
--- /dev/null
+++ b/src/types/problem.ts
@@ -0,0 +1,94 @@
+/**
+ * Problem-related type contracts.
+ *
+ * These describe the shapes returned by the LeetcodeServiceInterface methods
+ * (`fetchProblem`, `fetchProblemSimplified`, `searchProblems`, etc.) — not the
+ * raw GraphQL payloads from `leetcode-query`. The service layer is responsible
+ * for projecting the upstream data into these shapes.
+ */
+
+/** A `langSlug -> starter code` snippet attached to a problem. */
+export interface CodeSnippet {
+    lang: string;
+    langSlug: string;
+    code: string;
+}
+
+/** A topic tag on a LeetCode problem (e.g. `array`, `hash-table`). */
+export interface TopicTag {
+    name?: string;
+    slug: string;
+}
+
+/** A neighbour problem reference returned by `similarQuestions`. */
+export interface SimilarQuestion {
+    titleSlug: string;
+    difficulty: string;
+}
+
+/**
+ * Full problem payload as returned by the upstream leetcode-query library.
+ *
+ * Many fields are optional because LeetCode populates different subsets
+ * depending on whether the caller is authenticated and whether the problem is
+ * paid-only.
+ */
+export interface Problem {
+    questionId: string;
+    questionFrontendId?: string;
+    title: string;
+    titleSlug: string;
+    difficulty: string;
+    content?: string | null;
+    isPaidOnly?: boolean;
+    topicTags?: TopicTag[];
+    codeSnippets?: CodeSnippet[];
+    hints?: string[];
+    sampleTestCase?: string;
+    exampleTestcases?: string;
+    /** JSON-encoded array of similar-question metadata. */
+    similarQuestions?: string;
+    stats?: string;
+    metaData?: string;
+    [key: string]: unknown;
+}
+
+/**
+ * Trimmed-down problem payload returned by `fetchProblemSimplified` —
+ * the fields most useful to the AI agent without the upstream noise.
+ */
+export interface SimplifiedProblem {
+    titleSlug: string;
+    questionId: string;
+    title: string;
+    content?: string | null;
+    difficulty: string;
+    topicTags: string[];
+    codeSnippets: CodeSnippet[];
+    exampleTestcases?: string;
+    hints?: string[];
+    similarQuestions: SimilarQuestion[];
+}
+
+/** A row in the search-problems result list. */
+export interface ProblemSummary {
+    title: string;
+    titleSlug: string;
+    difficulty: string;
+    acRate: number;
+    topicTags: string[];
+}
+
+/** Result envelope for `searchProblems`. */
+export interface ProblemSearchResult {
+    total: number;
+    questions: ProblemSummary[];
+}
+
+/** The daily-challenge envelope returned by `fetchDailyChallenge`. */
+export interface DailyChallenge {
+    date?: string;
+    link?: string;
+    question?: Problem;
+    [key: string]: unknown;
+}
diff --git a/src/types/runner.ts b/src/types/runner.ts
new file mode 100644
index 0000000..8f3078c
--- /dev/null
+++ b/src/types/runner.ts
@@ -0,0 +1,94 @@
+/**
+ * Wire types for the local code runner introduced in Phase 4.
+ *
+ * The runner is intentionally simple: callers hand it a string of code
+ * plus a language tag, and get back a result envelope describing what the
+ * subprocess did. There is no per-problem harness logic at this layer —
+ * harnesses live one floor up, in `src/runner/harnesses/*`, and inject
+ * test scaffolding into the source before it reaches the runner.
+ */
+
+/**
+ * Languages the local runner knows how to execute.
+ *
+ * Phase 4a ships `python3` only; Phase 4b/4c add `go` and `java`. Other
+ * LeetCode languages remain valid for `submit_solution` but
+ * `run_local_tests` will reject them with
+ * `RUNNER_NOT_IMPLEMENTED_FOR_LANGUAGE`.
+ */
+export type RunnerLanguage = "python3" | "go" | "java";
+
+/**
+ * What the runner detected when it tried to spawn an isolated subprocess.
+ *
+ * - `none`     — plain subprocess, no OS-level sandbox (always available)
+ * - `bwrap`    — Linux: bubblewrap with read-only fs + writable tmp + no net
+ * - `firejail` — Linux fallback when bwrap isn't installed
+ * - `sandbox-exec` — macOS: built-in `sandbox-exec` profile
+ *
+ * Reported alongside every `RunResult` so callers can show "ran in
+ * bwrap sandbox" without parsing logs.
+ */
+export type SandboxKind = "none" | "bwrap" | "firejail" | "sandbox-exec";
+
+export interface RunInput {
+    /**
+     * LeetCode problem slug. Used by the tool layer to look up the
+     * active session and update `lastLocalRunPassed`. Not consumed by
+     * the runner itself.
+     */
+    titleSlug: string;
+    /** Language to run as. */
+    language: RunnerLanguage;
+    /**
+     * Source code to execute, exactly as the runner should receive it.
+     * The harness layer is responsible for any wrapping, scaffolding, or
+     * test-driver injection before this string is built.
+     */
+    code: string;
+    /**
+     * Wall-clock budget in milliseconds. Defaults to 5_000 if omitted.
+     * The runner kills the subprocess when this elapses and returns
+     * `timedOut: true` with whatever partial output was captured.
+     */
+    timeoutMs?: number;
+}
+
+export interface RunResult {
+    /** Convenience flag: `exitCode === 0 && !timedOut`. */
+    passed: boolean;
+    /** Subprocess exit code, or `null` when the process was killed. */
+    exitCode: number | null;
+    /** Captured stdout, truncated to ~1 MB. */
+    stdout: string;
+    /** Captured stderr, truncated to ~1 MB. */
+    stderr: string;
+    /** Whether the wall-clock budget was hit. */
+    timedOut: boolean;
+    /** Wall-clock time the subprocess ran for, in milliseconds. */
+    durationMs: number;
+    /** Which sandbox (if any) was used. See {@link SandboxKind}. */
+    sandbox: SandboxKind;
+    /**
+     * Human-readable note when something interesting happened that the
+     * caller should know about — e.g. "no OS sandbox available on this
+     * host; ran without isolation". Omitted on the happy path.
+     */
+    warning?: string;
+}
+
+/** Capability snapshot the `doctor` subcommand renders to the user. */
+export interface RunnerCapabilities {
+    /** What languages have a working runtime detected on PATH. */
+    languages: Array<{
+        language: RunnerLanguage;
+        available: boolean;
+        version?: string;
+        path?: string;
+    }>;
+    /** Sandbox tooling available on this host, in priority order. */
+    sandbox: {
+        kind: SandboxKind;
+        available: boolean;
+    };
+}
diff --git a/src/types/session.ts b/src/types/session.ts
new file mode 100644
index 0000000..3b4e9ab
--- /dev/null
+++ b/src/types/session.ts
@@ -0,0 +1,65 @@
+/**
+ * Per-problem session state — the durable record the pedagogy state machine
+ * reads and writes.
+ *
+ * One file per problem slug under `~/.leetcode-mcp/sessions/<slug>.json`.
+ * Persisted across restarts so the user can step away from a problem and
+ * resume at the same hint level / attempt count later.
+ */
+
+/**
+ * Discrete hint progression. Higher values strictly subsume lower ones —
+ * a session at level 4 has access to everything level 1 unlocked.
+ *
+ * - **0** Initial state after `start_problem`. No hints, no solutions.
+ * - **1** Clarification — restate the problem in the user's own words,
+ *   surface invariants and edge cases. No algorithmic direction yet.
+ * - **2** Approach — high-level paradigm or data structure to consider
+ *   ("what lookup is O(1)?"). No code, no pseudocode.
+ * - **3** Implementation sketch — pseudocode-level structure of a working
+ *   solution. Still does not unlock the canonical full solution.
+ * - **4** Optimal — the canonical full solution and the community
+ *   solutions tools (`get_problem_solution`, `list_problem_solutions`)
+ *   become callable.
+ */
+export type HintLevel = 0 | 1 | 2 | 3 | 4;
+
+export const MAX_HINT_LEVEL = 4 as const;
+
+/**
+ * Lifecycle of a per-problem session. The state machine moves through
+ * these labels as the user (or agent) drives `start_problem` →
+ * `request_hint` ↔ `submit_solution` → `solved`.
+ */
+export type SessionStatus = "started" | "attempting" | "solved" | "abandoned";
+
+export interface SessionState {
+    /** LeetCode problem slug (matches `Problem.titleSlug`). */
+    slug: string;
+    /** Language the user is solving in, when `start_problem` is given one. */
+    language?: string;
+    /** Current hint level. Bumped by `request_hint`. */
+    hintLevel: HintLevel;
+    /** Total submission attempts the session has driven so far. */
+    attempts: number;
+    /**
+     * Outcome of the most recent local-runner invocation. `null` until the
+     * user runs locally for the first time. Wired by Phase 4 (local
+     * runner); kept here so Phase 3 sets the contract.
+     */
+    lastLocalRunPassed: boolean | null;
+    /** Snapshot hash of the code+language pair that last passed locally. */
+    lastLocalRunSnapshot?: string | null;
+    /** Lifecycle label — see {@link SessionStatus}. */
+    status: SessionStatus;
+    /**
+     * Absolute path of the workspace file `start_problem` created for the
+     * user, if any. Workspace awareness lands in Phase 5; this field is
+     * already part of the contract so the file shape is stable.
+     */
+    workspacePath?: string;
+    /** ISO-8601 of session creation. */
+    createdAt: string;
+    /** ISO-8601 of the most recent state transition. */
+    updatedAt: string;
+}
diff --git a/src/types/solution.ts b/src/types/solution.ts
new file mode 100644
index 0000000..5a3f91e
--- /dev/null
+++ b/src/types/solution.ts
@@ -0,0 +1,40 @@
+/**
+ * Solution-article type contracts.
+ *
+ * Solutions are community-written walkthroughs ("Solution articles") that live
+ * under `https://leetcode.com/problems/<slug>/solutions/`. The service layer
+ * fetches them via GraphQL and projects the results into these shapes.
+ */
+
+/** A single solution article in a list. */
+export interface SolutionArticleSummary {
+    topicId?: number | string;
+    slug?: string;
+    title?: string;
+    summary?: string;
+    articleUrl?: string;
+    canSee?: boolean;
+    author?: {
+        username?: string;
+        userSlug?: string;
+        [key: string]: unknown;
+    };
+    [key: string]: unknown;
+}
+
+/** Result envelope for `fetchQuestionSolutionArticles`. */
+export interface SolutionArticleList {
+    totalNum: number;
+    hasNextPage: boolean;
+    articles: SolutionArticleSummary[];
+}
+
+/** Detailed solution article returned by `fetchSolutionArticleDetail`. */
+export interface SolutionArticleDetail {
+    topicId?: number | string;
+    title?: string;
+    slug?: string;
+    summary?: string;
+    content?: string;
+    [key: string]: unknown;
+}
diff --git a/src/types/user.ts b/src/types/user.ts
new file mode 100644
index 0000000..455dab6
--- /dev/null
+++ b/src/types/user.ts
@@ -0,0 +1,113 @@
+/**
+ * User / submission-history / contest type contracts.
+ */
+
+/** Result of `fetchUserStatus()` (called for the authenticated user). */
+export interface UserStatus {
+    isSignedIn: boolean;
+    /** `null` when signed out, or signed in but no display username set. */
+    username: string | null;
+    /** `null` when signed out, or signed in but no avatar set. */
+    avatar: string | null;
+    isAdmin: boolean;
+}
+
+/** Result of `fetchUserProfile(username)`. */
+export interface UserProfile {
+    username: string;
+    realName?: string | null;
+    userAvatar?: string | null;
+    countryName?: string | null;
+    githubUrl?: string | null;
+    company?: string | null;
+    school?: string | null;
+    ranking?: number | null;
+    /**
+     * Per-difficulty solved counts (LeetCode returns an array with rows for
+     * `All`, `Easy`, `Medium`, `Hard`).
+     */
+    totalSubmissionNum?: Array<{
+        difficulty: string;
+        count: number;
+        submissions: number;
+    }>;
+    [key: string]: unknown;
+}
+
+/** A single contest a user attended (or skipped). */
+export interface ContestRankingHistoryEntry {
+    attended: boolean;
+    rating?: number;
+    ranking?: number;
+    trendDirection?: string;
+    problemsSolved?: number;
+    totalProblems?: number;
+    finishTimeInSeconds?: number;
+    contest?: {
+        title?: string;
+        startTime?: number;
+    };
+    [key: string]: unknown;
+}
+
+/** Result of `fetchUserContestRanking(username, attended)`. */
+export interface UserContestInfo {
+    userContestRanking?: {
+        attendedContestsCount?: number;
+        rating?: number;
+        globalRanking?: number;
+        totalParticipants?: number;
+        topPercentage?: number;
+        [key: string]: unknown;
+    } | null;
+    userContestRankingHistory: ContestRankingHistoryEntry[];
+    [key: string]: unknown;
+}
+
+/** A single submission row returned by `fetchUserAllSubmissions`. */
+export interface SubmissionRow {
+    id?: string | number;
+    title?: string;
+    titleSlug?: string;
+    timestamp?: string | number;
+    statusDisplay?: string;
+    lang?: string;
+    runtime?: string;
+    memory?: string;
+    [key: string]: unknown;
+}
+
+/** Result envelope for `fetchUserAllSubmissions`. */
+export interface UserAllSubmissions {
+    submissions: SubmissionRow[] | { [key: string]: unknown };
+    [key: string]: unknown;
+}
+
+/** Result envelope for `fetchUserRecentSubmissions`. */
+export interface UserRecentSubmissions {
+    [key: string]: unknown;
+    recentSubmissionList?: SubmissionRow[];
+}
+
+/** Result of `fetchUserRecentACSubmissions` — raw GraphQL passthrough. */
+export interface UserRecentACSubmissions {
+    [key: string]: unknown;
+}
+
+/** Result of `fetchUserSubmissionDetail`. */
+export interface UserSubmissionDetail {
+    id?: number;
+    code?: string;
+    lang?: string;
+    runtime?: string;
+    memory?: string;
+    statusDisplay?: string;
+    [key: string]: unknown;
+}
+
+/** Result of `fetchUserProgressQuestionList`. */
+export interface UserProgressQuestionList {
+    questions?: Array<{ [key: string]: unknown }>;
+    totalNum?: number;
+    [key: string]: unknown;
+}
diff --git a/tests/auth/auth-flow.test.ts b/tests/auth/auth-flow.test.ts
new file mode 100644
index 0000000..088672e
--- /dev/null
+++ b/tests/auth/auth-flow.test.ts
@@ -0,0 +1,159 @@
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import {
+    applyValidatedCredentials,
+    restoreCredentials
+} from "../../src/auth/auth-flow.js";
+import type { LeetcodeServiceInterface } from "../../src/leetcode/leetcode-service-interface.js";
+import type { CredentialsStorage } from "../../src/types/credentials.js";
+
+function makeStorage(
+    overrides: Partial<CredentialsStorage> = {}
+): CredentialsStorage {
+    return {
+        exists: vi.fn().mockResolvedValue(false),
+        load: vi.fn().mockResolvedValue(null),
+        save: vi.fn().mockResolvedValue(undefined),
+        clear: vi.fn().mockResolvedValue(undefined),
+        ...overrides
+    };
+}
+
+function makeService(
+    overrides: Partial<LeetcodeServiceInterface> = {}
+): LeetcodeServiceInterface {
+    return {
+        validateCredentials: vi.fn().mockResolvedValue("alice"),
+        updateCredentials: vi.fn(),
+        isAuthenticated: vi.fn().mockReturnValue(false),
+        ...overrides
+    } as unknown as LeetcodeServiceInterface;
+}
+
+describe("restoreCredentials", () => {
+    beforeEach(() => {
+        vi.clearAllMocks();
+    });
+
+    it("returns no_credentials when no creds file exists", async () => {
+        const service = makeService();
+        const storage = makeStorage({
+            exists: vi.fn().mockResolvedValue(false)
+        });
+
+        const outcome = await restoreCredentials(service, storage);
+
+        expect(outcome).toEqual({ status: "no_credentials" });
+        expect(service.validateCredentials).not.toHaveBeenCalled();
+        expect(service.updateCredentials).not.toHaveBeenCalled();
+    });
+
+    it("returns invalid/load_failed when file exists but cannot be parsed", async () => {
+        const service = makeService();
+        const storage = makeStorage({
+            exists: vi.fn().mockResolvedValue(true),
+            load: vi.fn().mockResolvedValue(null)
+        });
+
+        const outcome = await restoreCredentials(service, storage);
+
+        expect(outcome).toEqual({ status: "invalid", reason: "load_failed" });
+        expect(service.updateCredentials).not.toHaveBeenCalled();
+    });
+
+    it("returns invalid/expired when LeetCode rejects the saved cookies", async () => {
+        const service = makeService({
+            validateCredentials: vi.fn().mockResolvedValue(null)
+        });
+        const storage = makeStorage({
+            exists: vi.fn().mockResolvedValue(true),
+            load: vi.fn().mockResolvedValue({
+                csrftoken: "csrf",
+                LEETCODE_SESSION: "session"
+            })
+        });
+
+        const outcome = await restoreCredentials(service, storage);
+
+        expect(outcome).toEqual({ status: "invalid", reason: "expired" });
+        expect(service.updateCredentials).not.toHaveBeenCalled();
+    });
+
+    it("returns invalid/expired when validateCredentials throws", async () => {
+        const service = makeService({
+            validateCredentials: vi.fn().mockRejectedValue(new Error("boom"))
+        });
+        const storage = makeStorage({
+            exists: vi.fn().mockResolvedValue(true),
+            load: vi.fn().mockResolvedValue({
+                csrftoken: "csrf",
+                LEETCODE_SESSION: "session"
+            })
+        });
+
+        await expect(restoreCredentials(service, storage)).resolves.toEqual({
+            status: "invalid",
+            reason: "expired"
+        });
+        expect(service.updateCredentials).not.toHaveBeenCalled();
+    });
+
+    it("returns restored and pushes creds into the service when validation succeeds", async () => {
+        const service = makeService({
+            validateCredentials: vi.fn().mockResolvedValue("alice")
+        });
+        const storage = makeStorage({
+            exists: vi.fn().mockResolvedValue(true),
+            load: vi.fn().mockResolvedValue({
+                csrftoken: "csrf-token",
+                LEETCODE_SESSION: "session-token"
+            })
+        });
+
+        const outcome = await restoreCredentials(service, storage);
+
+        expect(outcome).toEqual({ status: "restored", username: "alice" });
+        expect(service.updateCredentials).toHaveBeenCalledWith(
+            "csrf-token",
+            "session-token"
+        );
+    });
+});
+
+describe("applyValidatedCredentials", () => {
+    beforeEach(() => {
+        vi.clearAllMocks();
+    });
+
+    it("returns null and does not update when LeetCode rejects the cookies", async () => {
+        const service = makeService({
+            validateCredentials: vi.fn().mockResolvedValue(null)
+        });
+
+        const username = await applyValidatedCredentials(
+            service,
+            "csrf",
+            "session"
+        );
+
+        expect(username).toBeNull();
+        expect(service.updateCredentials).not.toHaveBeenCalled();
+    });
+
+    it("returns the username and pushes creds into the service when valid", async () => {
+        const service = makeService({
+            validateCredentials: vi.fn().mockResolvedValue("alice")
+        });
+
+        const username = await applyValidatedCredentials(
+            service,
+            "csrf",
+            "session"
+        );
+
+        expect(username).toBe("alice");
+        expect(service.updateCredentials).toHaveBeenCalledWith(
+            "csrf",
+            "session"
+        );
+    });
+});
diff --git a/tests/domain/hint-state-machine.test.ts b/tests/domain/hint-state-machine.test.ts
new file mode 100644
index 0000000..8c27196
--- /dev/null
+++ b/tests/domain/hint-state-machine.test.ts
@@ -0,0 +1,107 @@
+import { describe, expect, it } from "vitest";
+import {
+    SOLUTION_HINT_LEVEL,
+    advanceHint,
+    assertSolutionUnlocked,
+    resetSession
+} from "../../src/domain/hint-state-machine.js";
+import {
+    ErrorCode,
+    LeetCodeError,
+    type HintLevel,
+    type SessionState
+} from "../../src/types/index.js";
+
+function makeSession(overrides: Partial<SessionState> = {}): SessionState {
+    return {
+        slug: "two-sum",
+        hintLevel: 0,
+        attempts: 0,
+        lastLocalRunPassed: null,
+        status: "started",
+        createdAt: "2025-01-01T00:00:00.000Z",
+        updatedAt: "2025-01-01T00:00:00.000Z",
+        ...overrides
+    };
+}
+
+describe("advanceHint", () => {
+    it("bumps the hint level by one", () => {
+        const next = advanceHint(makeSession({ hintLevel: 1 }));
+        expect(next.hintLevel).toBe(2);
+    });
+
+    it("stamps updatedAt", () => {
+        const before = makeSession();
+        const next = advanceHint(before);
+        expect(next.updatedAt).not.toBe(before.updatedAt);
+    });
+
+    it("does not mutate the input", () => {
+        const before = makeSession({ hintLevel: 1 });
+        advanceHint(before);
+        expect(before.hintLevel).toBe(1);
+    });
+
+    it("clamps at the maximum level rather than overflowing", () => {
+        const next = advanceHint(
+            makeSession({ hintLevel: SOLUTION_HINT_LEVEL })
+        );
+        expect(next.hintLevel).toBe(SOLUTION_HINT_LEVEL);
+    });
+});
+
+describe("resetSession", () => {
+    it("returns to a level-0, started state", () => {
+        const before = makeSession({
+            hintLevel: 3,
+            attempts: 5,
+            lastLocalRunPassed: true,
+            lastLocalRunSnapshot: "snapshot-1",
+            status: "attempting"
+        });
+        const next = resetSession(before);
+        expect(next.hintLevel).toBe(0);
+        expect(next.attempts).toBe(0);
+        expect(next.lastLocalRunPassed).toBeNull();
+        expect(next.lastLocalRunSnapshot).toBeNull();
+        expect(next.status).toBe("started");
+    });
+
+    it("preserves slug / language / workspacePath", () => {
+        const before = makeSession({
+            language: "python3",
+            workspacePath: "/tmp/two-sum.py",
+            hintLevel: 4
+        });
+        const next = resetSession(before);
+        expect(next.slug).toBe("two-sum");
+        expect(next.language).toBe("python3");
+        expect(next.workspacePath).toBe("/tmp/two-sum.py");
+    });
+});
+
+describe("assertSolutionUnlocked", () => {
+    it("does not throw when the session is at the maximum hint level", () => {
+        expect(() =>
+            assertSolutionUnlocked(
+                makeSession({ hintLevel: SOLUTION_HINT_LEVEL })
+            )
+        ).not.toThrow();
+    });
+
+    it.each([0, 1, 2, 3] satisfies HintLevel[] as HintLevel[])(
+        "throws HINT_LEVEL_TOO_LOW when the session is at level %d",
+        (level) => {
+            try {
+                assertSolutionUnlocked(makeSession({ hintLevel: level }));
+                throw new Error("did not throw");
+            } catch (err) {
+                expect(err).toBeInstanceOf(LeetCodeError);
+                expect((err as LeetCodeError).code).toBe(
+                    ErrorCode.HINT_LEVEL_TOO_LOW
+                );
+            }
+        }
+    );
+});
diff --git a/tests/domain/pedagogy.test.ts b/tests/domain/pedagogy.test.ts
new file mode 100644
index 0000000..012ff98
--- /dev/null
+++ b/tests/domain/pedagogy.test.ts
@@ -0,0 +1,58 @@
+import { describe, expect, it } from "vitest";
+import { generateHint } from "../../src/domain/pedagogy.js";
+import type { SimplifiedProblem } from "../../src/types/index.js";
+
+const TWO_SUM: SimplifiedProblem = {
+    titleSlug: "two-sum",
+    questionId: "1",
+    title: "Two Sum",
+    content: "<p>Find two indices that sum to target.</p>",
+    difficulty: "Easy",
+    topicTags: ["array", "hash-table"],
+    codeSnippets: [],
+    exampleTestcases: "[2,7,11,15]\n9",
+    hints: ["A hash map gives O(1) lookup."],
+    similarQuestions: []
+};
+
+describe("generateHint", () => {
+    it("level 1 restates the problem and surfaces example testcases", () => {
+        const hint = generateHint(TWO_SUM, 1);
+        expect(hint).toContain("Level 1");
+        expect(hint).toContain("Two Sum");
+        expect(hint).toContain("[2,7,11,15]");
+    });
+
+    it("level 2 references the topic tags but does not give code", () => {
+        const hint = generateHint(TWO_SUM, 2);
+        expect(hint).toContain("Level 2");
+        expect(hint).toContain("array");
+        expect(hint).toContain("hash-table");
+        // No literal code blocks should appear at level 2.
+        expect(hint).not.toMatch(/```python|```js|```ts/);
+    });
+
+    it("level 3 surfaces the upstream LeetCode hint when available", () => {
+        const hint = generateHint(TWO_SUM, 3);
+        expect(hint).toContain("Level 3");
+        expect(hint).toContain("hash map");
+    });
+
+    it("level 4 announces solution unlock", () => {
+        const hint = generateHint(TWO_SUM, 4);
+        expect(hint).toContain("Level 4");
+        expect(hint).toContain("get_problem_solution");
+    });
+
+    it("does not crash on a problem with no hints / examples / tags", () => {
+        const sparse: SimplifiedProblem = {
+            ...TWO_SUM,
+            topicTags: [],
+            hints: [],
+            exampleTestcases: ""
+        };
+        for (const level of [1, 2, 3, 4] as const) {
+            expect(generateHint(sparse, level).length).toBeGreaterThan(0);
+        }
+    });
+});
diff --git a/tests/domain/session-service.test.ts b/tests/domain/session-service.test.ts
new file mode 100644
index 0000000..e649df8
--- /dev/null
+++ b/tests/domain/session-service.test.ts
@@ -0,0 +1,112 @@
+/**
+ * Unit tests for SessionService methods that don't already have
+ * coverage via the e2e/integration suites — primarily the Phase 4
+ * additions (`requireSession`, `recordLocalRun`).
+ */
+import { mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { SessionService } from "../../src/domain/session-service.js";
+import { FileSessionStore } from "../../src/domain/session-store.js";
+import { ErrorCode, isLeetCodeError } from "../../src/types/index.js";
+
+describe("SessionService — Phase 4 additions", () => {
+    let dir: string;
+    let service: SessionService;
+
+    beforeEach(async () => {
+        dir = await mkdtemp(join(tmpdir(), "leetcode-mcp-svc-"));
+        service = new SessionService(new FileSessionStore({ dir }));
+    });
+
+    afterEach(async () => {
+        await rm(dir, { recursive: true, force: true });
+    });
+
+    describe("requireSession", () => {
+        it("returns the session when present", async () => {
+            const session = await service.startOrResume({ slug: "two-sum" });
+            const fetched = await service.requireSession("two-sum");
+            expect(fetched.slug).toBe(session.slug);
+        });
+
+        it("throws SESSION_NOT_FOUND when no session exists", async () => {
+            await expect(async () => {
+                await service.requireSession("never-opened");
+            }).rejects.toSatisfy(
+                (error: unknown) =>
+                    isLeetCodeError(error) &&
+                    error.code === ErrorCode.SESSION_NOT_FOUND
+            );
+        });
+    });
+
+    describe("recordLocalRun", () => {
+        it("increments attempts and stores lastLocalRunPassed", async () => {
+            await service.startOrResume({ slug: "two-sum" });
+
+            const after1 = await service.recordLocalRun("two-sum", false);
+            expect(after1.attempts).toBe(1);
+            expect(after1.lastLocalRunPassed).toBe(false);
+            expect(after1.lastLocalRunSnapshot).toBeNull();
+            expect(after1.status).toBe("attempting");
+
+            const after2 = await service.recordLocalRun(
+                "two-sum",
+                true,
+                "snapshot-1"
+            );
+            expect(after2.attempts).toBe(2);
+            expect(after2.lastLocalRunPassed).toBe(true);
+            expect(after2.lastLocalRunSnapshot).toBe("snapshot-1");
+            // Status should not regress from "attempting".
+            expect(after2.status).toBe("attempting");
+        });
+
+        it("demotes solved sessions after a failing local run", async () => {
+            const store = new FileSessionStore({ dir });
+            const session = await service.startOrResume({ slug: "two-sum" });
+            await store.save({ ...session, status: "solved" });
+
+            const after = await service.recordLocalRun("two-sum", false);
+
+            expect(after.status).toBe("attempting");
+            expect(after.lastLocalRunPassed).toBe(false);
+        });
+
+        it("persists across service instances", async () => {
+            await service.startOrResume({ slug: "two-sum" });
+            await service.recordLocalRun("two-sum", true);
+
+            // Reload from disk via a fresh service.
+            const reloaded = new SessionService(new FileSessionStore({ dir }));
+            const session = await reloaded.requireSession("two-sum");
+            expect(session.attempts).toBe(1);
+            expect(session.lastLocalRunPassed).toBe(true);
+        });
+
+        it("serializes concurrent local-run updates", async () => {
+            await service.startOrResume({ slug: "two-sum" });
+
+            await Promise.all(
+                Array.from({ length: 10 }, (_, index) =>
+                    service.recordLocalRun("two-sum", index % 2 === 0)
+                )
+            );
+
+            const session = await service.requireSession("two-sum");
+            expect(session.attempts).toBe(10);
+        });
+
+        it("throws SESSION_NOT_FOUND when no session exists", async () => {
+            await expect(async () => {
+                await service.recordLocalRun("never-opened", true);
+            }).rejects.toSatisfy(
+                (error: unknown) =>
+                    isLeetCodeError(error) &&
+                    error.code === ErrorCode.SESSION_NOT_FOUND
+            );
+        });
+    });
+});
diff --git a/tests/domain/session-store.test.ts b/tests/domain/session-store.test.ts
new file mode 100644
index 0000000..9283757
--- /dev/null
+++ b/tests/domain/session-store.test.ts
@@ -0,0 +1,75 @@
+import { mkdtemp, rm, stat, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { FileSessionStore } from "../../src/domain/session-store.js";
+import type { SessionState } from "../../src/types/index.js";
+
+function makeSession(overrides: Partial<SessionState> = {}): SessionState {
+    return {
+        slug: "two-sum",
+        hintLevel: 0,
+        attempts: 0,
+        lastLocalRunPassed: null,
+        status: "started",
+        createdAt: "2025-01-01T00:00:00.000Z",
+        updatedAt: "2025-01-01T00:00:00.000Z",
+        ...overrides
+    };
+}
+
+describe("FileSessionStore", () => {
+    let dir: string;
+    let store: FileSessionStore;
+
+    beforeEach(async () => {
+        dir = await mkdtemp(join(tmpdir(), "leetcode-mcp-session-"));
+        store = new FileSessionStore({ dir });
+    });
+
+    afterEach(async () => {
+        await rm(dir, { recursive: true, force: true });
+    });
+
+    it("returns null for a slug that has never been saved", async () => {
+        expect(await store.load("two-sum")).toBeNull();
+    });
+
+    it("round-trips a saved session through load()", async () => {
+        const session = makeSession({ hintLevel: 2, attempts: 1 });
+        await store.save(session);
+        expect(await store.load("two-sum")).toEqual(session);
+    });
+
+    it("creates the sessions directory on save", async () => {
+        const subdir = join(dir, "nested", "sessions");
+        const subStore = new FileSessionStore({ dir: subdir });
+        await subStore.save(makeSession());
+        const info = await stat(subdir);
+        expect(info.isDirectory()).toBe(true);
+    });
+
+    it("delete is idempotent — removing a missing session does not throw", async () => {
+        await expect(store.delete("never-saved")).resolves.toBeUndefined();
+    });
+
+    it("delete removes a saved session", async () => {
+        await store.save(makeSession());
+        await store.delete("two-sum");
+        expect(await store.load("two-sum")).toBeNull();
+    });
+
+    it("rejects slugs with path-traversal characters", () => {
+        expect(() => store.pathFor("../etc/passwd")).toThrow(
+            /Invalid session slug/
+        );
+        expect(() => store.pathFor("two_sum")).toThrow(/Invalid session slug/);
+        expect(() => store.pathFor("Two-Sum")).toThrow(/Invalid session slug/);
+    });
+
+    it("returns null when the JSON file is malformed", async () => {
+        // Write garbage to where load() will look.
+        await writeFile(store.pathFor("two-sum"), "not json {", "utf-8");
+        expect(await store.load("two-sum")).toBeNull();
+    });
+});
diff --git a/tests/e2e/README.md b/tests/e2e/README.md
index 169ac6c..1931bcc 100644
--- a/tests/e2e/README.md
+++ b/tests/e2e/README.md
@@ -1,12 +1,70 @@
 # End-to-End Tests
 
-This directory will host real end-to-end tests that exercise the MCP server as
-a black box: the suite spawns the built binary (`build/index.js`), attaches the
-MCP SDK's `StdioClientTransport`, and mocks LeetCode HTTP via `nock`.
+Real end-to-end tests that exercise the MCP server as a black box: each
+spec spawns the built binary (`build/index.js`) as a child process,
+attaches the MCP SDK's `StdioClientTransport`, and drives the server over
+stdio just like a real MCP client would.
 
-The full e2e harness is defined in §6 of the assessment report and will be
-implemented in a dedicated PR (Phase 2 of the redesign plan). This Phase 0 PR
-only sets up the directory and a placeholder spec so that `npm run test:e2e`
-exits 0 instead of 1 with "No test files found".
+## Running
 
-Once the harness lands, the placeholder will be removed.
+```bash
+npm run test:e2e
+```
+
+This is also wired into `npm run test:all` (which runs unit + integration
+
+- e2e) so CI exercises the full stack. The default `npm test` script
+  **excludes** this directory because spawning a node child per spec is
+  significantly slower than the in-memory integration suites; keep it that
+  way unless you specifically want the e2e run.
+
+## How HTTP is mocked
+
+The server child process never reaches the real `leetcode.com`. Instead:
+
+1. `harness/preload.mjs` is registered via `NODE_OPTIONS=--import …`
+   when the child is spawned, so it runs before any user code.
+2. The preload script activates [`nock`](https://github.com/nock/nock)
+   with `disableNetConnect()` and reads a JSON fixture from
+   `process.env.E2E_FIXTURE_PATH`.
+3. The fixture (defined by `harness/types.ts`) describes which GraphQL
+   operations and REST endpoints to intercept and what to reply with.
+
+Specs author the fixture in TypeScript and pass it to `spawnServer({ fixture })`;
+the harness writes it to a temp file and points the child at it.
+
+## Isolation
+
+Each `spawnServer()` call gets a fresh `mkdtemp` `HOME`, so
+`~/.leetcode-mcp/credentials.json` is per-test and never touches the
+developer's real home. Specs that need to pre-seed credentials can pass
+`{ home }` to reuse a directory they prepared themselves.
+
+## Authoring a spec
+
+```ts
+import { spawnServer } from "./harness/spawn-server.js";
+
+const spawned = await spawnServer({
+  fixture: {
+    graphql: [
+      {
+        operationContains: "userStatus",
+        response: {
+          data: { userStatus: { isSignedIn: true, username: "alice" } }
+        }
+      }
+    ]
+  }
+});
+
+const result = await spawned.client.callTool({
+  name: "check_auth_status",
+  arguments: {}
+});
+
+await spawned.cleanup();
+```
+
+`spawnServer` ensures `build/index.js` is fresh (via `tests/e2e/harness/global-setup.ts`)
+before any spec runs; you don't need to `npm run build` manually.
diff --git a/tests/e2e/auth-restore.test.ts b/tests/e2e/auth-restore.test.ts
new file mode 100644
index 0000000..bedfd29
--- /dev/null
+++ b/tests/e2e/auth-restore.test.ts
@@ -0,0 +1,103 @@
+/**
+ * E2E regression for the "silent-logout-on-restart" bug fixed in Phase 1.
+ *
+ * Before the fix, a server restart would re-read the credentials file from
+ * `~/.leetcode-mcp/credentials.json` and tell the user they were
+ * authenticated, but never actually push the cookies into the in-memory
+ * `Credential` the LeetCode client reads from. The very next authenticated
+ * tool call then failed with "Authentication required".
+ *
+ * This spec spawns a real server with a pre-seeded credentials file and a
+ * mocked `userStatus` GraphQL response, then calls `check_auth_status` over
+ * stdio. If the fix regresses, the tool will report `authenticated: false`
+ * and this spec fails.
+ */
+import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { spawnServer, type SpawnedServer } from "./harness/spawn-server.js";
+
+interface ToolTextResult {
+    content: Array<{ type: string; text: string }>;
+}
+
+describe("e2e: auth restore on startup", () => {
+    let spawned: SpawnedServer | undefined;
+    let seededHome: string | undefined;
+
+    beforeEach(() => {
+        spawned = undefined;
+        seededHome = undefined;
+    });
+
+    afterEach(async () => {
+        if (spawned) {
+            await spawned.cleanup();
+        }
+        if (seededHome) {
+            await rm(seededHome, { recursive: true, force: true });
+        }
+    });
+
+    async function makeSeededHome(): Promise<string> {
+        const home = await mkdtemp(join(tmpdir(), "leetcode-mcp-e2e-auth-"));
+        const dir = join(home, ".leetcode-mcp");
+        await mkdir(dir, { recursive: true });
+        await writeFile(
+            join(dir, "credentials.json"),
+            JSON.stringify({
+                csrftoken: "test-csrf",
+                LEETCODE_SESSION: "test-session",
+                createdAt: new Date().toISOString()
+            }),
+            "utf-8"
+        );
+        return home;
+    }
+
+    it("check_auth_status reports authenticated after a fresh restart", async () => {
+        seededHome = await makeSeededHome();
+
+        spawned = await spawnServer({
+            home: seededHome,
+            fixture: {
+                graphql: [
+                    {
+                        operationContains: "userStatus",
+                        response: {
+                            data: {
+                                userStatus: {
+                                    isSignedIn: true,
+                                    username: "alice"
+                                }
+                            }
+                        }
+                    }
+                ]
+            }
+        });
+
+        const result = (await spawned.client.callTool({
+            name: "check_auth_status",
+            arguments: {}
+        })) as ToolTextResult;
+
+        expect(result.content[0]?.type).toBe("text");
+        const payload = JSON.parse(result.content[0].text);
+        expect(payload.authenticated).toBe(true);
+        expect(payload.username).toBe("alice");
+    });
+
+    it("check_auth_status reports unauthenticated when no credentials file exists", async () => {
+        spawned = await spawnServer();
+
+        const result = (await spawned.client.callTool({
+            name: "check_auth_status",
+            arguments: {}
+        })) as ToolTextResult;
+
+        const payload = JSON.parse(result.content[0].text);
+        expect(payload.authenticated).toBe(false);
+    });
+});
diff --git a/tests/e2e/harness/global-setup.ts b/tests/e2e/harness/global-setup.ts
new file mode 100644
index 0000000..4af42c4
--- /dev/null
+++ b/tests/e2e/harness/global-setup.ts
@@ -0,0 +1,62 @@
+/**
+ * Vitest globalSetup hook: ensures `build/index.js` exists before any e2e
+ * spec spawns the server, and that it's at least as fresh as everything
+ * under `src/`.
+ *
+ * Without this, an unsuspecting `npm run test:e2e` after editing source
+ * would silently exercise a stale binary and report green, hiding real
+ * regressions. We'd rather pay the ~1s `tsc` cost up front than ship a
+ * blind-spot.
+ */
+import { execFile } from "node:child_process";
+import { readdir, stat } from "node:fs/promises";
+import { join } from "node:path";
+import { promisify } from "node:util";
+
+const execFileAsync = promisify(execFile);
+
+export default async function setup(): Promise<void> {
+    if (!(await needsRebuild())) {
+        return;
+    }
+    await execFileAsync("npm", ["run", "build"], {
+        // Inherit cwd so it builds the project under test, not whichever
+        // sub-package vitest happens to launch from.
+        cwd: process.cwd(),
+        // Fail loudly if tsc errors, rather than silently letting the e2e
+        // suite fall through to "command not found" on `node build/index.js`.
+        env: { ...process.env, npm_config_loglevel: "error" }
+    });
+}
+
+async function needsRebuild(): Promise<boolean> {
+    let binMtime: number;
+    try {
+        binMtime = (await stat("build/index.js")).mtimeMs;
+    } catch {
+        return true;
+    }
+    // Walk every `.ts` file under `src/` — comparing only against
+    // `src/index.ts` would let edits to any other module slip through.
+    const srcMtime = await maxMtimeUnder("src");
+    return binMtime < srcMtime;
+}
+
+/** Recursively returns the largest mtime among `.ts` files under `dir`. */
+async function maxMtimeUnder(dir: string): Promise<number> {
+    let max = 0;
+    const entries = await readdir(dir, { withFileTypes: true });
+    await Promise.all(
+        entries.map(async (entry) => {
+            const path = join(dir, entry.name);
+            if (entry.isDirectory()) {
+                const sub = await maxMtimeUnder(path);
+                if (sub > max) max = sub;
+            } else if (entry.isFile() && entry.name.endsWith(".ts")) {
+                const m = (await stat(path)).mtimeMs;
+                if (m > max) max = m;
+            }
+        })
+    );
+    return max;
+}
diff --git a/tests/e2e/harness/preload.mjs b/tests/e2e/harness/preload.mjs
new file mode 100644
index 0000000..f27f10f
--- /dev/null
+++ b/tests/e2e/harness/preload.mjs
@@ -0,0 +1,78 @@
+/**
+ * Preload script that runs inside the spawned MCP server child process before
+ * any user code.
+ *
+ * Registered via `NODE_OPTIONS="--import .../preload.mjs"`. Its job is to:
+ *
+ *   1. Activate `nock`, blocking the child from making any real network
+ *      requests to leetcode.com (the e2e suite must never depend on the
+ *      live LeetCode service being reachable or behaving consistently).
+ *   2. Read fixture data from a JSON file whose path is provided via the
+ *      `E2E_FIXTURE_PATH` env var, and install nock interceptors that
+ *      replay the canned GraphQL / REST responses back to the server.
+ *
+ * The fixture format is the {@link E2EFixture} type from `./types.ts`. Tests
+ * write a JSON file describing the LeetCode responses they want, point the
+ * child at it via env, and then drive the server through StdioClientTransport.
+ *
+ * If `E2E_FIXTURE_PATH` is not set, this preload is a no-op apart from
+ * disabling network — useful for lifecycle tests that don't touch LeetCode.
+ */
+import nock from "nock";
+import { readFileSync } from "node:fs";
+import process from "node:process";
+
+nock.disableNetConnect();
+
+const fixturePath = process.env.E2E_FIXTURE_PATH;
+if (fixturePath) {
+    /** @type {import("./types.ts").E2EFixture} */
+    let fixture;
+    try {
+        fixture = JSON.parse(readFileSync(fixturePath, "utf-8"));
+    } catch (error) {
+        // If the fixture file is malformed, fail loudly rather than silently
+        // letting the server hit `nock.disableNetConnect` and produce a
+        // confusing "Nock: Disallowed net connect" error mid-test.
+        process.stderr.write(
+            `[e2e preload] Failed to read fixture at ${fixturePath}: ${error}\n`
+        );
+        process.exit(1);
+    }
+
+    for (const entry of fixture.graphql ?? []) {
+        // `.persist()` is a Scope method (must be called before the
+        // interceptor is constructed); `.times()` is an Interceptor method.
+        // Default to persist so a single fixture entry can serve multiple
+        // calls to the same operation without callers tracking counts.
+        const scope = nock("https://leetcode.com");
+        if (entry.times === undefined) {
+            scope.persist();
+        }
+        const interceptor = scope.post(
+            "/graphql",
+            (body) =>
+                typeof body?.query === "string" &&
+                body.query.includes(entry.operationContains)
+        );
+        if (entry.times !== undefined) {
+            interceptor.times(entry.times);
+        }
+        interceptor.reply(entry.status ?? 200, entry.response);
+    }
+
+    for (const entry of fixture.rest ?? []) {
+        const scope = nock("https://leetcode.com");
+        if (entry.times === undefined) {
+            scope.persist();
+        }
+        const interceptor =
+            entry.method === "GET"
+                ? scope.get(entry.path)
+                : scope.post(entry.path);
+        if (entry.times !== undefined) {
+            interceptor.times(entry.times);
+        }
+        interceptor.reply(entry.status ?? 200, entry.response);
+    }
+}
diff --git a/tests/e2e/harness/spawn-server.ts b/tests/e2e/harness/spawn-server.ts
new file mode 100644
index 0000000..e919c65
--- /dev/null
+++ b/tests/e2e/harness/spawn-server.ts
@@ -0,0 +1,143 @@
+/**
+ * Test harness for spawning the LeetCode MCP server as a real child process
+ * over stdio and connecting an MCP client to it.
+ *
+ * Each spawn:
+ *   - Runs the freshly built `build/index.js` binary (via `node`).
+ *   - Gets its own isolated `HOME` (a fresh `mkdtemp`) so the credentials
+ *     store at `~/.leetcode-mcp/credentials.json` is per-test, never leaks
+ *     between specs, and never touches the developer's real home.
+ *   - Uses `NODE_OPTIONS="--import preload.mjs"` to activate `nock` inside
+ *     the child before any application code runs, so all LeetCode HTTP is
+ *     served from a JSON fixture instead of the real internet.
+ *
+ * The harness returns an MCP `Client` already wired to the child plus the
+ * directory acting as `HOME` so tests can pre-seed credentials, and a
+ * `cleanup()` to close the client and remove the temp directory.
+ */
+import { Client } from "@modelcontextprotocol/sdk/client/index.js";
+import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
+import { mkdtemp, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join, resolve } from "node:path";
+import { fileURLToPath, pathToFileURL } from "node:url";
+import type { E2EFixture } from "./types.js";
+
+const HARNESS_DIR = fileURLToPath(new URL(".", import.meta.url));
+const REPO_ROOT = resolve(HARNESS_DIR, "..", "..", "..");
+const SERVER_BIN = join(REPO_ROOT, "build", "index.js");
+const PRELOAD = join(HARNESS_DIR, "preload.mjs");
+
+export interface SpawnOptions {
+    /**
+     * LeetCode HTTP responses to serve back to the child via nock. If
+     * omitted, nock is still activated (so the child can't reach the real
+     * leetcode.com), but no interceptors are installed — useful for
+     * lifecycle / negative-path specs that don't drive an authenticated
+     * tool.
+     */
+    fixture?: E2EFixture;
+    /**
+     * Reuse an existing directory as the child's `HOME` instead of letting
+     * the harness mkdtemp a fresh one. The caller is responsible for
+     * cleanup of any home it provides; the harness only removes homes it
+     * created itself.
+     *
+     * Useful for specs that need to pre-seed `~/.leetcode-mcp/...` before
+     * the server boots (e.g., the auth-restore regression).
+     */
+    home?: string;
+    /**
+     * Extra environment variables to pass to the child. Merged on top of
+     * the harness-controlled ones (`HOME`, `NODE_OPTIONS`,
+     * `E2E_FIXTURE_PATH`).
+     */
+    env?: Record<string, string>;
+}
+
+export interface SpawnedServer {
+    /** Connected MCP `Client` ready to call tools / list / etc. */
+    client: Client;
+    /** Temp directory acting as the child's `HOME`. */
+    home: string;
+    /** Tear down the client transport and remove the temp directory. */
+    cleanup: () => Promise<void>;
+}
+
+/**
+ * Spawns `build/index.js` as a child process with isolated `HOME` and
+ * preloaded nock, and returns an MCP client connected over stdio.
+ */
+export async function spawnServer(
+    options: SpawnOptions = {}
+): Promise<SpawnedServer> {
+    const homeWasProvided = options.home !== undefined;
+    const home =
+        options.home ?? (await mkdtemp(join(tmpdir(), "leetcode-mcp-e2e-")));
+
+    // Fixtures live in their own temp directory regardless of who owns
+    // `HOME`, so a caller-provided `HOME` never gets a stray
+    // `fixture.json` byproduct. The harness owns the fixture dir and
+    // always cleans it up.
+    let fixtureDir: string | undefined;
+    let fixturePath: string | undefined;
+    if (options.fixture) {
+        fixtureDir = await mkdtemp(join(tmpdir(), "leetcode-mcp-e2e-fix-"));
+        fixturePath = join(fixtureDir, "fixture.json");
+        await writeFile(fixturePath, JSON.stringify(options.fixture), "utf-8");
+    }
+
+    const env: Record<string, string> = {
+        ...(options.env ?? {}),
+        // Pass through the bare minimum from the parent so node can find
+        // node_modules and the test runner's cwd matches the repo root.
+        PATH: options.env?.PATH ?? process.env.PATH ?? "",
+        HOME: home,
+        // `pathToFileURL` percent-encodes path segments so the preload
+        // import works even when the harness path contains spaces or
+        // other URL-reserved characters (common on macOS user dirs).
+        NODE_OPTIONS: `--import ${pathToFileURL(PRELOAD).href}`
+    };
+    if (fixturePath) {
+        env.E2E_FIXTURE_PATH = fixturePath;
+    } else {
+        delete env.E2E_FIXTURE_PATH;
+    }
+
+    const transport = new StdioClientTransport({
+        command: process.execPath,
+        args: [SERVER_BIN],
+        env,
+        cwd: REPO_ROOT,
+        // Forward stderr so the test runner surfaces server logs / nock
+        // errors when things go wrong.
+        stderr: "inherit"
+    });
+
+    const client = new Client({
+        name: "leetcode-mcp-e2e",
+        version: "0.0.0"
+    });
+    const cleanup = async () => {
+        try {
+            await client.close();
+        } catch {
+            // Already closed — ignore.
+        }
+        if (!homeWasProvided) {
+            await rm(home, { recursive: true, force: true });
+        }
+        if (fixtureDir) {
+            await rm(fixtureDir, { recursive: true, force: true });
+        }
+    };
+
+    try {
+        await client.connect(transport);
+    } catch (error) {
+        await cleanup();
+        throw error;
+    }
+
+    return { client, home, cleanup };
+}
diff --git a/tests/e2e/harness/types.ts b/tests/e2e/harness/types.ts
new file mode 100644
index 0000000..40ad091
--- /dev/null
+++ b/tests/e2e/harness/types.ts
@@ -0,0 +1,42 @@
+/**
+ * Shared types for the e2e harness fixtures.
+ *
+ * The fixture file is the contract between the parent test process (which
+ * authors the fixture) and the spawned MCP server child process (which reads
+ * the fixture via the `preload.mjs` script and replays it through nock).
+ *
+ * Keep this module dependency-free so it can be imported by both vitest
+ * specs and the lightweight preload script without dragging in the rest of
+ * the codebase.
+ */
+
+export interface MockGraphqlResponse {
+    /** Match request body where `body.query` includes this substring. */
+    operationContains: string;
+    /** Response payload (will be JSON-stringified into the response body). */
+    response: unknown;
+    /** HTTP status to return. Defaults to 200. */
+    status?: number;
+    /** How many times this interceptor should fire. Defaults to Infinity. */
+    times?: number;
+}
+
+export interface MockRestEndpoint {
+    /** HTTP method, e.g. "POST" or "GET". */
+    method: "GET" | "POST";
+    /** URL path on `https://leetcode.com`, e.g. "/problems/two-sum/submit/". */
+    path: string;
+    /** Response payload. */
+    response: unknown;
+    /** HTTP status to return. Defaults to 200. */
+    status?: number;
+    /** How many times this interceptor should fire. Defaults to Infinity. */
+    times?: number;
+}
+
+export interface E2EFixture {
+    /** GraphQL operations on https://leetcode.com/graphql. */
+    graphql?: MockGraphqlResponse[];
+    /** REST endpoints on https://leetcode.com (submit / check / etc). */
+    rest?: MockRestEndpoint[];
+}
diff --git a/tests/e2e/lifecycle.test.ts b/tests/e2e/lifecycle.test.ts
new file mode 100644
index 0000000..df067de
--- /dev/null
+++ b/tests/e2e/lifecycle.test.ts
@@ -0,0 +1,83 @@
+/**
+ * Lifecycle e2e: spawns the real `build/index.js` over stdio, performs the
+ * MCP handshake via the SDK client, and asserts the server reports the
+ * tools / resources / prompts we expect.
+ *
+ * This locks in the wire-level surface area: any drift in tool names or
+ * server identity is caught before clients do.
+ */
+import { afterAll, beforeAll, describe, expect, it } from "vitest";
+import { spawnServer, type SpawnedServer } from "./harness/spawn-server.js";
+
+describe("e2e: server lifecycle", () => {
+    let spawned: SpawnedServer | undefined;
+
+    beforeAll(async () => {
+        spawned = await spawnServer();
+    });
+
+    afterAll(async () => {
+        await spawned?.cleanup();
+    });
+
+    it("advertises a non-empty server name and version after handshake", () => {
+        const info = spawned?.client.getServerVersion();
+        expect(info?.name).toBeTruthy();
+        expect(info?.version).toBeTruthy();
+    });
+
+    it("registers all expected tools", async () => {
+        const { tools } = await spawned!.client.listTools();
+        const names = tools.map((t) => t.name).sort();
+
+        // The exact set must stay stable — adding a tool is intentional and
+        // should bump this assertion. Keep alphabetised so diffs are easy to
+        // read.
+        const expected = [
+            "check_auth_status",
+            "get_all_submissions",
+            "get_daily_challenge",
+            "get_problem",
+            "get_problem_progress",
+            "get_problem_solution",
+            "get_problem_submission_report",
+            "get_recent_ac_submissions",
+            "get_recent_submissions",
+            "get_session_state",
+            "get_started",
+            "get_user_contest_ranking",
+            "get_user_profile",
+            "get_user_status",
+            "list_problem_solutions",
+            "request_hint",
+            "reset_session",
+            "run_local_tests",
+            "runner_doctor",
+            "save_leetcode_credentials",
+            "search_problems",
+            "start_leetcode_auth",
+            "start_problem",
+            "submit_solution"
+        ];
+
+        // Use toEqual with a sorted expected so any addition / rename
+        // surfaces clearly without a brittle "every name in any order"
+        // assertion.
+        expect(names).toEqual(expected.sort());
+    });
+
+    it("registers MCP prompts", async () => {
+        const { prompts } = await spawned!.client.listPrompts();
+        expect(prompts.length).toBeGreaterThan(0);
+        const names = prompts.map((p) => p.name);
+        expect(names).toContain("leetcode_authentication_guide");
+    });
+
+    it("exposes resource templates for problems and solutions", async () => {
+        const { resourceTemplates } =
+            await spawned!.client.listResourceTemplates();
+        expect(resourceTemplates.length).toBeGreaterThan(0);
+        const uriTemplates = resourceTemplates.map((r) => r.uriTemplate);
+        expect(uriTemplates.some((u) => u.startsWith("problem://"))).toBe(true);
+    });
+});
diff --git a/tests/e2e/pedagogy-gate.test.ts b/tests/e2e/pedagogy-gate.test.ts
new file mode 100644
index 0000000..2d823e4
--- /dev/null
+++ b/tests/e2e/pedagogy-gate.test.ts
@@ -0,0 +1,164 @@
+/**
+ * Pedagogy state machine e2e: spawn the real server, drive a problem
+ * through `start_problem` → `request_hint` × 4, and assert the
+ * solution-returning tools are gated until the maximum hint level.
+ *
+ * Locks in the Phase 3 contract end-to-end: the rules are enforced by
+ * the wire, not by prompts the agent might forget to read.
+ */
+import { afterEach, describe, expect, it } from "vitest";
+import { spawnServer, type SpawnedServer } from "./harness/spawn-server.js";
+
+interface ToolTextResult {
+    content: Array<{ type: string; text: string }>;
+}
+
+const TWO_SUM_PROBLEM = {
+    questionId: "1",
+    questionFrontendId: "1",
+    title: "Two Sum",
+    titleSlug: "two-sum",
+    difficulty: "Easy",
+    isPaidOnly: false,
+    content:
+        "<p>Given an array of integers <code>nums</code> and an integer <code>target</code>...</p>",
+    topicTags: [{ name: "Array", slug: "array" }],
+    codeSnippets: [
+        {
+            lang: "Python3",
+            langSlug: "python3",
+            code: "class Solution:\n    def twoSum(self, nums, target):\n        pass\n"
+        }
+    ],
+    similarQuestions: "[]",
+    exampleTestcases: "[2,7,11,15]\n9",
+    hints: ["Try a hash map for O(n) lookup"],
+    stats: '{"totalAccepted":"10M","totalSubmission":"20M","acRate":"50.0%"}'
+};
+
+const SOLUTION_LIST_PAYLOAD = {
+    data: {
+        ugcArticleSolutionArticles: {
+            edges: [{ node: { topicId: "topic-42", title: "Hash map O(n)" } }],
+            totalNum: 1,
+            pageInfo: { hasNextPage: false }
+        }
+    }
+};
+
+/**
+ * The fixture serves the same canned GraphQL payload for every request
+ * that contains the matching field selector — `start_problem` and each
+ * `request_hint` both refetch the problem, so the question fixture must
+ * be replayable.
+ */
+const FIXTURE = {
+    graphql: [
+        {
+            operationContains: "question(titleSlug:",
+            response: { data: { question: TWO_SUM_PROBLEM } }
+        },
+        {
+            operationContains: "ugcArticleSolutionArticles",
+            response: SOLUTION_LIST_PAYLOAD
+        }
+    ]
+};
+
+describe("e2e: pedagogy gate", () => {
+    let spawned: SpawnedServer | undefined;
+
+    afterEach(async () => {
+        if (spawned) {
+            await spawned.cleanup();
+            spawned = undefined;
+        }
+    });
+
+    it("gates list_problem_solutions until request_hint reaches level 4", async () => {
+        spawned = await spawnServer({ fixture: FIXTURE });
+
+        // 1. No session yet — solutions must reject with SESSION_NOT_FOUND.
+        const noSession = (await spawned.client.callTool({
+            name: "list_problem_solutions",
+            arguments: { questionSlug: "two-sum" }
+        })) as ToolTextResult;
+        const noSessionPayload = JSON.parse(noSession.content[0].text);
+        expect(noSessionPayload.code).toBe("SESSION_NOT_FOUND");
+
+        // 2. Open a session and assert the initial state.
+        const start = (await spawned.client.callTool({
+            name: "start_problem",
+            arguments: { titleSlug: "two-sum", language: "python3" }
+        })) as ToolTextResult;
+        const startPayload = JSON.parse(start.content[0].text);
+        expect(startPayload.session.hintLevel).toBe(0);
+        expect(startPayload.session.status).toBe("started");
+
+        // 3. Walk the hint flow up to (but not at) the unlock level.
+        for (let expectedLevel = 1; expectedLevel < 4; expectedLevel++) {
+            const hint = (await spawned.client.callTool({
+                name: "request_hint",
+                arguments: { titleSlug: "two-sum" }
+            })) as ToolTextResult;
+            const payload = JSON.parse(hint.content[0].text);
+            expect(payload.level).toBe(expectedLevel);
+            expect(typeof payload.hint).toBe("string");
+            expect(payload.hint.length).toBeGreaterThan(0);
+
+            // At each pre-unlock level, list_problem_solutions still rejects.
+            const stillGated = (await spawned.client.callTool({
+                name: "list_problem_solutions",
+                arguments: { questionSlug: "two-sum" }
+            })) as ToolTextResult;
+            const stillGatedPayload = JSON.parse(stillGated.content[0].text);
+            expect(stillGatedPayload.code).toBe("HINT_LEVEL_TOO_LOW");
+        }
+
+        // 4. Final hint bump unlocks the solutions tool.
+        const finalHint = (await spawned.client.callTool({
+            name: "request_hint",
+            arguments: { titleSlug: "two-sum" }
+        })) as ToolTextResult;
+        const finalPayload = JSON.parse(finalHint.content[0].text);
+        expect(finalPayload.level).toBe(4);
+
+        // 5. Now the gate opens.
+        const unlocked = (await spawned.client.callTool({
+            name: "list_problem_solutions",
+            arguments: { questionSlug: "two-sum" }
+        })) as ToolTextResult;
+        const unlockedPayload = JSON.parse(unlocked.content[0].text);
+        expect(unlockedPayload.questionSlug).toBe("two-sum");
+        expect(unlockedPayload.solutionArticles).toBeDefined();
+    });
+
+    it("reset_session clamps hint level back to 0 and re-engages the gate", async () => {
+        spawned = await spawnServer({ fixture: FIXTURE });
+
+        await spawned.client.callTool({
+            name: "start_problem",
+            arguments: { titleSlug: "two-sum" }
+        });
+        for (let i = 0; i < 4; i++) {
+            await spawned.client.callTool({
+                name: "request_hint",
+                arguments: { titleSlug: "two-sum" }
+            });
+        }
+
+        const reset = (await spawned.client.callTool({
+            name: "reset_session",
+            arguments: { titleSlug: "two-sum" }
+        })) as ToolTextResult;
+        const resetPayload = JSON.parse(reset.content[0].text);
+        expect(resetPayload.session.hintLevel).toBe(0);
+
+        const gatedAgain = (await spawned.client.callTool({
+            name: "list_problem_solutions",
+            arguments: { questionSlug: "two-sum" }
+        })) as ToolTextResult;
+        const gatedAgainPayload = JSON.parse(gatedAgain.content[0].text);
+        expect(gatedAgainPayload.code).toBe("HINT_LEVEL_TOO_LOW");
+    });
+});
diff --git a/tests/e2e/placeholder.test.ts b/tests/e2e/placeholder.test.ts
deleted file mode 100644
index b5eb08c..0000000
--- a/tests/e2e/placeholder.test.ts
+++ /dev/null
@@ -1,18 +0,0 @@
-/**
- * Placeholder e2e spec.
- *
- * The real end-to-end harness — spawning `build/index.js`, attaching
- * `StdioClientTransport`, and mocking LeetCode over `nock` — lands in a
- * dedicated PR (Phase 2 of the redesign plan). This file exists so that
- * `npm run test:e2e` (which targets `tests/e2e/`) exits 0 instead of 1
- * with "No test files found", giving CI an honest signal until then.
- *
- * Once the real harness lands, this file is removed.
- */
-import { describe, expect, it } from "vitest";
-
-describe("e2e harness placeholder", () => {
-    it("reserves the tests/e2e directory until the real harness lands", () => {
-        expect(true).toBe(true);
-    });
-});
diff --git a/tests/e2e/problem-flow.test.ts b/tests/e2e/problem-flow.test.ts
new file mode 100644
index 0000000..928123f
--- /dev/null
+++ b/tests/e2e/problem-flow.test.ts
@@ -0,0 +1,83 @@
+/**
+ * Happy-path e2e: spawn the server, call `get_problem` with a mocked
+ * GraphQL response, and assert the wire-level envelope flows through
+ * unchanged.
+ *
+ * Locks in the contract that callers see structured JSON (not free-form
+ * text) when a problem is fetched, and that the slug round-trips through
+ * the GraphQL boundary unmodified.
+ */
+import { afterEach, describe, expect, it } from "vitest";
+import { spawnServer, type SpawnedServer } from "./harness/spawn-server.js";
+
+interface ToolTextResult {
+    content: Array<{ type: string; text: string }>;
+}
+
+const TWO_SUM_PROBLEM = {
+    questionId: "1",
+    questionFrontendId: "1",
+    title: "Two Sum",
+    titleSlug: "two-sum",
+    difficulty: "Easy",
+    isPaidOnly: false,
+    content:
+        "<p>Given an array of integers <code>nums</code> and an integer <code>target</code>...</p>",
+    topicTags: [{ name: "Array", slug: "array" }],
+    codeSnippets: [
+        {
+            lang: "Python3",
+            langSlug: "python3",
+            code: "class Solution:\n    def twoSum(self, nums, target):\n        pass\n"
+        }
+    ],
+    similarQuestions: "[]",
+    exampleTestcases: "[2,7,11,15]\n9",
+    hints: ["Try a hash map for O(n) lookup"],
+    stats: '{"totalAccepted":"10M","totalSubmission":"20M","acRate":"50.0%"}'
+};
+
+describe("e2e: problem-flow happy path", () => {
+    let spawned: SpawnedServer | undefined;
+
+    afterEach(async () => {
+        if (spawned) {
+            await spawned.cleanup();
+            spawned = undefined;
+        }
+    });
+
+    it("get_problem returns a structured envelope for a known slug", async () => {
+        spawned = await spawnServer({
+            fixture: {
+                graphql: [
+                    {
+                        // `leetcode-query` issues an anonymous GraphQL
+                        // `question(titleSlug: ...)` query for problem
+                        // fetches. Match on the field-level selector
+                        // rather than an operation name (it doesn't have
+                        // one) to stay robust to formatting changes.
+                        operationContains: "question(titleSlug:",
+                        response: {
+                            data: { question: TWO_SUM_PROBLEM }
+                        }
+                    }
+                ]
+            }
+        });
+
+        const result = (await spawned.client.callTool({
+            name: "get_problem",
+            arguments: { titleSlug: "two-sum" }
+        })) as ToolTextResult;
+
+        expect(result.content[0]?.type).toBe("text");
+        const payload = JSON.parse(result.content[0].text);
+        // The tool wraps the simplified projection in `{ titleSlug, problem }`;
+        // assert the wire-level envelope, not the internal projection.
+        expect(payload.titleSlug).toBe("two-sum");
+        expect(payload.problem.title).toBe("Two Sum");
+        // topicTags are projected down to a string[] of slugs.
+        expect(payload.problem.topicTags).toEqual(["array"]);
+    });
+});
diff --git a/tests/e2e/runner.test.ts b/tests/e2e/runner.test.ts
new file mode 100644
index 0000000..05ac940
--- /dev/null
+++ b/tests/e2e/runner.test.ts
@@ -0,0 +1,260 @@
+/**
+ * Local-runner e2e: spawn the real `build/index.js`, drive
+ * `runner_doctor` and `run_local_tests` over the wire, and assert the
+ * runner actually executes Python on the host.
+ *
+ * Skipped automatically on hosts without `python3` so the suite stays
+ * portable; the project's CI image has it.
+ */
+import { execFileSync } from "node:child_process";
+import { afterEach, describe, expect, it } from "vitest";
+import { spawnServer, type SpawnedServer } from "./harness/spawn-server.js";
+
+interface ToolTextResult {
+    content: Array<{ type: string; text: string }>;
+}
+
+const TWO_SUM_PROBLEM = {
+    questionId: "1",
+    questionFrontendId: "1",
+    title: "Two Sum",
+    titleSlug: "two-sum",
+    difficulty: "Easy",
+    isPaidOnly: false,
+    content: "<p>Two Sum problem</p>",
+    topicTags: [{ name: "Array", slug: "array" }],
+    codeSnippets: [
+        {
+            lang: "Python3",
+            langSlug: "python3",
+            code: "class Solution:\n    def twoSum(self, nums, target):\n        pass\n"
+        }
+    ],
+    similarQuestions: "[]",
+    exampleTestcases: "[2,7,11,15]\n9",
+    hints: [],
+    stats: '{"totalAccepted":"10M","totalSubmission":"20M","acRate":"50.0%"}'
+};
+
+const FIXTURE = {
+    graphql: [
+        {
+            operationContains: "question(titleSlug:",
+            response: { data: { question: TWO_SUM_PROBLEM } }
+        }
+    ]
+};
+
+function pythonAvailable(): boolean {
+    try {
+        execFileSync("python3", ["--version"], { stdio: "ignore" });
+        return true;
+    } catch {
+        return false;
+    }
+}
+
+const PYTHON_PRESENT = pythonAvailable();
+
+describe.skipIf(!PYTHON_PRESENT)("e2e: local runner (python3)", () => {
+    let spawned: SpawnedServer | undefined;
+
+    afterEach(async () => {
+        if (spawned) {
+            await spawned.cleanup();
+            spawned = undefined;
+        }
+    });
+
+    it("runner_doctor reports python3 availability", async () => {
+        spawned = await spawnServer({ fixture: FIXTURE });
+
+        const doctor = (await spawned.client.callTool({
+            name: "runner_doctor",
+            arguments: {}
+        })) as ToolTextResult;
+
+        const payload = JSON.parse(doctor.content[0].text);
+        expect(payload.languages).toBeDefined();
+        const py = payload.languages.find(
+            (l: { language: string }) => l.language === "python3"
+        );
+        expect(py?.available).toBe(true);
+        expect(payload.sandbox).toBeDefined();
+    });
+
+    it("rejects run_local_tests when no session is open", async () => {
+        spawned = await spawnServer({ fixture: FIXTURE });
+
+        const result = (await spawned.client.callTool({
+            name: "run_local_tests",
+            arguments: {
+                titleSlug: "two-sum",
+                language: "python3",
+                code: "print('ok')"
+            }
+        })) as ToolTextResult;
+
+        const payload = JSON.parse(result.content[0].text);
+        expect(payload.code).toBe("SESSION_NOT_FOUND");
+    });
+
+    it("executes a passing python script and updates the session", async () => {
+        spawned = await spawnServer({ fixture: FIXTURE });
+
+        await spawned.client.callTool({
+            name: "start_problem",
+            arguments: { titleSlug: "two-sum", language: "python3" }
+        });
+
+        const run = (await spawned.client.callTool({
+            name: "run_local_tests",
+            arguments: {
+                titleSlug: "two-sum",
+                language: "python3",
+                code: 'print("hi")\nassert 1 + 1 == 2'
+            }
+        })) as ToolTextResult;
+
+        const payload = JSON.parse(run.content[0].text);
+        expect(payload.titleSlug).toBe("two-sum");
+        expect(payload.result.passed).toBe(true);
+        expect(payload.result.exitCode).toBe(0);
+        expect(payload.result.timedOut).toBe(false);
+        expect(payload.result.stdout).toContain("hi");
+
+        // Session state is observable via get_session_state.
+        const state = (await spawned.client.callTool({
+            name: "get_session_state",
+            arguments: { titleSlug: "two-sum" }
+        })) as ToolTextResult;
+        const sessionPayload = JSON.parse(state.content[0].text);
+        expect(sessionPayload.session.lastLocalRunPassed).toBe(true);
+        expect(sessionPayload.session.attempts).toBe(1);
+    });
+
+    it("captures non-zero exit code without throwing", async () => {
+        spawned = await spawnServer({ fixture: FIXTURE });
+
+        await spawned.client.callTool({
+            name: "start_problem",
+            arguments: { titleSlug: "two-sum", language: "python3" }
+        });
+
+        const run = (await spawned.client.callTool({
+            name: "run_local_tests",
+            arguments: {
+                titleSlug: "two-sum",
+                language: "python3",
+                code: "raise SystemExit(2)"
+            }
+        })) as ToolTextResult;
+
+        const payload = JSON.parse(run.content[0].text);
+        expect(payload.result.passed).toBe(false);
+        expect(payload.result.exitCode).toBe(2);
+
+        const state = (await spawned.client.callTool({
+            name: "get_session_state",
+            arguments: { titleSlug: "two-sum" }
+        })) as ToolTextResult;
+        const sessionPayload = JSON.parse(state.content[0].text);
+        expect(sessionPayload.session.lastLocalRunPassed).toBe(false);
+    });
+
+    it("kills runaway processes after the timeout budget", async () => {
+        spawned = await spawnServer({ fixture: FIXTURE });
+
+        await spawned.client.callTool({
+            name: "start_problem",
+            arguments: { titleSlug: "two-sum", language: "python3" }
+        });
+
+        const run = (await spawned.client.callTool({
+            name: "run_local_tests",
+            arguments: {
+                titleSlug: "two-sum",
+                language: "python3",
+                code: "while True: pass",
+                timeoutMs: 500
+            }
+        })) as ToolTextResult;
+
+        const payload = JSON.parse(run.content[0].text);
+        expect(payload.result.timedOut).toBe(true);
+        expect(payload.result.passed).toBe(false);
+    });
+
+    it("rejects unimplemented languages with RUNNER_NOT_IMPLEMENTED_FOR_LANGUAGE", async () => {
+        spawned = await spawnServer({ fixture: FIXTURE });
+
+        await spawned.client.callTool({
+            name: "start_problem",
+            arguments: { titleSlug: "two-sum", language: "go" }
+        });
+
+        const run = (await spawned.client.callTool({
+            name: "run_local_tests",
+            arguments: {
+                titleSlug: "two-sum",
+                language: "go",
+                code: "package main\nfunc main() {}"
+            }
+        })) as ToolTextResult;
+
+        const payload = JSON.parse(run.content[0].text);
+        expect(payload.code).toBe("RUNNER_NOT_IMPLEMENTED_FOR_LANGUAGE");
+    });
+
+    it("blocks submit_solution under strict mode until run_local_tests passes", async () => {
+        spawned = await spawnServer({
+            fixture: FIXTURE,
+            env: { LEETCODE_MCP_STRICT_MODE: "1" }
+        });
+
+        await spawned.client.callTool({
+            name: "start_problem",
+            arguments: { titleSlug: "two-sum", language: "python3" }
+        });
+
+        const code = 'print("ok")';
+
+        // First submit attempt: no run_local_tests yet → rejected.
+        const blocked = (await spawned.client.callTool({
+            name: "submit_solution",
+            arguments: {
+                problemSlug: "two-sum",
+                code,
+                language: "python3"
+            }
+        })) as ToolTextResult;
+        const blockedPayload = JSON.parse(blocked.content[0].text);
+        expect(blockedPayload.code).toBe("LOCAL_TESTS_NOT_PASSED");
+
+        // Run locals successfully.
+        const run = (await spawned.client.callTool({
+            name: "run_local_tests",
+            arguments: {
+                titleSlug: "two-sum",
+                language: "python3",
+                code
+            }
+        })) as ToolTextResult;
+        const runPayload = JSON.parse(run.content[0].text);
+        expect(runPayload.result.passed).toBe(true);
+
+        // Submit again: strict mode now permits it (the upstream
+        // request itself will fail via nock — we don't care; the gate
+        // is what we're locking down here).
+        const allowed = (await spawned.client.callTool({
+            name: "submit_solution",
+            arguments: {
+                problemSlug: "two-sum",
+                code,
+                language: "python3"
+            }
+        })) as ToolTextResult;
+        const allowedPayload = JSON.parse(allowed.content[0].text);
+        expect(allowedPayload.code).not.toBe("LOCAL_TESTS_NOT_PASSED");
+    });
+});
diff --git a/tests/helpers/mock-leetcode.ts b/tests/helpers/mock-leetcode.ts
index 08fbcdb..a380645 100644
--- a/tests/helpers/mock-leetcode.ts
+++ b/tests/helpers/mock-leetcode.ts
@@ -125,16 +125,20 @@ export function createMockLeetCodeService(): LeetcodeServiceInterface {
         }),
 
         // Solution methods
-        fetchQuestionSolutionArticles: vi.fn().mockResolvedValue([
-            {
-                id: "1",
-                title: "Two Sum - Solution",
-                slug: "two-sum-solution",
-                topicId: 12345,
-                authorUsername: "leetcode",
-                voteCount: 1000
-            }
-        ]),
+        fetchQuestionSolutionArticles: vi.fn().mockResolvedValue({
+            totalNum: 1,
+            hasNextPage: false,
+            articles: [
+                {
+                    id: "1",
+                    title: "Two Sum - Solution",
+                    slug: "two-sum-solution",
+                    topicId: 12345,
+                    authorUsername: "leetcode",
+                    voteCount: 1000
+                }
+            ]
+        }),
 
         fetchSolutionArticleDetail: vi.fn().mockResolvedValue({
             topicId: 12345,
diff --git a/tests/integration/runner-tools-integration.test.ts b/tests/integration/runner-tools-integration.test.ts
new file mode 100644
index 0000000..446eb86
--- /dev/null
+++ b/tests/integration/runner-tools-integration.test.ts
@@ -0,0 +1,302 @@
+/**
+ * Runner Tools Integration Tests
+ *
+ * Drives `run_local_tests` and `runner_doctor` through the MCP wire,
+ * with a fake `LocalRunner` that records what it was called with so we
+ * can assert the tool layer's behaviour without depending on `python3`
+ * being installed where these tests run.
+ */
+import { mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { createLocalRunSnapshot } from "../../src/domain/local-run-snapshot.js";
+import { SessionService } from "../../src/domain/session-service.js";
+import { FileSessionStore } from "../../src/domain/session-store.js";
+import { registerRunnerTools } from "../../src/mcp/tools/runner-tools.js";
+import type { LocalRunner } from "../../src/runner/runner.js";
+import {
+    ErrorCode,
+    LeetCodeError,
+    type RunInput,
+    type RunResult,
+    type RunnerCapabilities
+} from "../../src/types/index.js";
+import { createMockLeetCodeService } from "../helpers/mock-leetcode.js";
+import type { TestClientPair } from "../helpers/test-client.js";
+import { createTestClient } from "../helpers/test-client.js";
+import { INTEGRATION_TEST_TIMEOUT, assertions } from "./setup.js";
+
+const HAPPY_RESULT: RunResult = {
+    passed: true,
+    exitCode: 0,
+    stdout: "ok\n",
+    stderr: "",
+    timedOut: false,
+    durationMs: 42,
+    sandbox: "none",
+    warning: "No OS sandbox available on this host; ran without isolation."
+};
+
+const FAKE_CAPS: RunnerCapabilities = {
+    languages: [
+        { language: "python3", available: true, version: "Python 3.12.0" },
+        { language: "go", available: false },
+        { language: "java", available: false }
+    ],
+    sandbox: { kind: "none", available: false }
+};
+
+interface FakeRunnerOptions {
+    nextResult?: RunResult;
+    runError?: unknown;
+}
+
+function createFakeRunner(options: FakeRunnerOptions = {}): LocalRunner & {
+    runs: RunInput[];
+} {
+    const runs: RunInput[] = [];
+    return {
+        runs,
+        async run(input: RunInput): Promise<RunResult> {
+            runs.push(input);
+            if (options.runError) {
+                throw options.runError;
+            }
+            return options.nextResult ?? HAPPY_RESULT;
+        },
+        async capabilities(): Promise<RunnerCapabilities> {
+            return FAKE_CAPS;
+        }
+    };
+}
+
+describe("Runner Tools Integration", () => {
+    let testClient: TestClientPair;
+    let mockService: ReturnType<typeof createMockLeetCodeService>;
+    let sessions: SessionService;
+    let sessionDir: string;
+    let runner: ReturnType<typeof createFakeRunner>;
+
+    beforeEach(async () => {
+        mockService = createMockLeetCodeService();
+        sessionDir = await mkdtemp(join(tmpdir(), "leetcode-mcp-runner-"));
+        sessions = new SessionService(
+            new FileSessionStore({ dir: sessionDir })
+        );
+        runner = createFakeRunner();
+
+        testClient = await createTestClient({}, (server) => {
+            registerRunnerTools(server, mockService as any, sessions, runner);
+        });
+    }, INTEGRATION_TEST_TIMEOUT);
+
+    afterEach(async () => {
+        if (testClient) {
+            await testClient.cleanup();
+        }
+        await rm(sessionDir, { recursive: true, force: true });
+        vi.restoreAllMocks();
+    });
+
+    describe("run_local_tests", () => {
+        it(
+            "rejects with SESSION_NOT_FOUND when no session has been opened",
+            async () => {
+                const result: any = await testClient.client.callTool({
+                    name: "run_local_tests",
+                    arguments: {
+                        titleSlug: "two-sum",
+                        language: "python3",
+                        code: "print('hi')"
+                    }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text);
+                expect(payload.code).toBe(ErrorCode.SESSION_NOT_FOUND);
+                expect(runner.runs).toHaveLength(0);
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+
+        it(
+            "delegates to the runner and records lastLocalRunPassed",
+            async () => {
+                await sessions.startOrResume({ slug: "two-sum" });
+
+                const result: any = await testClient.client.callTool({
+                    name: "run_local_tests",
+                    arguments: {
+                        titleSlug: "two-sum",
+                        language: "python3",
+                        code: 'print("hi")'
+                    }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text);
+                expect(payload.titleSlug).toBe("two-sum");
+                expect(payload.result.passed).toBe(true);
+                expect(runner.runs).toHaveLength(1);
+                expect(runner.runs[0].language).toBe("python3");
+                expect(runner.runs[0].code).toBe('print("hi")');
+
+                const session = await sessions.requireSession("two-sum");
+                expect(session.lastLocalRunPassed).toBe(true);
+                expect(session.lastLocalRunSnapshot).toBe(
+                    createLocalRunSnapshot({
+                        code: 'print("hi")',
+                        language: "python3"
+                    })
+                );
+                expect(session.attempts).toBe(1);
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+
+        it(
+            "records lastLocalRunPassed=false on a failing run",
+            async () => {
+                await sessions.startOrResume({ slug: "two-sum" });
+                const failing = createFakeRunner({
+                    nextResult: { ...HAPPY_RESULT, passed: false, exitCode: 1 }
+                });
+                // Re-build the test client with the failing runner.
+                await testClient.cleanup();
+                testClient = await createTestClient({}, (server) => {
+                    registerRunnerTools(
+                        server,
+                        mockService as any,
+                        sessions,
+                        failing
+                    );
+                });
+
+                await testClient.client.callTool({
+                    name: "run_local_tests",
+                    arguments: {
+                        titleSlug: "two-sum",
+                        language: "python3",
+                        code: "raise SystemExit(1)"
+                    }
+                });
+
+                const session = await sessions.requireSession("two-sum");
+                expect(session.lastLocalRunPassed).toBe(false);
+                expect(session.lastLocalRunSnapshot).toBeNull();
+                expect(session.attempts).toBe(1);
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+
+        it(
+            "demotes solved sessions after a failing run",
+            async () => {
+                const store = new FileSessionStore({ dir: sessionDir });
+                const session = await sessions.startOrResume({
+                    slug: "two-sum"
+                });
+                await store.save({
+                    ...session,
+                    status: "solved",
+                    lastLocalRunPassed: true
+                });
+                const failing = createFakeRunner({
+                    nextResult: { ...HAPPY_RESULT, passed: false, exitCode: 1 }
+                });
+                await testClient.cleanup();
+                testClient = await createTestClient({}, (server) => {
+                    registerRunnerTools(
+                        server,
+                        mockService as any,
+                        sessions,
+                        failing
+                    );
+                });
+
+                await testClient.client.callTool({
+                    name: "run_local_tests",
+                    arguments: {
+                        titleSlug: "two-sum",
+                        language: "python3",
+                        code: "raise SystemExit(1)"
+                    }
+                });
+
+                const updated = await sessions.requireSession("two-sum");
+                expect(updated.status).toBe("attempting");
+                expect(updated.lastLocalRunPassed).toBe(false);
+                expect(updated.lastLocalRunSnapshot).toBeNull();
+                expect(updated.attempts).toBe(1);
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+
+        it(
+            "surfaces RUNNER_NOT_IMPLEMENTED_FOR_LANGUAGE thrown from the runner",
+            async () => {
+                await sessions.startOrResume({ slug: "two-sum" });
+                const broken = createFakeRunner({
+                    runError: new LeetCodeError(
+                        ErrorCode.RUNNER_NOT_IMPLEMENTED_FOR_LANGUAGE,
+                        "Go runner ships in Phase 4b"
+                    )
+                });
+                await testClient.cleanup();
+                testClient = await createTestClient({}, (server) => {
+                    registerRunnerTools(
+                        server,
+                        mockService as any,
+                        sessions,
+                        broken
+                    );
+                });
+
+                const result: any = await testClient.client.callTool({
+                    name: "run_local_tests",
+                    arguments: {
+                        titleSlug: "two-sum",
+                        language: "go",
+                        code: "package main"
+                    }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text);
+                expect(payload.code).toBe(
+                    ErrorCode.RUNNER_NOT_IMPLEMENTED_FOR_LANGUAGE
+                );
+
+                // The session attempt counter should NOT bump on a
+                // pre-run rejection.
+                const session = await sessions.requireSession("two-sum");
+                expect(session.attempts).toBe(0);
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+    });
+
+    describe("runner_doctor", () => {
+        it(
+            "returns the capabilities snapshot",
+            async () => {
+                const result: any = await testClient.client.callTool({
+                    name: "runner_doctor",
+                    arguments: {}
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text);
+                expect(payload.languages).toBeDefined();
+                expect(payload.sandbox).toBeDefined();
+                expect(
+                    payload.languages.find(
+                        (l: { language: string }) => l.language === "python3"
+                    )?.available
+                ).toBe(true);
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+    });
+});
diff --git a/tests/integration/solution-tools-integration.test.ts b/tests/integration/solution-tools-integration.test.ts
index ba5a4bc..8112560 100644
--- a/tests/integration/solution-tools-integration.test.ts
+++ b/tests/integration/solution-tools-integration.test.ts
@@ -1,9 +1,20 @@
 /**
  * Solution Tools Integration Tests
- * Tests all solution-related tools through MCP protocol
+ *
+ * Validates wire-level behaviour of `list_problem_solutions` and
+ * `get_problem_solution` through the MCP protocol — including the
+ * pedagogy gate added in Phase 3 (rejection with `HINT_LEVEL_TOO_LOW`
+ * when the active session has not reached the maximum hint level).
  */
+import { mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
 import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { SessionService } from "../../src/domain/session-service.js";
+import { FileSessionStore } from "../../src/domain/session-store.js";
 import { registerSolutionTools } from "../../src/mcp/tools/solution-tools.js";
+import type { SessionState } from "../../src/types/index.js";
+import { ErrorCode, MAX_HINT_LEVEL } from "../../src/types/index.js";
 import { createMockLeetCodeService } from "../helpers/mock-leetcode.js";
 import type { TestClientPair } from "../helpers/test-client.js";
 import { createTestClient } from "../helpers/test-client.js";
@@ -12,12 +23,19 @@ import { INTEGRATION_TEST_TIMEOUT, assertions } from "./setup.js";
 describe("Solution Tools Integration", () => {
     let testClient: TestClientPair;
     let mockService: ReturnType<typeof createMockLeetCodeService>;
+    let sessions: SessionService;
+    let sessionDir: string;
 
     beforeEach(async () => {
         mockService = createMockLeetCodeService();
+        // Sessions live in a per-test temp dir so specs don't leak state.
+        sessionDir = await mkdtemp(join(tmpdir(), "leetcode-mcp-itest-"));
+        sessions = new SessionService(
+            new FileSessionStore({ dir: sessionDir })
+        );
 
         testClient = await createTestClient({}, (server) => {
-            registerSolutionTools(server, mockService as any);
+            registerSolutionTools(server, mockService as any, sessions);
         });
     }, INTEGRATION_TEST_TIMEOUT);
 
@@ -25,8 +43,32 @@ describe("Solution Tools Integration", () => {
         if (testClient) {
             await testClient.cleanup();
         }
+        await rm(sessionDir, { recursive: true, force: true });
     });
 
+    /**
+     * Helper — drops a session for `slug` at the given level into the
+     * store. Bypasses `start_problem` so the gate can be tested in
+     * isolation.
+     */
+    async function seedSession(
+        slug: string,
+        hintLevel: number = MAX_HINT_LEVEL
+    ): Promise<void> {
+        const now = new Date().toISOString();
+        const session: SessionState = {
+            slug,
+            hintLevel: hintLevel as SessionState["hintLevel"],
+            attempts: 0,
+            lastLocalRunPassed: null,
+            status: "started",
+            createdAt: now,
+            updatedAt: now
+        };
+        const store = new FileSessionStore({ dir: sessionDir });
+        await store.save(session);
+    }
+
     describe("list_problem_solutions", () => {
         it(
             "should list list_problem_solutions tool",
@@ -37,14 +79,54 @@ describe("Solution Tools Integration", () => {
                     (t) => t.name === "list_problem_solutions"
                 );
                 expect(tool).toBeDefined();
-                expect(tool?.description).toContain("solutions");
+                expect(tool?.description).toContain("solution");
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+
+        it(
+            "should reject when no session has reached the unlock level",
+            async () => {
+                const result: any = await testClient.client.callTool({
+                    name: "list_problem_solutions",
+                    arguments: { questionSlug: "two-sum", limit: 5 }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text);
+                expect(payload.code).toBe(ErrorCode.SESSION_NOT_FOUND);
+                expect(
+                    mockService.fetchQuestionSolutionArticles
+                ).not.toHaveBeenCalled();
             },
             INTEGRATION_TEST_TIMEOUT
         );
 
         it(
-            "should execute list_problem_solutions successfully",
+            "should reject when session is below the unlock level",
             async () => {
+                await seedSession("two-sum", 2);
+
+                const result: any = await testClient.client.callTool({
+                    name: "list_problem_solutions",
+                    arguments: { questionSlug: "two-sum", limit: 5 }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text);
+                expect(payload.code).toBe(ErrorCode.HINT_LEVEL_TOO_LOW);
+                expect(
+                    mockService.fetchQuestionSolutionArticles
+                ).not.toHaveBeenCalled();
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+
+        it(
+            "should execute list_problem_solutions when session is at unlock level",
+            async () => {
+                await seedSession("two-sum");
+
                 const result: any = await testClient.client.callTool({
                     name: "list_problem_solutions",
                     arguments: { questionSlug: "two-sum", limit: 5 }
@@ -55,7 +137,7 @@ describe("Solution Tools Integration", () => {
                     mockService.fetchQuestionSolutionArticles
                 ).toHaveBeenCalledWith("two-sum", {
                     limit: 5,
-                    skip: undefined,
+                    skip: 0,
                     orderBy: undefined,
                     userInput: undefined,
                     tagSlugs: []
@@ -67,6 +149,8 @@ describe("Solution Tools Integration", () => {
         it(
             "should handle list_problem_solutions with filters",
             async () => {
+                await seedSession("two-sum");
+
                 const result: any = await testClient.client.callTool({
                     name: "list_problem_solutions",
                     arguments: {
@@ -81,7 +165,7 @@ describe("Solution Tools Integration", () => {
                     mockService.fetchQuestionSolutionArticles
                 ).toHaveBeenCalledWith("two-sum", {
                     limit: 10,
-                    skip: undefined,
+                    skip: 0,
                     orderBy: "MOST_VOTES",
                     userInput: undefined,
                     tagSlugs: ["python", "dynamic-programming"]
@@ -102,17 +186,38 @@ describe("Solution Tools Integration", () => {
                 );
                 expect(tool).toBeDefined();
                 expect(tool?.description).toContain("solution");
-                expect(tool?.description).toContain("hints");
             },
             INTEGRATION_TEST_TIMEOUT
         );
 
         it(
-            "should execute get_problem_solution successfully",
+            "should reject when titleSlug session is below unlock level",
             async () => {
+                await seedSession("two-sum", 3);
+
                 const result: any = await testClient.client.callTool({
                     name: "get_problem_solution",
-                    arguments: { topicId: "12345" }
+                    arguments: { topicId: "12345", titleSlug: "two-sum" }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text);
+                expect(payload.code).toBe(ErrorCode.HINT_LEVEL_TOO_LOW);
+                expect(
+                    mockService.fetchSolutionArticleDetail
+                ).not.toHaveBeenCalled();
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+
+        it(
+            "should execute get_problem_solution when session is unlocked",
+            async () => {
+                await seedSession("two-sum");
+
+                const result: any = await testClient.client.callTool({
+                    name: "get_problem_solution",
+                    arguments: { topicId: "12345", titleSlug: "two-sum" }
                 });
 
                 assertions.hasToolResultStructure(result);
@@ -122,5 +227,25 @@ describe("Solution Tools Integration", () => {
             },
             INTEGRATION_TEST_TIMEOUT
         );
+
+        it(
+            "rejects when topicId is not listed for titleSlug",
+            async () => {
+                await seedSession("two-sum");
+
+                const result: any = await testClient.client.callTool({
+                    name: "get_problem_solution",
+                    arguments: { topicId: "99999", titleSlug: "two-sum" }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text);
+                expect(payload.code).toBe(ErrorCode.SOLUTION_NOT_FOUND);
+                expect(
+                    mockService.fetchSolutionArticleDetail
+                ).not.toHaveBeenCalled();
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
     });
 });
diff --git a/tests/integration/submission-tools-integration.test.ts b/tests/integration/submission-tools-integration.test.ts
index aa60787..1e30f45 100644
--- a/tests/integration/submission-tools-integration.test.ts
+++ b/tests/integration/submission-tools-integration.test.ts
@@ -2,8 +2,15 @@
  * Submission Tools Integration Tests
  * Tests all submission-related tools through MCP protocol
  */
+import { mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
 import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { createLocalRunSnapshot } from "../../src/domain/local-run-snapshot.js";
+import { SessionService } from "../../src/domain/session-service.js";
+import { FileSessionStore } from "../../src/domain/session-store.js";
 import { registerSubmissionTools } from "../../src/mcp/tools/submission-tools.js";
+import { ErrorCode } from "../../src/types/index.js";
 import { createMockAuthenticatedService } from "../helpers/mock-leetcode.js";
 import type { TestClientPair } from "../helpers/test-client.js";
 import { createTestClient } from "../helpers/test-client.js";
@@ -12,13 +19,19 @@ import { INTEGRATION_TEST_TIMEOUT, assertions } from "./setup.js";
 describe("Submission Tools Integration", () => {
     let testClient: TestClientPair;
     let mockService: ReturnType<typeof createMockAuthenticatedService>;
+    let sessions: SessionService;
+    let sessionDir: string;
 
     beforeEach(async () => {
         // Use authenticated service since submission requires authentication
         mockService = createMockAuthenticatedService();
+        sessionDir = await mkdtemp(join(tmpdir(), "leetcode-mcp-sub-"));
+        sessions = new SessionService(
+            new FileSessionStore({ dir: sessionDir })
+        );
 
         testClient = await createTestClient({}, (server) => {
-            registerSubmissionTools(server, mockService as any);
+            registerSubmissionTools(server, mockService as any, sessions);
         });
     }, INTEGRATION_TEST_TIMEOUT);
 
@@ -26,6 +39,8 @@ describe("Submission Tools Integration", () => {
         if (testClient) {
             await testClient.cleanup();
         }
+        await rm(sessionDir, { recursive: true, force: true });
+        delete process.env.LEETCODE_MCP_STRICT_MODE;
     });
 
     describe("submit_solution", () => {
@@ -41,6 +56,65 @@ describe("Submission Tools Integration", () => {
             INTEGRATION_TEST_TIMEOUT
         );
 
+        it(
+            "blocks stale local passes for changed code",
+            async () => {
+                process.env.LEETCODE_MCP_STRICT_MODE = "1";
+                const passedCode = "def twoSum(nums, target): return [0, 1]";
+                await sessions.startOrResume({ slug: "two-sum" });
+                await sessions.recordLocalRun(
+                    "two-sum",
+                    true,
+                    createLocalRunSnapshot({
+                        code: passedCode,
+                        language: "python3"
+                    })
+                );
+
+                const result: any = await testClient.client.callTool({
+                    name: "submit_solution",
+                    arguments: {
+                        problemSlug: "two-sum",
+                        code: "def twoSum(nums, target): return []",
+                        language: "python3"
+                    }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text as string);
+                expect(payload.code).toBe(ErrorCode.LOCAL_TESTS_NOT_PASSED);
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+
+        it(
+            "blocks stale local passes for changed language",
+            async () => {
+                process.env.LEETCODE_MCP_STRICT_MODE = "1";
+                const code = "def twoSum(nums, target): return [0, 1]";
+                await sessions.startOrResume({ slug: "two-sum" });
+                await sessions.recordLocalRun(
+                    "two-sum",
+                    true,
+                    createLocalRunSnapshot({ code, language: "python3" })
+                );
+
+                const result: any = await testClient.client.callTool({
+                    name: "submit_solution",
+                    arguments: {
+                        problemSlug: "two-sum",
+                        code,
+                        language: "python"
+                    }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text as string);
+                expect(payload.code).toBe(ErrorCode.LOCAL_TESTS_NOT_PASSED);
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+
         it(
             "should have required parameters",
             async () => {
@@ -98,4 +172,164 @@ describe("Submission Tools Integration", () => {
             INTEGRATION_TEST_TIMEOUT
         );
     });
+
+    describe("submit_solution — strict mode", () => {
+        it(
+            "blocks submission when LEETCODE_MCP_STRICT_MODE=1 and session has not passed locals",
+            async () => {
+                process.env.LEETCODE_MCP_STRICT_MODE = "1";
+                await sessions.startOrResume({ slug: "two-sum" });
+                // No recordLocalRun call → lastLocalRunPassed is null.
+
+                const result: any = await testClient.client.callTool({
+                    name: "submit_solution",
+                    arguments: {
+                        problemSlug: "two-sum",
+                        code: "def twoSum(nums, target): pass",
+                        language: "python3"
+                    }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text as string);
+                expect(payload.code).toBe(ErrorCode.LOCAL_TESTS_NOT_PASSED);
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+
+        it(
+            "permits submission when strict mode is on and locals have passed",
+            async () => {
+                process.env.LEETCODE_MCP_STRICT_MODE = "1";
+                await sessions.startOrResume({ slug: "two-sum" });
+                const code = "def twoSum(nums, target): pass";
+                const language = "python3";
+                await sessions.recordLocalRun(
+                    "two-sum",
+                    true,
+                    createLocalRunSnapshot({ code, language })
+                );
+
+                const result: any = await testClient.client.callTool({
+                    name: "submit_solution",
+                    arguments: {
+                        problemSlug: "two-sum",
+                        code,
+                        language
+                    }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text as string);
+                // Mock service returns a normal submission envelope —
+                // we just need to confirm we didn't get the error code.
+                expect(payload.code).not.toBe(ErrorCode.LOCAL_TESTS_NOT_PASSED);
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+
+        it(
+            "blocks submission when code changed after passing locals",
+            async () => {
+                process.env.LEETCODE_MCP_STRICT_MODE = "1";
+                await sessions.startOrResume({ slug: "two-sum" });
+                const language = "python3";
+                await sessions.recordLocalRun(
+                    "two-sum",
+                    true,
+                    createLocalRunSnapshot({
+                        code: "def twoSum(nums, target): pass",
+                        language
+                    })
+                );
+
+                const result: any = await testClient.client.callTool({
+                    name: "submit_solution",
+                    arguments: {
+                        problemSlug: "two-sum",
+                        code: "def twoSum(nums, target): return []",
+                        language
+                    }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text as string);
+                expect(payload.code).toBe(ErrorCode.LOCAL_TESTS_NOT_PASSED);
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+
+        it(
+            "blocks submission when language changed after passing locals",
+            async () => {
+                process.env.LEETCODE_MCP_STRICT_MODE = "1";
+                await sessions.startOrResume({ slug: "two-sum" });
+                const code = "def twoSum(nums, target): pass";
+                await sessions.recordLocalRun(
+                    "two-sum",
+                    true,
+                    createLocalRunSnapshot({ code, language: "python3" })
+                );
+
+                const result: any = await testClient.client.callTool({
+                    name: "submit_solution",
+                    arguments: {
+                        problemSlug: "two-sum",
+                        code,
+                        language: "java"
+                    }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text as string);
+                expect(payload.code).toBe(ErrorCode.LOCAL_TESTS_NOT_PASSED);
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+
+        it(
+            "blocks submission when strict mode is on but no session was opened",
+            async () => {
+                process.env.LEETCODE_MCP_STRICT_MODE = "1";
+                // Deliberately no startOrResume — strict mode requires
+                // a session so it can verify an exact local-test pass.
+
+                const result: any = await testClient.client.callTool({
+                    name: "submit_solution",
+                    arguments: {
+                        problemSlug: "two-sum",
+                        code: "def twoSum(nums, target): pass",
+                        language: "python3"
+                    }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text as string);
+                expect(payload.code).toBe(ErrorCode.SESSION_NOT_FOUND);
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+
+        it(
+            "does not block by default (LEETCODE_MCP_STRICT_MODE unset)",
+            async () => {
+                // No env var; session exists with lastLocalRunPassed === null.
+                await sessions.startOrResume({ slug: "two-sum" });
+
+                const result: any = await testClient.client.callTool({
+                    name: "submit_solution",
+                    arguments: {
+                        problemSlug: "two-sum",
+                        code: "def twoSum(nums, target): pass",
+                        language: "python3"
+                    }
+                });
+
+                assertions.hasToolResultStructure(result);
+                const payload = JSON.parse(result.content[0].text as string);
+                expect(payload.code).not.toBe(ErrorCode.LOCAL_TESTS_NOT_PASSED);
+            },
+            INTEGRATION_TEST_TIMEOUT
+        );
+    });
 });
diff --git a/tests/mcp/tools/auth-tools.test.ts b/tests/mcp/tools/auth-tools.test.ts
index 73e5548..c88541f 100644
--- a/tests/mcp/tools/auth-tools.test.ts
+++ b/tests/mcp/tools/auth-tools.test.ts
@@ -234,6 +234,12 @@ describe("AuthToolRegistry", () => {
             const response = JSON.parse(result.content[0].text);
             expect(response.authenticated).toBe(true);
             expect(response.username).toBe("testuser");
+            // Validated credentials must be pushed into the running service
+            // so the next authenticated tool call works without a restart.
+            expect(mockLeetCodeService.updateCredentials).toHaveBeenCalledWith(
+                "test-csrf",
+                "test-session"
+            );
         });
 
         it("should return expired status for invalid credentials", async () => {
diff --git a/tests/runner/sandbox.test.ts b/tests/runner/sandbox.test.ts
new file mode 100644
index 0000000..34c9308
--- /dev/null
+++ b/tests/runner/sandbox.test.ts
@@ -0,0 +1,35 @@
+import { afterEach, describe, expect, it } from "vitest";
+import {
+    __resetSandboxCacheForTest,
+    __setSandboxCacheForTest,
+    wrapWithSandbox
+} from "../../src/runner/sandbox.js";
+
+describe("sandbox wrapping", () => {
+    afterEach(() => {
+        __resetSandboxCacheForTest();
+    });
+
+    it("escapes sandbox-exec subpath strings", async () => {
+        __setSandboxCacheForTest({ kind: "sandbox-exec" });
+
+        const wrapped = await wrapWithSandbox(
+            "python3",
+            ["solution.py"],
+            String.raw`/tmp/leetcode-mcp-run-\with-"quotes"`
+        );
+
+        expect(wrapped.cmd).toBe("/usr/bin/sandbox-exec");
+        expect(wrapped.args[1]).toContain(
+            String.raw`(subpath "/tmp/leetcode-mcp-run-\\with-\"quotes\"")`
+        );
+    });
+
+    it("rejects sandbox-exec subpaths containing newlines", async () => {
+        __setSandboxCacheForTest({ kind: "sandbox-exec" });
+
+        await expect(async () => {
+            await wrapWithSandbox("python3", ["solution.py"], "/tmp/bad\npath");
+        }).rejects.toThrow("cannot contain newlines");
+    });
+});
diff --git a/tests/runner/subprocess-runner.test.ts b/tests/runner/subprocess-runner.test.ts
new file mode 100644
index 0000000..27969dc
--- /dev/null
+++ b/tests/runner/subprocess-runner.test.ts
@@ -0,0 +1,161 @@
+/**
+ * Unit tests for the subprocess runner.
+ *
+ * These tests assume `python3` is available on PATH (the project's own
+ * CI image already has it). The runner's own probe gates each test on
+ * availability; a missing python3 produces a `LANGUAGE_RUNTIME_NOT_FOUND`
+ * which is its own first-class assertion.
+ */
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { __resetSandboxCacheForTest } from "../../src/runner/sandbox.js";
+import {
+    SubprocessRunner,
+    __resetProbeCacheForTest
+} from "../../src/runner/subprocess-runner.js";
+import {
+    ErrorCode,
+    isLeetCodeError,
+    type RunnerLanguage
+} from "../../src/types/index.js";
+
+describe("SubprocessRunner", () => {
+    let runner: SubprocessRunner;
+
+    beforeEach(() => {
+        // Force re-probing per test so mutations to PATH (none here, but
+        // future tests may) don't leak between cases.
+        __resetProbeCacheForTest();
+        __resetSandboxCacheForTest();
+        runner = new SubprocessRunner();
+    });
+
+    afterEach(() => {
+        __resetProbeCacheForTest();
+        __resetSandboxCacheForTest();
+    });
+
+    describe("capabilities", () => {
+        it("reports python3 as a supported language", async () => {
+            const caps = await runner.capabilities();
+            const py = caps.languages.find((l) => l.language === "python3");
+            expect(py).toBeDefined();
+            // Don't assert availability — environments without python3
+            // should still produce a coherent envelope.
+            expect(typeof py?.available).toBe("boolean");
+        });
+
+        it("reports go and java as supported languages even before they are implemented", async () => {
+            const caps = await runner.capabilities();
+            const langs = caps.languages.map((l) => l.language).sort();
+            expect(langs).toEqual(["go", "java", "python3"]);
+        });
+
+        it("includes a sandbox descriptor", async () => {
+            const caps = await runner.capabilities();
+            expect(caps.sandbox).toBeDefined();
+            expect(["none", "bwrap", "firejail", "sandbox-exec"]).toContain(
+                caps.sandbox.kind
+            );
+        });
+    });
+
+    describe("run", () => {
+        it("executes a happy-path python script", async () => {
+            const result = await runner.run({
+                titleSlug: "two-sum",
+                language: "python3",
+                code: 'print("hello"); assert 1 + 1 == 2'
+            });
+
+            expect(result.passed).toBe(true);
+            expect(result.exitCode).toBe(0);
+            expect(result.timedOut).toBe(false);
+            expect(result.stdout).toContain("hello");
+            expect(result.stderr).toBe("");
+            expect(result.durationMs).toBeGreaterThanOrEqual(0);
+        });
+
+        it("captures non-zero exit code without throwing", async () => {
+            const result = await runner.run({
+                titleSlug: "two-sum",
+                language: "python3",
+                code: "raise SystemExit(7)"
+            });
+
+            expect(result.passed).toBe(false);
+            expect(result.exitCode).toBe(7);
+            expect(result.timedOut).toBe(false);
+        });
+
+        it("captures stderr from raised exceptions", async () => {
+            const result = await runner.run({
+                titleSlug: "two-sum",
+                language: "python3",
+                code: 'raise ValueError("boom")'
+            });
+
+            expect(result.passed).toBe(false);
+            expect(result.exitCode).not.toBe(0);
+            expect(result.stderr).toContain("ValueError");
+            expect(result.stderr).toContain("boom");
+        });
+
+        it("kills runaway processes after the timeout budget", async () => {
+            const start = Date.now();
+            const result = await runner.run({
+                titleSlug: "two-sum",
+                language: "python3",
+                code: "while True: pass",
+                timeoutMs: 400
+            });
+            const elapsed = Date.now() - start;
+
+            expect(result.timedOut).toBe(true);
+            expect(result.passed).toBe(false);
+            // Tolerate slow CI: budget + the 500 ms SIGTERM-then-SIGKILL
+            // grace + scheduler jitter. Should not run for full 5s.
+            expect(elapsed).toBeLessThan(2_500);
+        });
+
+        it("rejects unsupported languages with RUNNER_NOT_IMPLEMENTED_FOR_LANGUAGE", async () => {
+            await expect(async () => {
+                await runner.run({
+                    titleSlug: "two-sum",
+                    language: "go" as RunnerLanguage,
+                    code: 'package main\nfunc main() { println("hi") }'
+                });
+            }).rejects.toSatisfy((error: unknown) => {
+                if (!isLeetCodeError(error)) {
+                    return false;
+                }
+                return (
+                    error.code === ErrorCode.RUNNER_NOT_IMPLEMENTED_FOR_LANGUAGE
+                );
+            });
+        });
+
+        it("forwards a clean env (no leaking secrets)", async () => {
+            // Ask the child to print one of its env vars. We never set
+            // SECRET_ON_PARENT in the child env, so it should print
+            // empty even if defined on the parent.
+            const before = process.env.SECRET_ON_PARENT;
+            process.env.SECRET_ON_PARENT = "leak-me";
+            try {
+                const result = await runner.run({
+                    titleSlug: "two-sum",
+                    language: "python3",
+                    code: 'import os; print(os.environ.get("SECRET_ON_PARENT", "MISSING"))'
+                });
+
+                expect(result.passed).toBe(true);
+                expect(result.stdout.trim()).toBe("MISSING");
+            } finally {
+                if (before === undefined) {
+                    delete process.env.SECRET_ON_PARENT;
+                } else {
+                    process.env.SECRET_ON_PARENT = before;
+                }
+            }
+        });
+    });
+});
diff --git a/tests/services/problem-services.test.ts b/tests/services/problem-services.test.ts
index ebf6e35..233fa10 100644
--- a/tests/services/problem-services.test.ts
+++ b/tests/services/problem-services.test.ts
@@ -14,8 +14,8 @@ describe("LeetCode Problem Services", () => {
 
                 expect(result).toBeDefined();
                 expect(result.question).toBeDefined();
-                expect(result.question.title).toBeDefined();
-                expect(result.question.questionId).toBeDefined();
+                expect(result.question?.title).toBeDefined();
+                expect(result.question?.questionId).toBeDefined();
             }, 30000);
         });
 
diff --git a/tests/services/solution-services.test.ts b/tests/services/solution-services.test.ts
index 4b10a40..37531b2 100644
--- a/tests/services/solution-services.test.ts
+++ b/tests/services/solution-services.test.ts
@@ -70,15 +70,17 @@ describe("LeetCode Solution Services", () => {
                     return;
                 }
 
-                const topicId = solutionsResult.articles[0].topicId;
+                const topicId = solutionsResult.articles[0]?.topicId;
+                expect(topicId).toBeDefined();
                 logger.info(`Using topicId: ${topicId} for detail fetch`);
 
-                const result =
-                    await service.fetchSolutionArticleDetail(topicId);
+                const result = await service.fetchSolutionArticleDetail(
+                    String(topicId)
+                );
 
                 expect(result).toBeDefined();
-                expect(result.title).toBeDefined();
-                expect(result.content).toBeDefined();
+                expect(result?.title).toBeDefined();
+                expect(result?.content).toBeDefined();
             }, 30000);
 
             it("should handle errors properly for invalid topicIds", async () => {
diff --git a/vitest.e2e.config.ts b/vitest.e2e.config.ts
new file mode 100644
index 0000000..aa8b82f
--- /dev/null
+++ b/vitest.e2e.config.ts
@@ -0,0 +1,22 @@
+import { defineConfig } from "vitest/config";
+
+/**
+ * Dedicated config for the e2e suite.
+ *
+ * - Only includes `tests/e2e/**` so the slow spawn-the-binary specs don't
+ *   run alongside the fast unit / integration suites.
+ * - Wires in `global-setup.ts` so `build/index.js` is guaranteed to exist
+ *   and be at least as fresh as `src/` before any spec spawns the server.
+ * - 30s test timeout because spawning a Node child process plus an MCP
+ *   handshake comfortably exceeds the 5s integration default.
+ */
+export default defineConfig({
+    test: {
+        environment: "node",
+        include: ["tests/e2e/**/*.test.ts"],
+        globals: true,
+        globalSetup: ["tests/e2e/harness/global-setup.ts"],
+        testTimeout: 30_000,
+        hookTimeout: 30_000
+    }
+});