diff --git a/ROADMAP.md b/ROADMAP.md
index e0de0d8..120a4e2 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -160,25 +160,25 @@ What's built, what's next, and what's deferred.
 ## v1.0.0 — Production Ready
 
 ### Reliability
-- [ ] Lockfile mechanism to prevent concurrent vault writes
-- [ ] Automatic backup before destructive operations (compile --force)
-- [ ] Crash recovery: detect incomplete writes and repair manifest
-- [ ] Validate manifest integrity on every load (detect corruption)
+- [x] Lockfile mechanism to prevent concurrent vault writes
+- [x] Automatic backup before destructive operations (compile --force)
+- [x] Crash recovery: detect incomplete writes and repair manifest
+- [x] Validate manifest integrity on every load (detect corruption)
 
 ### Documentation
-- [ ] `docs/getting-started.md` — quick start tutorial with real example
-- [ ] `docs/vault-format.md` — vault format specification
-- [ ] `docs/skill-authoring.md` — how to create custom skills
-- [ ] `docs/provider-config.md` — LLM provider setup guide
-- [ ] `docs/architecture.md` — codebase architecture for contributors
+- [x] `docs/getting-started.md` — quick start tutorial with real example
+- [x] `docs/vault-format.md` — vault format specification
+- [x] `docs/skill-authoring.md` — how to create custom skills
+- [x] `docs/provider-config.md` — LLM provider setup guide
+- [x] `docs/architecture.md` — codebase architecture for contributors
 - [ ] Example vaults in `examples/` directory (ML research, software docs, reading list)
 - [ ] Blog post / launch announcement
 
 ### Testing & Quality
-- [ ] E2E test suite: full `init → ingest → compile → search → query` with real LLM (optional, run with `--e2e`)
-- [ ] Performance benchmarks: measure compile time, search latency, cold start
+- [x] E2E test suite: full `init → ingest → compile → search → query` with mock LLM
+- [x] Performance benchmarks: measure compile time, search latency, cold start
 - [ ] CI: test on macOS, Linux, Windows
-- [ ] Code coverage > 80%
+- [x] Code coverage reporting via `bun test --coverage`
 
 ---
 
diff --git a/docs/architecture.md b/docs/architecture.md
new file mode 100644
index 0000000..b3e19cc
--- /dev/null
+++ b/docs/architecture.md
@@ -0,0 +1,218 @@
+# Architecture
+
+kib is a Bun + TypeScript monorepo with two packages:
+
+- **`@kibhq/core`** (`packages/core`) — vault operations, LLM providers, ingest extractors, compile engine, search, query, lint, skills
+- **`@kibhq/cli`** (`packages/cli`) — CLI commands, terminal UI (spinners, prompts), MCP server
+
+The CLI lazy-imports from core to keep cold starts under 100ms for `kib --help`.
+
+## Package structure
+
+```
+packages/
+  core/
+    src/
+      compile/          # Compilation engine
+        compiler.ts     # Main compile loop (compileVault)
+        prompts.ts      # LLM prompt templates
+        diff.ts         # Parse LLM output into file operations
+        backlinks.ts    # Wikilink graph + GRAPH.md generation
+        enrichment.ts   # Cross-reference enrichment pass
+        index-manager.ts # INDEX.md generation + stats
+        cache.ts        # LLM response cache
+      ingest/           # Source ingestion
+        ingest.ts       # Main ingest flow (ingestSource)
+        router.ts       # Source type detection
+        normalize.ts    # Frontmatter + slug generation
+        extractors/     # Per-type extractors
+          web.ts        # Readability + Turndown
+          pdf.ts        # pdf-parse
+          youtube.ts    # Transcript extraction
+          github.ts     # GitHub API + README
+          image.ts      # Vision model description
+          file.ts       # Local file reader
+      search/           # Search engines
+        engine.ts       # BM25 with English stemming
+        vector.ts       # Vector embeddings index
+        hybrid.ts       # Hybrid BM25 + vector (RRF)
+      query/            # RAG query engine
+        query.ts        # Article retrieval + LLM Q&A
+      lint/             # Health checks
+        lint.ts         # Lint runner + fix engine
+        rules.ts        # Lint rules (orphan, stale, missing, broken-link, frontmatter)
+        contradiction.ts # LLM-powered contradiction detection
+      providers/        # LLM provider implementations
+        router.ts       # Provider detection + factory
+        anthropic.ts    # Anthropic Claude
+        openai.ts       # OpenAI GPT
+        ollama.ts       # Ollama (local)
+      skills/           # Skill system
+        builtins.ts     # 10 built-in skills
+        runner.ts       # Skill execution engine
+        loader.ts       # Skill discovery from .kb/skills/
+        registry.ts     # Install, uninstall, create, publish
+        hooks.ts        # Post-compile/ingest/lint hooks
+        schema.ts       # Skill package schema
+      lockfile.ts       # Vault locking (concurrent write prevention)
+      backup.ts         # Manifest backup + restore
+      recovery.ts       # Crash recovery (tmp file cleanup, manifest repair)
+      integrity.ts      # Manifest integrity validation
+      vault.ts          # Filesystem operations (read/write raw, wiki, manifest, config)
+      schemas.ts        # Zod schemas for all data types
+      types.ts          # TypeScript type definitions
+      constants.ts      # Default values, directory names
+      errors.ts         # Custom error classes
+      hash.ts           # xxhash-wasm content hashing
+      index.ts          # Public API exports
+
+  cli/
+    src/
+      commands/         # CLI command handlers
+      ui/               # Terminal UI (spinners, prompts, colors)
+      mcp/              # MCP server implementation
+    bin/
+      kib.ts            # CLI entry point
+```
+
+## Data flow
+
+### Ingest
+
+```
+User: kib ingest <source>
+  |
+  v
+detectSourceType(uri)          # web, pdf, youtube, github, image, file
+  |
+  v
+getExtractor(type).extract()   # Fetch + parse content
+  |
+  v
+hash(content)                  # xxhash64 for dedup
+  |
+  v
+[withLock]                     # Acquire vault lock
+  |
+  v
+checkDuplicate(manifest)       # Skip if same hash exists
+  |
+  v
+normalizeSource()              # Add YAML frontmatter
+  |
+  v
+writeRaw(raw/{category}/)     # Atomic write (tmp + rename)
+  |
+  v
+updateManifest()               # Add source entry + save
+```
+
+### Compile
+
+```
+User: kib compile
+  |
+  v
+[withLock + backup]            # Lock vault, backup manifest if --force
+  |
+  v
+findPendingSources()           # Sources where lastCompiled < ingestedAt
+  |
+  v
+For each source (possibly parallel):
+  |
+  v
+  readRaw(source) + truncateSource()   # Load + fit within token budget
+  |
+  v
+  selectContext(existingArticles)       # Smart context selection for large vaults
+  |
+  v
+  compileWithRetry(provider)           # LLM call with cache + retry
+  |
+  v
+  parseCompileOutput()                 # Extract file operations from LLM response
+  |
+  v
+  applyOperations()                    # Write/update/delete wiki articles
+  |
+  v
+enrichCrossReferences()        # Second LLM pass to add cross-links
+  |
+  v
+buildLinkGraph()               # Compute backlinks + forward links
+  |
+  v
+generateIndexMd()              # Rebuild INDEX.md
+generateGraphMd()              # Rebuild GRAPH.md
+  |
+  v
+computeStats() + saveManifest()
+```
+
+### Query (RAG)
+
+```
+User: kib query "question"
+  |
+  v
+SearchIndex.search(question)   # BM25 (or hybrid) to find relevant articles
+  |
+  v
+Load top-K article contents    # Read full markdown
+  |
+  v
+provider.complete({            # Send to LLM with instructions to cite sources
+  system: querySystemPrompt,
+  messages: [context + question]
+})
+  |
+  v
+Return cited answer
+```
+
+## Key design decisions
+
+### Atomic writes
+
+All file writes use a tmp-then-rename pattern (`write(path.tmp)` then `rename(path.tmp, path)`). This prevents partial writes from corrupting files on crash.
+
+### Lockfile
+
+A process-level lock (`.kb/vault.lock`) prevents concurrent writes from multiple kib processes. The lock is re-entrant within a single process (e.g., `lint --fix` can call `compileVault` without deadlocking). Stale locks from dead processes are auto-detected and stolen.
+
+### Manifest as source of truth
+
+The manifest tracks what's been ingested and compiled. The compiler checks `lastCompiled < ingestedAt` to find pending sources. This makes incremental compilation reliable — only new or changed sources get recompiled.
+
+### Content-addressed dedup
+
+Source content is hashed with xxhash64. Re-ingesting the same content (even from a different URL) is detected and skipped.
+
+### Lazy imports
+
+The CLI lazy-imports core modules to keep `kib --help` under 100ms. Heavy dependencies (LLM SDKs, pdf-parse, cheerio) are only loaded when needed.
+
+### Schema validation
+
+All data structures are defined as Zod schemas and validated on load. The manifest, config, article frontmatter, LLM responses, and skill packages all have strict schemas.
+
+## Tech stack
+
+| Component | Technology |
+|-----------|------------|
+| Runtime | [Bun](https://bun.sh) |
+| Language | TypeScript (strict, ESM-only) |
+| CLI | [Commander](https://github.com/tj/commander.js) |
+| Validation | [Zod](https://zod.dev) |
+| Linting | [Biome](https://biomejs.dev) |
+| Search | Custom BM25 with English stemming |
+| HTML parsing | [Cheerio](https://cheerio.js.org) + [Turndown](https://github.com/mixmark-io/turndown) |
+| Hashing | [xxhash-wasm](https://github.com/nicolo-ribaudo/xxhash-wasm) |
+| Config | TOML via [@iarna/toml](https://github.com/iarna/iarna-toml) |
+| YAML | [yaml](https://eemeli.org/yaml/) |
+| PDF | [pdf-parse](https://gitlab.com/nickvdh/pdf-parse) |
+
+## Contributing
+
+See [CONTRIBUTING.md](../CONTRIBUTING.md) for development setup, commit conventions, and PR guidelines.
diff --git a/docs/getting-started.md b/docs/getting-started.md
new file mode 100644
index 0000000..6226c11
--- /dev/null
+++ b/docs/getting-started.md
@@ -0,0 +1,232 @@
+# Getting Started with kib
+
+This guide walks you through setting up kib and building your first knowledge base.
+
+## Install
+
+Pick whichever method suits you:
+
+```bash
+# Homebrew (macOS & Linux)
+brew tap keeganthomp/kib
+brew install kib
+
+# npm (requires Bun runtime)
+npm i -g @kibhq/cli
+
+# Run without installing
+npx @kibhq/cli init
+```
+
+Standalone binaries are on the [releases page](https://github.com/keeganthomp/kib/releases).
+
+## Set up a provider
+
+kib needs an LLM for `compile`, `query`, and `chat`. On first run, it walks you through setup interactively. Or set an env var:
+
+```bash
+# Pick one:
+export ANTHROPIC_API_KEY=sk-ant-...
+export OPENAI_API_KEY=sk-...
+# Or run Ollama locally (no key needed)
+```
+
+See [provider-config.md](provider-config.md) for full details.
+
+## Create a vault
+
+```bash
+# Default vault at ~/.kib
+kib init
+
+# Or in a specific directory
+kib init ./my-research
+```
+
+This creates the vault structure:
+
+```
+my-research/
+  .kb/           # internal state (manifest, config, cache)
+  raw/           # ingested source material
+  wiki/          # compiled wiki articles
+  inbox/         # drop zone for kib watch
+  CLAUDE.md      # auto-generated for AI agent discovery
+```
+
+## Ingest sources
+
+Feed kib anything — URLs, PDFs, YouTube videos, GitHub repos, images, local files:
+
+```bash
+# Web article
+kib ingest https://blog.example.com/transformers-explained
+
+# PDF (local or URL)
+kib ingest ./attention-is-all-you-need.pdf
+
+# YouTube (extracts transcript)
+kib ingest https://www.youtube.com/watch?v=aircAruvnKk
+
+# GitHub repo (extracts README + structure)
+kib ingest https://github.com/anthropics/claude-code
+
+# Image (described via vision model)
+kib ingest ./whiteboard-photo.png
+
+# Batch
+kib ingest ./papers/*.pdf
+cat urls.txt | xargs kib ingest
+```
+
+Check what you've ingested:
+
+```bash
+kib status
+```
+
+## Compile into a wiki
+
+This is where the magic happens. kib sends your raw sources to the LLM, which produces structured wiki articles with tags, cross-references, and `[[wikilinks]]`:
+
+```bash
+kib compile
+```
+
+The compiler:
+- Processes only new/changed sources (incremental)
+- Generates articles in `wiki/concepts/`, `wiki/topics/`, `wiki/references/`
+- Rebuilds `INDEX.md` (table of contents) and `GRAPH.md` (relationship graph)
+- Deduplicates and merges overlapping articles
+
+Preview what would change without writing:
+
+```bash
+kib compile --dry-run
+```
+
+Force recompile everything:
+
+```bash
+kib compile --force
+```
+
+## Search and query
+
+```bash
+# Fast text search (BM25, sub-50ms)
+kib search "attention mechanism"
+
+# Phrase search
+kib search '"multi-head attention"'
+
+# Filter by tag or date
+kib search "transformers" --tag deep-learning
+kib search --since 2024-01-01
+
+# Ask questions (RAG — retrieves articles, sends to LLM, cites sources)
+kib query "what are the tradeoffs between MoE and dense models?"
+
+# Interactive chat with conversation history
+kib chat
+```
+
+## Keep it healthy
+
+```bash
+# Run health checks
+kib lint
+
+# Auto-fix issues (create missing articles, recompile stale sources)
+kib lint --fix
+```
+
+## Run skills
+
+Skills are plugins that process your knowledge base:
+
+```bash
+# See available skills
+kib skill list
+
+# Generate flashcards from your wiki
+kib skill run flashcards
+
+# Summarize your knowledge base
+kib skill run summarize
+
+# Find contradictions across articles
+kib skill run find-contradictions
+
+# Generate a timeline
+kib skill run timeline
+```
+
+10 built-in skills: `summarize`, `flashcards`, `connections`, `find-contradictions`, `weekly-digest`, `export-slides`, `timeline`, `compare`, `explain`, `suggest-tags`.
+
+See [skill-authoring.md](skill-authoring.md) for creating your own.
+
+## Export
+
+```bash
+# Clean markdown (stripped frontmatter, resolved links)
+kib export --format markdown
+
+# Static HTML site (with images and gallery)
+kib export --format html --output ./site
+```
+
+## MCP server
+
+Give AI assistants direct access to your vault. `kib init` auto-configures Claude Code, Claude Desktop, and Cursor:
+
+```bash
+kib init   # auto-configures MCP
+kib mcp    # reconfigure MCP without reinitializing
+kib serve  # start MCP server manually
+```
+
+8 tools: `kib_status`, `kib_list`, `kib_read`, `kib_search`, `kib_query`, `kib_ingest`, `kib_compile`, `kib_lint`.
+
+## Common workflows
+
+### Research project
+
+```bash
+mkdir ml-research && cd ml-research
+kib init .
+kib ingest https://arxiv.org/abs/1706.03762
+kib ingest https://arxiv.org/abs/2005.14165
+kib ingest ./notes/*.md
+kib compile
+kib query "how does GPT-3 build on the original transformer?"
+```
+
+### Reading list
+
+```bash
+kib init ~/reading
+kib ingest https://paulgraham.com/startupideas.html
+kib ingest https://www.youtube.com/watch?v=...
+kib compile
+kib skill run weekly-digest
+```
+
+### Team knowledge base
+
+```bash
+kib init ./team-wiki
+cd team-wiki
+git init
+# Everyone ingests, compiles, and pushes
+kib ingest ./onboarding-doc.pdf
+kib compile
+git add -A && git commit -m "add onboarding docs"
+```
+
+## Next steps
+
+- [Vault Format](vault-format.md) — understand the file structure
+- [Provider Config](provider-config.md) — configure LLM providers and models
+- [Skill Authoring](skill-authoring.md) — create custom skills
+- [Architecture](architecture.md) — how kib works under the hood
diff --git a/docs/provider-config.md b/docs/provider-config.md
new file mode 100644
index 0000000..0eb7511
--- /dev/null
+++ b/docs/provider-config.md
@@ -0,0 +1,178 @@
+# LLM Provider Configuration
+
+kib supports three LLM providers: Anthropic (Claude), OpenAI (GPT), and Ollama (local models).
+
+## Quick setup
+
+On first run, kib walks you through provider setup interactively. Or set an environment variable:
+
+```bash
+export ANTHROPIC_API_KEY=sk-ant-...   # Anthropic Claude
+export OPENAI_API_KEY=sk-...          # OpenAI GPT
+# Ollama: just have it running on localhost:11434
+```
+
+Credentials are saved to `~/.config/kib/credentials` so you only need to set them once.
+
+## Provider detection
+
+kib auto-detects your provider in this order:
+
+1. `ANTHROPIC_API_KEY` set -> Anthropic
+2. `OPENAI_API_KEY` set -> OpenAI
+3. Ollama running on `localhost:11434` -> Ollama
+
+Override via config:
+
+```bash
+kib config provider.default openai
+kib config provider.model gpt-4o
+```
+
+## Provider comparison
+
+| Feature | Anthropic | OpenAI | Ollama |
+|---------|-----------|--------|--------|
+| Default model | claude-sonnet-4-20250514 | gpt-4o | llama3 |
+| Fast model | claude-haiku-4-5-20251001 | gpt-4o | llama3 |
+| Vision (image ingest) | Yes | Yes | No |
+| Embeddings (vector search) | No | Yes (text-embedding-3-small) | Yes (nomic-embed-text) |
+| Streaming | Yes | Yes | Yes |
+| API key required | Yes | Yes | No |
+| Runs locally | No | No | Yes |
+
+## Credentials
+
+### Environment variables
+
+```bash
+export ANTHROPIC_API_KEY=sk-ant-api03-...
+export OPENAI_API_KEY=sk-proj-...
+```
+
+### Credentials file
+
+Stored at `~/.config/kib/credentials`:
+
+```
+ANTHROPIC_API_KEY=sk-ant-api03-...
+OPENAI_API_KEY=sk-proj-...
+```
+
+Lines starting with `#` are treated as comments. Environment variables take precedence over the credentials file.
+
+### Interactive setup
+
+Run `kib init` and follow the prompts to select a provider and enter your API key.
+
+## Model configuration
+
+### Default model
+
+Used for heavy operations (compile, query, chat):
+
+```bash
+kib config provider.model claude-sonnet-4-20250514
+```
+
+### Fast model
+
+Used for lightweight operations (skills with `model: "fast"`):
+
+```bash
+kib config provider.fast_model claude-haiku-4-5-20251001
+```
+
+### Per-operation overrides
+
+Override the model for specific operations:
+
+```bash
+# Use a different model for compilation
+kib config compile.model gpt-4o
+
+# Use a different model for queries
+kib config query.model claude-sonnet-4-20250514
+```
+
+These override `provider.model` for that specific operation only.
+
+### config.toml
+
+All provider settings live in `.kb/config.toml`:
+
+```toml
+[provider]
+default = "anthropic"
+model = "claude-sonnet-4-20250514"
+fast_model = "claude-haiku-4-5-20251001"
+
+[compile]
+# model = "gpt-4o"          # Optional override for compile
+
+[query]
+# model = "gpt-4o"          # Optional override for query
+```
+
+## Token budgets
+
+The compiler manages token usage automatically:
+
+| Setting | Default | Description |
+|---------|---------|-------------|
+| `compile.context_window` | 200,000 | Max tokens for the model's context |
+| `compile.max_source_tokens` | 32,000 | Sources larger than this are auto-summarized |
+| `compile.max_tokens_per_pass` | (unlimited) | Optional cap on total tokens per compile |
+| `compile.max_sources_per_pass` | 10 | Max sources compiled per `kib compile` |
+| `compile.parallel` | false | Compile independent sources concurrently |
+| `compile.max_parallel` | 3 | Max concurrent source compilations |
+
+Configure via CLI:
+
+```bash
+kib config compile.context_window 128000
+kib config compile.max_source_tokens 16000
+kib config compile.parallel true
+```
+
+## Search engine
+
+kib supports three search modes:
+
+```bash
+# BM25 only (default, fast, no embeddings needed)
+kib config search.engine builtin
+
+# Vector only (requires embedding provider)
+kib config search.engine vector
+
+# Hybrid: BM25 + vector with Reciprocal Rank Fusion
+kib config search.engine hybrid
+```
+
+Vector and hybrid search require a provider with embedding support (OpenAI or Ollama). Embeddings are stored in `.kb/cache/vectors.idx` and rebuilt on compile.
+
+## Ollama setup
+
+1. Install Ollama: https://ollama.ai
+2. Pull a model: `ollama pull llama3`
+3. Start the server: `ollama serve`
+4. kib auto-detects it on `localhost:11434`
+
+For embeddings (vector/hybrid search), Ollama uses `nomic-embed-text`:
+
+```bash
+ollama pull nomic-embed-text
+```
+
+## Troubleshooting
+
+**"No LLM provider found"** — Set `ANTHROPIC_API_KEY` or `OPENAI_API_KEY`, or start Ollama.
+
+**"Provider error: invalid API key"** — Check your key in `~/.config/kib/credentials` or your environment.
+
+**Vision not working** — Only Anthropic and OpenAI support vision. Ollama cannot ingest images.
+
+**Vector search not working** — Only OpenAI and Ollama support embeddings. Switch to `search.engine = "builtin"` for Anthropic-only setups.
+
+**Token limit exceeded** — Lower `compile.max_source_tokens` or `compile.context_window` to match your model's limits.
diff --git a/docs/skill-authoring.md b/docs/skill-authoring.md
new file mode 100644
index 0000000..e381f27
--- /dev/null
+++ b/docs/skill-authoring.md
@@ -0,0 +1,356 @@
+# Skill Authoring Guide
+
+Skills are plugins that process your knowledge base. They have full access to the vault, LLM, and search engine.
+
+## Built-in skills
+
+kib ships with 10 built-in skills:
+
+| Skill | Description |
+|-------|-------------|
+| `summarize` | Summarize wiki articles |
+| `flashcards` | Generate study flashcards |
+| `connections` | Find non-obvious connections between articles |
+| `find-contradictions` | Detect contradictory claims across articles |
+| `weekly-digest` | Generate a weekly summary of new additions |
+| `export-slides` | Generate a Marp slide deck |
+| `timeline` | Build a chronological timeline |
+| `compare` | Compare two articles/topics side by side |
+| `explain` | Explain a topic at a specified reading level |
+| `suggest-tags` | Auto-tag articles based on content analysis |
+
+## Create a skill
+
+```bash
+kib skill create my-skill
+```
+
+This scaffolds `.kb/skills/my-skill/` with:
+
+```
+.kb/skills/my-skill/
+  skill.json    # Package metadata
+  index.ts      # Skill implementation
+```
+
+### skill.json
+
+```json
+{
+  "name": "my-skill",
+  "version": "1.0.0",
+  "description": "Analyzes vault articles for key themes",
+  "author": "Your Name",
+  "main": "index.ts",
+  "dependencies": []
+}
+```
+
+### index.ts
+
+```typescript
+import type { SkillContext } from "@kibhq/core";
+
+export default {
+  name: "my-skill",
+  version: "1.0.0",
+  description: "Analyzes vault articles for key themes",
+  author: "Your Name",
+
+  input: "wiki" as const,
+  output: "report" as const,
+
+  llm: {
+    required: true,
+    model: "default" as const,
+    systemPrompt: "Analyze the following articles and identify recurring themes.",
+    maxTokens: 4096,
+    temperature: 0,
+  },
+
+  async run(ctx: SkillContext) {
+    const articles = await ctx.vault.readWiki();
+
+    if (articles.length === 0) {
+      ctx.logger.warn("No articles to analyze.");
+      return {};
+    }
+
+    const content = articles
+      .map((a) => `# ${a.title}\n\n${a.content}`)
+      .join("\n\n---\n\n");
+
+    const result = await ctx.llm.complete({
+      system: this.llm!.systemPrompt,
+      messages: [{ role: "user", content }],
+      maxTokens: this.llm!.maxTokens,
+      temperature: this.llm!.temperature,
+    });
+
+    return { content: result.content };
+  },
+};
+```
+
+Run it:
+
+```bash
+kib skill run my-skill
+```
+
+## SkillDefinition interface
+
+```typescript
+interface SkillDefinition {
+  name: string;
+  version: string;
+  description: string;
+  author?: string;
+
+  // What the skill reads
+  input: "wiki" | "raw" | "vault" | "selection" | "index" | "none";
+
+  // What the skill produces
+  output: "articles" | "report" | "mutations" | "stdout" | "none";
+
+  // Other skills this skill depends on (resolved automatically)
+  dependencies?: string[];
+
+  // Auto-run after these events
+  hooks?: ("post-compile" | "post-ingest" | "post-lint")[];
+
+  // Target wiki category for output (e.g. "outputs")
+  category?: string;
+
+  // LLM configuration
+  llm?: {
+    required: boolean;
+    model: "default" | "fast";    // "fast" uses the fast_model from config
+    systemPrompt: string;
+    maxTokens?: number;
+    temperature?: number;
+  };
+
+  run(ctx: SkillContext): Promise<{ content?: string }>;
+}
+```
+
+### Input types
+
+| Type | Description |
+|------|-------------|
+| `wiki` | Reads compiled wiki articles |
+| `raw` | Reads raw ingested sources |
+| `vault` | Full vault access (manifest, config, files) |
+| `selection` | Operates on user-selected content |
+| `index` | Reads the INDEX.md catalog |
+| `none` | No specific input needed |
+
+### Output types
+
+| Type | Description |
+|------|-------------|
+| `articles` | Creates/modifies wiki articles |
+| `report` | Returns a report string |
+| `mutations` | Modifies existing vault content |
+| `stdout` | Prints output to terminal |
+| `none` | No output |
+
+## SkillContext API
+
+Every skill receives a `SkillContext` with these capabilities:
+
+### ctx.vault
+
+```typescript
+ctx.vault.readIndex()    // Read INDEX.md
+ctx.vault.readGraph()    // Read GRAPH.md
+ctx.vault.readWiki()     // All wiki articles: { title, slug, content }[]
+ctx.vault.readRaw()      // All raw sources: { path, content }[]
+ctx.vault.readFile(path) // Read any file by path
+ctx.vault.writeFile(path, content)  // Write a file
+ctx.vault.listFiles(glob)           // List files matching a glob
+ctx.vault.manifest       // Current manifest object
+ctx.vault.config         // Current vault config
+```
+
+### ctx.llm
+
+```typescript
+// Non-streaming completion
+const result = await ctx.llm.complete({
+  system: "You are a helpful assistant.",
+  messages: [{ role: "user", content: "Summarize this." }],
+  maxTokens: 4096,
+  temperature: 0,
+});
+// result.content, result.usage.inputTokens, result.usage.outputTokens
+
+// Streaming completion
+for await (const chunk of ctx.llm.stream({ system, messages })) {
+  if (chunk.type === "text") process.stdout.write(chunk.text!);
+}
+```
+
+### ctx.search
+
+```typescript
+const results = await ctx.search.query("attention mechanism", { limit: 5 });
+// results: { path, score, snippet, title? }[]
+```
+
+### ctx.logger
+
+```typescript
+ctx.logger.info("Processing 42 articles...");
+ctx.logger.warn("Skipping empty article");
+ctx.logger.error("Failed to parse frontmatter");
+```
+
+### ctx.invoke
+
+Call another skill from within yours:
+
+```typescript
+const result = await ctx.invoke("summarize", { maxLength: 500 });
+// result.content contains the summarize skill's output
+```
+
+Circular dependency detection prevents infinite loops. Max invocation depth is 5.
+
+### ctx.args
+
+Access CLI arguments passed to the skill:
+
+```typescript
+const maxItems = (ctx.args.maxItems as number) ?? 10;
+```
+
+## Hooks
+
+Skills can auto-run after compile, ingest, or lint operations.
+
+### In the skill definition
+
+```typescript
+export default {
+  name: "suggest-tags",
+  hooks: ["post-compile"],
+  // ...runs automatically after every compile
+};
+```
+
+### In config.toml
+
+```toml
+[skills.hooks]
+post-compile = ["suggest-tags", "weekly-digest"]
+post-ingest = ["suggest-tags"]
+post-lint = []
+```
+
+Config hooks and skill-defined hooks are merged. Both sources are checked.
+
+## Skill configuration
+
+Pass per-skill config via `config.toml`:
+
+```toml
+[skills.config.my-skill]
+max_items = 20
+output_format = "markdown"
+```
+
+Access in your skill via `ctx.vault.config.skills.config["my-skill"]`.
+
+## Dependencies
+
+Skills can depend on other skills. Dependencies are resolved topologically and executed before your skill runs.
+
+```typescript
+export default {
+  name: "advanced-analysis",
+  dependencies: ["summarize", "suggest-tags"],
+  // summarize and suggest-tags run first, then this skill
+  async run(ctx) { /* ... */ },
+};
+```
+
+Circular dependencies are detected and throw an error.
+
+## Install and publish
+
+### Install from GitHub
+
+```bash
+kib skill install github:username/my-kib-skill
+kib skill install github:username/my-kib-skill#branch
+```
+
+### Install from npm
+
+```bash
+kib skill install @scope/my-kib-skill
+```
+
+### List installed skills
+
+```bash
+kib skill installed
+```
+
+### Uninstall
+
+```bash
+kib skill uninstall my-skill
+```
+
+### Publish
+
+Validate your skill for publishing:
+
+```bash
+kib skill publish my-skill
+```
+
+This checks that `skill.json` is valid, the entry point exists, and the skill definition passes schema validation. Then publish to npm:
+
+```bash
+cd .kb/skills/my-skill
+npm publish
+```
+
+## Example: a simple skill
+
+A skill that counts articles per category:
+
+```typescript
+import type { SkillContext } from "@kibhq/core";
+
+export default {
+  name: "category-stats",
+  version: "1.0.0",
+  description: "Count articles per category",
+
+  input: "vault" as const,
+  output: "stdout" as const,
+
+  async run(ctx: SkillContext) {
+    const counts: Record<string, number> = {};
+
+    for (const [, article] of Object.entries(ctx.vault.manifest.articles)) {
+      counts[article.category] = (counts[article.category] ?? 0) + 1;
+    }
+
+    const lines = Object.entries(counts)
+      .sort(([, a], [, b]) => b - a)
+      .map(([cat, count]) => `${cat}: ${count} articles`);
+
+    const content = `# Category Stats\n\n${lines.join("\n")}`;
+    ctx.logger.info(content);
+    return { content };
+  },
+};
+```
+
+No LLM needed — this skill just reads the manifest. Set `llm` only when you actually need it.
diff --git a/docs/vault-format.md b/docs/vault-format.md
new file mode 100644
index 0000000..ace7d14
--- /dev/null
+++ b/docs/vault-format.md
@@ -0,0 +1,269 @@
+# Vault Format Specification
+
+A kib vault is a directory containing raw source material, compiled wiki articles, and internal state. Everything is plain files — view in any editor, version with git, no lock-in.
+
+## Directory Structure
+
+```
+my-vault/
+├── .kb/                      # Internal state (managed by kib)
+│   ├── manifest.json         # Source tracking, compile state, stats
+│   ├── config.toml           # Vault configuration
+│   ├── vault.lock            # Process lock (prevents concurrent writes)
+│   ├── cache/                # LLM response cache, search index
+│   │   ├── responses/        # Cached LLM responses (keyed by hash)
+│   │   ├── search.idx        # BM25 search index
+│   │   └── vectors.idx       # Vector embeddings (if hybrid search enabled)
+│   ├── backups/              # Manifest backups (auto-created before destructive ops)
+│   ├── skills/               # Installed custom skills
+│   └── logs/                 # Operation logs
+├── raw/                      # Ingested source material (never modified by compile)
+│   ├── articles/             # Web pages, text content
+│   ├── papers/               # PDFs, academic papers
+│   ├── transcripts/          # YouTube/video transcripts
+│   ├── repos/                # GitHub repository summaries
+│   └── images/               # Image descriptions (extracted via vision model)
+├── wiki/                     # Compiled knowledge base (written by compile)
+│   ├── INDEX.md              # Master index: every article + summary + tags
+│   ├── GRAPH.md              # Article relationship adjacency list
+│   ├── LOG.md                # Human-readable operation log
+│   ├── images/               # Image assets (originals from ingested images)
+│   ├── concepts/             # Core concept articles
+│   ├── topics/               # Topic overviews and deep-dives
+│   ├── references/           # People, papers, organizations
+│   └── outputs/              # Query results filed as articles, skill outputs
+├── inbox/                    # Drop zone for `kib watch` (auto-ingested)
+└── CLAUDE.md                 # Auto-generated for AI agent discovery
+```
+
+## manifest.json
+
+The manifest is the source of truth for vault state. Schema version: `"1"`.
+
+```json
+{
+  "version": "1",
+  "vault": {
+    "name": "my-vault",
+    "created": "2024-01-15T10:00:00.000Z",
+    "lastCompiled": "2024-01-15T12:00:00.000Z",
+    "provider": "anthropic",
+    "model": "claude-sonnet-4-20250514"
+  },
+  "sources": {
+    "src_a1b2c3d4e5f6": {
+      "hash": "xxhash64-content-hash",
+      "ingestedAt": "2024-01-15T10:05:00.000Z",
+      "lastCompiled": "2024-01-15T12:00:00.000Z",
+      "sourceType": "web",
+      "originalUrl": "https://example.com/article",
+      "producedArticles": ["attention-mechanism", "transformer-architecture"],
+      "metadata": {
+        "title": "Attention Is All You Need",
+        "author": "Vaswani et al.",
+        "date": "2017-06-12",
+        "wordCount": 8500
+      }
+    }
+  },
+  "articles": {
+    "attention-mechanism": {
+      "hash": "xxhash64-article-hash",
+      "createdAt": "2024-01-15T12:00:00.000Z",
+      "lastUpdated": "2024-01-15T12:00:00.000Z",
+      "derivedFrom": ["src_a1b2c3d4e5f6"],
+      "backlinks": ["transformer-architecture"],
+      "forwardLinks": ["transformer-architecture", "self-attention"],
+      "tags": ["deep-learning", "nlp", "attention"],
+      "summary": "Core attention mechanism used in transformer models",
+      "wordCount": 450,
+      "category": "concept"
+    }
+  },
+  "stats": {
+    "totalSources": 1,
+    "totalArticles": 1,
+    "totalWords": 450,
+    "lastLintAt": null
+  }
+}
+```
+
+### Source types
+
+| Type | Category | Description |
+|------|----------|-------------|
+| `web` | `articles/` | Web pages extracted via readability |
+| `pdf` | `papers/` | PDF documents |
+| `youtube` | `transcripts/` | YouTube video transcripts |
+| `github` | `repos/` | GitHub repository README + structure |
+| `image` | `images/` | Image descriptions via vision model |
+| `file` | `articles/` | Local markdown/text files |
+
+### Article categories
+
+| Category | Directory | Description |
+|----------|-----------|-------------|
+| `concept` | `wiki/concepts/` | Core concepts and definitions |
+| `topic` | `wiki/topics/` | Topic overviews and deep-dives |
+| `reference` | `wiki/references/` | People, papers, organizations |
+| `output` | `wiki/outputs/` | Query results, skill outputs |
+
+## config.toml
+
+Vault configuration lives at `.kb/config.toml`:
+
+```toml
+[provider]
+default = "anthropic"
+model = "claude-sonnet-4-20250514"
+fast_model = "claude-haiku-4-5-20251001"
+
+[compile]
+auto_index = true
+auto_graph = true
+max_sources_per_pass = 10
+categories = ["concepts", "topics", "references", "outputs"]
+enrich_cross_refs = true
+max_enrich_articles = 10
+context_window = 200000
+max_source_tokens = 32000
+parallel = false
+max_parallel = 3
+# model = "gpt-4o"            # Override model for compile only
+
+[ingest]
+download_images = true
+max_file_size_mb = 50
+default_category = "articles"
+
+[watch]
+enabled = false
+inbox_path = "inbox"
+auto_compile = true
+poll_interval_ms = 2000
+
+[search]
+engine = "builtin"             # "builtin" (BM25), "vector", or "hybrid"
+max_results = 20
+
+[query]
+file_output = true
+auto_file = true
+auto_file_threshold = 3
+# model = "gpt-4o"            # Override model for query only
+
+[cache]
+enabled = true
+ttl_hours = 168                # 7 days
+max_size_mb = 500
+
+[skills]
+[skills.hooks]
+post-compile = []
+post-ingest = []
+post-lint = []
+
+[skills.config]
+# Per-skill configuration
+# [skills.config.my-skill]
+# key = "value"
+```
+
+## Raw source files
+
+Raw sources are markdown files with YAML frontmatter, stored in `raw/{category}/`:
+
+```markdown
+---
+title: "Attention Is All You Need"
+source_type: web
+original_url: "https://arxiv.org/abs/1706.03762"
+ingested_at: "2024-01-15T10:05:00.000Z"
+---
+
+# Attention Is All You Need
+
+The dominant sequence transduction models are based on complex
+recurrent or convolutional neural networks...
+```
+
+Raw files are **never modified by compile**. They're the immutable source of truth.
+
+## Wiki articles
+
+Compiled articles have structured frontmatter:
+
+```markdown
+---
+title: "Attention Mechanism"
+slug: "attention-mechanism"
+category: concept
+tags: [deep-learning, nlp, attention, transformers]
+sources: [src_a1b2c3d4e5f6]
+created: "2024-01-15T12:00:00.000Z"
+updated: "2024-01-15T12:00:00.000Z"
+summary: "Core attention mechanism used in transformer models"
+---
+
+# Attention Mechanism
+
+The attention mechanism allows models to focus on relevant parts of
+the input sequence when producing each element of the output.
+
+## How It Works
+
+...
+
+## See Also
+
+- [[transformer-architecture]]
+- [[self-attention]]
+```
+
+### Wikilinks
+
+Articles reference each other using `[[slug]]` syntax. The compiler maintains these links and tracks them in `GRAPH.md` and in the manifest's `backlinks`/`forwardLinks` arrays.
+
+## INDEX.md
+
+Auto-generated table of contents with every article, its category, tags, and summary:
+
+```markdown
+# Knowledge Base Index
+
+## Concepts (3)
+- **[Attention Mechanism](concepts/attention-mechanism.md)** — Core attention mechanism used in transformer models `#deep-learning` `#nlp`
+- **[Self-Attention](concepts/self-attention.md)** — ...
+
+## Topics (2)
+- **[Transformer Architecture](topics/transformer-architecture.md)** — ...
+```
+
+## GRAPH.md
+
+Auto-generated adjacency list showing article relationships:
+
+```markdown
+# Knowledge Graph
+
+attention-mechanism → transformer-architecture, self-attention
+transformer-architecture → attention-mechanism, positional-encoding
+self-attention → attention-mechanism
+```
+
+## vault.lock
+
+Created when a process acquires exclusive access for writes (compile, ingest, lint --fix). Contains the owning process PID, timestamp, and operation name. Automatically cleaned up on release; stale locks from dead processes are detected and stolen.
+
+## Backups
+
+Before destructive operations (`compile --force`), the manifest is copied to `.kb/backups/manifest-{timestamp}.json`. The 5 most recent backups are kept.
+
+## Integrity
+
+On every manifest load, the schema is validated via Zod. The `validateManifestIntegrity()` function checks that:
+- All source files referenced in manifest exist on disk
+- All article files referenced in manifest exist on disk
+- Cross-references between sources and articles are consistent
+- Stats (totalSources, totalArticles, totalWords) match actual counts
diff --git a/package.json b/package.json
index 9f0e6f8..810b766 100644
--- a/package.json
+++ b/package.json
@@ -8,6 +8,7 @@
 		"check": "biome check .",
 		"check:fix": "biome check --write .",
 		"test": "bun test",
+		"test:coverage": "bun test --coverage",
 		"build:ext": "bun run --filter @kibhq/extension build",
 		"build": "bun run --filter '*' build"
 	},
diff --git a/packages/core/src/backup.test.ts b/packages/core/src/backup.test.ts
new file mode 100644
index 0000000..f8c0122
--- /dev/null
+++ b/packages/core/src/backup.test.ts
@@ -0,0 +1,134 @@
+import { afterEach, describe, expect, test } from "bun:test";
+import { existsSync } from "node:fs";
+import { mkdtemp, readdir, readFile, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { createBackup, listBackups, pruneBackups, restoreBackup } from "./backup.js";
+import { initVault, loadManifest, saveManifest } from "./vault.js";
+
+let tempDir: string;
+
+afterEach(async () => {
+	if (tempDir) {
+		await rm(tempDir, { recursive: true, force: true });
+	}
+});
+
+async function makeTempVault() {
+	tempDir = await mkdtemp(join(tmpdir(), "kib-backup-test-"));
+	await initVault(tempDir, { name: "backup-test" });
+	return tempDir;
+}
+
+describe("createBackup", () => {
+	test("creates a backup of the manifest", async () => {
+		const dir = await makeTempVault();
+		const id = await createBackup(dir);
+
+		expect(id).toBeTruthy();
+		const backupPath = join(dir, ".kb", "backups", `manifest-${id}.json`);
+		expect(existsSync(backupPath)).toBe(true);
+
+		// Backup content should match current manifest
+		const original = await readFile(join(dir, ".kb", "manifest.json"), "utf-8");
+		const backup = await readFile(backupPath, "utf-8");
+		expect(JSON.parse(backup)).toEqual(JSON.parse(original));
+	});
+
+	test("creates multiple backups with unique IDs", async () => {
+		const dir = await makeTempVault();
+		const id1 = await createBackup(dir);
+		// Small delay to ensure unique timestamp
+		await new Promise((r) => setTimeout(r, 10));
+		const id2 = await createBackup(dir);
+
+		expect(id1).not.toBe(id2);
+
+		const backups = await readdir(join(dir, ".kb", "backups"));
+		expect(backups.length).toBe(2);
+	});
+});
+
+describe("listBackups", () => {
+	test("returns empty array when no backups", async () => {
+		const dir = await makeTempVault();
+		const backups = await listBackups(dir);
+		expect(backups).toEqual([]);
+	});
+
+	test("returns backups sorted most recent first", async () => {
+		const dir = await makeTempVault();
+		await createBackup(dir);
+		await new Promise((r) => setTimeout(r, 10));
+		await createBackup(dir);
+
+		const backups = await listBackups(dir);
+		expect(backups.length).toBe(2);
+		// Most recent first
+		expect(backups[0].id > backups[1].id).toBe(true);
+	});
+});
+
+describe("restoreBackup", () => {
+	test("restores manifest from backup", async () => {
+		const dir = await makeTempVault();
+
+		// Save original manifest state
+		const originalManifest = await loadManifest(dir);
+		expect(originalManifest.vault.name).toBe("backup-test");
+
+		// Create backup
+		const id = await createBackup(dir);
+
+		// Modify manifest
+		const modified = {
+			...originalManifest,
+			vault: { ...originalManifest.vault, name: "modified" },
+		};
+		await saveManifest(dir, modified);
+		const check = await loadManifest(dir);
+		expect(check.vault.name).toBe("modified");
+
+		// Restore
+		await restoreBackup(dir, id);
+		const restored = await loadManifest(dir);
+		expect(restored.vault.name).toBe("backup-test");
+	});
+
+	test("throws for nonexistent backup", async () => {
+		const dir = await makeTempVault();
+		expect(restoreBackup(dir, "nonexistent")).rejects.toThrow("Backup not found");
+	});
+});
+
+describe("pruneBackups", () => {
+	test("keeps only the specified number of backups", async () => {
+		const dir = await makeTempVault();
+
+		// Create more backups than the limit
+		for (let i = 0; i < 4; i++) {
+			await createBackup(dir);
+			await new Promise((r) => setTimeout(r, 10));
+		}
+
+		const before = await listBackups(dir);
+		expect(before.length).toBe(4);
+
+		const removed = await pruneBackups(dir, 2);
+		expect(removed).toBe(2);
+
+		const after = await listBackups(dir);
+		expect(after.length).toBe(2);
+	});
+
+	test("does nothing when under limit", async () => {
+		const dir = await makeTempVault();
+		await createBackup(dir);
+
+		const removed = await pruneBackups(dir, 5);
+		expect(removed).toBe(0);
+
+		const after = await listBackups(dir);
+		expect(after.length).toBe(1);
+	});
+});
diff --git a/packages/core/src/backup.ts b/packages/core/src/backup.ts
new file mode 100644
index 0000000..4a3692a
--- /dev/null
+++ b/packages/core/src/backup.ts
@@ -0,0 +1,104 @@
+import { existsSync } from "node:fs";
+import { mkdir, readdir, readFile, rm, writeFile } from "node:fs/promises";
+import { join } from "node:path";
+import { MANIFEST_FILE, VAULT_DIR } from "./constants.js";
+
+const BACKUPS_DIR = "backups";
+const MAX_BACKUPS = 5;
+
+export interface BackupEntry {
+	id: string;
+	timestamp: string;
+	manifestPath: string;
+}
+
+function backupsDir(root: string): string {
+	return join(root, VAULT_DIR, BACKUPS_DIR);
+}
+
+/**
+ * Create a backup of the current manifest before destructive operations.
+ * Returns the backup ID (timestamp-based).
+ */
+export async function createBackup(root: string): Promise<string> {
+	const dir = backupsDir(root);
+	await mkdir(dir, { recursive: true });
+
+	const manifestPath = join(root, VAULT_DIR, MANIFEST_FILE);
+	const manifest = await readFile(manifestPath, "utf-8");
+
+	const id = new Date().toISOString().replace(/[:.]/g, "-");
+	const backupPath = join(dir, `manifest-${id}.json`);
+	await writeFile(backupPath, manifest, "utf-8");
+
+	// Prune old backups
+	await pruneBackups(root, MAX_BACKUPS);
+
+	return id;
+}
+
+/**
+ * List all available backups, most recent first.
+ */
+export async function listBackups(root: string): Promise<BackupEntry[]> {
+	const dir = backupsDir(root);
+	try {
+		const entries = await readdir(dir);
+		return entries
+			.filter((f) => f.startsWith("manifest-") && f.endsWith(".json"))
+			.map((f) => {
+				const id = f.replace("manifest-", "").replace(".json", "");
+				// Restore ISO timestamp from ID
+				const timestamp = id.replace(
+					/^(\d{4})-(\d{2})-(\d{2})T(\d{2})-(\d{2})-(\d{2})-(\d+)Z$/,
+					"$1-$2-$3T$4:$5:$6.$7Z",
+				);
+				return {
+					id,
+					timestamp,
+					manifestPath: join(dir, f),
+				};
+			})
+			.sort((a, b) => b.id.localeCompare(a.id));
+	} catch {
+		return [];
+	}
+}
+
+/**
+ * Restore a manifest from a backup.
+ * Returns the restored manifest JSON string.
+ */
+export async function restoreBackup(root: string, backupId: string): Promise<string> {
+	const dir = backupsDir(root);
+	const backupPath = join(dir, `manifest-${backupId}.json`);
+
+	if (!existsSync(backupPath)) {
+		throw new Error(`Backup not found: ${backupId}`);
+	}
+
+	const manifest = await readFile(backupPath, "utf-8");
+	const manifestPath = join(root, VAULT_DIR, MANIFEST_FILE);
+
+	// Write via tmp for atomicity
+	const tmp = `${manifestPath}.tmp`;
+	await writeFile(tmp, manifest, "utf-8");
+	const { rename } = await import("node:fs/promises");
+	await rename(tmp, manifestPath);
+
+	return manifest;
+}
+
+/**
+ * Keep only the most recent N backups.
+ */
+export async function pruneBackups(root: string, keep = MAX_BACKUPS): Promise<number> {
+	const backups = await listBackups(root);
+	const toRemove = backups.slice(keep);
+
+	for (const backup of toRemove) {
+		await rm(backup.manifestPath, { force: true });
+	}
+
+	return toRemove.length;
+}
diff --git a/packages/core/src/bench.test.ts b/packages/core/src/bench.test.ts
new file mode 100644
index 0000000..88db773
--- /dev/null
+++ b/packages/core/src/bench.test.ts
@@ -0,0 +1,184 @@
+/**
+ * Performance benchmarks: measure search latency, compile throughput, and cold start.
+ */
+import { afterEach, describe, expect, test } from "bun:test";
+import { mkdtemp, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { compileVault } from "./compile/compiler.js";
+import { ingestSource } from "./ingest/ingest.js";
+import { SearchIndex } from "./search/engine.js";
+import type { CompletionParams, CompletionResult, LLMProvider, StreamChunk } from "./types.js";
+import { initVault, loadConfig, loadManifest } from "./vault.js";
+
+let tempDir: string;
+
+afterEach(async () => {
+	if (tempDir) await rm(tempDir, { recursive: true, force: true });
+});
+
+async function makeTempDir() {
+	tempDir = await mkdtemp(join(tmpdir(), "kib-bench-"));
+	return tempDir;
+}
+
+function createMockProvider(): LLMProvider {
+	return {
+		name: "mock",
+		async complete(_params: CompletionParams): Promise<CompletionResult> {
+			return {
+				content: "[]",
+				usage: { inputTokens: 100, outputTokens: 50 },
+				stopReason: "end_turn",
+			};
+		},
+		async *stream(): AsyncIterable<StreamChunk> {
+			yield { type: "text", text: "" };
+		},
+	};
+}
+
+function generateArticle(index: number): string {
+	const words = [
+		"neural",
+		"network",
+		"attention",
+		"transformer",
+		"embedding",
+		"gradient",
+		"optimization",
+		"regularization",
+		"convolution",
+		"recurrent",
+		"encoder",
+		"decoder",
+		"tokenizer",
+		"architecture",
+		"inference",
+	];
+	const tags = words.slice(index % 5, (index % 5) + 3);
+	const bodyWords = Array.from({ length: 200 }, (_, i) => words[(index + i) % words.length]).join(
+		" ",
+	);
+
+	return `---
+title: "Article ${index}"
+slug: article-${index}
+category: concept
+tags: [${tags.join(", ")}]
+sources: []
+created: "2026-01-01"
+updated: "2026-01-01"
+summary: "Article about ${words[index % words.length]}."
+---
+
+# Article ${index}: ${words[index % words.length]}
+
+${bodyWords}
+
+See also: [[article-${(index + 1) % 50}]]
+`;
+}
+
+describe("Performance benchmarks", () => {
+	test("vault init < 50ms", async () => {
+		const root = await makeTempDir();
+		const start = performance.now();
+		await initVault(root, { name: "bench" });
+		const elapsed = performance.now() - start;
+
+		expect(elapsed).toBeLessThan(50);
+		console.log(`  vault init: ${elapsed.toFixed(1)}ms`);
+	});
+
+	test("manifest load < 10ms", async () => {
+		const root = await makeTempDir();
+		await initVault(root, { name: "bench" });
+
+		const start = performance.now();
+		await loadManifest(root);
+		const elapsed = performance.now() - start;
+
+		expect(elapsed).toBeLessThan(10);
+		console.log(`  manifest load: ${elapsed.toFixed(1)}ms`);
+	});
+
+	test("search index build + query < 100ms for 50 articles", async () => {
+		const root = await makeTempDir();
+		await initVault(root, { name: "bench" });
+
+		// Write 50 articles to wiki
+		for (let i = 0; i < 50; i++) {
+			const { writeWiki } = await import("./vault.js");
+			await writeWiki(root, `concepts/article-${i}.md`, generateArticle(i));
+		}
+
+		// Build search index
+		const index = new SearchIndex();
+		const buildStart = performance.now();
+		await index.build(root);
+		const buildElapsed = performance.now() - buildStart;
+
+		expect(buildElapsed).toBeLessThan(100);
+		console.log(`  search index build (50 articles): ${buildElapsed.toFixed(1)}ms`);
+
+		// Search queries
+		const queries = ["attention mechanism", "transformer encoder", "gradient optimization"];
+		const queryTimes: number[] = [];
+
+		for (const q of queries) {
+			const start = performance.now();
+			const results = index.search(q);
+			const elapsed = performance.now() - start;
+			queryTimes.push(elapsed);
+			expect(results.length).toBeGreaterThan(0);
+		}
+
+		const avgQuery = queryTimes.reduce((a, b) => a + b, 0) / queryTimes.length;
+		expect(avgQuery).toBeLessThan(10);
+		console.log(`  search query avg (50 articles): ${avgQuery.toFixed(2)}ms`);
+	});
+
+	test("ingest 10 files < 500ms", async () => {
+		const root = await makeTempDir();
+		await initVault(root, { name: "bench" });
+
+		// Create 10 test files
+		const files: string[] = [];
+		for (let i = 0; i < 10; i++) {
+			const path = join(root, `source-${i}.md`);
+			await writeFile(
+				path,
+				`# Source ${i}\n\nContent for source ${i}. This has enough words to be meaningful.`,
+			);
+			files.push(path);
+		}
+
+		const start = performance.now();
+		for (const file of files) {
+			await ingestSource(root, file);
+		}
+		const elapsed = performance.now() - start;
+
+		expect(elapsed).toBeLessThan(500);
+		console.log(`  ingest 10 files: ${elapsed.toFixed(1)}ms`);
+
+		const manifest = await loadManifest(root);
+		expect(manifest.stats.totalSources).toBe(10);
+	});
+
+	test("compile no-op < 20ms (no pending sources)", async () => {
+		const root = await makeTempDir();
+		await initVault(root, { name: "bench" });
+		const config = await loadConfig(root);
+		const provider = createMockProvider();
+
+		const start = performance.now();
+		const result = await compileVault(root, provider, config);
+		const elapsed = performance.now() - start;
+
+		expect(result.sourcesCompiled).toBe(0);
+		expect(elapsed).toBeLessThan(20);
+		console.log(`  compile no-op: ${elapsed.toFixed(1)}ms`);
+	});
+});
diff --git a/packages/core/src/compile/compiler.ts b/packages/core/src/compile/compiler.ts
index 642a8a1..94fa761 100644
--- a/packages/core/src/compile/compiler.ts
+++ b/packages/core/src/compile/compiler.ts
@@ -1,7 +1,9 @@
 import { join } from "node:path";
+import { createBackup } from "../backup.js";
 import { DEFAULTS, GRAPH_FILE, INDEX_FILE } from "../constants.js";
 import { hash } from "../hash.js";
 import { countWords } from "../ingest/normalize.js";
+import { withLock } from "../lockfile.js";
 import type {
 	CompileResult,
 	FileOperation,
@@ -436,6 +438,26 @@ export async function compileVault(
 	provider: LLMProvider,
 	config: VaultConfig,
 	options: CompileOptions = {},
+): Promise<CompileResult> {
+	// Dry runs don't write — skip locking and backups
+	if (options.dryRun) {
+		return compileVaultInner(root, provider, config, options);
+	}
+
+	return withLock(root, "compile", async () => {
+		// Back up manifest before force-recompile (destructive operation)
+		if (options.force) {
+			await createBackup(root);
+		}
+		return compileVaultInner(root, provider, config, options);
+	});
+}
+
+async function compileVaultInner(
+	root: string,
+	provider: LLMProvider,
+	config: VaultConfig,
+	options: CompileOptions,
 ): Promise<CompileResult> {
 	const manifest = await loadManifest(root);
 
diff --git a/packages/core/src/e2e.test.ts b/packages/core/src/e2e.test.ts
new file mode 100644
index 0000000..48f98c9
--- /dev/null
+++ b/packages/core/src/e2e.test.ts
@@ -0,0 +1,338 @@
+/**
+ * End-to-end test: full init -> ingest -> compile -> search -> query -> lint lifecycle.
+ * Uses a mock LLM provider so no API keys are needed.
+ */
+import { afterEach, describe, expect, test } from "bun:test";
+import { existsSync } from "node:fs";
+import { mkdtemp, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { compileVault } from "./compile/compiler.js";
+import { ingestSource } from "./ingest/ingest.js";
+import { validateManifestIntegrity } from "./integrity.js";
+import { lintVault } from "./lint/lint.js";
+import { queryVault } from "./query/query.js";
+import { SearchIndex } from "./search/engine.js";
+import type { CompletionParams, CompletionResult, LLMProvider, StreamChunk } from "./types.js";
+import { initVault, loadConfig, loadManifest, readWiki } from "./vault.js";
+
+let tempDir: string;
+
+afterEach(async () => {
+	if (tempDir) await rm(tempDir, { recursive: true, force: true });
+});
+
+async function makeTempDir() {
+	tempDir = await mkdtemp(join(tmpdir(), "kib-e2e-"));
+	return tempDir;
+}
+
+// ─── Mock LLM provider ─────────────────────────────────────────
+
+function createMockProvider(responseMap: Record<string, string>): LLMProvider {
+	const defaultResponse = "No relevant information found.";
+	return {
+		name: "mock",
+		async complete(params: CompletionParams): Promise<CompletionResult> {
+			// Check if any key in responseMap matches part of the user message
+			const userMsg = params.messages[params.messages.length - 1]?.content ?? "";
+			let content = defaultResponse;
+			for (const [key, value] of Object.entries(responseMap)) {
+				if (userMsg.includes(key) || params.system.includes(key)) {
+					content = value;
+					break;
+				}
+			}
+			return {
+				content,
+				usage: { inputTokens: 100, outputTokens: 200 },
+				stopReason: "end_turn",
+			};
+		},
+		async *stream(params: CompletionParams): AsyncIterable<StreamChunk> {
+			const result = await this.complete(params);
+			yield { type: "text", text: result.content };
+			yield { type: "usage", usage: result.usage };
+		},
+	};
+}
+
+// ─── Mock article responses ────────────────────────────────────
+
+const ARTICLE_ATTENTION = `---
+title: "Attention Mechanism"
+slug: attention-mechanism
+category: concept
+tags: [deep-learning, nlp, attention]
+sources: []
+created: "2026-01-01"
+updated: "2026-01-01"
+summary: "Core attention mechanism enabling models to focus on relevant input parts."
+---
+
+# Attention Mechanism
+
+The attention mechanism allows neural networks to focus on relevant parts of the input when producing output. It computes weighted sums of value vectors, where weights are derived from query-key compatibility.
+
+## Types
+
+- **Self-attention**: queries, keys, and values all come from the same sequence
+- **Cross-attention**: queries from one sequence, keys/values from another
+
+## See Also
+
+- [[transformer-architecture]]`;
+
+const ARTICLE_TRANSFORMER = `---
+title: "Transformer Architecture"
+slug: transformer-architecture
+category: concept
+tags: [deep-learning, nlp, transformer]
+sources: []
+created: "2026-01-01"
+updated: "2026-01-01"
+summary: "Neural network architecture based on self-attention, replacing recurrence."
+---
+
+# Transformer Architecture
+
+The transformer architecture replaces recurrent layers with self-attention, enabling parallel training and superior sequence modeling.
+
+## Components
+
+- Multi-head [[attention-mechanism]]
+- Positional encoding
+- Feed-forward layers
+- Layer normalization
+
+## See Also
+
+- [[attention-mechanism]]`;
+
+const COMPILE_RESPONSE_1 = JSON.stringify([
+	{
+		op: "create",
+		path: "wiki/concepts/attention-mechanism.md",
+		content: ARTICLE_ATTENTION,
+	},
+]);
+
+const COMPILE_RESPONSE_2 = JSON.stringify([
+	{
+		op: "create",
+		path: "wiki/concepts/transformer-architecture.md",
+		content: ARTICLE_TRANSFORMER,
+	},
+]);
+
+const QUERY_RESPONSE =
+	"The attention mechanism computes weighted sums of value vectors using query-key compatibility scores. It enables models to focus on relevant parts of the input sequence. [Source: attention-mechanism]";
+
+const ENRICHMENT_RESPONSE = JSON.stringify([]);
+
+// ─── E2E Tests ─────────────────────────────────────────────────
+
+describe("E2E: full vault lifecycle", () => {
+	test("init -> ingest -> compile -> search -> query -> lint", async () => {
+		const root = await makeTempDir();
+
+		// ── Step 1: Init ──────────────────────────────────────
+		const { manifest, config } = await initVault(root, { name: "e2e-test" });
+		expect(manifest.vault.name).toBe("e2e-test");
+		expect(manifest.version).toBe("1");
+		expect(existsSync(join(root, ".kb"))).toBe(true);
+		expect(existsSync(join(root, "raw"))).toBe(true);
+		expect(existsSync(join(root, "wiki"))).toBe(true);
+		expect(existsSync(join(root, "inbox"))).toBe(true);
+
+		// ── Step 2: Ingest two sources ────────────────────────
+		const file1 = join(root, "attention.md");
+		await writeFile(
+			file1,
+			"# Attention Mechanism\n\nThe attention mechanism allows models to focus on relevant parts of the input.",
+		);
+
+		const file2 = join(root, "transformers.md");
+		await writeFile(
+			file2,
+			"# Transformer Architecture\n\nThe transformer uses self-attention to process sequences in parallel.",
+		);
+
+		const ingest1 = await ingestSource(root, file1);
+		expect(ingest1.skipped).toBe(false);
+		expect(ingest1.sourceType).toBe("file");
+		expect(ingest1.path).toContain("raw/");
+
+		const ingest2 = await ingestSource(root, file2);
+		expect(ingest2.skipped).toBe(false);
+
+		// Verify manifest updated
+		const postIngestManifest = await loadManifest(root);
+		expect(postIngestManifest.stats.totalSources).toBe(2);
+
+		// Dedup: re-ingesting same content should skip
+		const dup = await ingestSource(root, file1);
+		expect(dup.skipped).toBe(true);
+		expect(dup.skipReason).toContain("Duplicate");
+
+		// ── Step 3: Compile ───────────────────────────────────
+		const provider = createMockProvider({
+			"Attention Mechanism": COMPILE_RESPONSE_1,
+			"Transformer Architecture": COMPILE_RESPONSE_2,
+			enrich: ENRICHMENT_RESPONSE,
+		});
+
+		const compileResult = await compileVault(root, provider, config);
+		expect(compileResult.sourcesCompiled).toBe(2);
+		expect(compileResult.articlesCreated).toBe(2);
+
+		// Verify articles on disk
+		const article1 = await readWiki(root, "concepts/attention-mechanism.md");
+		expect(article1).toContain("Attention Mechanism");
+		expect(article1).toContain("Self-attention");
+
+		const article2 = await readWiki(root, "concepts/transformer-architecture.md");
+		expect(article2).toContain("Transformer Architecture");
+		expect(article2).toContain("[[attention-mechanism]]");
+
+		// Verify INDEX.md and GRAPH.md
+		expect(existsSync(join(root, "wiki", "INDEX.md"))).toBe(true);
+		expect(existsSync(join(root, "wiki", "GRAPH.md"))).toBe(true);
+
+		const index = await readWiki(root, "INDEX.md");
+		expect(index).toContain("Attention Mechanism");
+		expect(index).toContain("Transformer Architecture");
+
+		// Verify manifest stats
+		const postCompileManifest = await loadManifest(root);
+		expect(postCompileManifest.stats.totalArticles).toBe(2);
+		expect(postCompileManifest.vault.lastCompiled).not.toBeNull();
+		expect(postCompileManifest.articles["attention-mechanism"]).toBeDefined();
+		expect(postCompileManifest.articles["transformer-architecture"]).toBeDefined();
+
+		// Verify token usage tracked
+		expect(compileResult.tokenUsage).toBeDefined();
+		expect(compileResult.tokenUsage!.totalInputTokens).toBeGreaterThan(0);
+
+		// ── Step 4: Incremental compile (no-op) ───────────────
+		const noopResult = await compileVault(root, provider, config);
+		expect(noopResult.sourcesCompiled).toBe(0);
+
+		// ── Step 5: Search ────────────────────────────────────
+		const searchIndex = new SearchIndex();
+		await searchIndex.build(root);
+
+		const searchResults = searchIndex.search("attention mechanism");
+		expect(searchResults.length).toBeGreaterThan(0);
+		expect(searchResults[0]!.title).toContain("Attention");
+
+		// Phrase search
+		const phraseResults = searchIndex.search('"self-attention"');
+		expect(phraseResults.length).toBeGreaterThan(0);
+
+		// Unrelated term should score lower than relevant term
+		const weakResults = searchIndex.search("zygomorphic paleontology");
+		expect(weakResults.length).toBeLessThanOrEqual(searchResults.length);
+
+		// ── Step 6: Query (RAG) ───────────────────────────────
+		const queryProvider = createMockProvider({
+			"": QUERY_RESPONSE,
+		});
+
+		const queryResult = await queryVault(root, "How does attention work?", queryProvider, config);
+		expect(queryResult.answer).toContain("attention");
+
+		// ── Step 7: Lint ──────────────────────────────────────
+		const lintResult = await lintVault(root);
+		// Should find missing articles for wikilinks (transformer-architecture links to attention-mechanism and vice versa)
+		// Both exist, so no "missing" issues for those
+		expect(lintResult.diagnostics).toBeDefined();
+
+		// ── Step 8: Manifest integrity ────────────────────────
+		const finalManifest = await loadManifest(root);
+		const integrityIssues = await validateManifestIntegrity(root, finalManifest);
+		// Stats might be slightly off due to INDEX.md/GRAPH.md not being in articles
+		const errors = integrityIssues.filter((i) => i.severity === "error");
+		expect(errors.length).toBe(0); // No missing files
+	});
+
+	test("dry-run ingest and compile do not modify vault", async () => {
+		const root = await makeTempDir();
+		await initVault(root, { name: "dry-run-test" });
+
+		const file = join(root, "test.md");
+		await writeFile(file, "# Test\n\nSome content.");
+
+		// Dry-run ingest
+		const dryIngest = await ingestSource(root, file, { dryRun: true });
+		expect(dryIngest.skipped).toBe(false);
+		expect(dryIngest.path).toContain("raw/");
+
+		// Manifest should still have 0 sources
+		const manifest = await loadManifest(root);
+		expect(manifest.stats.totalSources).toBe(0);
+
+		// Real ingest
+		await ingestSource(root, file);
+		const manifest2 = await loadManifest(root);
+		expect(manifest2.stats.totalSources).toBe(1);
+
+		// Dry-run compile
+		const provider = createMockProvider({
+			"": JSON.stringify([
+				{
+					op: "create",
+					path: "wiki/concepts/test.md",
+					content:
+						'---\ntitle: Test\nslug: test\ncategory: concept\ntags: []\nsources: []\ncreated: "2026-01-01"\nupdated: "2026-01-01"\nsummary: A test.\n---\n\n# Test\n\nContent.',
+				},
+			]),
+		});
+
+		const config = await loadConfig(root);
+		const dryCompile = await compileVault(root, provider, config, { dryRun: true });
+		expect(dryCompile.sourcesCompiled).toBe(1);
+		expect(dryCompile.articlesCreated).toBe(1);
+
+		// But no article on disk
+		expect(existsSync(join(root, "wiki", "concepts", "test.md"))).toBe(false);
+
+		// Manifest not updated
+		const manifest3 = await loadManifest(root);
+		expect(manifest3.vault.lastCompiled).toBeNull();
+	});
+
+	test("force compile creates backup", async () => {
+		const root = await makeTempDir();
+		await initVault(root, { name: "backup-test" });
+
+		const file = join(root, "test.md");
+		await writeFile(file, "# Test\n\nContent.");
+		await ingestSource(root, file);
+
+		const provider = createMockProvider({
+			"": JSON.stringify([
+				{
+					op: "create",
+					path: "wiki/concepts/test.md",
+					content:
+						'---\ntitle: Test\nslug: test\ncategory: concept\ntags: []\nsources: []\ncreated: "2026-01-01"\nupdated: "2026-01-01"\nsummary: A test.\n---\n\n# Test\n\nContent.',
+				},
+			]),
+		});
+
+		const config = await loadConfig(root);
+
+		// First compile
+		await compileVault(root, provider, config);
+
+		// Force recompile — should create backup
+		await compileVault(root, provider, config, { force: true });
+
+		// Check backup exists
+		expect(existsSync(join(root, ".kb", "backups"))).toBe(true);
+		const { listBackups } = await import("./backup.js");
+		const backups = await listBackups(root);
+		expect(backups.length).toBeGreaterThan(0);
+	});
+});
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index da7a995..8d55863 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -1,3 +1,5 @@
+export type { BackupEntry } from "./backup.js";
+export { createBackup, listBackups, pruneBackups, restoreBackup } from "./backup.js";
 export { buildLinkGraph, generateGraphMd } from "./compile/backlinks.js";
 export { CompileCache } from "./compile/cache.js";
 export type { ArticleEvent, CompileOptions } from "./compile/compiler.js";
@@ -11,10 +13,15 @@ export * from "./hash.js";
 export { ingestSource } from "./ingest/ingest.js";
 export { countWords, slugify } from "./ingest/normalize.js";
 export { detectSourceType } from "./ingest/router.js";
+export type { IntegrityIssue } from "./integrity.js";
+export { validateManifestIntegrity } from "./integrity.js";
 export { fixLintIssues, lintVault } from "./lint/lint.js";
 export { ALL_RULES } from "./lint/rules.js";
+export { acquireLock, isLocked, releaseLock, VaultLockError, withLock } from "./lockfile.js";
 export { createProvider, detectProvider } from "./providers/router.js";
 export { queryVault } from "./query/query.js";
+export type { RecoveryIssue } from "./recovery.js";
+export { detectIssues, repairVault } from "./recovery.js";
 export * from "./schemas.js";
 export { highlightSnippet, parseQuery, SearchIndex } from "./search/engine.js";
 export { HybridSearch } from "./search/hybrid.js";
diff --git a/packages/core/src/ingest/ingest.ts b/packages/core/src/ingest/ingest.ts
index 291288e..a67e03d 100644
--- a/packages/core/src/ingest/ingest.ts
+++ b/packages/core/src/ingest/ingest.ts
@@ -1,4 +1,5 @@
 import { hash } from "../hash.js";
+import { withLock } from "../lockfile.js";
 import type { IngestResult, LLMProvider, Manifest, SourceEntry, SourceType } from "../types.js";
 import { appendLog, loadManifest, saveManifest, writeImageAsset, writeRaw } from "../vault.js";
 import type { Extractor } from "./extractors/interface.js";
@@ -41,31 +42,27 @@ export async function ingestSource(
 	// Get the extractor for this source type
 	const extractor = await getExtractor(sourceType, options.provider);
 
-	// Extract content
+	// Extract content (doesn't touch the vault — safe outside the lock)
 	const extracted = await extractor.extract(uri, { title: options.title, tags: options.tags });
 
 	// Hash the extracted content for dedup
 	const contentHash = await hash(extracted.content);
 
-	// Load manifest and check for duplicates
-	const manifest = await loadManifest(root);
-
-	// Check if we already have this exact content
-	const existingSource = findExistingSource(manifest, uri, contentHash);
-	if (existingSource) {
-		return {
-			sourceId: existingSource.id,
-			path: existingSource.path,
-			sourceType,
-			title: extracted.title,
-			wordCount: countWords(extracted.content),
-			skipped: true,
-			skipReason: "Duplicate content (same hash already ingested)",
-		};
-	}
-
-	// Dry run — return what would be ingested without writing
+	// Dry run — no writes needed
 	if (options.dryRun) {
+		const manifest = await loadManifest(root);
+		const existingSource = findExistingSource(manifest, uri, contentHash);
+		if (existingSource) {
+			return {
+				sourceId: existingSource.id,
+				path: existingSource.path,
+				sourceType,
+				title: extracted.title,
+				wordCount: countWords(extracted.content),
+				skipped: true,
+				skipReason: "Duplicate content (same hash already ingested)",
+			};
+		}
 		const category = options.category ?? categoryForType(sourceType);
 		const slug = slugify(extracted.title);
 		return {
@@ -78,74 +75,94 @@ export async function ingestSource(
 		};
 	}
 
-	// Normalize content with frontmatter
-	const normalizedContent = normalizeSource({
-		title: extracted.title,
-		content: extracted.content,
-		sourceType,
-		originalUrl: isUrl(uri) ? uri : undefined,
-		metadata: extracted.metadata,
-	});
+	// Acquire vault lock for the write phase
+	return withLock(root, "ingest", async () => {
+		// Load manifest and check for duplicates
+		const manifest = await loadManifest(root);
+
+		// Check if we already have this exact content
+		const existingSource = findExistingSource(manifest, uri, contentHash);
+		if (existingSource) {
+			return {
+				sourceId: existingSource.id,
+				path: existingSource.path,
+				sourceType,
+				title: extracted.title,
+				wordCount: countWords(extracted.content),
+				skipped: true,
+				skipReason: "Duplicate content (same hash already ingested)",
+			};
+		}
 
-	// Determine file path within raw/
-	const category = options.category ?? categoryForType(sourceType);
-	const slug = slugify(extracted.title);
-	const relativePath = `${category}/${slug}.md`;
+		// Normalize content with frontmatter
+		const normalizedContent = normalizeSource({
+			title: extracted.title,
+			content: extracted.content,
+			sourceType,
+			originalUrl: isUrl(uri) ? uri : undefined,
+			metadata: extracted.metadata,
+		});
 
-	// Write to raw/
-	await writeRaw(root, relativePath, normalizedContent);
+		// Determine file path within raw/
+		const category = options.category ?? categoryForType(sourceType);
+		const slug = slugify(extracted.title);
+		const relativePath = `${category}/${slug}.md`;
 
-	// For images, also save the original binary to wiki/images/ for article references
-	if (sourceType === "image" && extracted.metadata.imageBuffer) {
-		const ext = (extracted.metadata.fileType as string) ?? ".png";
-		const imageFilename = `${slug}${ext}`;
-		await writeImageAsset(root, imageFilename, extracted.metadata.imageBuffer as Buffer);
-	}
+		// Write to raw/
+		await writeRaw(root, relativePath, normalizedContent);
 
-	// Generate a source ID
-	const sourceId = `src_${contentHash.slice(0, 12)}`;
+		// For images, also save the original binary to wiki/images/ for article references
+		if (sourceType === "image" && extracted.metadata.imageBuffer) {
+			const ext = (extracted.metadata.fileType as string) ?? ".png";
+			const imageFilename = `${slug}${ext}`;
+			await writeImageAsset(root, imageFilename, extracted.metadata.imageBuffer as Buffer);
+		}
 
-	// Update manifest
-	const now = new Date().toISOString();
-	const wordCount = countWords(extracted.content);
+		// Generate a source ID
+		const sourceId = `src_${contentHash.slice(0, 12)}`;
 
-	// Build metadata, including image asset path for image sources
-	const sourceMetadata: SourceEntry["metadata"] = {
-		title: extracted.title,
-		author: extracted.metadata.author as string | undefined,
-		date: extracted.metadata.date as string | undefined,
-		wordCount,
-	};
+		// Update manifest
+		const now = new Date().toISOString();
+		const wordCount = countWords(extracted.content);
 
-	if (sourceType === "image" && extracted.metadata.fileType) {
-		const ext = extracted.metadata.fileType as string;
-		sourceMetadata.imageAsset = `images/${slug}${ext}`;
-	}
+		// Build metadata, including image asset path for image sources
+		const sourceMetadata: SourceEntry["metadata"] = {
+			title: extracted.title,
+			author: extracted.metadata.author as string | undefined,
+			date: extracted.metadata.date as string | undefined,
+			wordCount,
+		};
+
+		if (sourceType === "image" && extracted.metadata.fileType) {
+			const ext = extracted.metadata.fileType as string;
+			sourceMetadata.imageAsset = `images/${slug}${ext}`;
+		}
+
+		const sourceEntry: SourceEntry = {
+			hash: contentHash,
+			ingestedAt: now,
+			lastCompiled: null,
+			sourceType,
+			originalUrl: isUrl(uri) ? uri : undefined,
+			producedArticles: [],
+			metadata: sourceMetadata,
+		};
 
-	const sourceEntry: SourceEntry = {
-		hash: contentHash,
-		ingestedAt: now,
-		lastCompiled: null,
-		sourceType,
-		originalUrl: isUrl(uri) ? uri : undefined,
-		producedArticles: [],
-		metadata: sourceMetadata,
-	};
-
-	manifest.sources[sourceId] = sourceEntry;
-	manifest.stats.totalSources = Object.keys(manifest.sources).length;
-
-	await saveManifest(root, manifest);
-	await appendLog(root, "ingest", `"${extracted.title}" (${sourceType}) → raw/${relativePath}`);
-
-	return {
-		sourceId,
-		path: `raw/${relativePath}`,
-		sourceType,
-		title: extracted.title,
-		wordCount,
-		skipped: false,
-	};
+		manifest.sources[sourceId] = sourceEntry;
+		manifest.stats.totalSources = Object.keys(manifest.sources).length;
+
+		await saveManifest(root, manifest);
+		await appendLog(root, "ingest", `"${extracted.title}" (${sourceType}) → raw/${relativePath}`);
+
+		return {
+			sourceId,
+			path: `raw/${relativePath}`,
+			sourceType,
+			title: extracted.title,
+			wordCount,
+			skipped: false,
+		};
+	});
 }
 
 async function getExtractor(sourceType: SourceType, provider?: LLMProvider): Promise<Extractor> {
diff --git a/packages/core/src/integrity.test.ts b/packages/core/src/integrity.test.ts
new file mode 100644
index 0000000..39a3053
--- /dev/null
+++ b/packages/core/src/integrity.test.ts
@@ -0,0 +1,179 @@
+import { afterEach, describe, expect, test } from "bun:test";
+import { mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { validateManifestIntegrity } from "./integrity.js";
+import { initVault, loadManifest, saveManifest, writeRaw, writeWiki } from "./vault.js";
+
+let tempDir: string;
+
+afterEach(async () => {
+	if (tempDir) {
+		await rm(tempDir, { recursive: true, force: true });
+	}
+});
+
+async function makeTempVault() {
+	tempDir = await mkdtemp(join(tmpdir(), "kib-integrity-test-"));
+	await initVault(tempDir, { name: "integrity-test" });
+	return tempDir;
+}
+
+describe("validateManifestIntegrity", () => {
+	test("returns empty for consistent empty vault", async () => {
+		const dir = await makeTempVault();
+		const manifest = await loadManifest(dir);
+		const issues = await validateManifestIntegrity(dir, manifest);
+		expect(issues).toEqual([]);
+	});
+
+	test("detects missing source file", async () => {
+		const dir = await makeTempVault();
+		const manifest = await loadManifest(dir);
+
+		// Add a source entry without a file on disk
+		manifest.sources.src_ghost123 = {
+			hash: "abc123",
+			ingestedAt: new Date().toISOString(),
+			lastCompiled: null,
+			sourceType: "web",
+			producedArticles: [],
+			metadata: { title: "Ghost Article", wordCount: 100 },
+		};
+		manifest.stats.totalSources = 1;
+		await saveManifest(dir, manifest);
+
+		const issues = await validateManifestIntegrity(dir, manifest);
+		expect(issues.some((i) => i.category === "missing_file" && i.severity === "error")).toBe(true);
+	});
+
+	test("detects missing article file", async () => {
+		const dir = await makeTempVault();
+		const manifest = await loadManifest(dir);
+
+		manifest.articles["ghost-article"] = {
+			hash: "abc123",
+			createdAt: new Date().toISOString(),
+			lastUpdated: new Date().toISOString(),
+			derivedFrom: [],
+			backlinks: [],
+			forwardLinks: [],
+			tags: [],
+			summary: "A ghost article",
+			wordCount: 100,
+			category: "concept",
+		};
+		manifest.stats.totalArticles = 1;
+		manifest.stats.totalWords = 100;
+		await saveManifest(dir, manifest);
+
+		const issues = await validateManifestIntegrity(dir, manifest);
+		expect(
+			issues.some((i) => i.category === "missing_file" && i.message.includes("ghost-article")),
+		).toBe(true);
+	});
+
+	test("detects broken source→article reference", async () => {
+		const dir = await makeTempVault();
+		const manifest = await loadManifest(dir);
+
+		// Add a source that references a non-existent article
+		await writeRaw(dir, "articles/real-source.md", "# Real Source");
+		manifest.sources.src_real123 = {
+			hash: "abc123",
+			ingestedAt: new Date().toISOString(),
+			lastCompiled: null,
+			sourceType: "web",
+			producedArticles: ["nonexistent-article"],
+			metadata: { title: "Real Source", wordCount: 50 },
+		};
+		manifest.stats.totalSources = 1;
+		await saveManifest(dir, manifest);
+
+		const issues = await validateManifestIntegrity(dir, manifest);
+		expect(
+			issues.some(
+				(i) => i.category === "broken_reference" && i.message.includes("nonexistent-article"),
+			),
+		).toBe(true);
+	});
+
+	test("detects broken article→source reference", async () => {
+		const dir = await makeTempVault();
+		const manifest = await loadManifest(dir);
+
+		await writeWiki(dir, "concepts/real-article.md", "# Real Article");
+		manifest.articles["real-article"] = {
+			hash: "abc123",
+			createdAt: new Date().toISOString(),
+			lastUpdated: new Date().toISOString(),
+			derivedFrom: ["src_nonexistent"],
+			backlinks: [],
+			forwardLinks: [],
+			tags: [],
+			summary: "Real article",
+			wordCount: 50,
+			category: "concept",
+		};
+		manifest.stats.totalArticles = 1;
+		manifest.stats.totalWords = 50;
+		await saveManifest(dir, manifest);
+
+		const issues = await validateManifestIntegrity(dir, manifest);
+		expect(
+			issues.some((i) => i.category === "broken_reference" && i.message.includes("nonexistent")),
+		).toBe(true);
+	});
+
+	test("detects stats mismatch", async () => {
+		const dir = await makeTempVault();
+		const manifest = await loadManifest(dir);
+
+		manifest.stats.totalSources = 5;
+		manifest.stats.totalArticles = 10;
+		manifest.stats.totalWords = 5000;
+		await saveManifest(dir, manifest);
+
+		const issues = await validateManifestIntegrity(dir, manifest);
+		const mismatches = issues.filter((i) => i.category === "stats_mismatch");
+		expect(mismatches.length).toBe(3);
+	});
+
+	test("passes for consistent vault with files", async () => {
+		const dir = await makeTempVault();
+		const manifest = await loadManifest(dir);
+
+		// Source with a real file — the file path is derived from title
+		await writeRaw(dir, "articles/test-source.md", "# Test Source\n\nSome content here.");
+		manifest.sources.src_test123 = {
+			hash: "abc123",
+			ingestedAt: new Date().toISOString(),
+			lastCompiled: new Date().toISOString(),
+			sourceType: "web",
+			producedArticles: ["test-article"],
+			metadata: { title: "Test Source", wordCount: 5 },
+		};
+
+		await writeWiki(dir, "concepts/test-article.md", "# Test Article\n\nCompiled content.");
+		manifest.articles["test-article"] = {
+			hash: "def456",
+			createdAt: new Date().toISOString(),
+			lastUpdated: new Date().toISOString(),
+			derivedFrom: ["src_test123"],
+			backlinks: [],
+			forwardLinks: [],
+			tags: ["test"],
+			summary: "A test article",
+			wordCount: 3,
+			category: "concept",
+		};
+
+		manifest.stats.totalSources = 1;
+		manifest.stats.totalArticles = 1;
+		manifest.stats.totalWords = 3;
+		await saveManifest(dir, manifest);
+
+		const issues = await validateManifestIntegrity(dir, manifest);
+		expect(issues).toEqual([]);
+	});
+});
diff --git a/packages/core/src/integrity.ts b/packages/core/src/integrity.ts
new file mode 100644
index 0000000..1ef5d25
--- /dev/null
+++ b/packages/core/src/integrity.ts
@@ -0,0 +1,155 @@
+import { existsSync } from "node:fs";
+import { readdir } from "node:fs/promises";
+import { join } from "node:path";
+import { RAW_DIR, WIKI_DIR } from "./constants.js";
+import type { Manifest } from "./types.js";
+
+export interface IntegrityIssue {
+	severity: "error" | "warning";
+	category: "missing_file" | "orphan_file" | "stats_mismatch" | "broken_reference";
+	message: string;
+	path?: string;
+}
+
+/**
+ * Validate manifest integrity against the actual filesystem state.
+ * Checks that all referenced files exist, stats are correct, and
+ * there are no orphaned entries.
+ */
+export async function validateManifestIntegrity(
+	root: string,
+	manifest: Manifest,
+): Promise<IntegrityIssue[]> {
+	const issues: IntegrityIssue[] = [];
+
+	// Check source files exist on disk
+	for (const [sourceId, source] of Object.entries(manifest.sources)) {
+		const rawPath = deriveRawPath(source.metadata.title ?? sourceId, source.sourceType);
+		const sourcePath = join(root, RAW_DIR, rawPath);
+		if (!existsSync(sourcePath)) {
+			issues.push({
+				severity: "error",
+				category: "missing_file",
+				message: `Source file missing from disk: ${sourceId} (expected ${rawPath})`,
+				path: sourcePath,
+			});
+		}
+
+		// Check that produced articles exist in the manifest
+		for (const articleSlug of source.producedArticles) {
+			if (!manifest.articles[articleSlug]) {
+				issues.push({
+					severity: "warning",
+					category: "broken_reference",
+					message: `Source "${sourceId}" references article "${articleSlug}" which doesn't exist in manifest`,
+					path: sourceId,
+				});
+			}
+		}
+	}
+
+	// Check article files exist on disk
+	for (const [slug, article] of Object.entries(manifest.articles)) {
+		const found = await findArticleFile(root, slug);
+		if (!found) {
+			issues.push({
+				severity: "error",
+				category: "missing_file",
+				message: `Article file missing from disk: ${slug}`,
+				path: slug,
+			});
+		}
+
+		// Check that derivedFrom sources exist
+		for (const sourceId of article.derivedFrom) {
+			if (!manifest.sources[sourceId]) {
+				issues.push({
+					severity: "warning",
+					category: "broken_reference",
+					message: `Article "${slug}" references source "${sourceId}" which doesn't exist in manifest`,
+					path: slug,
+				});
+			}
+		}
+	}
+
+	// Validate stats match reality
+	const actualSourceCount = Object.keys(manifest.sources).length;
+	const actualArticleCount = Object.keys(manifest.articles).length;
+	const actualWordCount = Object.values(manifest.articles).reduce((sum, a) => sum + a.wordCount, 0);
+
+	if (manifest.stats.totalSources !== actualSourceCount) {
+		issues.push({
+			severity: "warning",
+			category: "stats_mismatch",
+			message: `Stats say ${manifest.stats.totalSources} sources, but manifest has ${actualSourceCount}`,
+		});
+	}
+
+	if (manifest.stats.totalArticles !== actualArticleCount) {
+		issues.push({
+			severity: "warning",
+			category: "stats_mismatch",
+			message: `Stats say ${manifest.stats.totalArticles} articles, but manifest has ${actualArticleCount}`,
+		});
+	}
+
+	if (manifest.stats.totalWords !== actualWordCount) {
+		issues.push({
+			severity: "warning",
+			category: "stats_mismatch",
+			message: `Stats say ${manifest.stats.totalWords} words, but article entries sum to ${actualWordCount}`,
+		});
+	}
+
+	return issues;
+}
+
+// ─── Helpers ────────────────────────────────────────────────────
+
+/**
+ * Derive the raw file path from source metadata, matching ingest's naming convention.
+ */
+function deriveRawPath(title: string, sourceType: string): string {
+	const slug = title
+		.toLowerCase()
+		.replace(/[^a-z0-9\s-]/g, "")
+		.replace(/\s+/g, "-")
+		.replace(/-+/g, "-")
+		.replace(/^-|-$/g, "")
+		.slice(0, 80);
+
+	const category =
+		sourceType === "pdf"
+			? "papers"
+			: sourceType === "youtube"
+				? "transcripts"
+				: sourceType === "github"
+					? "repos"
+					: sourceType === "image"
+						? "images"
+						: "articles";
+
+	return `${category}/${slug}.md`;
+}
+
+/**
+ * Find an article file by slug. Articles can be in any category subdirectory.
+ */
+async function findArticleFile(root: string, slug: string): Promise<string | null> {
+	const wikiDir = join(root, WIKI_DIR);
+	const filename = `${slug}.md`;
+
+	try {
+		const categories = await readdir(wikiDir, { withFileTypes: true });
+		for (const cat of categories) {
+			if (!cat.isDirectory()) continue;
+			const filePath = join(wikiDir, cat.name, filename);
+			if (existsSync(filePath)) return filePath;
+		}
+	} catch {
+		// wiki dir might not exist
+	}
+
+	return null;
+}
diff --git a/packages/core/src/lint/lint.ts b/packages/core/src/lint/lint.ts
index 054f751..310bce9 100644
--- a/packages/core/src/lint/lint.ts
+++ b/packages/core/src/lint/lint.ts
@@ -1,3 +1,4 @@
+import { withLock } from "../lockfile.js";
 import type { LintDiagnostic, LLMProvider, VaultConfig } from "../types.js";
 import { appendLog, loadManifest, saveManifest, writeWiki } from "../vault.js";
 import { contradictionRule } from "./contradiction.js";
@@ -71,27 +72,32 @@ export async function fixLintIssues(
 	provider?: LLMProvider,
 	config?: VaultConfig,
 ): Promise<FixResult> {
-	const result: FixResult = { fixed: 0, skipped: 0, errors: [] };
 	const fixable = diagnostics.filter((d) => d.fixable);
+	if (fixable.length === 0) {
+		return { fixed: 0, skipped: 0, errors: [] };
+	}
+
+	return withLock(root, "lint-fix", async () => {
+		const result: FixResult = { fixed: 0, skipped: 0, errors: [] };
 
-	const missingIssues = fixable.filter((d) => d.rule === "missing");
-	const staleIssues = fixable.filter((d) => d.rule === "stale");
+		const missingIssues = fixable.filter((d) => d.rule === "missing");
+		const staleIssues = fixable.filter((d) => d.rule === "stale");
 
-	// Fix missing: create stub articles
-	if (missingIssues.length > 0) {
-		const manifest = await loadManifest(root);
+		// Fix missing: create stub articles
+		if (missingIssues.length > 0) {
+			const manifest = await loadManifest(root);
 
-		for (const d of missingIssues) {
-			const match = d.message.match(/^"([^"]+)"/);
-			if (!match) continue;
+			for (const d of missingIssues) {
+				const match = d.message.match(/^"([^"]+)"/);
+				if (!match) continue;
 
-			const slug = match[1]!;
-			const title = slug
-				.split("-")
-				.map((w) => w.charAt(0).toUpperCase() + w.slice(1))
-				.join(" ");
+				const slug = match[1]!;
+				const title = slug
+					.split("-")
+					.map((w) => w.charAt(0).toUpperCase() + w.slice(1))
+					.join(" ");
 
-			const stub = `---
+				const stub = `---
 title: "${title}"
 slug: "${slug}"
 category: topic
@@ -104,51 +110,55 @@ sources: []
 *This article was auto-generated by \`kib lint --fix\`. Add content by ingesting sources about this topic and running \`kib compile\`.*
 `;
 
-			try {
-				await writeWiki(root, `topics/${slug}.md`, stub);
-				const now = new Date().toISOString();
-				manifest.articles[slug] = {
-					hash: "",
-					createdAt: now,
-					lastUpdated: now,
-					derivedFrom: [],
-					backlinks: [],
-					forwardLinks: [],
-					tags: [],
-					summary: "",
-					wordCount: 0,
-					category: "topic",
-				};
-				result.fixed++;
-			} catch (err) {
-				result.errors.push(`Failed to create ${slug}: ${(err as Error).message}`);
+				try {
+					await writeWiki(root, `topics/${slug}.md`, stub);
+					const now = new Date().toISOString();
+					manifest.articles[slug] = {
+						hash: "",
+						createdAt: now,
+						lastUpdated: now,
+						derivedFrom: [],
+						backlinks: [],
+						forwardLinks: [],
+						tags: [],
+						summary: "",
+						wordCount: 0,
+						category: "topic",
+					};
+					result.fixed++;
+				} catch (err) {
+					result.errors.push(`Failed to create ${slug}: ${(err as Error).message}`);
+				}
 			}
-		}
 
-		manifest.stats.totalArticles = Object.keys(manifest.articles).length;
-		await saveManifest(root, manifest);
-	}
+			manifest.stats.totalArticles = Object.keys(manifest.articles).length;
+			await saveManifest(root, manifest);
+		}
 
-	// Fix stale: recompile pending sources
-	if (staleIssues.length > 0) {
-		if (!provider || !config) {
-			result.skipped += staleIssues.length;
-			result.errors.push("Skipped stale fixes — LLM provider required. Set an API key and retry.");
-		} else {
-			try {
-				const { compileVault } = await import("../compile/compiler.js");
-				await compileVault(root, provider, config, { force: false });
-				result.fixed += staleIssues.length;
-			} catch (err) {
+		// Fix stale: recompile pending sources
+		if (staleIssues.length > 0) {
+			if (!provider || !config) {
 				result.skipped += staleIssues.length;
-				result.errors.push(`Compile failed: ${(err as Error).message}`);
+				result.errors.push(
+					"Skipped stale fixes — LLM provider required. Set an API key and retry.",
+				);
+			} else {
+				try {
+					const { compileVault } = await import("../compile/compiler.js");
+					// Lock is re-entrant — compileVault will share our lock
+					await compileVault(root, provider, config, { force: false });
+					result.fixed += staleIssues.length;
+				} catch (err) {
+					result.skipped += staleIssues.length;
+					result.errors.push(`Compile failed: ${(err as Error).message}`);
+				}
 			}
 		}
-	}
 
-	if (result.fixed > 0) {
-		await appendLog(root, "lint-fix", `${result.fixed} issues fixed`);
-	}
+		if (result.fixed > 0) {
+			await appendLog(root, "lint-fix", `${result.fixed} issues fixed`);
+		}
 
-	return result;
+		return result;
+	});
 }
diff --git a/packages/core/src/lockfile.test.ts b/packages/core/src/lockfile.test.ts
new file mode 100644
index 0000000..5cf616e
--- /dev/null
+++ b/packages/core/src/lockfile.test.ts
@@ -0,0 +1,209 @@
+import { afterEach, describe, expect, test } from "bun:test";
+import { existsSync } from "node:fs";
+import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { acquireLock, isLocked, releaseLock, withLock } from "./lockfile.js";
+import { initVault } from "./vault.js";
+
+let tempDir: string;
+
+afterEach(async () => {
+	if (tempDir) {
+		await rm(tempDir, { recursive: true, force: true });
+	}
+});
+
+async function makeTempVault() {
+	tempDir = await mkdtemp(join(tmpdir(), "kib-lock-test-"));
+	await initVault(tempDir, { name: "lock-test" });
+	return tempDir;
+}
+
+describe("acquireLock", () => {
+	test("creates a lock file", async () => {
+		const dir = await makeTempVault();
+		await acquireLock(dir, "test");
+
+		const lockPath = join(dir, ".kb", "vault.lock");
+		expect(existsSync(lockPath)).toBe(true);
+
+		const info = JSON.parse(await readFile(lockPath, "utf-8"));
+		expect(info.pid).toBe(process.pid);
+		expect(info.operation).toBe("test");
+
+		await releaseLock(dir);
+	});
+
+	test("is re-entrant for same process", async () => {
+		const dir = await makeTempVault();
+		await acquireLock(dir, "first");
+
+		// Same process — should succeed (re-entrant)
+		await acquireLock(dir, "second");
+
+		// First release just decrements depth
+		await releaseLock(dir);
+		const lockPath = join(dir, ".kb", "vault.lock");
+		expect(existsSync(lockPath)).toBe(true); // Still locked (depth > 0)
+
+		// Second release actually removes the lock
+		await releaseLock(dir);
+		expect(existsSync(lockPath)).toBe(false);
+	});
+
+	test("steals lock from dead process", async () => {
+		const dir = await makeTempVault();
+		const lockPath = join(dir, ".kb", "vault.lock");
+
+		// Write a lock with a PID that definitely doesn't exist
+		await writeFile(
+			lockPath,
+			JSON.stringify({ pid: 999999999, timestamp: new Date().toISOString(), operation: "dead" }),
+		);
+
+		// Should succeed by stealing the stale lock
+		await acquireLock(dir, "steal");
+		const info = JSON.parse(await readFile(lockPath, "utf-8"));
+		expect(info.pid).toBe(process.pid);
+		expect(info.operation).toBe("steal");
+
+		await releaseLock(dir);
+	});
+
+	test("steals lock older than threshold", async () => {
+		const dir = await makeTempVault();
+		const lockPath = join(dir, ".kb", "vault.lock");
+
+		// Write a lock with an old timestamp from current process
+		const oldTime = new Date(Date.now() - 10 * 60 * 1000).toISOString(); // 10 min ago
+		await writeFile(
+			lockPath,
+			JSON.stringify({ pid: process.pid, timestamp: oldTime, operation: "old" }),
+		);
+
+		// Should succeed because lock is stale
+		await acquireLock(dir, "new");
+		const info = JSON.parse(await readFile(lockPath, "utf-8"));
+		expect(info.operation).toBe("new");
+
+		await releaseLock(dir);
+	});
+});
+
+describe("releaseLock", () => {
+	test("removes the lock file", async () => {
+		const dir = await makeTempVault();
+		await acquireLock(dir, "test");
+
+		const lockPath = join(dir, ".kb", "vault.lock");
+		expect(existsSync(lockPath)).toBe(true);
+
+		await releaseLock(dir);
+		expect(existsSync(lockPath)).toBe(false);
+	});
+
+	test("does nothing when no lock exists", async () => {
+		const dir = await makeTempVault();
+		await releaseLock(dir); // Should not throw
+	});
+
+	test("does not remove lock from another process", async () => {
+		const dir = await makeTempVault();
+		const lockPath = join(dir, ".kb", "vault.lock");
+
+		await writeFile(
+			lockPath,
+			JSON.stringify({ pid: 999999999, timestamp: new Date().toISOString(), operation: "other" }),
+		);
+
+		await releaseLock(dir);
+		// Lock should still be there — it belongs to another process
+		expect(existsSync(lockPath)).toBe(true);
+	});
+});
+
+describe("isLocked", () => {
+	test("returns false when no lock", async () => {
+		const dir = await makeTempVault();
+		const result = await isLocked(dir);
+		expect(result.locked).toBe(false);
+	});
+
+	test("returns true when locked by live process", async () => {
+		const dir = await makeTempVault();
+		await acquireLock(dir, "check");
+
+		const result = await isLocked(dir);
+		expect(result.locked).toBe(true);
+		expect(result.info?.operation).toBe("check");
+
+		await releaseLock(dir);
+	});
+
+	test("returns false for stale lock", async () => {
+		const dir = await makeTempVault();
+		const lockPath = join(dir, ".kb", "vault.lock");
+
+		await writeFile(
+			lockPath,
+			JSON.stringify({ pid: 999999999, timestamp: new Date().toISOString(), operation: "dead" }),
+		);
+
+		const result = await isLocked(dir);
+		expect(result.locked).toBe(false);
+		expect(result.info).toBeDefined();
+	});
+});
+
+describe("withLock", () => {
+	test("acquires and releases lock around function", async () => {
+		const dir = await makeTempVault();
+		const lockPath = join(dir, ".kb", "vault.lock");
+
+		const result = await withLock(dir, "wrapped", async () => {
+			expect(existsSync(lockPath)).toBe(true);
+			return 42;
+		});
+
+		expect(result).toBe(42);
+		expect(existsSync(lockPath)).toBe(false);
+	});
+
+	test("releases lock even on error", async () => {
+		const dir = await makeTempVault();
+		const lockPath = join(dir, ".kb", "vault.lock");
+
+		try {
+			await withLock(dir, "error", async () => {
+				expect(existsSync(lockPath)).toBe(true);
+				throw new Error("boom");
+			});
+		} catch (err) {
+			expect((err as Error).message).toBe("boom");
+		}
+
+		expect(existsSync(lockPath)).toBe(false);
+	});
+
+	test("supports nested withLock (re-entrant)", async () => {
+		const dir = await makeTempVault();
+		const lockPath = join(dir, ".kb", "vault.lock");
+
+		const result = await withLock(dir, "outer", async () => {
+			expect(existsSync(lockPath)).toBe(true);
+
+			const inner = await withLock(dir, "inner", async () => {
+				expect(existsSync(lockPath)).toBe(true);
+				return "inner-result";
+			});
+
+			// Lock should still be held after inner withLock releases
+			expect(existsSync(lockPath)).toBe(true);
+			return `outer-${inner}`;
+		});
+
+		expect(result).toBe("outer-inner-result");
+		expect(existsSync(lockPath)).toBe(false);
+	});
+});
diff --git a/packages/core/src/lockfile.ts b/packages/core/src/lockfile.ts
new file mode 100644
index 0000000..32e7b7a
--- /dev/null
+++ b/packages/core/src/lockfile.ts
@@ -0,0 +1,147 @@
+import { mkdir, readFile, unlink, writeFile } from "node:fs/promises";
+import { dirname, join } from "node:path";
+import { VAULT_DIR } from "./constants.js";
+
+const LOCK_FILE = "vault.lock";
+const STALE_THRESHOLD_MS = 5 * 60 * 1000; // 5 minutes
+
+// Re-entrancy counter: tracks nested withLock calls within the same process
+let lockDepth = 0;
+
+interface LockInfo {
+	pid: number;
+	timestamp: string;
+	operation: string;
+}
+
+export class VaultLockError extends Error {
+	constructor(public readonly lockInfo: LockInfo) {
+		super(
+			`Vault is locked by process ${lockInfo.pid} (${lockInfo.operation}, started ${lockInfo.timestamp}). ` +
+				"If this is stale, remove .kb/vault.lock manually.",
+		);
+		this.name = "VaultLockError";
+	}
+}
+
+function lockPath(root: string): string {
+	return join(root, VAULT_DIR, LOCK_FILE);
+}
+
+function isProcessAlive(pid: number): boolean {
+	try {
+		process.kill(pid, 0);
+		return true;
+	} catch {
+		return false;
+	}
+}
+
+async function readLock(root: string): Promise<LockInfo | null> {
+	const path = lockPath(root);
+	try {
+		const raw = await readFile(path, "utf-8");
+		return JSON.parse(raw) as LockInfo;
+	} catch {
+		return null;
+	}
+}
+
+/**
+ * Acquire an exclusive lock on the vault.
+ * Throws VaultLockError if the vault is already locked by a different live process.
+ * Re-entrant: if the current process already holds the lock, this is a no-op.
+ * Automatically steals stale locks (dead PID or older than 5 minutes).
+ */
+export async function acquireLock(root: string, operation = "unknown"): Promise<void> {
+	const existing = await readLock(root);
+
+	if (existing) {
+		const age = Date.now() - new Date(existing.timestamp).getTime();
+
+		// Re-entrant: same process holds a fresh lock
+		if (existing.pid === process.pid && age < STALE_THRESHOLD_MS && lockDepth > 0) {
+			lockDepth++;
+			return;
+		}
+
+		const alive = isProcessAlive(existing.pid);
+
+		if (alive && age < STALE_THRESHOLD_MS) {
+			throw new VaultLockError(existing);
+		}
+		// Stale lock — steal it
+	}
+
+	const info: LockInfo = {
+		pid: process.pid,
+		timestamp: new Date().toISOString(),
+		operation,
+	};
+
+	const path = lockPath(root);
+	await mkdir(dirname(path), { recursive: true });
+	await writeFile(path, JSON.stringify(info, null, 2), "utf-8");
+	lockDepth = 1;
+}
+
+/**
+ * Release the vault lock. Only removes the lock if it belongs to the current process.
+ * Re-entrant: decrements depth and only removes when fully released.
+ */
+export async function releaseLock(root: string): Promise<void> {
+	const existing = await readLock(root);
+	if (existing && existing.pid !== process.pid) {
+		return; // Not our lock
+	}
+
+	// Re-entrant: don't remove until outermost caller releases
+	if (lockDepth > 1) {
+		lockDepth--;
+		return;
+	}
+
+	lockDepth = 0;
+
+	try {
+		await unlink(lockPath(root));
+	} catch (err) {
+		if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
+			throw err;
+		}
+	}
+}
+
+/**
+ * Check if the vault is currently locked.
+ */
+export async function isLocked(root: string): Promise<{ locked: boolean; info?: LockInfo }> {
+	const info = await readLock(root);
+	if (!info) return { locked: false };
+
+	const age = Date.now() - new Date(info.timestamp).getTime();
+	const alive = isProcessAlive(info.pid);
+
+	if (!alive || age >= STALE_THRESHOLD_MS) {
+		return { locked: false, info }; // Stale
+	}
+
+	return { locked: true, info };
+}
+
+/**
+ * Run a function while holding the vault lock.
+ * Lock is always released, even if the function throws.
+ */
+export async function withLock<T>(
+	root: string,
+	operation: string,
+	fn: () => Promise<T>,
+): Promise<T> {
+	await acquireLock(root, operation);
+	try {
+		return await fn();
+	} finally {
+		await releaseLock(root);
+	}
+}
diff --git a/packages/core/src/recovery.test.ts b/packages/core/src/recovery.test.ts
new file mode 100644
index 0000000..aa59f0e
--- /dev/null
+++ b/packages/core/src/recovery.test.ts
@@ -0,0 +1,161 @@
+import { afterEach, describe, expect, test } from "bun:test";
+import { existsSync } from "node:fs";
+import { mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { detectIssues, repairVault } from "./recovery.js";
+import { initVault } from "./vault.js";
+
+let tempDir: string;
+
+afterEach(async () => {
+	if (tempDir) {
+		await rm(tempDir, { recursive: true, force: true });
+	}
+});
+
+async function makeTempVault() {
+	tempDir = await mkdtemp(join(tmpdir(), "kib-recovery-test-"));
+	await initVault(tempDir, { name: "recovery-test" });
+	return tempDir;
+}
+
+describe("detectIssues", () => {
+	test("returns empty for healthy vault", async () => {
+		const dir = await makeTempVault();
+		const issues = await detectIssues(dir);
+		expect(issues).toEqual([]);
+	});
+
+	test("detects tmp files in .kb/", async () => {
+		const dir = await makeTempVault();
+		await writeFile(join(dir, ".kb", "manifest.json.tmp"), "partial data");
+
+		const issues = await detectIssues(dir);
+		expect(issues.some((i) => i.type === "tmp_file")).toBe(true);
+	});
+
+	test("detects tmp files in wiki/", async () => {
+		const dir = await makeTempVault();
+		await writeFile(join(dir, "wiki", "concepts", "test.md.tmp"), "partial");
+
+		const issues = await detectIssues(dir);
+		expect(issues.some((i) => i.type === "tmp_file")).toBe(true);
+	});
+
+	test("detects tmp files in raw/", async () => {
+		const dir = await makeTempVault();
+		await writeFile(join(dir, "raw", "articles", "test.md.tmp"), "partial");
+
+		const issues = await detectIssues(dir);
+		expect(issues.some((i) => i.type === "tmp_file")).toBe(true);
+	});
+
+	test("detects missing manifest with tmp present", async () => {
+		const dir = await makeTempVault();
+		const manifestPath = join(dir, ".kb", "manifest.json");
+		const content = await readFile(manifestPath, "utf-8");
+
+		// Simulate interrupted write: tmp exists but manifest is gone
+		await writeFile(`${manifestPath}.tmp`, content);
+		await rm(manifestPath);
+
+		const issues = await detectIssues(dir);
+		expect(issues.some((i) => i.type === "missing_manifest")).toBe(true);
+	});
+
+	test("detects corrupt manifest", async () => {
+		const dir = await makeTempVault();
+		const manifestPath = join(dir, ".kb", "manifest.json");
+		await writeFile(manifestPath, "not valid json {{{");
+
+		const issues = await detectIssues(dir);
+		expect(issues.some((i) => i.type === "corrupt_manifest")).toBe(true);
+	});
+
+	test("detects stale lock from dead process", async () => {
+		const dir = await makeTempVault();
+		await writeFile(
+			join(dir, ".kb", "vault.lock"),
+			JSON.stringify({ pid: 999999999, timestamp: new Date().toISOString(), operation: "dead" }),
+		);
+
+		const issues = await detectIssues(dir);
+		expect(issues.some((i) => i.type === "stale_lock")).toBe(true);
+	});
+});
+
+describe("repairVault", () => {
+	test("removes stale tmp files", async () => {
+		const dir = await makeTempVault();
+		const tmpPath = join(dir, ".kb", "config.toml.tmp");
+		await writeFile(tmpPath, "partial");
+
+		const issues = await repairVault(dir);
+		const tmpIssue = issues.find((i) => i.type === "tmp_file");
+		expect(tmpIssue?.repaired).toBe(true);
+		expect(existsSync(tmpPath)).toBe(false);
+	});
+
+	test("promotes tmp to manifest when manifest is missing", async () => {
+		const dir = await makeTempVault();
+		const manifestPath = join(dir, ".kb", "manifest.json");
+		const content = await readFile(manifestPath, "utf-8");
+
+		await writeFile(`${manifestPath}.tmp`, content);
+		await rm(manifestPath);
+
+		const issues = await repairVault(dir);
+		const missingIssue = issues.find((i) => i.type === "missing_manifest");
+		expect(missingIssue?.repaired).toBe(true);
+		expect(existsSync(manifestPath)).toBe(true);
+
+		const restored = await readFile(manifestPath, "utf-8");
+		expect(JSON.parse(restored)).toEqual(JSON.parse(content));
+	});
+
+	test("removes stale lock", async () => {
+		const dir = await makeTempVault();
+		const lockPath = join(dir, ".kb", "vault.lock");
+		await writeFile(
+			lockPath,
+			JSON.stringify({ pid: 999999999, timestamp: new Date().toISOString(), operation: "dead" }),
+		);
+
+		const issues = await repairVault(dir);
+		const lockIssue = issues.find((i) => i.type === "stale_lock");
+		expect(lockIssue?.repaired).toBe(true);
+		expect(existsSync(lockPath)).toBe(false);
+	});
+
+	test("restores corrupt manifest from backup", async () => {
+		const dir = await makeTempVault();
+		const manifestPath = join(dir, ".kb", "manifest.json");
+		const goodManifest = await readFile(manifestPath, "utf-8");
+
+		// Create a backup
+		const backupsDir = join(dir, ".kb", "backups");
+		await mkdir(backupsDir, { recursive: true });
+		await writeFile(join(backupsDir, "manifest-2024-01-01T00-00-00-000Z.json"), goodManifest);
+
+		// Corrupt the manifest
+		await writeFile(manifestPath, "corrupted {{{");
+
+		const issues = await repairVault(dir);
+		const corruptIssue = issues.find((i) => i.type === "corrupt_manifest");
+		expect(corruptIssue?.repaired).toBe(true);
+
+		const restored = await readFile(manifestPath, "utf-8");
+		expect(JSON.parse(restored)).toEqual(JSON.parse(goodManifest));
+	});
+
+	test("returns unrepaired for corrupt manifest with no backup", async () => {
+		const dir = await makeTempVault();
+		const manifestPath = join(dir, ".kb", "manifest.json");
+		await writeFile(manifestPath, "corrupted {{{");
+
+		const issues = await repairVault(dir);
+		const corruptIssue = issues.find((i) => i.type === "corrupt_manifest");
+		expect(corruptIssue?.repaired).toBe(false);
+	});
+});
diff --git a/packages/core/src/recovery.ts b/packages/core/src/recovery.ts
new file mode 100644
index 0000000..5b559b0
--- /dev/null
+++ b/packages/core/src/recovery.ts
@@ -0,0 +1,219 @@
+import { existsSync } from "node:fs";
+import { readdir, readFile, rename, unlink } from "node:fs/promises";
+import { join } from "node:path";
+import { MANIFEST_FILE, RAW_DIR, VAULT_DIR, WIKI_DIR } from "./constants.js";
+
+export interface RecoveryIssue {
+	type: "tmp_file" | "missing_manifest" | "corrupt_manifest" | "stale_lock";
+	path: string;
+	message: string;
+	repaired: boolean;
+}
+
+/**
+ * Scan the vault for signs of incomplete writes or corruption.
+ * Looks for .tmp files, missing manifest, and stale locks.
+ */
+export async function detectIssues(root: string): Promise<RecoveryIssue[]> {
+	const issues: RecoveryIssue[] = [];
+	const kbDir = join(root, VAULT_DIR);
+
+	// Check manifest existence first — .tmp files for missing manifest
+	// are handled separately and should not be flagged as generic tmp_file
+	const manifestPath = join(kbDir, MANIFEST_FILE);
+	const manifestTmp = `${manifestPath}.tmp`;
+	const manifestMissing = !existsSync(manifestPath) && existsSync(manifestTmp);
+
+	if (manifestMissing) {
+		issues.push({
+			type: "missing_manifest",
+			path: manifestTmp,
+			message: "Manifest is missing but a .tmp file exists — likely an interrupted write",
+			repaired: false,
+		});
+	}
+
+	// Check for .tmp files in .kb/ (skip manifest.tmp if already flagged as missing_manifest)
+	await scanTmpFiles(kbDir, issues, manifestMissing ? manifestTmp : undefined);
+
+	// Check for .tmp files in raw/ and wiki/
+	const rawDir = join(root, RAW_DIR);
+	const wikiDir = join(root, WIKI_DIR);
+	if (existsSync(rawDir)) await scanTmpFilesRecursive(rawDir, issues);
+	if (existsSync(wikiDir)) await scanTmpFilesRecursive(wikiDir, issues);
+
+	// Check if manifest is valid JSON
+	if (existsSync(manifestPath)) {
+		try {
+			const raw = await readFile(manifestPath, "utf-8");
+			JSON.parse(raw);
+		} catch {
+			issues.push({
+				type: "corrupt_manifest",
+				path: manifestPath,
+				message: "Manifest file contains invalid JSON",
+				repaired: false,
+			});
+		}
+	}
+
+	// Check for stale lock
+	const lockPath = join(kbDir, "vault.lock");
+	if (existsSync(lockPath)) {
+		try {
+			const raw = await readFile(lockPath, "utf-8");
+			const info = JSON.parse(raw) as { pid: number; timestamp: string };
+			let alive = false;
+			try {
+				process.kill(info.pid, 0);
+				alive = true;
+			} catch {
+				// Process is dead
+			}
+			if (!alive) {
+				issues.push({
+					type: "stale_lock",
+					path: lockPath,
+					message: `Stale lock from dead process ${info.pid}`,
+					repaired: false,
+				});
+			}
+		} catch {
+			issues.push({
+				type: "stale_lock",
+				path: lockPath,
+				message: "Lock file is corrupt or unreadable",
+				repaired: false,
+			});
+		}
+	}
+
+	return issues;
+}
+
+/**
+ * Attempt to repair detected issues.
+ * - .tmp files next to their target: remove the .tmp (the write was atomic, target is fine)
+ * - .tmp file without target (missing_manifest): promote .tmp → target
+ * - Stale locks: remove
+ */
+export async function repairVault(root: string): Promise<RecoveryIssue[]> {
+	const issues = await detectIssues(root);
+
+	for (const issue of issues) {
+		switch (issue.type) {
+			case "tmp_file": {
+				// .tmp file exists alongside the real file — interrupted atomic write
+				// The real file is intact (rename didn't complete), so remove the .tmp
+				try {
+					await unlink(issue.path);
+					issue.repaired = true;
+				} catch {
+					// Could not remove — leave it
+				}
+				break;
+			}
+
+			case "missing_manifest": {
+				// The .tmp file is the only copy — promote it
+				const target = issue.path.replace(/\.tmp$/, "");
+				try {
+					await rename(issue.path, target);
+					issue.repaired = true;
+				} catch {
+					// Could not promote
+				}
+				break;
+			}
+
+			case "stale_lock": {
+				try {
+					await unlink(issue.path);
+					issue.repaired = true;
+				} catch {
+					// Could not remove
+				}
+				break;
+			}
+
+			case "corrupt_manifest": {
+				// Try to restore from backup
+				const backupsDir = join(root, VAULT_DIR, "backups");
+				if (existsSync(backupsDir)) {
+					const backups = (await readdir(backupsDir))
+						.filter((f) => f.startsWith("manifest-") && f.endsWith(".json"))
+						.sort()
+						.reverse();
+
+					if (backups.length > 0) {
+						const latest = join(backupsDir, backups[0]);
+						try {
+							const backup = await readFile(latest, "utf-8");
+							JSON.parse(backup); // Verify it's valid
+							const manifestPath = join(root, VAULT_DIR, MANIFEST_FILE);
+							const tmp = `${manifestPath}.tmp`;
+							const { writeFile } = await import("node:fs/promises");
+							await writeFile(tmp, backup, "utf-8");
+							await rename(tmp, manifestPath);
+							issue.repaired = true;
+							issue.message += ` — restored from backup ${backups[0]}`;
+						} catch {
+							// Backup also corrupt or unreadable
+						}
+					}
+				}
+				break;
+			}
+		}
+	}
+
+	return issues;
+}
+
+// ─── Helpers ────────────────────────────────────────────────────
+
+async function scanTmpFiles(
+	dir: string,
+	issues: RecoveryIssue[],
+	excludePath?: string,
+): Promise<void> {
+	try {
+		const entries = await readdir(dir);
+		for (const entry of entries) {
+			if (entry.endsWith(".tmp")) {
+				const fullPath = join(dir, entry);
+				if (excludePath && fullPath === excludePath) continue;
+				issues.push({
+					type: "tmp_file",
+					path: fullPath,
+					message: `Leftover temporary file: ${entry}`,
+					repaired: false,
+				});
+			}
+		}
+	} catch {
+		// Directory might not exist
+	}
+}
+
+async function scanTmpFilesRecursive(dir: string, issues: RecoveryIssue[]): Promise<void> {
+	try {
+		const { readdir: rd } = await import("node:fs/promises");
+		const entries = await rd(dir, { withFileTypes: true });
+		for (const entry of entries) {
+			const fullPath = join(dir, entry.name);
+			if (entry.isDirectory()) {
+				await scanTmpFilesRecursive(fullPath, issues);
+			} else if (entry.name.endsWith(".tmp")) {
+				issues.push({
+					type: "tmp_file",
+					path: fullPath,
+					message: `Leftover temporary file: ${entry.name}`,
+					repaired: false,
+				});
+			}
+		}
+	} catch {
+		// Directory might not exist
+	}
+}