From 2c3fa82f03c809ce92044abd1bd30c78c90ceb8b Mon Sep 17 00:00:00 2001
From: PMarzec <98286080+przemarzec@users.noreply.github.com>
Date: Fri, 5 Jun 2026 01:32:44 +0200
Subject: [PATCH] docs: expand and correct the engrava documentation set (#17)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Documentation expansion bringing docs/ up to the shipped 0.3.x behaviour: new pages (Core Concepts, Positioning, Migration, Troubleshooting, FAQ, Performance, Data lifecycle, Deployment, Concurrency, Backup & Recovery, CLI, Glossary) + accuracy fixes across existing pages, all verified against the running package. tests/docs/ compiles, phantom-scans, and executes documentation code. docs:/style: only — no release.
---
 README.md                                     |  32 +-
 docs/api-reference.md                         |  92 +++++-
 docs/audit-trail.md                           | 261 +++++++++++++++++
 docs/backup-and-recovery.md                   | 129 +++++++++
 docs/cli.md                                   | 228 +++++++++++++++
 docs/concepts.md                              | 247 ++++++++++++++++
 docs/concurrency.md                           | 124 ++++++++
 docs/configuration.md                         |  94 +++++-
 docs/data-lifecycle.md                        | 170 +++++++++++
 docs/deployment.md                            | 133 +++++++++
 docs/dreaming.md                              |  82 +++++-
 docs/faq.md                                   | 116 ++++++++
 docs/glossary.md                              | 161 ++++++++++
 docs/guides/agent-memory.md                   | 260 +++++++++++++++++
 docs/guides/embeddings.md                     | 228 +++++++++++++++
 docs/guides/migrating-from-other-memory.md    | 274 ++++++++++++++++++
 docs/observability.md                         | 121 ++++++++
 docs/performance.md                           | 175 +++++++++++
 docs/positioning.md                           |  97 +++++++
 docs/quickstart.md                            |  42 ++-
 docs/recipes/index.md                         | 190 ++++++++++++
 docs/troubleshooting.md                       | 193 ++++++++++++
 docs/tutorial.md                              | 153 ++++++++++
 docs/upgrade.md                               |  68 ++++-
 examples/agent_loop.py                        | 235 +++++++++++++++
 examples/config.yaml                          |   2 +-
 examples/notes_memory.py                      | 117 ++++++++
 src/engrava/config.py                         |   3 +-
 src/engrava/extensions/__init__.py            |   2 +-
 src/engrava/extensions/vector_sqlite_vec.py   |  18 +-
 .../infrastructure/sqlite/engrava_core.py     |   8 +-
 tests/docs/test_docs_examples_execute.py      |   1 +
 tests/examples/test_quickstart_runs.py        |  25 ++
 33 files changed, 4044 insertions(+), 37 deletions(-)
 create mode 100644 docs/audit-trail.md
 create mode 100644 docs/backup-and-recovery.md
 create mode 100644 docs/cli.md
 create mode 100644 docs/concepts.md
 create mode 100644 docs/concurrency.md
 create mode 100644 docs/data-lifecycle.md
 create mode 100644 docs/deployment.md
 create mode 100644 docs/faq.md
 create mode 100644 docs/glossary.md
 create mode 100644 docs/guides/agent-memory.md
 create mode 100644 docs/guides/embeddings.md
 create mode 100644 docs/guides/migrating-from-other-memory.md
 create mode 100644 docs/performance.md
 create mode 100644 docs/positioning.md
 create mode 100644 docs/recipes/index.md
 create mode 100644 docs/troubleshooting.md
 create mode 100644 docs/tutorial.md
 create mode 100644 examples/agent_loop.py
 create mode 100644 examples/notes_memory.py

diff --git a/README.md b/README.md
index 0539b04..0b94764 100644
--- a/README.md
+++ b/README.md
@@ -175,6 +175,17 @@ since 0.3.0.
 → See [`docs/benchmarks.md`](docs/benchmarks.md) for reproducible
 evidence (synthetic benchmark suite runnable in ~5 minutes).
 
+### Tamper-Evident Audit Trail
+
+Opt-in hash-chain **journal** that records every thought/edge mutation as a
+SHA-256-linked, before/after entry — off by default, one config flag to enable.
+Query history with `store.journal.get_entries(...)` and validate the chain with
+`store.journal.verify_integrity()`.
+
+→ See [`docs/audit-trail.md`](docs/audit-trail.md) for enabling, querying,
+verification, and the security model (what "tamper-evident" does and does not
+guarantee).
+
 ### Multi-Service Isolation
 
 Run multiple independent databases under one `EngravaManager`:
@@ -203,6 +214,8 @@ engrava --db mydata.db export -o portable.json
 `engrava info` now renders the same metrics snapshot contract exposed by
 `await store.metrics()`.
 
+See the [CLI reference](docs/cli.md) for every command and option.
+
 ## Architecture
 
 - **SQLite** with WAL mode for concurrent reads
@@ -214,13 +227,30 @@ engrava --db mydata.db export -o portable.json
 
 ## Documentation
 
-- [Upgrade Guide](docs/upgrade.md) — compatibility matrix, backups, and troubleshooting
+- [Core Concepts](docs/concepts.md) — the mental model (thought, edge, reflection, cycle, …) — start here
+- [Positioning](docs/positioning.md) — when Engrava is (and isn't) the right tool, and how it compares
 - [Quick Start](docs/quickstart.md) — 5-minute setup guide
+- [Tutorial](docs/tutorial.md) — build a small notes memory end to end
+- [Recipes](docs/recipes/index.md) — copy-paste snippets for common tasks (store a turn, retrieve context, TTL, dedup, …)
+- [Building a memory-backed agent](docs/guides/agent-memory.md) — the end-to-end agent turn loop (ingest → retrieve → generate → consolidate)
+- [Migrating from another memory system](docs/guides/migrating-from-other-memory.md) — concept mapping, porting calls, bulk import, and scoping/multi-tenancy
+- [Embeddings](docs/guides/embeddings.md) — wiring a real embedding provider (local / OpenAI / Ollama / HuggingFace / custom)
 - [Configuration](docs/configuration.md) — YAML config format and options
+- [Upgrade Guide](docs/upgrade.md) — compatibility matrix, backups, and troubleshooting
 - [Extensions](docs/extensions.md) — Writing custom extensions and hooks
 - [Observability](docs/observability.md) — Metrics snapshot API
+- [Audit Trail](docs/audit-trail.md) — Tamper-evident hash-chain journal (enabling, querying, verifying, security model)
 - [API Reference](docs/api-reference.md) — Full protocol and class reference
+- [CLI Reference](docs/cli.md) — every `engrava` command and option
+- [Glossary](docs/glossary.md) — quick definitions of every Engrava term
 - [MindQL](docs/mindql.md) — Query language syntax and examples
+- [Troubleshooting](docs/troubleshooting.md) — symptom → cause → fix for common errors
+- [FAQ](docs/faq.md) — quick answers (LLM/keys, embeddings-optional, scale, concurrency, backups, …)
+- [Performance & Scaling](docs/performance.md) — the vector-backend switch, bulk-ingest, and dreaming cost at scale
+- [Data Lifecycle & Retention](docs/data-lifecycle.md) — lifecycle states, TTL, archive-vs-delete, GDPR erasure, disk reclamation
+- [Deployment](docs/deployment.md) — process model, database files on disk, containers, graceful shutdown
+- [Concurrency](docs/concurrency.md) — the WAL single-writer model, busy timeout, and per-service isolation
+- [Backup & Recovery](docs/backup-and-recovery.md) — WAL-safe backups, snapshot vs file copy, restore verification
 - [Known Limitations](docs/known-limitations.md) — Platform notes and constraints
 
 ## Development
diff --git a/docs/api-reference.md b/docs/api-reference.md
index d3e5651..3c6cc99 100644
--- a/docs/api-reference.md
+++ b/docs/api-reference.md
@@ -65,6 +65,7 @@ keyword arguments and does **not** return a UUID string.
 | `await list_thoughts(...)` | `list[ThoughtRecord]` | List with filters (keyword-only) |
 | `await count_thoughts(...)` | `int` | Count with filters (keyword-only) |
 | `await delete_thought(thought_id)` | `bool` | Hard delete; `True` if a row was removed |
+| `await record_access(thought_id)` | `None` | Mark a thought as accessed — bumps `access_count` and sets `last_accessed_at`; raises `ThoughtNotFoundError` if missing. Drives the access-frequency dreaming signal. |
 
 ```python
 import uuid
@@ -135,6 +136,40 @@ await store.create_edge(
 )
 ```
 
+#### REFLECTION lineage
+
+Helpers for navigating the `CONSOLIDATED_FROM` graph that dreaming builds
+between a REFLECTION and the source thoughts it summarises.
+
+| Method | Returns | Description |
+|--------|---------|-------------|
+| `await consolidated_member_ids(reflection_id)` | `list[str]` | The thought IDs a REFLECTION was consolidated from |
+| `await consolidated_source_statuses(reflection_id)` | `list[str]` | The lifecycle statuses of those source thoughts (e.g. to detect a fully-archived, orphaned cluster) |
+| `await reflections_consolidated_from(source_id)` | `list[str]` | The REFLECTION IDs that consolidated a given source thought (the reverse direction) |
+| `await thought_exists_by_source(*, source, thought_type_value)` | `bool` | Whether any thought exists with the given `source` and type — keyword-only |
+
+```python
+# Walk a REFLECTION down to its sources, and back from a source to its REFLECTIONs.
+member_ids = await store.consolidated_member_ids(reflection_id)
+for thought_id in member_ids:
+    source = await store.get_thought(thought_id)
+    if source is not None:
+        print(source.essence)
+
+# Detect an orphaned cluster — every source archived/gone:
+statuses = await store.consolidated_source_statuses(reflection_id)
+is_orphaned = bool(statuses) and all(s != "ACTIVE" for s in statuses)
+
+# Reverse direction: which REFLECTIONs summarise this source?
+parents = await store.reflections_consolidated_from(member_ids[0])
+
+# Exact-source existence check (e.g. dreaming's idempotency guard — a REFLECTION's
+# source is "dreaming:<cluster_hash>", so match the full value, not a prefix):
+exists = await store.thought_exists_by_source(
+    source="dreaming:abc123def4567890", thought_type_value="REFLECTION"
+)
+```
+
 #### Embedding Operations
 
 | Method | Returns | Description |
@@ -167,11 +202,23 @@ returns a single `HybridSearchResult` container.
 | `await metrics()` | `EngravaMetrics` | Snapshot of thought/edge counts, storage, and search-latency percentiles (see [Observability](observability.md)) |
 | `await cleanup_expired(now=None, *, exclude_id=None)` | `CleanupResult` | Archive or delete thoughts past their `expires_at` |
 | `await verify_embedding_model()` | `None` | Raise `EmbeddingModelMismatchError` if the stored model lock disagrees with the configured provider |
+| `async with store.suspend_auto_commit():` | context manager | Defer per-call commits so a block of writes commits once (rolls back on error) — use for bulk ingest |
 | `await close()` | `None` | Close the owned connection (only when the store opened it via `from_config`) |
 
+```python
+# Bulk ingest: one transaction instead of one commit per write.
+async with store.suspend_auto_commit():
+    for record in many_records:
+        await store.create_thought(record)
+# commit happens once on clean exit; any exception rolls the whole block back
+```
+
 ### `ReadOnlyEngrava`
 
-Wrapper that raises `ReadOnlyViolationError` on any write operation.
+A composition wrapper that delegates reads to the wrapped store and raises
+`ReadOnlyViolationError` on any write. Use it to hand a retrieval-only view of
+shared memory to a component that should never mutate it — e.g. a sub-agent or
+worker whose job is only to look things up.
 
 ```python
 from engrava import ReadOnlyEngrava
@@ -299,14 +346,51 @@ extension is recommended for filtering queries (`json_extract(metadata_json, '$.
 
 ### `ActionRecord`
 
+Records an action the agent took (a tool call, a message, …), linked to the
+thought that prompted it, with execution and verification state.
+
 | Field | Type | Description |
 |-------|------|-------------|
 | `action_id` | `str` | UUID primary key |
-| `source_thought_id` | `str` | Linked thought |
+| `source_thought_id` | `str` | The thought this action originated from |
 | `action_type` | `ActionType` | Action classification |
-| `intent` | `str` | Description of intent |
-| `status` | `ActionStatus` | Current status |
+| `intent` | `str` | Description of intent (min length 1) |
+| `status` | `ActionStatus` | Current execution status |
 | `verification_status` | `VerificationStatus` | Verification state |
+| `raw_metrics_json` | `str \| None` | Optional ground-truth facts for verification |
+
+**Store methods** (on `SqliteEngravaCore`):
+
+| Method | Returns | Description |
+|--------|---------|-------------|
+| `await create_action(action)` | `ActionRecord` | Persist an `ActionRecord` |
+| `await get_actions(thought_id)` | `list[ActionRecord]` | Actions linked to a thought |
+
+`ActionStatus` is a state machine: `PLANNED → EXECUTING → CONFIRMED` / `FAILED`,
+and `PLANNED → BLOCKED → PLANNED`. `can_transition_to(...)` / `evolve(...)`
+enforce valid transitions (an illegal change raises `InvalidTransitionError`).
+
+```python
+import uuid
+from engrava import ActionRecord, ActionType, ActionStatus, VerificationStatus
+
+action = ActionRecord(
+    action_id=str(uuid.uuid4()),
+    source_thought_id=prompting_thought_id,
+    action_type=ActionType.TOOL_CALL,
+    intent="search the web for flight prices",
+    status=ActionStatus.PLANNED,
+    verification_status=VerificationStatus.PENDING,
+)
+await store.create_action(action)
+
+# advance through the lifecycle (frozen model → evolve returns a new instance):
+done = action.evolve(status=ActionStatus.EXECUTING).evolve(
+    status=ActionStatus.CONFIRMED
+)
+
+actions = await store.get_actions(prompting_thought_id)
+```
 
 ### `HybridSearchResult`
 
diff --git a/docs/audit-trail.md b/docs/audit-trail.md
new file mode 100644
index 0000000..40015ee
--- /dev/null
+++ b/docs/audit-trail.md
@@ -0,0 +1,261 @@
+# Audit Trail (hash-chain journal)
+
+Engrava can record every change to your thought-graph in an append-only,
+hash-linked **journal** — a tamper-evident audit trail. Each entry captures one
+mutation (insert / update / delete of a thought or edge) as a before/after
+delta, and is cryptographically chained to the previous entry with SHA-256.
+
+> **Read the [Security model](#security-model--guarantees) before relying on this
+> for compliance.** The chain detects accidental corruption and naive edits, but
+> it is a *keyless* chain stored in the same database file — see the boundary
+> below.
+
+## Enabling the journal
+
+Journaling is **off by default** (zero overhead when disabled — the
+`journal_entry` table exists but is never written to). Turn it on either via
+configuration or the constructor.
+
+In `engrava.yaml`:
+
+```yaml
+database:
+  path: "./engrava.db"
+
+journal:
+  enabled: true
+```
+
+```python
+from engrava import SqliteEngravaCore
+
+async with await SqliteEngravaCore.from_config("engrava.yaml") as store:
+    assert store.journal is not None  # journaling is active
+```
+
+Or when constructing the store directly:
+
+```python
+import aiosqlite
+from engrava import SqliteEngravaCore
+
+async with aiosqlite.connect("engrava.db") as conn:
+    conn.row_factory = aiosqlite.Row
+    store = SqliteEngravaCore(conn, journal_enabled=True)
+    await store.ensure_schema()
+```
+
+`store.journal` returns the `JournalWriter` when journaling is enabled, or
+`None` when it is off — so a quick `if store.journal is not None:` guards any
+journal-specific code.
+
+## What gets recorded
+
+When journaling is enabled, the store records a journal entry **automatically**
+on every mutation of a thought or an edge — you do not call the journal
+yourself. The recorded `mutation_type` values (the `MutationType` enum) are:
+
+| `MutationType` | When |
+|---|---|
+| `INSERT_THOUGHT` | `create_thought()` |
+| `UPDATE_THOUGHT` | `update_thought()` |
+| `DELETE_THOUGHT` | `delete_thought()` (only when a row was actually deleted) |
+| `INSERT_EDGE` | `create_edge()` |
+| `UPDATE_EDGE` | `update_edge()` |
+| `DELETE_EDGE` | `delete_edge()` (only when a row was actually deleted) |
+
+Each entry's `delta` is a `{"before": ..., "after": ...}` dictionary: inserts
+have `before: null`, deletes have `after: null`, and updates carry both sides.
+
+> **Not recorded:** embeddings (`store_embedding`) and action records
+> (`create_action`) are **not** written to the journal — the audit trail covers
+> the thought-and-edge graph, not the embedding or action tables. This also
+> matters for backups — see [Backup note](#backup--retention-note).
+
+**TTL expiry is recorded.** `cleanup_expired()` (and the auto-cleanup it
+triggers) goes through the same journaled paths, so expiry of a thought is
+captured according to the configured TTL strategy:
+
+- **archive** strategy → an `UPDATE_THOUGHT` entry (the thought's
+  `lifecycle_status` flips to `ARCHIVED` and `expires_at` is cleared; the delta
+  carries the before/after).
+- **delete** strategy → a `DELETE_THOUGHT` entry (`after: null`).
+
+(The separate `engrava gc` CLI command, which physically purges already-archived
+rows, operates at the storage layer and is not journaled.)
+
+## The `JournalEntry` schema
+
+Each entry is an immutable `JournalEntry`:
+
+| Field | Type | Meaning |
+|---|---|---|
+| `entry_id` | `str` | Stable UUID for this entry |
+| `sequence_number` | `int` | Monotonic, gapless position in the chain (starts at 1) |
+| `mutation_type` | `str` | One of the `MutationType` values above |
+| `target_id` | `str \| None` | The affected `thought_id` / `edge_id` |
+| `delta` | `dict` | `{"before": {...}, "after": {...}}` diff |
+| `parent_hash` | `str \| None` | SHA-256 of the previous entry (`None` for the first entry) |
+| `entry_hash` | `str` | SHA-256 of this entry's canonical content |
+| `created_at` | `str` | ISO-8601 UTC timestamp |
+
+The hash is computed over the canonical string
+`"{sequence_number}|{mutation_type}|{target_id}|{json(delta, sort_keys)}|{parent_hash}"`
+via `JournalWriter.compute_hash(...)` (a static method, exposed for callers who
+want to recompute a hash independently).
+
+## Querying history
+
+Use `store.journal.get_entries(...)` to read the trail. All filters are
+optional; results are ordered by `sequence_number` ascending.
+
+```python
+# Everything that ever happened to one thought:
+history = await store.journal.get_entries(target_id="thought-001")
+for entry in history:
+    print(entry.sequence_number, entry.mutation_type, entry.created_at)
+
+# Only deletions, since a timestamp, capped:
+deletions = await store.journal.get_entries(
+    mutation_type="DELETE_THOUGHT",
+    since="2026-01-01T00:00:00+00:00",
+    limit=500,
+)
+```
+
+| Parameter | Default | Meaning |
+|---|---|---|
+| `target_id` | `None` | Filter by the affected entity ID |
+| `mutation_type` | `None` | Filter by mutation type string |
+| `since` | `None` | ISO-8601 lower bound on `created_at` (inclusive) |
+| `limit` | `100` | Maximum entries returned |
+
+## Verifying integrity
+
+`store.journal.verify_integrity()` walks the whole chain in order, recomputes
+every hash, and checks the parent-hash linkage. It returns a
+`JournalIntegrityResult`:
+
+```python
+result = await store.journal.verify_integrity()
+if result.valid:
+    print(f"Chain OK — {result.entries_checked} entries verified.")
+else:
+    print(
+        f"Tampering or corruption detected at sequence "
+        f"{result.first_invalid_sequence}: {result.error_message}"
+    )
+```
+
+| Field | Type | Meaning |
+|---|---|---|
+| `valid` | `bool` | `True` if every hash and link checks out |
+| `entries_checked` | `int` | Number of entries verified |
+| `first_invalid_sequence` | `int \| None` | Sequence of the first broken entry, or `None` |
+| `error_message` | `str \| None` | Description of the first error, or `None` |
+
+An empty journal verifies as `valid=True` with `entries_checked=0`.
+
+**Run verification on a schedule** (e.g. before each backup, during incident
+response, or as a periodic monitoring check) rather than only ad hoc — that is
+what turns the chain from a passive structure into an active control.
+
+## Worked example
+
+```python
+import aiosqlite
+import uuid
+from engrava import (
+    SqliteEngravaCore,
+    ThoughtRecord,
+    ThoughtType,
+    Priority,
+    LifecycleStatus,
+)
+
+async with aiosqlite.connect(":memory:") as conn:
+    conn.row_factory = aiosqlite.Row
+    store = SqliteEngravaCore(conn, journal_enabled=True)
+    await store.ensure_schema()
+
+    note = ThoughtRecord(
+        thought_id=str(uuid.uuid4()),
+        thought_type=ThoughtType.OBSERVATION,
+        essence="User prefers email over phone",
+        content="Stated during onboarding call.",
+        priority=Priority.P2,
+        lifecycle_status=LifecycleStatus.ACTIVE,
+        created_cycle=0,
+        updated_cycle=0,
+        source="human",
+    )
+    await store.create_thought(note)
+    await store.update_thought(note.thought_id, essence="User strongly prefers email")
+
+    # Two entries were recorded automatically (INSERT_THOUGHT, UPDATE_THOUGHT).
+    entries = await store.journal.get_entries(target_id=note.thought_id)
+    assert [e.mutation_type for e in entries] == ["INSERT_THOUGHT", "UPDATE_THOUGHT"]
+
+    # The chain verifies.
+    result = await store.journal.verify_integrity()
+    assert result.valid and result.entries_checked == 2
+```
+
+## Security model & guarantees
+
+The journal is a **keyless** SHA-256 integrity chain stored **in the same
+SQLite file** it protects. `verify_integrity()` recomputes each entry's hash
+from that entry's own stored data — there is no secret key, HMAC, signature, or
+external anchor.
+
+**What it protects against (in scope):**
+
+- **Accidental corruption** — bit-rot, a truncated file, a half-written row: the
+  recomputed hash or the parent linkage will not match, and verification fails.
+- **Naive tampering** — someone who edits, deletes, or reorders a journal row
+  (or an audited record) *without* recomputing the rest of the chain: the break
+  is detected at the first inconsistent entry.
+
+**What it does NOT protect against (out of scope):**
+
+- **A chain-aware actor with write access to the database file.** Because the
+  chain is keyless and self-contained, anyone who can write to the `.db` can
+  edit an entry **and** recompute every subsequent hash, producing a fully
+  self-consistent chain that passes `verify_integrity()` with `valid=True`. The
+  journal is **not** forgery-proof against an adversary (including the agent
+  process itself) who controls the file.
+
+If you need genuine, multi-party tamper-evidence, treat the in-file chain as one
+layer and add at least one of:
+
+- **Restrict write access** — store the `.db` on a volume only the trusted
+  writer process can modify (OS file permissions / ownership).
+- **Anchor the chain externally** — periodically export the latest
+  `entry_hash` (the chain tail) to an append-only / WORM store, a signed log, or
+  another system out of the writer's control. A later `verify_integrity()` plus
+  a match against the externally-anchored tail hash detects a full-file rewrite.
+- **Verify on a schedule** — run `verify_integrity()` from a separate monitored
+  process so a detected mismatch raises an alert.
+
+State this boundary plainly to stakeholders: Engrava's journal gives you
+**integrity detection for accidental damage and unsophisticated edits**, not
+cryptographic non-repudiation against a file-level adversary.
+
+## Backup & retention note
+
+The logical snapshot/restore path (`engrava snapshot` / `engrava restore`)
+covers the thought / edge / embedding / action tables — it does **not** include
+the `journal_entry` table. A snapshot is therefore **not** a backup of the audit
+trail, and restoring from one starts a fresh chain. To preserve the journal,
+back up the database file itself (see the upgrade/backup guidance), and note
+that hard-deleting an audited thought still leaves its content in the journal's
+`before`/`after` delta — relevant when handling erasure requests.
+
+## See also
+
+- The [Enabling the journal](#enabling-the-journal) section above is the
+  canonical reference for the `journal.enabled` configuration flag; the general
+  [Configuration](configuration.md) guide covers the rest of `engrava.yaml`.
+- [API Reference](api-reference.md) — the broader public API (the journal
+  classes `JournalWriter` / `JournalEntry` / `JournalIntegrityResult` and the
+  `MutationType` enum are documented on this page).
diff --git a/docs/backup-and-recovery.md b/docs/backup-and-recovery.md
new file mode 100644
index 0000000..4f99f32
--- /dev/null
+++ b/docs/backup-and-recovery.md
@@ -0,0 +1,129 @@
+# Backup & Recovery
+
+Two ways to back up an Engrava database, what each one covers, and how to restore
+and verify. The most important thing to know up front: a **logical snapshot does
+not include the audit journal**, and a **naive file copy in WAL mode can lose
+data** — both are explained below.
+
+## Two kinds of backup
+
+| Method | What it captures | Portable across versions? |
+|---|---|---|
+| **Logical snapshot** (`engrava snapshot`) | Thoughts, edges, embeddings, and actions as JSONL records | Yes — it's data, not file format |
+| **Physical file backup** | The exact database file(s) — *everything*, including the audit journal | Tied to the SQLite file format (very stable) |
+
+Pick the logical snapshot for portability and selective restore; pick a physical
+backup when you need a byte-exact copy (including the journal) or point-in-time
+file recovery.
+
+## Logical snapshot and restore
+
+```bash
+engrava --db engrava.db snapshot -o backup.jsonl   # export
+engrava --db fresh.db   restore  -i backup.jsonl   # import into a fresh db
+```
+
+The snapshot is JSONL: a metadata header line, then one record per
+thought / edge / embedding / action.
+
+> **A snapshot does NOT include the audit journal.** The `journal_entry` table —
+> the tamper-evident hash chain — is **not** exported by `engrava snapshot`, and
+> therefore is **not** recreated by `restore`. A database restored from a snapshot
+> starts with an **empty journal**: the data is intact, but its prior audit
+> history is gone. If audit continuity matters, use a **physical file backup**
+> (which copies the journal verbatim), not a logical snapshot. See
+> [Audit Trail](audit-trail.md).
+
+`restore` options worth knowing (see the [CLI reference](cli.md#restore) for the
+full list): `--clear` to wipe the target first, `--skip-embeddings` / `--re-embed`
+to control embedding handling, and `--service` for multi-service targets.
+
+## Physical file backup (WAL-safe)
+
+Engrava runs in **WAL mode**, where recently-written data lives in the `-wal`
+file until it is checkpointed into the main `.db`. A plain file copy is only safe
+under specific conditions, so choose the method by whether the database is
+**live** (being written) or **stopped**.
+
+### If the database is live (writers running)
+
+A file copy of a database under active writes is **not reliable** — the `.db` and
+`-wal` change during the copy and can be captured inconsistently. Use a method
+that produces an internally consistent copy *without* stopping writers:
+
+**SQLite Online Backup API** — a hot, consistent backup driven from your own code
+via Python's `sqlite3` backup API (`source.backup(dest)`). This is the
+recommended way to back up a running database, and it supports incremental copies.
+
+**`VACUUM INTO`** — writes a fresh, consistent, compacted copy of the database to
+a new file. SQLite serialises it correctly against ongoing activity:
+
+```bash
+sqlite3 engrava.db "VACUUM INTO 'engrava-backup.db';"
+```
+
+Both produce a single clean `.db` you can store or move; neither requires copying
+the `-wal`/`-shm` files.
+
+### If you can stop or quiesce writers
+
+When you can take the database offline (or guarantee no writes for the duration),
+a file copy is safe — preferably after folding the WAL back into the main file:
+
+**Checkpoint, then copy the single file:**
+
+```bash
+# with no writers active:
+sqlite3 engrava.db "PRAGMA wal_checkpoint(TRUNCATE);"
+cp engrava.db engrava.db.bak
+```
+
+**Or copy the file set** (`engrava.db` + `-wal` + `-shm`) **as one atomic unit** —
+e.g. via a filesystem-level snapshot (LVM, ZFS, a cloud volume snapshot) that
+captures all three at the same instant. A plain `cp` of the three files of a
+*live* database is **not** atomic and can still be inconsistent; only do the
+multi-file copy when writers are stopped or behind a consistent snapshot.
+
+> **Do not** rely on a bare `cp engrava.db backup.db` — or even a non-atomic
+> `cp engrava.db engrava.db-wal engrava.db-shm ...` — while the database is being
+> written. For a live database use the Online Backup API or `VACUUM INTO`.
+
+## Restoring
+
+- **From a snapshot:** `engrava --db <target> restore -i backup.jsonl`. Restore
+  into a **fresh** database (optionally `--clear` an existing one). Remember the
+  journal is not restored.
+- **From a physical backup:** stop the process, put the backed-up file in place,
+  and start again. A backup made with the Online Backup API, `VACUUM INTO`, or a
+  checkpoint-then-copy is a single self-contained `.db`. If instead you captured a
+  multi-file filesystem snapshot, restore `engrava.db`, `engrava.db-wal`, and
+  `engrava.db-shm` together as the unit they were snapshotted in.
+
+### Verify a restore
+
+After restoring, confirm the database is readable and the counts look right:
+
+```bash
+engrava --db restored.db info     # reports counts; confirms the schema is readable
+```
+
+For a snapshot restore you can compare `info` counts against the source. If you
+rely on the audit journal and restored from a **physical** backup, also re-run
+journal verification (see [Audit Trail](audit-trail.md)) to confirm the chain is
+intact.
+
+## Multi-service backups
+
+With [`EngravaManager`](concurrency.md#per-service-isolation), each service is its
+own database file under the shared data directory. Back them up the same way —
+either snapshot each service (`snapshot --service <name>`) or take a WAL-safe
+physical copy of each `<name>.db` (plus its `-wal`/`-shm`). Because services are
+independent files, you can back up, restore, or delete one without touching the
+others.
+
+## See also
+
+- [Audit Trail](audit-trail.md) — the journal that snapshots exclude
+- [Concurrency](concurrency.md) — why WAL needs a WAL-safe backup
+- [Data Lifecycle](data-lifecycle.md) — retention, erasure, and VACUUM
+- [Upgrade Guide](upgrade.md) — backing up before an upgrade
diff --git a/docs/cli.md b/docs/cli.md
new file mode 100644
index 0000000..6f0e4b9
--- /dev/null
+++ b/docs/cli.md
@@ -0,0 +1,228 @@
+# CLI reference
+
+Engrava ships an `engrava` command-line tool for inspecting, querying, and
+maintaining a database without writing code. This page documents every command
+and option.
+
+```bash
+engrava [GLOBAL OPTIONS] COMMAND [ARGS]...
+```
+
+## Global options
+
+These apply to every command and go **before** the command name:
+
+| Option | Values / type | Default | Description |
+|---|---|---|---|
+| `--db` | path | `./engrava.db` | Path to the SQLite database. Falls back to the `ENGRAVA_DB` env var, then the default. |
+| `--config` | path | — | Path to `engrava.yaml`. Falls back to the `ENGRAVA_CONFIG` env var. |
+| `--format` | `table` \| `json` \| `csv` | `table` | Output format for commands that print records. |
+| `--verbose` | flag | off | Enable verbose output. |
+| `--help` | flag | — | Show help and exit (works on the root and on every command). |
+
+**Environment variables.** `ENGRAVA_DB` and `ENGRAVA_CONFIG` are CLI fallbacks for
+`--db` and `--config` respectively; the explicit flag always wins
+(`--db` > `ENGRAVA_DB` > `./engrava.db`).
+
+```bash
+export ENGRAVA_DB=/data/engrava.db
+engrava info                       # uses /data/engrava.db
+engrava --db other.db info         # flag overrides the env var
+```
+
+## Commands
+
+| Command | Purpose |
+|---|---|
+| [`info`](#info) | Show a metrics snapshot for the database. |
+| [`query`](#query) | Run a MindQL query. |
+| [`snapshot`](#snapshot) | Export the whole database to a JSONL snapshot. |
+| [`restore`](#restore) | Restore a database from a JSONL snapshot. |
+| [`gc`](#gc) | Garbage-collect archived thoughts (and optionally expired ones). |
+| [`migrate`](#migrate) | Run pending schema migrations. |
+| [`export`](#export) | Export thoughts to a portable JSON file. |
+
+## Service resolution
+
+The `--service` option on `snapshot` and `restore` resolves the same way in both
+commands:
+
+| `--service` | Services config loaded? | Result |
+|---|---|---|
+| `--service NAME` (explicit) | either | Targets service **NAME**. Its database is found/created in the services `data_dir` if a config is loaded, otherwise in the **parent directory of `--db`** (i.e. `<parent-of-db>/NAME.db`). |
+| omitted | yes | Falls back to `services.default_service`. |
+| omitted | no | Operates on the single `--db` database (not service mode). |
+
+In short: an explicit `--service` works even without a services config (using
+`--db`'s directory as the data directory), while omitting it only enters
+multi-service mode when a services config is present.
+
+### `info`
+
+Shows a metrics snapshot (counts, etc.) for the current database. Takes no
+command-specific options.
+
+```bash
+engrava --db engrava.db info
+```
+
+Use this after an upgrade or a restore to confirm the database is readable and
+the counts look right.
+
+### `query`
+
+Executes a [MindQL](mindql.md) query and prints the results in the chosen
+`--format`.
+
+```bash
+engrava query "MQL"
+```
+
+The `MQL` string is a positional argument. It accepts `FIND`, `COUNT`, `SELECT`,
+or registered extension commands:
+
+```bash
+engrava query "FIND thoughts WHERE lifecycle_status = 'ACTIVE'"
+engrava query "COUNT thoughts WHERE priority = 'P1'"
+engrava --format json query "SELECT thought_id, essence FROM thought LIMIT 5"
+```
+
+### `snapshot`
+
+Exports the **entire** database to a JSONL snapshot (one record per line).
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `-o`, `--output` | path | derived (see below) | Output JSONL file path. |
+| `--service` | name | see below | The service to snapshot (multi-service mode only). |
+
+**Default output path** depends on the mode:
+
+- **Single database:** `<db-stem>.snapshot.jsonl` next to the database — e.g.
+  `--db engrava.db` → `engrava.snapshot.jsonl` (the `.db` suffix is replaced).
+- **Multi-service:** `<data_dir>/<service>.snapshot.jsonl`.
+
+**`--service`** resolves in three ways (see [Service resolution](#service-resolution)):
+
+- **Explicit `--service NAME`** targets that service even with no services config
+  — the service database is looked up/created in the data directory, which is the
+  services config's `data_dir` if one is loaded, otherwise the **parent directory
+  of `--db`**.
+- **Omitted, with a services config loaded** → falls back to
+  `services.default_service`.
+- **Omitted, with no services config** → snapshots the single `--db` database.
+
+```bash
+engrava --db engrava.db snapshot -o backup.jsonl
+engrava --db engrava.db snapshot               # -> engrava.snapshot.jsonl
+engrava --db /data/engrava.db snapshot --service tenant_a   # -> /data/tenant_a.snapshot.jsonl
+engrava --config engrava.yaml snapshot --service tenant_a   # data_dir from config
+```
+
+> A snapshot exports `thought`, `edge`, `embedding`, and `action` records — but
+> **not** the audit journal (`journal_entry`). See
+> [Backup & Recovery](backup-and-recovery.md) for what this means and when to use
+> a physical file backup instead.
+
+### `restore`
+
+Restores a database from a JSONL snapshot produced by `snapshot`.
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `-i`, `--input` | path | **required** | JSONL snapshot file to restore. |
+| `--clear` | flag | off | Clear existing data before restoring. |
+| `--skip-embeddings` | flag | off | Import without embedding records. |
+| `--re-embed` | flag | off | Re-embed all thoughts via the target provider, ignoring source embeddings. |
+| `--service` | name | see below | The service to restore into. |
+
+`--service` resolves exactly as for [`snapshot`](#service-resolution): an explicit
+`--service NAME` targets that service even without a services config (its database
+resolves in the services `data_dir`, or the **parent directory of `--db`** when no
+config is loaded); omitted with a services config falls back to
+`services.default_service`; omitted with no services config restores into the
+single `--db` database.
+
+`--skip-embeddings` and `--re-embed` are **mutually exclusive** — passing both
+fails with:
+
+```
+Error: --re-embed and --skip-embeddings are mutually exclusive.
+```
+
+Use `--re-embed` when the target uses a different embedding model than the
+snapshot (the embeddings would otherwise be incompatible — see
+[Troubleshooting → EmbeddingModelMismatchError](troubleshooting.md#embeddingmodelmismatcherror-when-opening-an-existing-database)).
+Use `--skip-embeddings` to import text only.
+
+```bash
+engrava --db fresh.db restore -i backup.jsonl
+engrava --db fresh.db restore -i backup.jsonl --clear --re-embed
+```
+
+> Restore recreates thoughts, edges, embeddings, and actions, **not** the audit
+> journal — a restored database starts with an empty journal.
+
+### `gc`
+
+Garbage-collects `ARCHIVED` thoughts and their orphaned edges. With `--expired`
+it also runs the TTL expiry cleanup first.
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `--dry-run` | flag | off | Show what would be deleted without changing anything. |
+| `--expired` | flag | off | Also run expiry cleanup (archive or delete per `ttl.strategy`) before collecting. |
+
+```bash
+engrava --db engrava.db gc                 # delete ARCHIVED thoughts + orphaned edges
+engrava --db engrava.db gc --expired       # run expiry cleanup first (per strategy)
+engrava --db engrava.db gc --expired --dry-run
+```
+
+The behaviour of `gc --expired` depends on `ttl.strategy`: with `delete` it
+removes expired rows and then collects pre-existing archived rows; with the
+default `archive` it archives the expired rows and stops (it does not collect
+them in the same pass). See
+[Data lifecycle → running cleanup](data-lifecycle.md#running-cleanup).
+
+### `migrate`
+
+Runs pending schema migrations (ensures the core tables exist and are
+up to date). Takes no command-specific options. Safe to run after an upgrade.
+
+```bash
+engrava --db engrava.db migrate
+```
+
+### `export`
+
+Exports thoughts to a portable JSON file (with edges and metadata). Unlike
+`snapshot` (JSONL, whole-database, for backup/restore), `export` writes a single
+indented JSON document and can be filtered by lifecycle status.
+
+| Option | Type | Default | Description |
+|---|---|---|---|
+| `-o`, `--output` | path | `<db>.export.json` (derived) | Output JSON file path. |
+| `--status` | lifecycle status | all | Only export thoughts with this `lifecycle_status` (e.g. `ACTIVE`). |
+
+```bash
+engrava --db engrava.db export -o thoughts.json
+engrava --db engrava.db export --status ACTIVE
+```
+
+## Journal verification
+
+There is **no `engrava verify` command** in this version. To verify the
+[audit journal](audit-trail.md)'s hash chain, use the Python API:
+
+```python
+result = await store.journal.verify_integrity()
+print(result.valid)
+```
+
+## See also
+
+- [MindQL](mindql.md) — the query language `engrava query` runs
+- [Backup & Recovery](backup-and-recovery.md) — snapshot/restore vs physical backup
+- [Data Lifecycle](data-lifecycle.md) — what `gc` and `gc --expired` do
+- [Configuration](configuration.md) — the `engrava.yaml` that `--config` loads
diff --git a/docs/concepts.md b/docs/concepts.md
new file mode 100644
index 0000000..d186f10
--- /dev/null
+++ b/docs/concepts.md
@@ -0,0 +1,247 @@
+# Core Concepts
+
+Engrava models an agent's memory as a **thought-graph**: typed *thoughts*
+connected by typed *edges*, made searchable by *embeddings*, and refined over
+time by *dreaming* into higher-order *reflections*. This page explains those
+pieces as a mental model — what each is, why it exists, and when you'd create
+it — before the how-to guides. Read it once and the rest of the docs will make
+more sense.
+
+> For a one-line definition of any term used here (essence, cycle, signal, gate,
+> provenance, …), see the [Glossary](glossary.md).
+
+```
+                 ┌──────────────────────────────────────────┐
+   OBSERVATION   │  "User prefers email over phone"         │  essence (prompt-facing)
+   (a thought)   │  content: "Stated during onboarding..."  │  content (full text)
+                 │  priority P2 · lifecycle ACTIVE           │
+                 └───────────────┬──────────────────────────┘
+                                 │ ASSOCIATED  (an edge: typed, weighted)
+                 ┌───────────────▼──────────────────────────┐
+   BELIEF        │  "This user is low-touch"                 │
+                 └───────────────┬──────────────────────────┘
+                                 │ CONSOLIDATED_FROM  (created by dreaming)
+                 ┌───────────────▼──────────────────────────┐
+   REFLECTION    │  cluster summary of related thoughts      │  (higher-order, system-made)
+                 └──────────────────────────────────────────┘
+```
+
+## Thought
+
+A **thought** (`ThoughtRecord`) is the unit of memory — one idea, fact,
+observation, or message. Thoughts are *frozen* (immutable) value objects; you
+don't mutate one in place, you `create_thought()` it and later
+`update_thought()` to get a new version.
+
+### `essence` vs `content` (two text fields, on purpose)
+
+Every thought carries **two** texts, and the split is deliberate:
+
+- **`essence`** — the compact, canonical, **prompt-facing** one-liner
+  (1–200 characters, enforced). This is the text you inject into an LLM prompt
+  when this memory is retrieved. Keep it short and self-contained.
+- **`content`** — the **full** source text, retained for full-text search and
+  provenance. It can be as long as you like.
+
+> Why it matters: when you retrieve memories to build a prompt, you want the
+> tight `essence`, not the whole `content`. Putting the same long text in both
+> defeats the purpose. Think *headline* (`essence`) vs *article* (`content`).
+
+### Thought types
+
+`ThoughtType` is a closed set — choose the one that fits what you're storing:
+
+| Type | What it is | Who creates it |
+|---|---|---|
+| `OBSERVATION` | Something learned from the world (a user message, a fact) | you (ingest) |
+| `BELIEF` | A held conclusion or stance derived from observations | you / your agent |
+| `TASK` | Something to be done | you / your agent |
+| `OUTPUT_DRAFT` | The agent's own outgoing content (a reply it produced) | your agent |
+| `NOTE` | A free-form internal note | you / your agent |
+| `REFLECTION` | A cluster summary produced by **dreaming** | the system (don't hand-create) |
+
+There is no `INSIGHT`/`IDEA`/`GOAL` — the set is exactly the six above. Type is
+not cosmetic: dreaming only clusters `OBSERVATION`s by default, and
+`REFLECTION` is reserved for dreaming's output, so mis-typing changes downstream
+behaviour.
+
+### Priority
+
+`Priority` is `P1` (highest) … `P4` (lowest). It is one of the signals that
+hybrid search fuses into a ranking, so higher-priority thoughts surface more
+readily. Set it to reflect how important a memory is to keep at hand.
+
+### Lifecycle
+
+A thought moves through a small state machine:
+
+```
+CREATED → ACTIVE → DONE → ARCHIVED
+```
+
+`LifecycleStatus` transitions are enforced (`evolve()` rejects illegal jumps).
+Most thoughts you create will start `ACTIVE`. `ARCHIVED` is a **soft-retired**
+retention state and a marker for garbage collection — an archived regular thought
+is **not** automatically hidden from `search_hybrid` / `list_thoughts` /
+`count_thoughts`; it stays searchable until you remove it with `engrava gc`. The
+only rows search auto-excludes are **expired** thoughts and **retired
+REFLECTIONs**. See [Data Lifecycle](data-lifecycle.md) for the full
+retention and garbage-collection behavior.
+
+## Edge
+
+An **edge** (`EdgeRecord`) is a typed, weighted, directional link between two
+thoughts — this is what makes Engrava a *graph*, not just a table. The
+`EdgeType` set includes `ASSOCIATED`, `DEPENDS_ON`, `DERIVED_FROM`,
+`MESSAGE_OF`, `BRIDGE`, `CONSOLIDATED_FROM`, and `CONTESTED_BY`. `weight` (0.0–1.0)
+expresses how strong the relation is.
+
+Create edges when a relationship between two memories is itself meaningful —
+e.g. one thought supports, contradicts, or depends on another. Dreaming also
+creates edges automatically (`ASSOCIATED` between consolidated thoughts, and
+`CONSOLIDATED_FROM` from a reflection back to its sources).
+
+## Embedding
+
+An **embedding** is the vector representation of a thought that powers semantic
+(meaning-based) search. Embeddings are optional: with no embedding provider
+configured, search still works using the bundled lexical (FTS5/BM25) index, and
+the vector signal is simply skipped. Configure a provider (and `auto_embed`) to
+get semantic retrieval. See [Configuration](configuration.md) and the search
+docs for the provider options.
+
+## Reflection
+
+A **reflection** is a `ThoughtType.REFLECTION` thought created by **dreaming**:
+Engrava clusters semantically related thoughts and writes a higher-order summary
+node, linked back to its members by `CONSOLIDATED_FROM` edges, with a centroid
+embedding. Reflections are how a pile of individual observations becomes
+fewer, more retrievable, higher-level memories over an agent's lifetime. You do
+not create reflections by hand — dreaming makes them. See
+[Dreaming](dreaming.md).
+
+## Cycle (the agent clock)
+
+A **cycle** is a *logical clock* — a monotonically increasing integer tick that
+**you own and advance**. It is not wall-clock time and not a database row;
+Engrava never increments or stores it for you. Typically one cycle = one agent
+turn / interaction / scheduled pass.
+
+Three fields use it:
+
+- **`created_cycle`** / **`updated_cycle`** — required on every `ThoughtRecord`
+  (the model enforces `updated_cycle >= created_cycle`). They stamp *when, in
+  your agent's logical time*, a thought appeared and last changed.
+- **`current_cycle`** — the value you pass into `search_hybrid(...)` and
+  `run_consolidation(...)` to tell Engrava "it is now tick N."
+
+Why a cycle exists *alongside* timestamps: it gives recency and dreaming
+deterministic, wall-clock-independent math. Search's recency signal and all of
+dreaming's age/scheduling gates (`min_age_cycles`, `schedule_every_n_cycles`,
+`recency_half_life`) are expressed in cycles, not seconds.
+
+> **The trap to avoid.** Because Engrava does not advance the cycle for you,
+> there are two distinct failure modes — and neither raises an error:
+>
+> - **Omitting it entirely** (`current_cycle=None`, the default in
+>   `search_hybrid`) makes the recency signal **inactive** — it is dropped from
+>   the ranking and its weight is redistributed to the other signals.
+> - **Passing a constant** (e.g. always `current_cycle=0`, and never advancing
+>   `created_cycle`/`updated_cycle`) keeps recency active but **useless**: a
+>   thought's age is `current_cycle - updated_cycle`, so with everything frozen
+>   at the same value every memory looks equally fresh and recency cannot
+>   distinguish old from new. The same staleness also means dreaming's age gate
+>   (`min_age_cycles`) never opens — `created_cycle`/`current_cycle` never grow,
+>   so no thought ever ages enough to be promoted.
+>
+> **Do this instead:** keep a counter in your application, increment it once per
+> turn, pass it as `current_cycle`, and use it for `created_cycle`/`updated_cycle`
+> when building thoughts. On restart, recover it (e.g. from the maximum
+> `created_cycle` you've stored) so it stays monotonic across process restarts.
+
+## Provenance (where a memory came from)
+
+Two distinct fields record origin, and they are easy to confuse:
+
+- **`source`** — a free-form **string** identifier of the origin (e.g.
+  `"human"`, `"ingest"`, your component name). Required, your choice.
+- **`source_type`** — the **`KnowledgeSource` enum**: how the knowledge was
+  obtained.
+
+| `KnowledgeSource` | Set it when the memory came from… |
+|---|---|
+| `EXPERIENCE` | The agent's own experience / observed reality (the default) |
+| `SEEDED_LLM` | Content seeded by an LLM up front |
+| `DISTILLED_LLM` | Content distilled/derived by an LLM |
+| `DREAMING` | Produced by consolidation — **the system sets this itself** on dream-created edges/reflections |
+
+Provenance is not decoration: dreaming can filter on it (e.g. preferring
+experience-based confirmations), so setting `source_type` honestly lets you tune
+what consolidation trusts.
+
+## Visibility (inner vs outer speech)
+
+`ThoughtVisibility` marks whether a thought may surface in the agent's **outer
+speech** (what it says) or stays **internal** (what it only thinks):
+
+- **`private`** — never disclosed externally; internal memory only.
+- **`selective`** — shared with trusted entities on request (the **default**).
+- **`public`** — may appear in the agent's outer speech / output.
+
+Engrava *stores* the level; **honouring it is your application's
+responsibility** (Engrava won't stop you from reading a `private` thought — it
+records the intent so your agent can respect it). Use it to keep a privacy
+boundary between what the agent knows and what it's allowed to say.
+
+## Reliability: `confidence` vs `confirmation_count`
+
+A thought carries **two different** notions of how much to trust it, and they
+feed dreaming as separate signals:
+
+- **`confidence`** — a static `0.0–1.0` belief-strength **you assign** at
+  creation (nullable; treated as `0.5` when unset). "How sure am I of this?"
+- **`confirmation_count`** — a counter of how many times the thought has been
+  **independently re-encountered / validated** over time. It grows via
+  `deduplicate=True` on `create_thought` (identical content bumps the count) or
+  your own logic. "How many times has reality re-confirmed this?"
+
+Dreaming's `ConfidenceSignal` reads the first and `ConfirmationSignal` reads the
+second, so they tune consolidation in different ways. (Relatedly,
+`DreamingGates.allow_zero_confirmation` exists so single-write batch ingest —
+where `confirmation_count` never grows — can still be consolidated.)
+
+## Putting it together
+
+```python
+import uuid
+from engrava import (
+    ThoughtRecord,
+    ThoughtType,
+    Priority,
+    LifecycleStatus,
+    KnowledgeSource,
+    ThoughtVisibility,
+)
+
+observation = ThoughtRecord(
+    thought_id=str(uuid.uuid4()),
+    thought_type=ThoughtType.OBSERVATION,  # learned from the world
+    essence="User prefers email over phone",  # prompt-facing one-liner
+    content="The user said during onboarding that email is the best way to reach them.",
+    priority=Priority.P2,
+    lifecycle_status=LifecycleStatus.ACTIVE,
+    created_cycle=12,  # your agent's logical clock, this turn
+    updated_cycle=12,
+    source="onboarding-flow",  # free-form origin id
+    source_type=KnowledgeSource.EXPERIENCE,  # how it was obtained
+    confidence=0.9,  # how sure you are
+    visibility=ThoughtVisibility.SELECTIVE,  # inner/outer-speech boundary
+)
+```
+
+## Next
+
+- [Quick Start](quickstart.md) — create, link, and search in five minutes.
+- [Dreaming](dreaming.md) — how consolidation turns observations into reflections.
+- [Hybrid Search](search.md) — how the signals (including recency/cycle and priority) fuse into a ranking.
+- [API Reference](api-reference.md) — the exact fields, enums, and methods.
diff --git a/docs/concurrency.md b/docs/concurrency.md
new file mode 100644
index 0000000..ca2df89
--- /dev/null
+++ b/docs/concurrency.md
@@ -0,0 +1,124 @@
+# Concurrency
+
+Engrava is built on SQLite, so it inherits SQLite's concurrency model: **many
+concurrent readers, one writer at a time.** This page explains what that means in
+practice — within one process and across processes — and the specific behaviours
+to know about (busy timeout, the journal's in-process lock, and per-service
+isolation).
+
+## WAL: many readers, one writer
+
+File databases opened via `from_config` use **WAL** (write-ahead logging) mode.
+Under WAL:
+
+- **Readers don't block the writer and the writer doesn't block readers.** A
+  read sees a consistent snapshot while a write is in progress.
+- **There is still only one writer at a time.** Two writes are serialised; the
+  second waits for the first to finish.
+
+This is ideal for read-heavy agent-memory workloads: retrieval (the hot path) is
+all reads and scales freely; writes are comparatively infrequent.
+
+## Many async tasks, one store
+
+**A single store instance safely serves many concurrent `asyncio` tasks.** You do
+not need a connection pool or multiple stores for in-process concurrency:
+
+- aiosqlite runs the actual SQLite calls on a dedicated background thread and
+  marshals every query to it, so concurrent `await`s against one store are
+  serialised onto that thread rather than racing.
+- The store additionally guards order-sensitive operations (deduplication, the
+  embedding-model check) with internal `asyncio.Lock`s.
+
+What you must **not** do is share one store across **different event loops** — the
+connection is bound to the loop it was created on. One store per loop; within
+that loop, share it freely. (See
+[Known Limitations](known-limitations.md#aiosqlite-proxy-architecture).)
+
+## Busy timeout
+
+When a connection can't immediately get the lock it needs (another writer holds
+it), SQLite waits up to the **busy timeout** before giving up with
+`database is locked`. Engrava inherits Python's `sqlite3` default of **5000 ms
+(5 s)** — it does not override it.
+
+For workloads with more write contention you can raise it on your own connection
+before handing it to the store, or after `from_config` via the store's
+connection:
+
+```python
+import aiosqlite
+from engrava import SqliteEngravaCore
+
+conn = await aiosqlite.connect("engrava.db")
+conn.row_factory = aiosqlite.Row
+await conn.execute("PRAGMA busy_timeout = 15000")  # wait up to 15s for a lock
+store = SqliteEngravaCore(conn)
+await store.ensure_schema()
+```
+
+A longer busy timeout trades latency-on-contention for fewer `database is locked`
+errors; tune it to your write pattern.
+
+## Multiple processes
+
+WAL allows multiple **processes** to read concurrently, and one to write — but
+heavy multi-process **writing** of the same database file is **out of scope** for
+Engrava, for two reasons:
+
+1. **SQLite is single-writer.** Multiple OS processes writing the same file
+   contend on the database lock; the busy timeout only papers over light
+   contention.
+2. **The audit journal's lock is in-process only.** When journaling is enabled,
+   appends are serialised by an `asyncio.Lock` keyed on the connection — which
+   exists **only within one process**. A second process shares no such lock, so
+   two processes journaling the same database can race the journal's
+   monotonic `sequence_number`. The writer retries on the resulting
+   `UNIQUE` collision up to **5 times**; if contention persists it raises:
+
+   ```
+   RuntimeError: Failed to append journal entry after 5 retries due to sequence contention
+   ```
+
+   This is the signal that you have more than one process writing a journaled
+   database — which is unsupported.
+
+If you need multiple independent writers, don't point them at the same file —
+give each its own database (next section).
+
+## Per-service isolation
+
+`EngravaManager` runs **one database file per named service**, each with its own
+connection and its own lock. This is the supported way to isolate writers (per
+tenant, per worker, per logical partition):
+
+```python
+from engrava import EngravaManager, load_config
+
+config = load_config("engrava.yaml")
+async with EngravaManager.from_config(config.services) as mgr:
+    store_a = await mgr.get_store("tenant_a")  # tenant_a.db
+    store_b = await mgr.get_store("tenant_b")  # tenant_b.db
+```
+
+Because each service is a separate file, writes to `tenant_a` never contend with
+writes to `tenant_b`, and each can be backed up or deleted independently. See the
+[scoping section](guides/migrating-from-other-memory.md#filtering-scoping-and-multi-tenancy)
+for when to choose per-service isolation over in-store filtering.
+
+## Summary
+
+| Scenario | Supported? | Notes |
+|---|---|---|
+| Many async tasks, one store, one loop | ✅ | The normal case — share the store. |
+| Many readers (WAL) | ✅ | Readers never block the writer. |
+| One writer at a time | ✅ | SQLite serialises writes. |
+| One store across multiple event loops | ❌ | Connection is loop-bound; one store per loop. |
+| Many processes reading the same file | ✅ | WAL supports concurrent readers. |
+| Many processes writing the same file | ❌ | Single-writer; journal lock is in-process — use `EngravaManager`. |
+
+## See also
+
+- [Deployment](deployment.md) — process model, files on disk, graceful shutdown
+- [Known Limitations](known-limitations.md) — the aiosqlite proxy and write-safety notes
+- [Audit Trail](audit-trail.md) — the journal whose lock is discussed above
diff --git a/docs/configuration.md b/docs/configuration.md
index da6e682..5aadc1a 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -167,12 +167,102 @@ is no per-service `db_path` — the file is derived as `<data_dir>/<name>.db`):
 |-----|------|---------|-------------|
 | `embeddings` | `dict` | — | Per-service embedding-provider override (same shape as the top-level `embeddings` section) |
 
+### `journal`
+
+The hash-chain audit trail. Off by default. See [Audit Trail](audit-trail.md).
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `enabled` | `bool` | `false` | Record every thought/edge mutation as a hash-linked journal entry |
+
+```yaml
+journal:
+  enabled: true
+```
+
+### `ttl`
+
+Time-to-live / auto-expiry of thoughts. See the
+[data-lifecycle recipes](recipes/index.md).
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `strategy` | `str` | `"archive"` | What `cleanup_expired` does to expired thoughts: `"archive"` (soft, marks `ARCHIVED`) or `"delete"` (hard) |
+| `check_every_n_operations` | `int` | `0` | Run auto-cleanup every *N* store operations (`0` = manual only, via `cleanup_expired()` / `engrava gc --expired`) |
+| `default_ttl_seconds` | `int \| null` | `null` | Default TTL applied to new thoughts with no explicit `expires_at` (`null` = no default) |
+
+```yaml
+ttl:
+  strategy: archive          # or "delete"
+  check_every_n_operations: 100
+  default_ttl_seconds: 2592000   # 30 days
+```
+
+### `ingest`
+
+Ingest-layer behaviour (content-hash deduplication).
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `deduplication_enabled` | `bool` | `true` | Whether ingest pipelines should pass `deduplicate=True` so identical `content` collapses into one thought (bumping `confirmation_count`) instead of a duplicate row |
+
+> Note: this flag advises ingest-layer callers; the persistence-layer
+> `create_thought` still defaults to `deduplicate=False` — see
+> [Recipes → Deduplicate repeated facts](recipes/index.md).
+
+### `hooks`
+
+Wire a custom `EngravaHooksProtocol` implementation by dotted path. See
+[Extensions](extensions.md).
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `class` | `str \| null` | `null` | Dotted import path to a hooks class, last segment is the class name (e.g. `"my_package.hooks.MyHooks"`), instantiated and used by `from_config` |
+
+```yaml
+hooks:
+  class: "my_package.hooks.MyHooks"
+```
+
+The path is split on the final dot (`module.path` + `ClassName`) — this is a
+plain dotted path, **not** the `module.path:ATTRIBUTE` colon form used by
+[`manifests.paths`](#manifests) below.
+
+### `manifests`
+
+Load extension manifests (their hooks + schema migrations). Accepts a plain
+list of dotted paths, or a mapping with `discover` / `paths`. See
+[Extensions](extensions.md).
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `paths` | `list[str]` | `[]` | Dotted `module.path:ATTRIBUTE` references to `ExtensionManifest` objects |
+| `discover` | `bool` | `false` | Also scan the `engrava.extensions` entry-point group for manifests |
+
+```yaml
+# list form
+manifests:
+  - "my_plugin.manifest:MANIFEST"
+
+# or mapping form
+manifests:
+  discover: true
+  paths:
+    - "my_plugin.manifest:MANIFEST"
+```
+
+> The `metrics:` section (latency window size, enable/disable) is documented in
+> [Observability](observability.md).
+
 ## Environment Variables
 
+Both are read by the **`engrava` CLI** only (library callers pass paths
+explicitly to `load_config` / `SqliteEngravaCore`).
+
 | Variable | Description |
 |----------|-------------|
-| `ENGRAVA_CONFIG` | Path to the YAML configuration file |
-| `ENGRAVA_DB` | Override `db_path` from configuration |
+| `ENGRAVA_CONFIG` | Fallback path to the YAML configuration file when `--config` is omitted (`--config` > `ENGRAVA_CONFIG` > none) |
+| `ENGRAVA_DB` | Fallback database-file path when `--db` is omitted (`--db` > `ENGRAVA_DB` > `./engrava.db`) |
 
 ## Multi-Service Usage
 
diff --git a/docs/data-lifecycle.md b/docs/data-lifecycle.md
new file mode 100644
index 0000000..41615c5
--- /dev/null
+++ b/docs/data-lifecycle.md
@@ -0,0 +1,170 @@
+# Data lifecycle, retention & deletion
+
+How a thought moves through its lifecycle, how time-to-live expiry works, and —
+importantly for privacy and compliance — what it takes to **truly** erase data,
+including the residue a naive delete leaves behind.
+
+> **Compliance note.** This page describes the mechanics honestly so you can build
+> a correct retention/erasure process. The default expiry strategy **archives**
+> (does not erase), and a hard delete can still leave content in the audit
+> journal and in backups. Read the [GDPR / hard deletion](#gdpr-and-hard-deletion)
+> section before relying on TTL for "deletion".
+
+## Lifecycle states
+
+Every thought carries a `LifecycleStatus`. There are four states:
+
+| State | Meaning |
+|---|---|
+| `CREATED` | Just created, not yet promoted into active use. |
+| `ACTIVE` | In normal use — the default working state, included in queries. |
+| `DONE` | Completed (e.g. a finished task) but retained. |
+| `ARCHIVED` | Soft-retired and retained until garbage-collected. **Not a global results filter** — see the note below. |
+
+You set the status on the `ThoughtRecord` you create, and update it over the
+thought's life. Archiving is the soft-retire step: an `ARCHIVED` thought still
+exists (and its content is still stored) until you garbage-collect it.
+
+> **`ARCHIVED` does not hide a thought from search or queries.** Marking a
+> regular thought `ARCHIVED` is a *retention* state, not a visibility filter: an
+> archived `OBSERVATION` still appears in `search_hybrid` / `search_fts` and is
+> still counted by `count_thoughts()` / `list_thoughts()`. Only two kinds of rows
+> are auto-excluded: **expired** thoughts (dropped by the TTL expiry checks
+> described below, unless you pass `include_expired=True`), and **retired
+> REFLECTIONs** — a `REFLECTION` whose `lifecycle_status` is no longer `ACTIVE` is
+> filtered out of search by a *freshness floor* so a stale cluster centroid can't
+> resurface. This REFLECTION gate is type-specific; it does **not** apply to
+> ordinary thoughts. To keep archived regular thoughts out of your own results,
+> either filter on `lifecycle_status` yourself or remove them with `engrava gc`.
+
+## Time-to-live (TTL) and expiry
+
+A thought can carry an expiry time. Two ways to set it:
+
+- **Per-thought, absolute:** set `ThoughtRecord.expires_at` to a timestamp.
+- **Per-thought, relative at create time:** pass `expires_after_seconds=` to
+  `create_thought(...)`, which computes `expires_at` for you.
+- **A default for the whole store:** `ttl.default_ttl_seconds` in config applies a
+  default TTL to new thoughts that don't set their own (see
+  [Configuration → ttl](configuration.md#ttl)).
+
+Expiry is **not** automatic on a timer. Expired thoughts remain until a cleanup
+pass runs (see [running cleanup](#running-cleanup) below). By default, expired
+thoughts are **excluded** from `count_thoughts(...)` and `list_thoughts(...)` —
+pass `include_expired=True` to include them:
+
+```python
+live = await store.count_thoughts()  # excludes expired
+everything = await store.count_thoughts(include_expired=True)
+```
+
+## Archive vs. delete
+
+What a cleanup pass *does* to an expired thought is governed by the store's TTL
+strategy, set via `ttl.strategy` in config (see
+[Configuration → ttl](configuration.md#ttl)):
+
+| Strategy | Effect on an expired thought | Reversible? | Content erased? |
+|---|---|---|---|
+| `"archive"` (default) | Flips `lifecycle_status` to `ARCHIVED`; the row and its `content` stay in the database | Yes | **No** |
+| `"delete"` | Removes the thought row from the `thought` table | No | From the live table, yes — but see [residue](#gdpr-and-hard-deletion) |
+
+The default is **`archive`** — chosen so expiry is non-destructive and
+auditable. This means **expiry alone does not erase anything** under the default
+configuration. To make expiry actually remove rows, set `ttl.strategy: delete`.
+
+## Running cleanup
+
+Expiry is applied by an explicit cleanup pass — nothing happens on a timer.
+
+**From Python:** `cleanup_expired()` returns a `CleanupResult`:
+
+```python
+result = await store.cleanup_expired()
+print(result.expired_count)  # how many thoughts were expired
+print(result.strategy_applied)  # "archive" or "delete" (per config)
+print(result.timestamp)  # ISO-8601 time of the pass
+```
+
+You can also have the store run cleanup automatically every *N* operations via
+`ttl.check_every_n_operations` (default `0` = manual only).
+
+**From the CLI:** `engrava gc --expired` runs the expiry cleanup per your TTL
+strategy. What it does next depends on that strategy:
+
+```bash
+engrava gc --expired            # run expiry cleanup (per ttl.strategy)
+engrava gc --expired --dry-run  # show what would happen, change nothing
+engrava gc                      # delete ARCHIVED thoughts (+ orphaned edges)
+```
+
+- **With `ttl.strategy: delete`:** the expired rows are deleted outright, and the
+  same pass then garbage-collects any pre-existing `ARCHIVED` thoughts.
+- **With `ttl.strategy: archive` (default):** the expired rows are *archived*
+  (marked `ARCHIVED`), and the pass **stops there** — it does **not** also
+  garbage-collect archived rows in the same run. (Collecting the rows it just
+  archived would defeat the soft-retire.) To physically remove archived rows you
+  must either run a **separate** `engrava gc`, or switch to `ttl.strategy:
+  delete`.
+
+Plain `engrava gc` (no `--expired`) removes `ARCHIVED` thoughts and their
+orphaned edges. This is how archived data is finally deleted from the live table.
+
+## GDPR and hard deletion
+
+If you must erase a user's data (e.g. a GDPR erasure request), be aware that
+**neither archiving nor a single delete is sufficient on its own**. Three places
+can retain the content:
+
+1. **Archive does not erase.** Under the default `ttl.strategy: archive`, an
+   "expired" thought is only marked `ARCHIVED` — the row and its `content` remain
+   in the database. Note that `engrava gc --expired` under the `archive` strategy
+   *archives* the rows and stops; it does **not** delete archived rows in the same
+   pass. To remove the row you must run a **separate** `engrava gc` afterwards, or
+   use `ttl.strategy: delete` so the row is deleted outright.
+2. **The audit journal retains a content delta.** If the
+   [audit journal](audit-trail.md) is enabled, deleting a thought does **not**
+   remove its content from the journal. The original `INSERT_THOUGHT` entry holds
+   the content in its `delta`, and the `DELETE_THOUGHT` entry records the deletion
+   delta too — so the data survives in `journal_entry` after the thought row is
+   gone. A true erasure must also purge the relevant journal entries (and doing so
+   breaks the hash chain from that point — re-baseline if you depend on
+   verification).
+3. **Backups.** Any snapshot or file backup taken before the deletion still
+   contains the data. Erasure must extend to your backup retention.
+
+A correct hard-erasure procedure therefore looks like: delete (or
+archive-then-gc) the thought rows → purge the matching `journal_entry` rows if
+journaling is on → roll the deletion through your backup retention. Don't treat
+"the thought no longer appears in search" as "the data is gone."
+
+## Reclaiming disk space
+
+Deleting rows — whether via `ttl.strategy: delete`, `engrava gc`, or a hard
+erasure — **does not shrink the database file**. SQLite returns the freed pages
+to an internal free-list and reuses them for future writes; the file stays the
+same size on disk.
+
+To actually reclaim file size you must run `VACUUM`, which rebuilds the database
+into a compact file. Plan for its cost:
+
+- **Exclusive lock.** `VACUUM` takes an exclusive lock for its whole duration —
+  no concurrent reads or writes. Run it during a maintenance window.
+- **Temporary space.** It writes a fresh copy before swapping, so it needs
+  roughly **2× the database size** in free disk (temp + final) transiently.
+- **Off-peak.** On a large database this can take a while; schedule it off-peak.
+
+```sql
+VACUUM;                 -- rebuild in place (exclusive lock, ~2x temp space)
+VACUUM INTO 'copy.db';  -- write a compacted copy without locking in place as long
+```
+
+Until you `VACUUM`, expect the file size to reflect the high-water mark, not the
+live row count — this is normal SQLite behaviour, not a leak.
+
+## See also
+
+- [Configuration → ttl](configuration.md#ttl) — the strategy and default-TTL knobs
+- [Audit Trail](audit-trail.md) — what the journal records (and its delta residue)
+- [CLI](cli.md#gc) — the full `engrava gc` option reference
+- [Known Limitations](known-limitations.md) — storage and concurrency constraints
diff --git a/docs/deployment.md b/docs/deployment.md
new file mode 100644
index 0000000..bf32e0b
--- /dev/null
+++ b/docs/deployment.md
@@ -0,0 +1,133 @@
+# Deployment
+
+How to run Engrava in production: opening the store, the database files on disk,
+multi-worker setups, and shutting down cleanly. Engrava is an embedded library —
+there is no server to deploy; "deployment" means how your process opens and owns
+the database.
+
+For the concurrency model behind these recommendations, see
+[Concurrency](concurrency.md). For backups, see
+[Backup & Recovery](backup-and-recovery.md).
+
+## One store per process, opened at startup
+
+Open the store **once at process startup** and reuse it for the process's
+lifetime. `from_config` opens and **owns** the connection (it applies the schema
+and the right PRAGMAs), so use it as an async context manager that spans your
+app's life:
+
+```python
+from engrava import SqliteEngravaCore
+
+
+async def main() -> None:
+    async with await SqliteEngravaCore.from_config("engrava.yaml") as store:
+        # Hold this store for the lifetime of the process / app.
+        await run_app(store)
+```
+
+- **Do not open a new store per request.** Opening a store applies schema checks
+  and PRAGMAs; doing it per request is wasteful and multiplies open handles to
+  the same file.
+- **Do not share one store across event loops.** The underlying connection is
+  bound to the loop/thread that aiosqlite created it on — see
+  [Known Limitations](known-limitations.md#aiosqlite-proxy-architecture). One
+  store belongs to one running loop.
+- **A single store safely serves many concurrent async tasks** within that one
+  loop — see [Concurrency](concurrency.md). You do **not** need a pool of stores
+  for in-process concurrency.
+
+## The database files on disk
+
+In WAL mode (the default for file databases opened via `from_config`), SQLite
+keeps **three** files side by side:
+
+| File | Purpose |
+|---|---|
+| `engrava.db` | The main database. |
+| `engrava.db-wal` | The write-ahead log — **uncommitted and recently-committed data lives here** until checkpointed. |
+| `engrava.db-shm` | Shared-memory index for the WAL. |
+
+Operational consequences:
+
+- **Use a WAL-safe backup method** — copying only the `.db` file (or copying the
+  three files non-atomically while writes continue) can capture inconsistent
+  state. See [Backup & Recovery](backup-and-recovery.md) for the live-vs-stopped
+  options.
+- **Put them on a real local filesystem.** SQLite + WAL on networked filesystems
+  (NFS, some container overlay mounts) can corrupt or fail locking. Use a local
+  disk or a properly-configured volume.
+- **Permissions.** The process needs read/write on the directory (SQLite creates
+  and deletes `-wal`/`-shm`), not just the `.db` file. Lock the directory down to
+  the service user.
+
+## Containers
+
+- **Mount a volume for the database directory**, not just the file — SQLite needs
+  to create the `-wal`/`-shm` siblings next to the `.db`.
+- Point `database.path` in your `engrava.yaml` at the mounted volume — that's the
+  setting `from_config` reads. (`ENGRAVA_DB` is a **CLI-only** fallback for the
+  `engrava --db` flag; it does **not** configure `from_config`, so application
+  code should set `database.path`, not rely on `ENGRAVA_DB`.)
+- One container instance = one writer. If you scale to multiple replicas, they
+  must **not** all write the same database file (see
+  [multi-process](concurrency.md#multiple-processes)). Either run a single writer
+  replica, or give each replica its own database via
+  [`EngravaManager`](concurrency.md#per-service-isolation).
+
+## Multiple workers
+
+Engrava follows SQLite's single-writer model. For multi-worker app servers
+(Gunicorn/Uvicorn workers, etc.):
+
+- **Reads scale freely** under WAL — many readers and one writer coexist.
+- **Concentrate writes.** Heavy write fan-out across many OS processes hitting the
+  same file is out of scope; see [Concurrency → Multiple processes](concurrency.md#multiple-processes).
+- **Per-tenant or per-worker isolation:** give each its own database file via
+  [`EngravaManager`](concurrency.md#per-service-isolation) when you need
+  independent writers.
+
+## Graceful shutdown
+
+Who closes the connection depends on how you opened the store — because the store
+only closes a connection it **owns**:
+
+- **`from_config` (owned connection).** `from_config` opens and owns the
+  connection. Leaving the `async with` block closes it for you; equivalently, call
+  `await store.close()`, which **closes and releases the owned connection
+  cleanly**. (It does not issue an explicit WAL checkpoint — that is a
+  backup/maintenance step, `PRAGMA wal_checkpoint(TRUNCATE)`, covered in
+  [Backup & Recovery](backup-and-recovery.md#if-you-can-stop-or-quiesce-writers).)
+
+  ```python
+  async with await SqliteEngravaCore.from_config("engrava.yaml") as store:
+      ...
+  # connection closed here
+
+  # or, if you hold the store yourself:
+  await store.close()
+  ```
+
+- **Manual `SqliteEngravaCore(conn)` (caller-managed connection).** The store does
+  **not** own your connection, so `store.close()` is a **no-op** here — *you* must
+  close the connection you created:
+
+  ```python
+  conn = await aiosqlite.connect("engrava.db")
+  conn.row_factory = aiosqlite.Row
+  store = SqliteEngravaCore(conn)
+  ...
+  await conn.close()  # the caller owns and closes the connection
+  ```
+
+  (Using `async with aiosqlite.connect(...) as conn:` handles this for you.)
+
+Wire whichever applies into your framework's shutdown hook (e.g. FastAPI
+`lifespan`, a signal handler) so an interrupted process still closes cleanly.
+
+## See also
+
+- [Concurrency](concurrency.md) — the single-writer model, busy timeout, isolation
+- [Backup & Recovery](backup-and-recovery.md) — WAL-safe backup and restore
+- [Configuration](configuration.md) — the YAML the deployment loads
+- [Known Limitations](known-limitations.md) — filesystem and locking constraints
diff --git a/docs/dreaming.md b/docs/dreaming.md
index 5c722b6..fdefd18 100644
--- a/docs/dreaming.md
+++ b/docs/dreaming.md
@@ -8,6 +8,75 @@ Dreaming runs **outside** the normal CRUD path — the consumer decides
 when to invoke `run_consolidation()` (after N cycles, in a cron job,
 or manually).
 
+## How memory consolidation works (the dreaming loop)
+
+Think of a single memory's journey through an agent's lifetime. The first two
+steps — ingest and confirmation — happen on the **normal write path** as you use
+the store. The consolidation part is **manual**: when you call
+`run_consolidation()`, that one call runs promotion → edge creation → reflection
+clustering/creation → an orphan sweep, in order.
+
+```
+  ingest        you create an OBSERVATION ("user prefers email")   (write path)
+    │
+    ▼
+  confirm       the same fact is re-encountered over time, so its  (write path)
+    │           confirmation_count grows (e.g. via deduplicate=True)
+    │
+    ▼  run_consolidation(current_cycle=N)   ── manual ──
+    │
+  ┌─┴───────────────────────────────────────────────────────┐
+  │ 1. promote   thoughts that pass the gates and clear       │
+  │              promote_threshold are raised to priority P1  │
+  │ 2. link      a promoted thought *may* gain ASSOCIATED      │
+  │              edges to similar neighbours (when enabled)    │
+  │ 3. reflect   related thoughts *may* be clustered into      │
+  │              REFLECTION meta-thoughts (when enabled)       │
+  │ 4. sweep     stale REFLECTIONs whose sources left the      │
+  │              active set are retired                         │
+  └─┬───────────────────────────────────────────────────────┘
+    │
+    ▼
+  improved      later searches rank the P1 memory higher (priority
+  retrieval     signal), follow any new edges (graph signal), and can
+                surface a REFLECTION instead of many raw thoughts
+```
+
+Walking the journey:
+
+1. **Ingest.** You store memories as thoughts (typically `OBSERVATION`s) on the
+   normal write path. Dreaming does nothing yet.
+2. **Confirm.** As the same knowledge recurs, its `confirmation_count` rises —
+   automatically when you write with `deduplicate=True` (identical content
+   collapses and bumps the count), or via your own logic. This is *evidence the
+   memory matters*, and it feeds dreaming's confirmation signal. (Distinct from
+   `confidence`, the static belief-strength you set — see
+   [Core Concepts](concepts.md#reliability-confidence-vs-confirmation_count).)
+3. **Promote.** When you run consolidation, each candidate must first pass the
+   [gates](#gates) (e.g. old enough, enough confirmations) and then score above
+   `promote_threshold` across the weighted [signals](#signals). Survivors are
+   promoted to **P1**. (Both bars matter: a thought that passes the gates but
+   scores low is *not* promoted — see
+   [Troubleshooting](troubleshooting.md#dreaming-promotes-nothing-consolidation-is-inert).)
+4. **Link.** A promoted thought *may* gain `ASSOCIATED` [edges](#edge-creation)
+   to similar neighbours — when edge creation is enabled, the thought has a stored
+   embedding, and qualifying neighbours (above `min_similarity`) are found. New
+   edges persist the structure in the graph, idempotently (re-runs don't
+   duplicate edges).
+5. **Reflect.** Related thoughts *may* be clustered and summarised into
+   [`REFLECTION`](#reflections-meta-consolidation) meta-thoughts — a centroid
+   embedding plus `CONSOLIDATED_FROM` edges back to the members — when reflections
+   are enabled and eligible clusters pass the clustering/quality gates. This turns
+   a pile of observations into fewer, higher-level memories. (A REFLECTION whose
+   source cluster later leaves the active set is automatically retired so a stale
+   summary can't resurface.)
+6. **Improved retrieval.** All of this changes future
+   [hybrid search](search.md): the P1 memory ranks higher via the priority
+   signal, any new edges feed the opt-in graph signal, and reflections let one
+   high-level memory stand in for many raw ones.
+
+The rest of this page is the knob-by-knob reference for each phase.
+
 ## Quick start
 
 ```python
@@ -75,6 +144,7 @@ class MySignal:
     def __call__(self, thought: ThoughtRecord, ctx: DreamingContext) -> float:
         return 0.42
 
+
 ext = DreamingExtension(
     config=config,
     custom_signals={"my_signal": MySignal()},
@@ -224,6 +294,14 @@ counts from member text, centroid from member vectors). LLM-generated
 prose summaries belong in downstream extension hooks, not in the
 core graph layer.
 
+> **Navigating the lineage.** The `CONSOLIDATED_FROM` edges are queryable
+> through dedicated store helpers — `consolidated_member_ids(reflection_id)`,
+> `consolidated_source_statuses(reflection_id)`, and the reverse
+> `reflections_consolidated_from(source_id)`. Use them to walk from a REFLECTION
+> to its sources and back (e.g. for provenance views or orphan detection)
+> instead of querying the edge table directly. See
+> [REFLECTION lineage](api-reference.md#reflection-lineage) in the API reference.
+
 ### How clustering works
 
 Two algorithms are available via `DreamingGates.cluster_algorithm`:
@@ -267,8 +345,8 @@ extensions:
 
 ```python
 result = await ext.run_consolidation(store, current_cycle=42)
-print(result.promoted_count)       # thoughts promoted to P1
-print(result.edges_created)        # ASSOCIATED edges created
+print(result.promoted_count)  # thoughts promoted to P1
+print(result.edges_created)  # ASSOCIATED edges created
 print(result.reflections_created)  # new REFLECTION thoughts created
 ```
 
diff --git a/docs/faq.md b/docs/faq.md
new file mode 100644
index 0000000..a13f870
--- /dev/null
+++ b/docs/faq.md
@@ -0,0 +1,116 @@
+# FAQ
+
+Short answers to the questions that come up most. For "something is broken" see
+[Troubleshooting](troubleshooting.md); for "is this the right tool" see
+[Positioning](positioning.md).
+
+## Does Engrava call an LLM? Do I need an API key?
+
+No. Engrava never calls a language model and needs no API key to run. It stores
+and retrieves what your agent gives it; deciding *what* to remember (extraction,
+summarisation) is your agent's job, above the storage layer. The one feature
+that synthesises new thoughts — [dreaming](dreaming.md) — is purely structural
+(clustering, centroids, keyword counts), with no LLM involved. See
+[Non-goals](positioning.md#non-goals).
+
+An API key is only relevant if **you** choose a remote embedding provider (e.g.
+an OpenAI-compatible endpoint) — and that's for embeddings, not for any
+Engrava-side reasoning. See the [Embeddings guide](guides/embeddings.md).
+
+## Does it need network access or any running service?
+
+No. Engrava is an embedded library built on SQLite — one `pip install`, runs
+in-process, no server, no network. The only time network is involved is if you
+configure a remote embedding provider yourself.
+
+## Are embeddings required?
+
+No. Without an embedding provider, search runs on FTS5/BM25 (keyword), priority,
+and recency signals — semantic vector matching is simply skipped. Add a provider
+(local or remote) when you want semantic retrieval. See the
+[Embeddings guide](guides/embeddings.md). Note that storing on write only embeds
+when you set both `embedding_provider=...` **and** `auto_embed=True`.
+
+## How large a corpus can it handle?
+
+The default vector backend brute-forces cosine similarity in Python, which works
+well up to roughly **100k embeddings**. Beyond that, install the `sqlite-vec`
+backend (`pip install engrava[vec]`, then `extensions.vector.backend:
+sqlite-vec`) for indexed vector search. FTS5 scales well independently. SQLite
+itself has been exercised here into the multi-GB / millions-of-thoughts range.
+See [Known Limitations](known-limitations.md#sqlite-vec-pre-v1-status).
+
+## Can multiple processes or tasks use the same store at once?
+
+A single process can drive **many async tasks** against one store safely —
+aiosqlite serialises them on its background thread, and WAL mode lets readers and
+a single writer coexist. SQLite is **single-writer**, so heavy concurrent writes
+from **multiple processes** are out of scope. For multi-tenant isolation, give
+each tenant its own database file via `EngravaManager` (each has its own lock).
+See [Known Limitations → Concurrent Write Safety](known-limitations.md#concurrent-write-safety)
+and the [migration guide's scoping section](guides/migrating-from-other-memory.md#filtering-scoping-and-multi-tenancy).
+
+## How do I scope search to one user or session?
+
+The `search_*` methods are **unscoped by default** — they take no `user_id` /
+`session_id` filter and rank across the whole store. Scope it yourself with one
+of three patterns: over-fetch + post-filter, one store per tenant via
+`EngravaManager`, or a raw-SQL pre-filter on `metadata_json` with `json_extract`.
+The tradeoffs are laid out in the
+[scoping section](guides/migrating-from-other-memory.md#filtering-scoping-and-multi-tenancy).
+
+## When should I enable dreaming?
+
+Enable [dreaming](dreaming.md) when memory **accumulates over time** and you want
+the store to surface and link what matters: it promotes important thoughts to P1,
+builds associative edges, and clusters related thoughts into
+[`REFLECTION`](concepts.md) summaries. It is not useful on a tiny or write-once
+store. Run it periodically (every N cycles, a cron
+job, or manually) — never on the hot CRUD path. For single-write batch ingest,
+keep `allow_zero_confirmation=True` or nothing will ever pass the confirmation
+gate. See the agent loop's
+[consolidation cadence](guides/agent-memory.md) pattern.
+
+## What is a "cycle" and do I have to manage it?
+
+A cycle is a **consumer-owned monotonic logical clock** — your agent's tick.
+Engrava never advances or persists it for you; you pass `current_cycle` into
+search and consolidation. It drives the recency signal and the dreaming age gate.
+On restart, recover it from `max(created_cycle)` in the store.
+
+Two ways to get it wrong have different effects: passing `current_cycle=None`
+(the `search_hybrid` default) makes the recency signal **inactive** — it is
+dropped from the ranking. Passing a **constant** (e.g. always `0`, never
+advancing `created_cycle`/`updated_cycle`) keeps recency *active but useless* —
+every thought's age collapses to the same value, so nothing looks more recent
+than anything else, and the dreaming age gate (`min_age_cycles`) never opens.
+Advance the cycle each turn. See
+[Core Concepts → Cycle](concepts.md) and the related
+[Troubleshooting entry](troubleshooting.md#dreaming-promotes-nothing-consolidation-is-inert).
+
+## How do I back up the database safely?
+
+Because Engrava uses WAL mode, a naive copy of just the `.db` file can miss
+in-flight data in the `-wal` file. Use a WAL-safe approach — checkpoint then
+copy, `VACUUM INTO`, or SQLite's backup API. Note that a logical snapshot does
+**not** include the audit journal. See [Upgrade Guide](upgrade.md) for the
+current backup guidance.
+
+## Is the audit trail tamper-proof?
+
+It is **tamper-evident**, not tamper-proof. The journal is a keyless in-file
+SHA-256 hash chain: it reliably detects accidental corruption and naive edits or
+truncation, but a write-capable actor who rewrites the whole file and recomputes
+the chain is out of its threat model. Treat it as integrity evidence with OS
+file permissions and periodic off-box verification, not as a cryptographic
+guarantee against a privileged attacker. It is **off by default**
+(`journal.enabled: false`). See [Audit Trail](audit-trail.md).
+
+## Is Engrava production-ready?
+
+Engrava is published on PyPI and maintained to a strict quality bar (typed,
+linted, high test coverage). For production, the things to plan are the same as
+for any embedded SQLite system: pick the right vector backend for your corpus
+size, respect the single-writer model, set up WAL-safe backups, and (if you need
+it) enable and monitor the audit trail. The [Known Limitations](known-limitations.md)
+page is the honest list of constraints to design around.
diff --git a/docs/glossary.md b/docs/glossary.md
new file mode 100644
index 0000000..61c3568
--- /dev/null
+++ b/docs/glossary.md
@@ -0,0 +1,161 @@
+# Glossary
+
+Short definitions of the terms Engrava uses, each linking to the page that
+explains it in depth. New to Engrava? Read [Core Concepts](concepts.md) first —
+this page is a quick reference, not a tutorial.
+
+### Thought
+
+The unit of memory — one idea, fact, observation, or message, stored as a frozen
+(immutable) `ThoughtRecord`. You don't mutate a thought in place; you
+`create_thought()` it and `update_thought()` to get a new version. See
+[Core Concepts → Thought](concepts.md#thought).
+
+### Essence
+
+The compact, canonical, **prompt-facing** one-liner of a thought (1–200
+characters, enforced) — the text you inject into an LLM prompt when the memory is
+retrieved. Think *headline*. See
+[Core Concepts → essence vs content](concepts.md#essence-vs-content-two-text-fields-on-purpose).
+
+### Content
+
+The **full** source text of a thought, retained for full-text search and
+provenance — as long as you like. Think *article* (to the essence's *headline*).
+See [Core Concepts → essence vs content](concepts.md#essence-vs-content-two-text-fields-on-purpose).
+
+### Edge
+
+A typed, weighted, directional link between two thoughts — what makes Engrava a
+*graph* rather than a flat table. The `EdgeType` set is `ASSOCIATED`,
+`DEPENDS_ON`, `DERIVED_FROM`, `MESSAGE_OF`, `BRIDGE`, `CONSOLIDATED_FROM`, and
+`CONTESTED_BY`; `weight` (0.0–1.0) expresses how strong the relation is. See
+[Core Concepts → Edge](concepts.md#edge).
+
+### Embedding
+
+The vector representation of a thought that powers semantic (meaning-based)
+search. Embeddings are optional — without a provider, search falls back to the
+lexical (FTS5) index and the vector signal is skipped. See the
+[Embeddings guide](guides/embeddings.md).
+
+### Reflection
+
+A higher-order summary thought (`ThoughtType.REFLECTION`) created by **dreaming**:
+Engrava clusters semantically related thoughts and writes a centroid-embedded
+summary node, linked back to its members by `CONSOLIDATED_FROM` edges. You don't
+create reflections by hand. See [Core Concepts → Reflection](concepts.md#reflection)
+and [Dreaming](dreaming.md).
+
+### Dreaming
+
+The periodic, off-the-hot-path consolidation process you invoke with
+`run_consolidation()`: it scores stored thoughts, **promotes** the important ones,
+links related ones with edges, and clusters them into reflections. No LLM is
+involved — it is purely structural. See [Dreaming](dreaming.md).
+
+### Consolidation
+
+Another name for what dreaming does in a single pass — evaluating candidates and
+producing promotions, edges, and reflections via `run_consolidation()`. See
+[Dreaming](dreaming.md).
+
+### Promotion
+
+The act, during consolidation, of marking an important thought by setting its
+priority to **P1** so it surfaces more readily in search. Whether a candidate is
+promoted depends on the [gates](#gate) and the `promote_threshold`. See
+[Dreaming](dreaming.md).
+
+### Cycle
+
+A **logical clock** — a monotonically increasing integer tick that *you own and
+advance* (typically one cycle per agent turn). It is not wall-clock time and not
+a stored row; Engrava never increments it for you. It drives the recency signal
+and dreaming's age gates. Leaving it at `None` makes recency inactive; freezing it
+at a constant makes recency useless and stalls dreaming. See
+[Core Concepts → Cycle](concepts.md#cycle-the-agent-clock).
+
+### Signal
+
+One scoring component that [hybrid search](#hybrid-search) computes for a
+candidate and fuses into the final rank. Engrava has five: FTS5 keyword, vector
+similarity, recency, priority, and graph. A signal whose prerequisite is missing
+(e.g. no embeddings) is skipped rather than erroring. See [Search](search.md).
+
+### Gate
+
+A cheap boolean check in dreaming that a candidate must pass *before* it is scored
+for promotion — e.g. `min_age_cycles` (the thought must be old enough) and the
+confirmation gate. Gates filter out clearly ineligible thoughts. See
+[Dreaming → Gates](dreaming.md#gates).
+
+### Priority
+
+A thought's importance level, `P1` (highest) to `P4` (lowest). It is one of the
+hybrid-search signals, so higher-priority thoughts surface more readily; dreaming
+**promotes** thoughts to `P1`. See [Core Concepts → Priority](concepts.md#priority).
+
+### Lifecycle
+
+The small state machine a thought moves through: `CREATED → ACTIVE → DONE →
+ARCHIVED` (`LifecycleStatus`, with transitions enforced). `ARCHIVED` is a
+soft-retired state and a thought there remains (and stays searchable) until
+garbage-collected — it is a retention marker, not an automatic results filter. See
+[Core Concepts → Lifecycle](concepts.md#lifecycle) and
+[Data Lifecycle](data-lifecycle.md).
+
+### Provenance
+
+Where a memory came from, recorded in two fields: `source` (a free-form string id
+you choose, e.g. `"onboarding-flow"`) and `source_type` (the `KnowledgeSource`
+enum: `EXPERIENCE`, `SEEDED_LLM`, `DISTILLED_LLM`, `DREAMING`). Dreaming can
+filter on provenance, so set it honestly. See
+[Core Concepts → Provenance](concepts.md#provenance-where-a-memory-came-from).
+
+### Confirmation
+
+`confirmation_count` — a counter of how many times a thought has been
+independently re-encountered or validated over time (grows via `deduplicate=True`
+or your own logic). Distinct from `confidence`, the static belief-strength you
+assign at creation. Dreaming reads them as separate signals. See
+[Core Concepts → confidence vs confirmation_count](concepts.md#reliability-confidence-vs-confirmation_count).
+
+### Visibility
+
+`ThoughtVisibility` — whether a thought may surface in the agent's **outer
+speech**: `private` (internal only), `selective` (shared on request — the
+default), or `public` (may appear in output). Engrava stores the level;
+**honouring it is your application's responsibility**. See
+[Core Concepts → Visibility](concepts.md#visibility-inner-vs-outer-speech).
+
+### Hybrid search
+
+`search_hybrid()` — retrieval that fuses up to five [signals](#signal) (FTS5
+keyword, vector, recency, priority, graph) into one ranked result, rather than
+relying on vector similarity alone. See [Search](search.md).
+
+### Graph signal
+
+The fifth, **opt-in** hybrid-search signal: a 1-hop-weighted neighbour boost where
+a candidate gains score if its graph neighbours also match the query. Disabled by
+default (`default_graph_weight = 0.0`), so no graph queries run unless you enable
+it. See [Search](search.md).
+
+### Percept
+
+In the agent loop, an incoming observation (e.g. a user message) stored as an
+`OBSERVATION` thought, typically tagged with the `percept(...)` helper. It is what
+the agent *takes in*. See [Building a memory-backed agent](guides/agent-memory.md).
+
+### Utterance
+
+In the agent loop, the agent's own outgoing reply, stored as an `OUTPUT_DRAFT`
+thought. It is what the agent *produces*. See
+[Building a memory-backed agent](guides/agent-memory.md).
+
+## See also
+
+- [Core Concepts](concepts.md) — the same ideas as a guided mental model
+- [Search](search.md) — the signal model in depth
+- [Dreaming](dreaming.md) — consolidation, gates, promotion, reflections
diff --git a/docs/guides/agent-memory.md b/docs/guides/agent-memory.md
new file mode 100644
index 0000000..becbba8
--- /dev/null
+++ b/docs/guides/agent-memory.md
@@ -0,0 +1,260 @@
+# Building a memory-backed agent
+
+This guide shows the canonical way to wire Engrava into an agent's turn loop:
+give a chat/agent long-term memory that persists across sessions and surfaces
+relevant context on every turn. It's the end-to-end pattern behind Engrava's
+one-line pitch — "the memory database for AI agents."
+
+A complete, runnable version of everything here ships as
+[`examples/agent_loop.py`](https://github.com/sovantica/engrava/blob/main/examples/agent_loop.py)
+— no LLM or embedding API required (it uses a canned responder and a
+deterministic embedder). This page walks through the shape of that loop.
+
+> New to the model (thought, edge, reflection, **cycle**)? Read
+> [Core Concepts](../concepts.md) first — this guide assumes those terms.
+
+## The loop, in one picture
+
+Per user turn:
+
+```
+user message
+   │
+   ▼
+1. store it as a percept  ──────────────►  create_thought(OBSERVATION)
+2. retrieve relevant memory  ───────────►  search_hybrid(query, current_cycle)
+3. build prompt from retrieved essences ─►  call your LLM
+4. store the reply as an utterance ─────►  create_thought(OUTPUT_DRAFT)
+5. record the action taken ─────────────►  create_action(ActionRecord)
+6. advance the cycle counter ───────────►  cycle += 1   (you own this clock)
+   │
+   └─ every N turns ────────────────────►  dreaming.run_consolidation(current_cycle)
+```
+
+## Setup
+
+Create one store for the lifetime of the agent. Configure an embedding provider
+so retrieval is semantic (the example uses a deterministic stand-in; in
+production pass a real provider such as `SentenceTransformerProvider` or
+`OpenAICompatibleProvider`, configurable via
+[`engrava.yaml`](../configuration.md)):
+
+```python
+import aiosqlite
+from engrava import SqliteEngravaCore, CallbackProvider
+
+provider = CallbackProvider(
+    callback=my_embed_fn,       # swap in a real provider in production
+    dimension=64,
+    model_name="demo",
+)
+conn = await aiosqlite.connect("agent-memory.db")   # a file persists across runs
+conn.row_factory = aiosqlite.Row
+store = SqliteEngravaCore(conn, embedding_provider=provider, auto_embed=True)
+await store.ensure_schema()
+```
+
+`auto_embed=True` means thoughts are embedded on write. At search time you may
+pass an explicit `query_vector`; if you omit it, the store embeds the query
+text for you **when an embedding provider is configured**. Passing it yourself
+is handy when you've already computed the vector or want a different query
+representation.
+
+## Step 1 — store the incoming message (a *percept*)
+
+Each user message becomes an `OBSERVATION` thought, tagged with `percept(...)`
+metadata so its origin is recorded. Extend that metadata with a `session_id`
+(which conversation) and `turn_index` (position within it) so every memory is
+anchored to its conversation — these are the keys you'd later filter on (or
+post-filter on) to scope retrieval to one session or user:
+
+```python
+import uuid
+from engrava import ThoughtRecord, ThoughtType, Priority, LifecycleStatus, percept
+
+async def store_percept(store, text, cycle, user_id, session_id, turn_index):
+    record = ThoughtRecord(
+        thought_id=str(uuid.uuid4()),
+        thought_type=ThoughtType.OBSERVATION,
+        essence=text[:200],          # the prompt-facing one-liner
+        content=text,                # the full message
+        priority=Priority.P2,
+        lifecycle_status=LifecycleStatus.ACTIVE,
+        created_cycle=cycle,         # the agent clock (see step 6)
+        updated_cycle=cycle,
+        source=user_id,
+        metadata={
+            **percept(source_id=user_id, label="user"),
+            "session_id": session_id,
+            "turn_index": turn_index,
+        },
+    )
+    return await store.create_thought(record)
+```
+
+## Step 2 — retrieve relevant memory
+
+Before calling the LLM, pull the most relevant prior memories with
+`search_hybrid`. Pass `current_cycle` so the recency signal works, and turn the
+returned `(thought_id, score)` tuples back into text via `get_thought`:
+
+```python
+async def retrieve_context(store, query, cycle):
+    result = await store.search_hybrid(
+        query,
+        query_vector=my_embed_fn(query),   # optional: omit to let the provider embed `query`
+        top_k=3,
+        current_cycle=cycle,
+    )
+    essences = []
+    for thought_id, _score in result.results:
+        record = await store.get_thought(thought_id)
+        if record is not None:
+            essences.append(record.essence)   # essence = prompt-ready text
+    return essences
+```
+
+`result.results` is a list of `(thought_id, score)` — Engrava returns IDs, not
+records, so you fetch the ones you want. `result.backends_used` tells you which
+signals contributed (e.g. `{"fts5", "vector", "recency"}`).
+
+## Step 3 — build the prompt and call your LLM
+
+This is the only step that touches your model. Engrava is LLM-free; you own the
+call:
+
+```python
+prompt = "Context:\n" + "\n".join(f"- {c}" for c in context)
+prompt += f"\n\nUser: {user_message}\nAssistant:"
+reply = await my_llm(prompt)        # your provider here
+```
+
+## Step 4 — store the agent's reply (an *utterance*)
+
+Persist what the agent said as an `OUTPUT_DRAFT` thought with `utterance(...)`
+metadata, so the agent's own outputs are part of memory too:
+
+```python
+from engrava import utterance
+
+async def store_utterance(store, reply, cycle, session_id, turn_index):
+    record = ThoughtRecord(
+        thought_id=str(uuid.uuid4()),
+        thought_type=ThoughtType.OUTPUT_DRAFT,
+        essence=reply[:200],
+        content=reply,
+        priority=Priority.P3,
+        lifecycle_status=LifecycleStatus.ACTIVE,
+        created_cycle=cycle,
+        updated_cycle=cycle,
+        source="agent",
+        metadata={                       # same session + turn as the percept it answered
+            **utterance(),
+            "session_id": session_id,
+            "turn_index": turn_index,
+        },
+    )
+    return await store.create_thought(record)
+```
+
+## Step 5 — record the action taken (optional)
+
+If your agent *does* things (sends a message, calls a tool), record each as an
+`ActionRecord` linked to the source thought. This is how the audit/action
+surface tracks what the agent did and whether it succeeded:
+
+```python
+from engrava import ActionRecord, ActionType, ActionStatus, VerificationStatus
+
+await store.create_action(
+    ActionRecord(
+        action_id=str(uuid.uuid4()),
+        source_thought_id=percept_thought.thought_id,
+        action_type=ActionType.MESSAGE,        # or TOOL_CALL / CLI_OUTPUT / STATE_UPDATE
+        intent="answered user",
+        status=ActionStatus.CONFIRMED,
+        verification_status=VerificationStatus.CONFIRMED,
+    )
+)
+```
+
+Read them back with `await store.get_actions(thought_id)`.
+
+## Step 6 — advance the cycle
+
+A **cycle** is the agent's logical clock, and **you own it** — Engrava never
+advances or persists it. Increment it once per turn and use it for
+`created_cycle`/`updated_cycle` and the `current_cycle` you pass to search and
+consolidation:
+
+```python
+cycle = 0
+while running:
+    ...                  # steps 1–5 use `cycle`
+    cycle += 1
+```
+
+If you leave it at a constant, recency can't distinguish old memories from new
+and dreaming's age gate never opens (see
+[Cycle (the agent clock)](../concepts.md#cycle-the-agent-clock)). On restart,
+recover it so it stays monotonic — see [Persistence across restarts](#persistence-across-restarts).
+
+## Step 7 — consolidate periodically
+
+Dreaming turns accumulated observations into higher-order REFLECTIONs. In a
+long-running agent, run it on a cadence — e.g. every N turns — rather than every
+turn:
+
+```python
+from engrava import DreamingExtension, DreamingConfig
+
+dreaming = DreamingExtension(config=DreamingConfig(enabled=True))
+
+# inside the loop, after advancing the cycle:
+if cycle % 20 == 0:
+    result = await dreaming.run_consolidation(store, current_cycle=cycle)
+```
+
+The cadence is yours to choose: every-N-turns (as above), a background asyncio
+task on a timer, or an out-of-process job. Engrava is single-writer, so run
+consolidation on the same writer that handles turns (or coordinate so they don't
+write concurrently). A brand-new store has little to consolidate — REFLECTIONs
+emerge as memories accumulate and repeat. See [Dreaming](../dreaming.md) for the
+knobs.
+
+## Persistence across restarts
+
+- **Embeddings persist.** They are stored in the database; you do **not**
+  re-embed on a normal restart. (You only need `engrava restore --re-embed`
+  when you deliberately change the embedding model.)
+- **The cycle counter does not persist** — Engrava doesn't store it. Recover it
+  on startup so it keeps increasing. `list_thoughts` returns rows ordered by
+  `updated_cycle` descending, so the most recent thought carries the highest
+  cycle you've used; resume one past it:
+
+  ```python
+  recent = await store.list_thoughts(limit=1)   # ordered by updated_cycle desc
+  cycle = (recent[0].updated_cycle + 1) if recent else 0
+  ```
+
+- **Model lock.** If you configured an embedding provider, the store remembers
+  which model produced its vectors; calling `store_embedding` later with a
+  different model raises `EmbeddingModelMismatchError`. Keep the same provider
+  across restarts (or migrate deliberately).
+
+## Full example
+
+The complete, runnable loop — including the deterministic embedder and the
+mock LLM so it runs with zero external dependencies — is in
+[`examples/agent_loop.py`](https://github.com/sovantica/engrava/blob/main/examples/agent_loop.py):
+
+```bash
+python examples/agent_loop.py
+```
+
+## Next
+
+- [Core Concepts](../concepts.md) — thought / edge / reflection / cycle.
+- [Hybrid Search](../search.md) — how the retrieval ranking works.
+- [Dreaming](../dreaming.md) — consolidation in depth.
+- [Configuration](../configuration.md) — wiring an embedding provider via `engrava.yaml`.
diff --git a/docs/guides/embeddings.md b/docs/guides/embeddings.md
new file mode 100644
index 0000000..6899d8b
--- /dev/null
+++ b/docs/guides/embeddings.md
@@ -0,0 +1,228 @@
+# Embeddings
+
+Engrava's semantic (meaning-based) search is powered by **embeddings** — vector
+representations of your thoughts. This guide shows how to wire a real embedding
+provider so retrieval actually understands meaning, and how the query side
+works.
+
+> **Embeddings are optional.** With no provider configured, search still works
+> using the bundled lexical FTS5/BM25 index — the vector signal is simply
+> skipped (`HybridSearchResult.backends_used` will not contain `"vector"`). Add
+> a provider to get semantic retrieval.
+
+## Two things a provider gives you
+
+1. **Ingest-time embedding** — with `auto_embed=True`, every thought is embedded
+   on write, so it becomes findable by meaning.
+2. **Query-time embedding** — at search time the query must also be a vector.
+   `search_hybrid` takes the query *text* and, when a provider is configured,
+   embeds it **for you** (unless you pass an explicit `query_vector`).
+   `search_similar` takes a *vector* directly, so you embed the query yourself
+   first. See [The query side](#the-query-side) for both.
+
+The corpus and the query must use **the same model / dimension** — once a store
+has embeddings for one model, writing with a different model raises
+`EmbeddingModelMismatchError`.
+
+## Wiring a provider
+
+Pass the provider to the store constructor (and set `auto_embed=True`):
+
+```python
+import aiosqlite
+from engrava import SqliteEngravaCore, SentenceTransformerProvider
+
+provider = SentenceTransformerProvider(model_name="all-MiniLM-L6-v2")
+async with aiosqlite.connect("engrava.db") as conn:
+    conn.row_factory = aiosqlite.Row
+    store = SqliteEngravaCore(conn, embedding_provider=provider, auto_embed=True)
+    await store.ensure_schema()
+```
+
+Or declare it in `engrava.yaml` and let `from_config` build it (see the
+[`embeddings` section](../configuration.md)):
+
+```yaml
+embeddings:
+  provider: sentence-transformer
+  model: all-MiniLM-L6-v2
+  auto_embed: true
+```
+
+```python
+from engrava import SqliteEngravaCore
+
+async with await SqliteEngravaCore.from_config("engrava.yaml") as store:
+    ...   # provider wired from config, auto_embed honoured
+```
+
+## Providers
+
+Every provider implements the same async interface — `await provider.embed(text)`
+returns a `list[float]` — so they're interchangeable. Pick by where you want the
+model to run.
+
+### `SentenceTransformerProvider` — local model (no API, no network)
+
+Runs a sentence-transformers model on your machine. Requires the
+`embeddings-local` extra (pulls `sentence-transformers` + `torch`).
+
+```bash
+pip install "engrava[embeddings-local]"
+```
+
+```python
+from engrava import SentenceTransformerProvider
+
+provider = SentenceTransformerProvider(
+    model_name="all-MiniLM-L6-v2",   # default: all-MiniLM-L12-v2
+    device="cpu",                    # or "cuda"
+    batch_size=32,
+)
+```
+
+No API key, no network after the first model download. Best default for
+self-hosting.
+
+### `OpenAICompatibleProvider` — OpenAI or any OpenAI-compatible API
+
+Calls an OpenAI-style `/embeddings` endpoint. Requires the `embeddings-openai`
+extra (pulls `httpx`).
+
+```bash
+pip install "engrava[embeddings-openai]"
+```
+
+```python
+import os
+from engrava import OpenAICompatibleProvider
+
+provider = OpenAICompatibleProvider(
+    model_name="text-embedding-3-small",   # this is the default
+    base_url="https://api.openai.com/v1",  # default; point at any compatible API
+    api_key=os.environ["OPENAI_API_KEY"],  # or omit — falls back to $OPENAI_API_KEY
+)
+```
+
+`api_key` defaults to the `OPENAI_API_KEY` environment variable when omitted.
+Set `base_url` to target a compatible gateway (Azure OpenAI, a local proxy, etc.).
+
+### `OllamaProvider` — local Ollama server
+
+Calls a running [Ollama](https://ollama.com) instance. Requires the
+`embeddings-ollama` extra (pulls `httpx`); no API key.
+
+```bash
+pip install "engrava[embeddings-ollama]"
+```
+
+```python
+from engrava import OllamaProvider
+
+provider = OllamaProvider(
+    model_name="nomic-embed-text",          # default
+    base_url="http://localhost:11434",      # default Ollama address
+)
+```
+
+### `HuggingFaceProvider` — HuggingFace Inference API
+
+Calls the HuggingFace Inference API. Requires the `embeddings-hf` extra (pulls
+`huggingface_hub`).
+
+```bash
+pip install "engrava[embeddings-hf]"
+```
+
+```python
+import os
+from engrava import HuggingFaceProvider
+
+provider = HuggingFaceProvider(
+    model_name="sentence-transformers/all-MiniLM-L12-v2",  # default
+    api_key=os.environ["HF_TOKEN"],   # or omit — falls back to $HF_TOKEN
+)
+```
+
+`api_key` defaults to the `HF_TOKEN` environment variable when omitted.
+
+### `CallbackProvider` — bring your own embedding function
+
+Wrap any function `str -> list[float]`. Built-in (no extra). Use it for a custom
+model, a cached lookup, or testing.
+
+```python
+from engrava import CallbackProvider
+
+provider = CallbackProvider(
+    callback=my_embed_fn,   # str -> list[float]
+    dimension=384,          # the length your callback returns
+    model_name="my-model",
+)
+```
+
+> Do **not** ship a placeholder like `lambda text: [0.1] * 384` — a constant
+> vector makes every thought identical, so similarity is meaningless. Use a real
+> model (the providers above) or a genuine embedding function.
+
+## The query side
+
+The two search methods handle the query vector differently — `search_hybrid`
+takes the query **text**, `search_similar` takes a query **vector**.
+
+**`search_hybrid(query_text, query_vector=None, ...)`** — pass the query text.
+When an embedding provider is configured, Engrava embeds that text for you if
+you don't supply a `query_vector`; pass one explicitly only to override:
+
+```python
+# Provider configured → the query text is embedded for you:
+result = await store.search_hybrid("trips to Japan", top_k=5, current_cycle=cycle)
+
+# Or override with a vector you already have:
+query_vec = await provider.embed("trips to Japan")
+result = await store.search_hybrid("trips to Japan", query_vector=query_vec, top_k=5)
+```
+
+If **no** provider is configured **and** you pass no `query_vector`,
+`search_hybrid` skips the vector signal and falls back to the lexical (FTS5/BM25)
+signal — still useful, just keyword-based rather than semantic.
+
+**`search_similar(query_vector, ...)`** — takes a ready vector as its first,
+required argument. It does not accept query text, so there is nothing for it to
+auto-embed: you must embed the query yourself first.
+
+```python
+query_vec = await provider.embed("trips to Japan")   # required — no auto-embed here
+result = await store.search_similar(query_vec, top_k=5)
+```
+
+## Choosing a model and dimension
+
+- **Keep one model per store.** The query and corpus vectors must come from the
+  same model; switching models on an existing store requires re-embedding (see
+  `engrava restore --re-embed`).
+- **Dimension follows the model.** Local/HF providers infer it from the model;
+  `CallbackProvider` requires you to declare `dimension` to match what your
+  callback returns. For the `sqlite-vec` ANN backend, set
+  `extensions.vector.dimension` in config to match.
+
+## Config-driven equivalents
+
+Each provider has a `provider:` name for `engrava.yaml`, resolved by
+`resolve_embedding_provider(config.embeddings)`:
+
+| `provider:` value | Class | Extra |
+|---|---|---|
+| `sentence-transformer` | `SentenceTransformerProvider` | `embeddings-local` |
+| `openai-compatible` | `OpenAICompatibleProvider` | `embeddings-openai` |
+| `ollama` | `OllamaProvider` | `embeddings-ollama` |
+| `huggingface` | `HuggingFaceProvider` | `embeddings-hf` |
+
+`CallbackProvider` takes a Python callable, so it's wired in code (via the
+`embedding_provider=` constructor argument), not YAML.
+
+## Next
+
+- [Configuration](../configuration.md) — the `embeddings` YAML section.
+- [Hybrid Search](../search.md) — how the vector signal fuses with the others.
+- [Building a memory-backed agent](agent-memory.md) — embeddings in the agent loop.
diff --git a/docs/guides/migrating-from-other-memory.md b/docs/guides/migrating-from-other-memory.md
new file mode 100644
index 0000000..2b7954c
--- /dev/null
+++ b/docs/guides/migrating-from-other-memory.md
@@ -0,0 +1,274 @@
+# Migrating from another memory system
+
+This guide helps you move an agent's memory from another store — a hosted
+agent-memory service (mem0, Zep, …), a framework's built-in memory (LangChain,
+…), or a plain vector database (Chroma, Qdrant, pgvector, …) — into Engrava.
+
+It covers three things:
+
+1. [Mapping concepts](#concept-mapping) from other systems onto Engrava's model.
+2. [Porting your write/read calls](#porting-your-calls) with before/after snippets.
+3. [Bulk-importing](#bulk-import) an existing corpus efficiently.
+
+It ends with [filtering, scoping & multi-tenancy](#filtering-scoping-and-multi-tenancy)
+— the one area where Engrava's defaults differ most from a hosted service, and
+what to do about it.
+
+Read [Core Concepts](../concepts.md) first if the terms *thought*, *edge*,
+*cycle*, or *reflection* are unfamiliar, and [Positioning](../positioning.md)
+to confirm Engrava is the right destination for your workload.
+
+## Concept mapping
+
+Other memory systems use different vocabulary for similar ideas. This table maps
+common concepts onto Engrava:
+
+| Concept elsewhere | Engrava equivalent | Notes |
+|---|---|---|
+| "Memory" / "record" / "document" | **`ThoughtRecord`** | The unit you store. Has `essence` (short) + `content` (full). |
+| "Memory type" / "role" | **`thought_type`** (`OBSERVATION`, `BELIEF`, `TASK`, …) | A small fixed taxonomy; see [Core Concepts](../concepts.md). |
+| Free-form metadata / `metadata={...}` | **`ThoughtRecord.metadata`** | An arbitrary JSON dict, persisted and round-tripped. |
+| "User id" / "session id" / namespace | A key inside **`metadata`** (or `source`) | Engrava has no built-in tenant field — see [scoping](#filtering-scoping-and-multi-tenancy). |
+| Relationship / link between memories | **`EdgeRecord`** (typed, weighted) | First-class graph; edges also feed ranking. |
+| Embedding / vector | Stored on write only with `embedding_provider=...` **and** `auto_embed=True`; otherwise call `store_embedding(thought_id, vector)` yourself | See the [Embeddings guide](embeddings.md). |
+| Vector / similarity search | **`search_similar(query_vector, …)`** | Needs a ready query vector. |
+| Keyword / BM25 search | **`search_fts(query, …)`** | Returns `list[(thought_id, score)]`. |
+| Hybrid search | **`search_hybrid(query_text, …)`** | Fuses FTS + vector + recency + priority + graph. |
+| Automatic summarisation / fact extraction | *(none — by design)* | Engrava does no LLM-side extraction; see [Non-goals](../positioning.md#non-goals). |
+| Decay / forgetting | TTL + lifecycle + the recency signal | See [Data lifecycle](../data-lifecycle.md) (TTL, archive-vs-delete, erasure) and the recency signal in [Search](../search.md). |
+| Summaries of clusters | **`REFLECTION`** thoughts via [dreaming](../dreaming.md) | Structural (centroid + keywords), not LLM prose. |
+
+## Porting your calls
+
+The shapes below are illustrative fragments — they assume you already have a
+`store` (see [Quick Start](../quickstart.md) for how to open one).
+
+**Writing a memory.** Where another library takes a string and does extraction
+for you, Engrava takes a fully-formed `ThoughtRecord` — you decide the type,
+priority, and metadata:
+
+```python
+import uuid
+
+from engrava import LifecycleStatus, Priority, ThoughtRecord, ThoughtType
+
+# before (illustrative, another library):
+#   memory.add("User prefers dark mode", user_id="u1")
+
+# after (engrava):
+await store.create_thought(
+    ThoughtRecord(
+        thought_id=str(uuid.uuid4()),
+        thought_type=ThoughtType.OBSERVATION,
+        essence="User prefers dark mode",
+        content="User prefers dark mode",
+        priority=Priority.P3,
+        lifecycle_status=LifecycleStatus.ACTIVE,
+        created_cycle=0,
+        updated_cycle=0,
+        source="chat",
+        metadata={"user_id": "u1"},
+    )
+)
+```
+
+**Searching.** Where another library returns ranked memories from a single
+`search`, pick the Engrava method that matches the signal you want; `search_hybrid`
+is the closest analogue to a managed hybrid search:
+
+```python
+# before (illustrative):
+#   hits = memory.search("what theme does the user like?", user_id="u1")
+
+# after (engrava) — note: search is unscoped; filter by user yourself:
+result = await store.search_hybrid("what theme does the user like?", top_k=10)
+for thought_id, score in result.results:
+    record = await store.get_thought(thought_id)
+    if record is not None and record.metadata.get("user_id") == "u1":
+        print(score, record.essence)
+```
+
+See [filtering, scoping & multi-tenancy](#filtering-scoping-and-multi-tenancy)
+for why the post-filter is there and how to do it better.
+
+## Bulk import
+
+When migrating an existing corpus, insert under a single transaction instead of
+committing once per row. The `suspend_auto_commit()` async context manager
+defers the commit until the block exits — it **commits once on success and rolls
+back the whole batch on any error**. Pair it with `deduplicate=True` so repeated
+`content` collapses into one thought (bumping `confirmation_count`) instead of
+inserting duplicate rows.
+
+The following is a complete, runnable example (it uses an in-memory store and a
+small fake export):
+
+```python
+import asyncio
+import uuid
+
+import aiosqlite
+
+from engrava import LifecycleStatus, Priority, SqliteEngravaCore, ThoughtRecord, ThoughtType
+
+# Pretend this came from your previous memory system's export.
+EXPORTED_MEMORIES = [
+    {"text": "User prefers dark mode", "user": "u1"},
+    {"text": "User is based in Berlin", "user": "u1"},
+    {"text": "User prefers dark mode", "user": "u1"},  # a duplicate
+    {"text": "Project deadline is Friday", "user": "u2"},
+]
+
+
+def to_thought(item: dict[str, str]) -> ThoughtRecord:
+    return ThoughtRecord(
+        thought_id=str(uuid.uuid4()),
+        thought_type=ThoughtType.OBSERVATION,
+        essence=item["text"][:200],
+        content=item["text"],
+        priority=Priority.P3,
+        lifecycle_status=LifecycleStatus.ACTIVE,
+        created_cycle=0,
+        updated_cycle=0,
+        source="import",
+        metadata={"user_id": item["user"]},
+    )
+
+
+async def bulk_import(store, items: list[dict[str, str]]) -> int:
+    # One transaction for the whole batch: commit on success, roll back on error.
+    async with store.suspend_auto_commit():
+        for item in items:
+            # deduplicate=True collapses identical content into one thought.
+            await store.create_thought(to_thought(item), deduplicate=True)
+    return await store.count_thoughts()
+
+
+async def main() -> None:
+    async with aiosqlite.connect(":memory:") as conn:
+        conn.row_factory = aiosqlite.Row
+        store = SqliteEngravaCore(conn)
+        await store.ensure_schema()
+
+        total = await bulk_import(store, EXPORTED_MEMORIES)
+        # 4 exported rows, one duplicate collapsed -> 3 stored.
+        assert total == 3
+        print(f"Imported {total} thoughts.")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+For large corpora, import in batches (e.g. a few thousand rows per
+`suspend_auto_commit()` block) to keep each transaction short — long
+transactions block the background SQLite thread (see
+[Known Limitations](../known-limitations.md#aiosqlite-proxy-architecture)).
+If you have embeddings configured, note that each new thought is embedded on
+write (see the [Embeddings guide](embeddings.md)), so a bulk load pays the
+embedding cost up front — pre-compute vectors or import in batches accordingly.
+See the [Performance guide](../performance.md#write-throughput-and-bulk-ingest)
+for the throughput levers in detail.
+
+## Filtering, scoping & multi-tenancy
+
+This is the most important difference from a hosted memory service. Engrava's
+**`search_hybrid` / `search_similar` / `search_fts` take no scope or metadata
+filter** — they rank across the entire store. There is no `user_id=` or
+`session_id=` argument on the ranked path. You scope retrieval yourself, and
+there are three patterns, with clear tradeoffs.
+
+### Option A — over-fetch, then post-filter (simplest)
+
+Ask for more results than you need, then drop the ones that don't match. Keep
+the scope key in `metadata` when you write.
+
+```python
+# Want the top 5 for user "u1": over-fetch, then filter and trim.
+result = await store.search_hybrid("dark mode", top_k=50)
+scoped = []
+for thought_id, score in result.results:
+    record = await store.get_thought(thought_id)
+    if record is not None and record.metadata.get("user_id") == "u1":
+        scoped.append((thought_id, score))
+    if len(scoped) >= 5:
+        break
+```
+
+- **Pros:** no SQL, works with the high-level API, fine for modest stores.
+- **Cons:** wasteful when one tenant is a small slice of a large store (you may
+  over-fetch a lot, or miss matches if `top_k` is too small). Ranking is still
+  computed over everything.
+
+### Option B — one store per tenant (strongest isolation)
+
+Give each tenant its own database file via
+[`EngravaManager`](../api-reference.md). Each service has its own file and its
+own lock, so retrieval is naturally scoped and tenants are physically isolated.
+
+```python
+from engrava import EngravaManager, load_config
+
+config = load_config("engrava.yaml")
+async with EngravaManager.from_config(config.services) as mgr:
+    store_u1 = await mgr.get_store("u1")  # u1.db
+    result = await store_u1.search_hybrid("dark mode", top_k=5)
+```
+
+- **Pros:** true isolation (separate files, separate locks, easy per-tenant
+  backup/delete); search is scoped for free.
+- **Cons:** not suitable for a very large number of tenants (one file each); no
+  cross-tenant query. Best when tenants are coarse (a handful of services), not
+  per-end-user at massive cardinality.
+
+### Option C — pre-filter in raw SQL (scoped recall without over-fetch)
+
+When you need keyword/metadata-scoped recall without over-fetching, query the
+`thought` table directly. The Python `metadata` dict is persisted to a
+`metadata_json` column you can index into with SQLite's `json_extract`:
+
+```sql
+-- thoughts for one user, most recent first
+SELECT thought_id, essence
+FROM thought
+WHERE json_extract(metadata_json, '$.user_id') = :user_id
+ORDER BY updated_cycle DESC
+LIMIT 20;
+```
+
+Run it through the same connection you gave the store:
+
+```python
+cursor = await conn.execute(
+    "SELECT thought_id, essence FROM thought "
+    "WHERE json_extract(metadata_json, '$.user_id') = ? "
+    "ORDER BY updated_cycle DESC LIMIT 20",
+    ("u1",),
+)
+rows = await cursor.fetchall()
+```
+
+- **Pros:** exact scoping, no over-fetch; you can combine it with FTS by joining
+  the `thought_fts` table.
+- **Cons:** you drop below the high-level API to raw SQL against the schema, and
+  this path does **not** apply the hybrid ranking signals (it is a filter, not a
+  ranked search). Treat the schema as semi-stable and re-check it across upgrades.
+
+### Choosing
+
+| Situation | Use |
+|---|---|
+| Small/medium store, occasional scoping | **A** (over-fetch + post-filter) |
+| A handful of coarse tenants needing real isolation | **B** (store per tenant) |
+| Scoped recall over a large store, ranking not required | **C** (raw `json_extract`) |
+
+> **Want a real filter on the ranked path?** Adding a scope/metadata argument to
+> `search_*` is a public-API change under consideration, not a current feature.
+> Until then, the patterns above are the supported approach.
+
+## See also
+
+- [Positioning](../positioning.md) — when Engrava fits, and its non-goals
+- [Core Concepts](../concepts.md) — thoughts, edges, cycles, reflections
+- [Recipes](../recipes/index.md) — short task-oriented snippets, incl. dedup
+- [Known Limitations](../known-limitations.md) — concurrency and scale constraints
diff --git a/docs/observability.md b/docs/observability.md
index 08782f0..970b520 100644
--- a/docs/observability.md
+++ b/docs/observability.md
@@ -17,6 +17,7 @@ storage footprint, and a rolling-window search-latency histogram.
 from engrava import SqliteEngravaCore
 import aiosqlite
 
+
 async def main() -> None:
     conn = await aiosqlite.connect("engrava.db")
     conn.row_factory = aiosqlite.Row
@@ -56,3 +57,123 @@ engrava --db mydata.db --format json info
 - Nested calls inside `search_hybrid()` are suppressed, so one hybrid search
   contributes one latency sample.
 - This snapshot API tracks only aggregate counts and search latency — not individual events.
+
+## Production monitoring
+
+`store.metrics()` is a **pull** snapshot — there is no built-in exporter. To
+monitor a deployment, scrape the snapshot on an interval and feed the fields into
+your metrics system (Prometheus, OpenTelemetry, StatsD, …).
+
+### Exporting the snapshot
+
+The snapshot is a plain dataclass, so mapping it to any client is
+straightforward. A Prometheus example:
+
+```python
+from prometheus_client import Gauge
+
+THOUGHTS = Gauge("engrava_thoughts_total", "Total thoughts")
+DB_BYTES = Gauge("engrava_db_bytes", "Main database size in bytes")
+WAL_BYTES = Gauge("engrava_wal_bytes", "WAL size in bytes")
+SEARCH_P95 = Gauge("engrava_search_p95_ms", "Search p95 latency (ms)")
+SEARCH_P99 = Gauge("engrava_search_p99_ms", "Search p99 latency (ms)")
+
+
+async def collect(store) -> None:
+    m = await store.metrics()
+    THOUGHTS.set(m.thoughts.total)
+    DB_BYTES.set(m.storage.db_bytes)
+    WAL_BYTES.set(m.storage.wal_bytes)
+    SEARCH_P95.set(m.search_latency.p95_ms)
+    SEARCH_P99.set(m.search_latency.p99_ms)
+```
+
+The main metric groups on `EngravaMetrics` are `thoughts` (`total`, `by_type`,
+`by_status`), `edges` (`total`, `by_type`), `storage` (`db_bytes`, `wal_bytes`,
+`vec_index_bytes`, `total_bytes`), and `search_latency` (`sample_count`,
+`p50_ms`, `p95_ms`, `p99_ms`, `min_ms`, `max_ms`, `mean_ms`). The snapshot also
+carries `schema_version` and `snapshot_timestamp` for the snapshot itself.
+
+### Scrape cadence
+
+Treat `metrics()` like any pull endpoint: a **30–60 s** scrape interval is
+typically plenty. Counts and storage change slowly; the latency histogram is a
+rolling window (`metrics.window_size`, default 1000 samples), so it already
+smooths short spikes. Avoid sub-second scrapes — each call runs a few aggregate
+SQL queries.
+
+### What to alert on
+
+| Signal | Source field | Alert when… |
+|---|---|---|
+| Storage growth | `storage.db_bytes`, `storage.total_bytes` | size approaches your disk budget, or grows unexpectedly fast |
+| WAL not checkpointing | `storage.wal_bytes` | the WAL keeps growing and never shrinks (checkpoints not happening) |
+| Search latency | `search_latency.p95_ms` / `p99_ms` | p95/p99 exceeds your budget — often the sign you've passed the brute-force vector ceiling (see [Performance](performance.md)) |
+| Expired backlog | `count_thoughts(include_expired=True)` − `count_thoughts()` | the number of expired-but-not-cleaned thoughts grows (run `engrava gc --expired`) — see [Data Lifecycle](data-lifecycle.md) |
+| Audit integrity | `store.journal.verify_integrity()` (journaling only) | the chain fails verification (tampering or corruption) — see [Audit Trail](audit-trail.md) |
+
+The expired-backlog and audit-integrity signals are **not** in the metrics
+snapshot — compute them from the calls shown above on your own cadence.
+
+The audit-integrity check applies **only when journaling is enabled**
+(`journal.enabled: true`). With journaling off, `store.journal` is `None`, so
+guard the call:
+
+```python
+async def journal_ok(store) -> bool:
+    if store.journal is None:
+        return True  # journaling disabled — nothing to verify
+    result = await store.journal.verify_integrity()
+    return result.valid
+```
+
+### Health check
+
+For a readiness probe you want a call that actually touches the database. Note
+that `metrics()` is **not** reliable for this when metrics are disabled: with
+`metrics.enabled: false`, `store.metrics()` returns a zero-filled snapshot
+**without issuing any SQL**, so it would report healthy even if the database were
+unreadable. Use a lightweight real read instead — `count_thoughts()` always
+queries the database (independent of the metrics setting):
+
+```python
+async def healthcheck(store) -> bool:
+    try:
+        await store.count_thoughts()  # issues SQL — confirms DB + schema are readable
+    except Exception:
+        return False
+    return True
+```
+
+(If you know metrics are enabled in your deployment, `await store.metrics()`
+works too and additionally returns the live counts.)
+
+### Logging
+
+The library logs through the standard `logging` module under the **`engrava.*`**
+namespace (each module uses `logging.getLogger(__name__)`, e.g.
+`engrava.extensions.dreaming`, `engrava.extensions.vector_sqlite_vec`,
+`engrava.config`). It logs at **`WARNING`** (degraded conditions, e.g. sqlite-vec
+unavailable → numpy fallback), **`INFO`** (dreaming progress), and **`DEBUG`**
+(detailed internals) — it does **not** log at `ERROR`/`CRITICAL`; failures are
+raised as typed exceptions for the caller to handle. Configure it like any
+library logger:
+
+```python
+import logging
+
+logging.getLogger("engrava").setLevel(logging.WARNING)  # quiet, production default
+# logging.getLogger("engrava").setLevel(logging.INFO)   # see dreaming activity
+```
+
+### Out of scope
+
+The snapshot is deliberately small. It does **not** include:
+
+- **write / mutation counters** or **error counters** — track those at your
+  application layer (Engrava raises typed exceptions you can count there);
+- **dreaming metrics** — `run_consolidation()` returns a `ConsolidationResult`
+  (promoted / edges / reflections counts) per run; consume that directly;
+- **journal size or per-event audit metrics** — the audit history lives in the
+  [journal](audit-trail.md) itself, which you query and verify directly, not via
+  the metrics snapshot.
diff --git a/docs/performance.md b/docs/performance.md
new file mode 100644
index 0000000..ac36147
--- /dev/null
+++ b/docs/performance.md
@@ -0,0 +1,175 @@
+# Performance & scaling
+
+How Engrava behaves as data grows, where the limits are, and the two levers that
+matter most: the **vector backend** and **batched writes**. The numbers that
+matter for *your* workload depend on corpus size, embedding dimension, query mix,
+and hardware — measure on your own data rather than trusting a single headline
+figure. This page explains *what* drives cost so you know what to measure.
+
+For the dreaming *quality* benchmark (does consolidation help retrieval), see
+[Benchmarks](benchmarks.md). For the hard platform constraints, see
+[Known Limitations](known-limitations.md).
+
+## Where the cost is
+
+A query touches up to five signals; each scales differently:
+
+| Signal | Cost driver | Scaling |
+|---|---|---|
+| **FTS5 / BM25** | SQLite's FTS5 inverted index | Sub-linear; scales well into large corpora. |
+| **Vector** | The vector backend (see below) | Linear in #embeddings for both backends; **sqlite-vec scans a compact `vec0` table with a much smaller constant factor** than the Python path. |
+| **Recency** | A cheap per-candidate arithmetic decay | Negligible. |
+| **Priority** | A per-candidate enum→multiplier lookup | Negligible. |
+| **Graph** | 1-hop neighbour expansion over edges | Proportional to the fusion-pool size × average degree; **opt-in** (`graph_weight=0.0` makes zero graph queries). |
+
+The dominant term at scale is almost always the **vector** signal, because both
+backends compare the query against every stored embedding — the difference is how
+efficiently they do it (see below).
+
+## The brute-force ceiling (and how to pass it)
+
+Without the `vec` extra, vector search is **brute-force cosine similarity in
+Python**: every `search_similar` / `search_hybrid` query scans all embeddings.
+This is simple and dependency-free, and works well up to roughly **100k
+embeddings**. Past that, vector-query latency grows linearly and becomes the
+bottleneck.
+
+The fix is the **sqlite-vec** backend, which stores vectors in a dedicated,
+compact `vec0` virtual table. In the pinned `sqlite-vec` 0.1.x line a `vec0`
+query is still an **exhaustive k-nearest-neighbour scan** — not an approximate or
+sub-linear index — but over a tightly packed, chunked columnar store, so it runs
+with a far smaller constant factor (and lower memory overhead) than the Python
+brute-force path. The practical effect is that the same corpus stays well under
+your latency budget for much longer. FTS5 scales independently and usually needs
+no special handling.
+
+> The ~100k figure is a rule of thumb, not a cliff — see
+> [Known Limitations → sqlite-vec](known-limitations.md#sqlite-vec-pre-v1-status).
+> Measure your own p95 query latency and switch when it stops meeting your budget.
+
+## Switching to sqlite-vec (incl. migrating an existing database)
+
+The migration is designed to be turnkey: your embeddings already live in the
+`embedding` table, so switching backends only builds and backfills the `vec0`
+vector table — you do **not** re-embed anything.
+
+**1. Install the extra.**
+
+```bash
+pip install 'engrava[vec]'
+```
+
+**2. Set the backend in your config.**
+
+```yaml
+extensions:
+  vector:
+    backend: sqlite-vec      # default is "numpy"
+    dimension: 384           # must match your embedding model
+```
+
+**3. Open the store with `from_config`.** On open, Engrava creates the `vec0`
+virtual table and **backfills every existing embedding into it automatically**
+(idempotent — safe to run repeatedly). From then on, new writes keep the index
+in sync.
+
+```python
+from engrava import SqliteEngravaCore
+
+# from_config wires the vector backend; the index is created and back-filled
+# on open. A plain SqliteEngravaCore(conn) constructor stays on numpy.
+async with await SqliteEngravaCore.from_config("engrava.yaml") as store:
+    result = await store.search_similar(query_vector, top_k=5)
+```
+
+That's the whole migration — no manual re-index step, and no re-embedding,
+because the vectors are reused from the existing `embedding` table.
+
+**Important caveats.**
+
+- **Use `from_config`.** Only the `from_config` path configures the vector
+  backend. If you build the store directly with `SqliteEngravaCore(conn)`, it
+  stays on the numpy backend regardless of the YAML.
+- **Graceful fallback, not a hard error.** If the `sqlite-vec` package is missing
+  or the extension can't load, Engrava logs a warning and **falls back to numpy**
+  rather than crashing — so a "switch" that silently kept numpy usually means the
+  extension didn't load.
+- **macOS system SQLite blocks extensions.** The most common load failure is
+  macOS's bundled SQLite, which disables extension loading. Install Python via
+  Homebrew or pyenv (a full-featured SQLite build). See
+  [Known Limitations → macOS](known-limitations.md#macos-sqlite-extension-loading).
+- **Dimension must match.** The index is created for a fixed dimension; it must
+  equal your embedding model's output. Mixing dimensions corrupts results (see
+  [Embedding Dimension Consistency](known-limitations.md#embedding-dimension-consistency)).
+
+## Write throughput and bulk ingest
+
+By default each mutating call commits its own transaction. For a bulk load that
+is the wrong granularity — one commit per row dominates wall-clock. Wrap the
+batch in `suspend_auto_commit()`, which defers to a **single commit on success
+and rolls the whole batch back on any error**:
+
+```python
+async def bulk_load(store, items):
+    async with store.suspend_auto_commit():
+        for item in items:
+            await store.create_thought(item, deduplicate=True)
+    return await store.count_thoughts()
+```
+
+- **`deduplicate=True`** collapses identical `content` into one thought (bumping
+  `confirmation_count`) instead of inserting duplicate rows — cheaper storage and
+  fewer embeddings to compute. (Note the persistence default is
+  `deduplicate=False`; opt in per call.)
+- **Keep each transaction short.** A long-running transaction blocks aiosqlite's
+  background thread (see
+  [Known Limitations → aiosqlite](known-limitations.md#aiosqlite-proxy-architecture)),
+  so for very large imports, batch in chunks (e.g. a few thousand rows per
+  `suspend_auto_commit()` block) rather than one giant transaction.
+- **Embedding cost dominates a bulk load** when a provider is configured with
+  `auto_embed=True`: each new thought is embedded on write. Pre-compute vectors
+  and store them with `store_embedding(...)`, use a batching local provider, or
+  import in chunks so the encoder isn't the bottleneck. See the
+  [Embeddings guide](guides/embeddings.md).
+
+A runnable end-to-end bulk-import example lives in the
+[migration guide](guides/migrating-from-other-memory.md#bulk-import).
+
+## Dreaming cost at scale
+
+[Dreaming](dreaming.md) runs **off the hot path** — you invoke
+`run_consolidation()` on your own cadence, so it never adds latency to CRUD or
+search. Its own cost scales with the number of candidate thoughts and the
+clustering algorithm:
+
+- Run it **periodically**, not every turn (every N cycles, a cron job, or
+  manually).
+- `candidates_limit` caps how many thoughts are evaluated per pass — keep it
+  bounded on large stores.
+- Clustering has two backends via `extensions.dreaming.clustering_backend`
+  (`"numpy"` default, or `"python"`); `numpy` is faster for the similarity math
+  on larger candidate sets.
+- The LPA clustering algorithm is `O(edges × iterations)`; the agglomerative
+  algorithm operates over active thoughts — see [Dreaming](dreaming.md) for the
+  algorithm tradeoffs.
+
+## Checklist: scaling Engrava
+
+1. **Past ~100k embeddings or missing your latency budget?** Switch to
+   `sqlite-vec` (above).
+2. **Bulk loading?** Batch writes with `suspend_auto_commit()` and consider
+   `deduplicate=True`.
+3. **Embedding is the bottleneck?** Use a batching provider or pre-compute
+   vectors.
+4. **Multi-tenant?** One database file per tenant via `EngravaManager` keeps each
+   store smaller and independently lockable (see the
+   [scoping section](guides/migrating-from-other-memory.md#filtering-scoping-and-multi-tenancy)).
+5. **Dreaming heavy?** Cap `candidates_limit`, run it on a schedule, pick the
+   right `clustering_backend`.
+
+## See also
+
+- [Known Limitations](known-limitations.md) — the brute-force ceiling, macOS, concurrency
+- [Configuration](configuration.md) — the `extensions.vector` and dreaming knobs
+- [Benchmarks](benchmarks.md) — the dreaming retrieval-quality benchmark
+- [Embeddings](guides/embeddings.md) — provider choice and batching
diff --git a/docs/positioning.md b/docs/positioning.md
new file mode 100644
index 0000000..31a0e6e
--- /dev/null
+++ b/docs/positioning.md
@@ -0,0 +1,97 @@
+# Positioning — what Engrava is (and isn't)
+
+Engrava is a **standalone embedded database for AI-agent memory**. It is built on
+SQLite and runs in-process: one `pip install`, no server, no LLM, no external
+services. It gives an agent a durable thought-graph with hybrid retrieval
+(full-text + vector + recency + priority + graph) and an optional tamper-evident
+audit trail.
+
+This page explains **when Engrava is the right tool**, when it isn't, and how it
+relates to the other memory options you might be choosing between.
+
+## When Engrava is a good fit
+
+- **You want memory you own and can inspect.** The whole store is one SQLite
+  file. You can open it with any SQLite tool, back it up with a file copy
+  ([with care around WAL](known-limitations.md#concurrent-write-safety)), and
+  query it with SQL when the high-level API isn't enough.
+- **You want retrieval, not just a vector index.** Engrava fuses FTS5/BM25,
+  vector similarity, recency, priority, and a 1-hop graph signal into one ranked
+  result. See [Search](search.md).
+- **You want a graph, not a flat list.** Thoughts are connected by typed,
+  weighted [edges](concepts.md), and the graph itself contributes to ranking.
+- **You want it embedded.** No network hop, no service to operate, no separate
+  process. It runs anywhere Python and SQLite run.
+- **You want embeddings to be optional and pluggable.** Bring a local model, an
+  OpenAI-compatible endpoint, Ollama, HuggingFace, or your own callback — or run
+  with FTS-only and no embeddings at all. See the
+  [Embeddings guide](guides/embeddings.md).
+- **Small-to-medium corpora.** The default backend brute-forces vector search in
+  Python and works well up to roughly 100k embeddings; beyond that, switch to
+  the `sqlite-vec` backend. See
+  [Known Limitations](known-limitations.md#sqlite-vec-pre-v1-status).
+
+## When Engrava is *not* a good fit
+
+- **You need a managed, horizontally-scaled vector service.** Engrava is a local
+  embedded library, not a clustered database. One store is one SQLite file
+  written by one process. If you need sharding, replication, or a multi-writer
+  service across many machines, use a dedicated vector database.
+- **You need many processes writing the same store concurrently.** SQLite is
+  single-writer. WAL mode lets readers and a single writer coexist, and a
+  single process can drive many async tasks safely, but heavy multi-process
+  write fan-out is out of scope. See
+  [Known Limitations → Concurrent Write Safety](known-limitations.md#concurrent-write-safety).
+- **You want the library to call an LLM for you.** Engrava does no LLM-side fact
+  extraction, summarisation, or entity resolution (see [Non-goals](#non-goals)).
+  It stores and retrieves what you give it; your agent decides what to write.
+- **You need per-tenant retrieval isolation on the ranked path out of the box.**
+  The `search_*` methods take no scope/metadata filter today — retrieval is
+  unscoped by default. There are good workarounds (over-fetch + post-filter,
+  one store per tenant, raw-SQL pre-filter); see the
+  [migration guide's scoping section](guides/migrating-from-other-memory.md#filtering-scoping-and-multi-tenancy).
+
+## Non-goals
+
+These are deliberate boundaries, not missing features:
+
+- **No LLM-side intelligence.** Engrava never calls a language model. It does no
+  fact extraction, no summarisation, no entity resolution, no automatic
+  "memory writing" from raw text. Those belong in your agent (or a downstream
+  extension), above the storage layer. The one consolidation feature that *does*
+  synthesise — [dreaming](dreaming.md) — is purely structural (clustering +
+  centroids + keyword counts), with **no LLM involved**.
+- **Retrieval is unscoped by default.** `search_hybrid` / `search_similar` /
+  `search_fts` rank across the whole store; they accept no per-user or
+  per-session filter argument. Scoping is an application-level concern today —
+  see the [workarounds](guides/migrating-from-other-memory.md#filtering-scoping-and-multi-tenancy).
+- **Not a distributed system.** No clustering, replication, or cross-machine
+  consistency. One file, one writer.
+- **Not an application framework.** Engrava is the memory layer. It does not
+  provide an agent runtime, tool-calling, or prompt orchestration.
+
+## How it compares
+
+A rough orientation, not a feature scorecard. Evaluate the specifics against
+your own workload.
+
+| | Engrava | Hosted agent-memory services (e.g. mem0, Zep) | Framework memory (e.g. LangChain memory) | Standalone vector DBs (e.g. Chroma, Qdrant, pgvector) |
+|---|---|---|---|---|
+| **Deployment** | Embedded library, one SQLite file, in-process | Typically a hosted/managed service or self-hosted server | In-process, tied to the framework | Separate database/service (some have embedded modes) |
+| **Retrieval model** | Hybrid: FTS + vector + recency + priority + graph, fused | Varies; often vector + recency with managed pipelines | Usually buffer/window or a vector-store wrapper | Primarily vector similarity (some add keyword/hybrid) |
+| **Graph** | First-class typed/weighted edges that feed ranking | Some offer entity/graph memory | Generally no | Generally no |
+| **LLM-side extraction** | None — you decide what to store | Often built in (auto fact-extraction/summarisation) | Sometimes, via chains | None |
+| **External services** | None required | Usually yes | Depends on the chosen store | Usually a running service |
+| **Audit trail** | Optional tamper-evident hash-chain journal | Varies | No | Generally no |
+| **Best for** | Owning a local, inspectable, hybrid memory graph for an agent | Offloading memory ops to a managed pipeline | Quick memory inside an existing framework app | Large-scale pure vector retrieval |
+
+If you are currently using one of these and want concept mappings and porting
+snippets, see
+[Migrating from another memory system](guides/migrating-from-other-memory.md).
+
+## See also
+
+- [Core Concepts](concepts.md) — the mental model behind thoughts, edges, and cycles
+- [Search](search.md) — how the hybrid ranking actually works
+- [Known Limitations](known-limitations.md) — the hard constraints in one place
+- [Migrating from another memory system](guides/migrating-from-other-memory.md)
diff --git a/docs/quickstart.md b/docs/quickstart.md
index e1741a8..a1689f7 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -135,6 +135,14 @@ stored = await store.create_thought(observation)
 print(f"Created thought: {stored.thought_id}")
 ```
 
+> **About `created_cycle` / `updated_cycle`.** A *cycle* is a consumer-owned
+> logical clock — Engrava never advances it for you. `0` is fine for this
+> quickstart, but in a real long-running agent you should keep a counter and
+> increment it once per turn, using it for these fields (and for `current_cycle`
+> in search / consolidation). Otherwise recency can't tell old memories from new
+> and dreaming's age gate never opens. See
+> [Cycle (the agent clock)](concepts.md#cycle-the-agent-clock).
+
 ## Link Thoughts with Edges
 
 ```python
@@ -180,19 +188,20 @@ for thought_id, score in await store.search_fts("Python AI", top_k=5):
 
 ### Embedding Similarity Search
 
+Use a real embedding provider so similarity is meaningful (this needs the
+`embeddings-local` extra; see the [Embeddings guide](guides/embeddings.md) for
+all provider options):
+
 ```python
-from engrava import CallbackProvider
+from engrava import SentenceTransformerProvider
 
-# Use any embedding function
-provider = CallbackProvider(
-    callback=lambda text: [0.1] * 384,  # Replace with real embeddings
-    dimension=384,
-    model_name="my-model",
-)
+provider = SentenceTransformerProvider(model_name="all-MiniLM-L6-v2")
 
 # Store an embedding for an existing thought
 vector = await provider.embed(observation.content)
-await store.store_embedding(observation.thought_id, vector, model_name="my-model")
+await store.store_embedding(
+    observation.thought_id, vector, model_name=provider.model_name
+)
 
 # Search by similarity — returns (thought_id, score) tuples
 for thought_id, score in await store.search_similar(vector, top_k=5):
@@ -201,6 +210,10 @@ for thought_id, score in await store.search_similar(vector, top_k=5):
         print(f"  {record.essence}  (score: {score:.3f})")
 ```
 
+> Tip: configure the provider on the store with `auto_embed=True` (or via
+> `engrava.yaml`) and Engrava embeds thoughts on write — and embeds your query
+> for you in `search_hybrid`. See the [Embeddings guide](guides/embeddings.md).
+
 ## Query with MindQL
 
 ```python
@@ -240,7 +253,14 @@ engrava --db my_thoughts.db restore -i backup.jsonl
 
 ## Next Steps
 
+Build something next, then reach for the references:
+
+- [Tutorial](tutorial.md) — build a small notes memory end to end (start here)
+- [Recipes](recipes/index.md) — copy-paste snippets: store a turn, retrieve context, TTL, dedup, session scoping
+- [Building a memory-backed agent](guides/agent-memory.md) — the full agent turn loop
+- [Core Concepts](concepts.md) — the mental model (thought, edge, reflection, cycle)
 - [Configuration](configuration.md) — YAML-based setup for production use
-- [Extensions](extensions.md) — Hook into the thought lifecycle
-- [API Reference](api-reference.md) — Full class and method reference
-- [MindQL](mindql.md) — Complete query language reference
+- [API Reference](api-reference.md) — full class and method reference
+- [MindQL](mindql.md) — complete query language reference
+- [Troubleshooting](troubleshooting.md) — when something doesn't work as expected
+- [FAQ](faq.md) — quick answers to common questions
diff --git a/docs/recipes/index.md b/docs/recipes/index.md
new file mode 100644
index 0000000..79ee5e6
--- /dev/null
+++ b/docs/recipes/index.md
@@ -0,0 +1,190 @@
+# Recipes
+
+Short, copy-paste snippets for the things you actually do with an agent-memory
+database. Each assumes you already have an open `store` (see the
+[Quick Start](../quickstart.md)); imports are shown once per recipe.
+
+> New to the model? Read [Core Concepts](../concepts.md) first. For the full
+> agent turn loop, see [Building a memory-backed agent](../guides/agent-memory.md).
+
+## Store a conversation turn
+
+Persist a user message and the agent's reply, tagged with conversation metadata
+so you can scope retrieval later:
+
+```python
+import uuid
+from engrava import ThoughtRecord, ThoughtType, Priority, LifecycleStatus, percept, utterance
+
+async def store_turn(store, user_text, agent_text, *, cycle, session_id, turn_index, user_id):
+    user_thought = ThoughtRecord(
+        thought_id=str(uuid.uuid4()),
+        thought_type=ThoughtType.OBSERVATION,
+        essence=user_text[:200], content=user_text,
+        priority=Priority.P2, lifecycle_status=LifecycleStatus.ACTIVE,
+        created_cycle=cycle, updated_cycle=cycle, source=user_id,
+        metadata={**percept(source_id=user_id, label="user"),
+                  "session_id": session_id, "turn_index": turn_index},
+    )
+    await store.create_thought(user_thought)
+
+    agent_thought = ThoughtRecord(
+        thought_id=str(uuid.uuid4()),
+        thought_type=ThoughtType.OUTPUT_DRAFT,
+        essence=agent_text[:200], content=agent_text,
+        priority=Priority.P3, lifecycle_status=LifecycleStatus.ACTIVE,
+        created_cycle=cycle, updated_cycle=cycle, source="agent",
+        metadata={**utterance(), "session_id": session_id, "turn_index": turn_index},
+    )
+    await store.create_thought(agent_thought)
+```
+
+## Retrieve context for a prompt
+
+Get the most relevant prior memories and turn them into prompt-ready text. With
+an embedding provider configured, `search_hybrid` embeds the query for you:
+
+```python
+async def context_for(store, query, cycle, top_k=5):
+    result = await store.search_hybrid(query, top_k=top_k, current_cycle=cycle)
+    lines = []
+    for thought_id, _score in result.results:
+        record = await store.get_thought(thought_id)
+        if record is not None:
+            lines.append(record.essence)        # essence = the prompt-facing one-liner
+    return "\n".join(f"- {line}" for line in lines)
+```
+
+## Filter retrieval by session (or user)
+
+The ranked search methods take **no** metadata/scope filter, so "only this
+session's memories" is done by over-fetching and post-filtering on metadata in
+Python:
+
+```python
+async def search_in_session(store, query, session_id, cycle, want=5):
+    # over-fetch, then keep only this session's hits, preserving rank order
+    result = await store.search_hybrid(query, top_k=want * 5, current_cycle=cycle)
+    scoped = []
+    for thought_id, _score in result.results:
+        record = await store.get_thought(thought_id)
+        if record is not None and record.metadata.get("session_id") == session_id:
+            scoped.append(record)
+        if len(scoped) >= want:
+            break
+    return scoped
+```
+
+> For *hard* isolation between users/tenants (separate databases rather than a
+> shared one with a metadata tag), use [`EngravaManager`](../api-reference.md) —
+> one `<name>.db` per service. That trades cross-tenant search for strong
+> isolation; the metadata approach keeps one searchable store.
+
+## Set a TTL on transient memories
+
+Give a thought an expiry, then expire due thoughts. The default strategy is
+`archive` (soft — marks `ARCHIVED`); switch to `delete` for hard removal:
+
+```python
+# expire this thought one hour from now
+await store.create_thought(transient_thought, expires_after_seconds=3600)
+
+# later: process everything past its expiry (archive or delete per ttl_strategy)
+result = await store.cleanup_expired()
+print(f"{result.expired_count} thoughts expired via '{result.strategy_applied}'")
+```
+
+A store-wide default TTL and the archive-vs-delete strategy are set in config —
+see the [`ttl` configuration](../configuration.md). Archived thoughts leave disk
+only on a later `engrava gc`.
+
+## Deduplicate repeated facts
+
+Pass `deduplicate=True` so identical `content` collapses into one thought with a
+bumped `confirmation_count` instead of a duplicate row:
+
+```python
+first = await store.create_thought(fact, deduplicate=True)
+again = await store.create_thought(same_fact, deduplicate=True)
+# again.thought_id == first.thought_id; confirmation_count incremented, no new row
+```
+
+The growing `confirmation_count` is also a reliability signal dreaming uses (a
+fact re-confirmed many times ranks as more trustworthy) — see
+[Core Concepts](../concepts.md#reliability-confidence-vs-confirmation_count).
+
+## Run consolidation on a schedule
+
+In a long-running agent, run dreaming every N turns rather than every turn:
+
+```python
+from engrava import DreamingExtension, DreamingConfig
+
+dreaming = DreamingExtension(config=DreamingConfig(enabled=True))
+
+# inside your turn loop, after advancing the cycle counter:
+if cycle % 20 == 0:
+    result = await dreaming.run_consolidation(store, current_cycle=cycle)
+    print(f"consolidation: promoted {result.promoted_count}")
+```
+
+A fresh store has little to consolidate — REFLECTIONs emerge as memories
+accumulate and repeat. See [Dreaming](../dreaming.md) for the cadence and knobs.
+
+## Inspect what changed (audit trail)
+
+With the [audit journal](../audit-trail.md) enabled, read the history of any
+thought:
+
+```python
+history = await store.journal.get_entries(target_id=some_thought_id)
+for entry in history:
+    print(entry.sequence_number, entry.mutation_type, entry.created_at)
+```
+
+## Record a tool result / action
+
+If your agent *does* things (calls a tool, sends a message), record each as an
+`ActionRecord` linked to the thought that prompted it, so what the agent did —
+and whether it worked — is part of memory:
+
+```python
+import uuid
+from engrava import ActionRecord, ActionType, ActionStatus, VerificationStatus
+
+await store.create_action(
+    ActionRecord(
+        action_id=str(uuid.uuid4()),
+        source_thought_id=prompting_thought_id,
+        action_type=ActionType.TOOL_CALL,     # or MESSAGE / CLI_OUTPUT / STATE_UPDATE
+        intent="search the web for flight prices",
+        status=ActionStatus.CONFIRMED,        # PLANNED → EXECUTING → CONFIRMED / FAILED / BLOCKED
+        verification_status=VerificationStatus.CONFIRMED,
+    )
+)
+
+# read an entity's actions back:
+actions = await store.get_actions(prompting_thought_id)
+```
+
+## Restore the cycle counter after a restart
+
+The cycle is the agent's logical clock and Engrava does **not** persist it — on
+startup, seed it from the highest cycle already stored so it keeps increasing.
+`list_thoughts` returns rows ordered by `updated_cycle` descending, so the most
+recent thought carries the highest value:
+
+```python
+recent = await store.list_thoughts(limit=1)        # ordered by updated_cycle desc
+cycle = (recent[0].updated_cycle + 1) if recent else 0
+```
+
+See [Cycle (the agent clock)](../concepts.md#cycle-the-agent-clock) for why this
+matters (a frozen clock disables recency and stalls dreaming).
+
+## Next
+
+- [Building a memory-backed agent](../guides/agent-memory.md) — these recipes assembled into a loop.
+- [Tutorial](../tutorial.md) — build a small notes memory from scratch.
+- [Core Concepts](../concepts.md) — the model behind the snippets.
+- [Hybrid Search](../search.md) · [Dreaming](../dreaming.md) · [Configuration](../configuration.md).
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
new file mode 100644
index 0000000..047d084
--- /dev/null
+++ b/docs/troubleshooting.md
@@ -0,0 +1,193 @@
+# Troubleshooting
+
+Common symptoms, their cause, and the fix. Each entry shows the error (or the
+surprising behaviour) you actually see, then what to change.
+
+If your problem is a platform constraint rather than a mistake (macOS extension
+loading, the ~100k brute-force ceiling, FTS5 availability), see
+[Known Limitations](known-limitations.md) instead.
+
+## `AttributeError: 'tuple' object has no attribute 'keys'` on read
+
+**Symptom.** Writes succeed, but the first `get_thought` / search call raises:
+
+```
+AttributeError: 'tuple' object has no attribute 'keys'
+```
+
+**Cause.** The aiosqlite connection has no row factory, so rows come back as
+plain tuples. Engrava maps rows to records by column name and needs
+`aiosqlite.Row`. The failure surfaces on **read**, not on connect or write,
+which makes it look unrelated to setup.
+
+**Fix.** Set the row factory immediately after connecting:
+
+```python
+import aiosqlite
+
+conn = await aiosqlite.connect("engrava.db")
+conn.row_factory = aiosqlite.Row  # required
+```
+
+`SqliteEngravaCore.from_config(...)` opens the connection for you and sets this
+correctly — the manual snippet above only applies when you construct the store
+from your own connection.
+
+## `ValueError: '...' is not a valid ThoughtType` (or `Priority`, `EdgeType`, …)
+
+**Symptom.**
+
+```
+ValueError: 'INSIGHT' is not a valid ThoughtType
+```
+
+**Cause.** A string was passed that is not a member of the enum. The valid
+`ThoughtType` members are `TASK`, `OBSERVATION`, `BELIEF`, `REFLECTION`,
+`OUTPUT_DRAFT`, and `NOTE` — there is no `INSIGHT`. The same applies to
+`Priority` (`P1`–`P4`), `EdgeType`, `LifecycleStatus`, etc.
+
+**Fix.** Use a real enum member, ideally the symbol rather than a string literal:
+
+```python
+from engrava import ThoughtType
+
+ThoughtType.BELIEF  # preferred
+ThoughtType("BELIEF")  # also valid — must match a real member
+```
+
+See [Core Concepts](concepts.md) for the full taxonomy and when to use each type.
+
+## Search returns nothing (or fewer results than expected)
+
+**Symptom.** `search_hybrid` / `search_fts` returns an empty or short result
+list even though matching thoughts exist.
+
+**Cause.** A signal you assumed was active was **silently skipped**, so the query
+ran on fewer signals than you expected. Engrava skips a signal rather than
+erroring when its prerequisite is missing. Work through this checklist:
+
+| If… | then… |
+|---|---|
+| No `embedding_provider` is configured | the **vector** signal is skipped — only FTS/priority run. A purely semantic query with no shared keywords may find nothing. |
+| You pass `query_text` but no provider and no `query_vector` | same as above — there is no vector to compare against. |
+| `current_cycle` is `None` | the **recency** signal is skipped (it cannot compute an age). |
+| `recency_weight` is `0.0` | recency is disabled even if `current_cycle` is set. |
+| The query shares no FTS tokens with any thought | FTS legitimately returns nothing — this is a real miss, not a bug. |
+
+Inspect which signals actually ran via `HybridSearchResult.backends_used`:
+
+```python
+result = await store.search_hybrid("python async", top_k=5, current_cycle=10)
+print(sorted(result.backends_used))  # e.g. ['fts5', 'priority', 'recency']
+```
+
+If `'vector'` is missing and you expected semantic matching, configure an
+embedding provider (see the [Embeddings guide](guides/embeddings.md)). If
+`'recency'` is missing, pass a non-`None` `current_cycle` **and** a
+`recency_weight > 0`.
+
+## Dreaming promotes nothing (consolidation is inert)
+
+**Symptom.** `run_consolidation(...)` returns `promoted_count == 0` every time.
+
+**Cause.** Promotion requires a candidate to clear **two independent bars**, and
+either one alone keeps the count at zero:
+
+1. **The age gate.** A thought is eligible only when
+   `current_cycle - created_cycle >= min_age_cycles` (default `1`). If you never
+   advance your cycle counter — every thought stays at the same `current_cycle`
+   you created it in — `0 >= 1` is false and nothing is ever eligible. This is
+   the most common cause. See [Core Concepts → Cycle](concepts.md).
+2. **The promotion threshold.** Even after the gate passes, a candidate's
+   weighted signal score must reach `promote_threshold`. Brand-new, unconfirmed,
+   never-accessed thoughts score low, so a high threshold promotes nothing.
+
+**Fix.**
+
+```python
+from engrava.config import DreamingConfig, DreamingGates
+from engrava.extensions.dreaming import DreamingExtension
+
+config = DreamingConfig(
+    enabled=True,
+    promote_threshold=0.4,  # lower it if nothing clears the bar
+    gates=DreamingGates(
+        allow_zero_confirmation=True,  # essential for single-write ingest
+        min_age_cycles=1,
+    ),
+)
+ext = DreamingExtension(config=config)
+
+# Advance current_cycle past the thoughts' created_cycle so the age gate passes:
+result = await ext.run_consolidation(store, current_cycle=10)
+print(result.promoted_count)
+```
+
+See [Dreaming](dreaming.md) for the full gate-and-signal model.
+
+## `EmbeddingModelMismatchError` when opening an existing database
+
+**Symptom.** A store that worked before now raises `EmbeddingModelMismatchError`
+on startup or first embed.
+
+**Cause.** Engrava records the embedding **model name and dimension** in the
+database the first time it embeds. If you later open that same database with a
+different model name or a different dimension, the stored vectors are
+incompatible with new ones, so it refuses rather than silently mixing
+dimensions (which would corrupt similarity results).
+
+**Fix.** Use the same embedding model the database was created with, or
+re-embed the corpus under the new model. The CLI does this safely:
+
+```bash
+engrava restore --re-embed   # validates model consistency, re-embeds
+```
+
+See [Known Limitations → Embedding Dimension Consistency](known-limitations.md#embedding-dimension-consistency).
+
+## `ReferentialIntegrityError` — and you can't import it from `engrava`
+
+**Symptom.** Creating an edge to a thought that doesn't exist raises:
+
+```
+referential integrity violation: edge.to_thought_id='...' does not reference an existing thought
+```
+
+…and the obvious import fails:
+
+```python
+from engrava import ReferentialIntegrityError  # ImportError!
+```
+
+**Cause (two parts).**
+
+1. **The error itself** means one endpoint of an edge (`from_thought_id` or
+   `to_thought_id`) is not a real thought id. Create both thoughts before the
+   edge that links them.
+2. **The import:** `ReferentialIntegrityError` is **not** re-exported from the
+   top-level `engrava` package. It lives in `engrava.domain.exceptions`.
+
+**Fix.** Import it from its real module, and ensure both endpoints exist first:
+
+```python
+from engrava.domain.exceptions import ReferentialIntegrityError
+
+try:
+    await store.create_edge(edge)
+except ReferentialIntegrityError:
+    ...  # one endpoint is missing — create the thought, then retry
+```
+
+The exceptions that *are* re-exported at the top level are `EngravaError` (the
+base), `ConfigError`, `EmbeddingModelMismatchError`, `ExtensionMigrationError`,
+`InvalidTransitionError`, `MindQLParseError`, `ReadOnlyViolationError`,
+`StaleDataError`, and `ThoughtNotFoundError`. Anything else lives under
+`engrava.domain.exceptions`.
+
+## Still stuck?
+
+- Re-read the relevant guide: [Core Concepts](concepts.md),
+  [Search](search.md), [Embeddings](guides/embeddings.md), [Dreaming](dreaming.md).
+- Check the [FAQ](faq.md) for "is this supposed to work this way?" questions.
+- Confirm it isn't a documented constraint in [Known Limitations](known-limitations.md).
+- Open an issue with a minimal reproduction.
diff --git a/docs/tutorial.md b/docs/tutorial.md
new file mode 100644
index 0000000..ee33427
--- /dev/null
+++ b/docs/tutorial.md
@@ -0,0 +1,153 @@
+# Tutorial: a small notes memory
+
+The [Quick Start](quickstart.md) shows the primitives in isolation. This
+tutorial builds one small, real thing end to end — a personal-notes memory you
+can search by meaning and consolidate — typing each step yourself. By the end
+you'll have a script that runs.
+
+It uses no external services: embeddings come from a tiny deterministic function
+(swap in a real provider from the [Embeddings guide](guides/embeddings.md) for
+production). Read [Core Concepts](concepts.md) first if "thought", "cycle", or
+"reflection" are unfamiliar.
+
+## 1. Imports and a store
+
+Open an in-memory store with a (toy) embedding provider so search is semantic:
+
+```python
+import asyncio
+import hashlib
+import uuid
+
+import aiosqlite
+
+from engrava import (
+    CallbackProvider,
+    EdgeRecord,
+    EdgeType,
+    LifecycleStatus,
+    Priority,
+    SqliteEngravaCore,
+    ThoughtRecord,
+    ThoughtType,
+)
+
+
+def embed(text: str) -> list[float]:
+    """A tiny deterministic stand-in. Use a real provider in production."""
+    digest = hashlib.sha256(text.lower().encode("utf-8")).digest()
+    return [byte / 255.0 for byte in (digest * 2)[:32]]
+```
+
+## 2. Ingest some notes
+
+Each note becomes an `OBSERVATION` thought. We keep the returned records so we
+can link them next:
+
+```python
+NOTES = [
+    "Buy oat milk and coffee beans on the way home.",
+    "The espresso machine descaling is overdue.",
+    "Standup moved to 10am on Thursdays.",
+    "Coffee tastes better with freshly ground beans.",
+]
+
+
+async def ingest(store, notes):
+    records = []
+    for index, text in enumerate(notes):
+        record = ThoughtRecord(
+            thought_id=str(uuid.uuid4()),
+            thought_type=ThoughtType.OBSERVATION,
+            essence=text[:200],
+            content=text,
+            priority=Priority.P3,
+            lifecycle_status=LifecycleStatus.ACTIVE,
+            created_cycle=index,        # one cycle per note here
+            updated_cycle=index,
+            source="notes",
+        )
+        records.append(await store.create_thought(record))
+    return records
+```
+
+With `auto_embed=True` (step 5) each note is embedded on write.
+
+## 3. Link related notes
+
+Connect notes that are about the same thing with an `ASSOCIATED` edge — this is
+what makes the memory a *graph*:
+
+```python
+async def link(store, a, b, weight=0.8):
+    await store.create_edge(
+        EdgeRecord(
+            edge_id=str(uuid.uuid4()),
+            from_thought_id=a.thought_id,
+            to_thought_id=b.thought_id,
+            edge_type=EdgeType.ASSOCIATED,
+            weight=weight,
+            created_cycle=0,
+        )
+    )
+```
+
+## 4. Search by meaning
+
+Ask a question; `search_hybrid` embeds the query for you and returns ranked
+`(thought_id, score)` tuples, which we turn back into text:
+
+```python
+async def search(store, query, cycle):
+    result = await store.search_hybrid(query, top_k=3, current_cycle=cycle)
+    print(f"\nQuery: {query!r}  (signals: {sorted(result.backends_used)})")
+    for thought_id, score in result.results:
+        record = await store.get_thought(thought_id)
+        if record is not None:
+            print(f"  {score:.3f}  {record.essence}")
+```
+
+## 5. Put it together
+
+Wire the pieces into a `main()` and run it:
+
+```python
+async def main():
+    provider = CallbackProvider(callback=embed, dimension=32, model_name="tutorial")
+    async with aiosqlite.connect(":memory:") as conn:
+        conn.row_factory = aiosqlite.Row
+        store = SqliteEngravaCore(conn, embedding_provider=provider, auto_embed=True)
+        await store.ensure_schema()
+
+        notes = await ingest(store, NOTES)
+
+        # link the two coffee-related notes
+        await link(store, notes[0], notes[3])
+
+        await search(store, "anything about coffee?", cycle=len(NOTES))
+
+        total = await store.count_thoughts()
+        print(f"\nStored {total} notes.")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+Run it and you'll see the coffee notes rank for the coffee query, plus the total
+count. That's a working memory: ingest, embed, link, search.
+
+The complete script is also shipped as
+[`examples/notes_memory.py`](https://github.com/sovantica/engrava/blob/main/examples/notes_memory.py)
+— run it directly with `python examples/notes_memory.py`.
+
+## Where to go next
+
+- **Make it an agent.** [Building a memory-backed agent](guides/agent-memory.md)
+  turns this into a per-turn loop (retrieve before you answer, store the reply).
+- **More tasks.** The [Recipes](recipes/index.md) cover TTL, dedup, session
+  scoping, and scheduled consolidation.
+- **Real embeddings.** Swap the toy `embed` for a provider in the
+  [Embeddings guide](guides/embeddings.md).
+- **Consolidation.** [Dreaming](dreaming.md) turns accumulating notes into
+  higher-level reflections over time.
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 0ac391a..9891dc3 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -15,16 +15,59 @@ In practice, most applications do not need a separate migration step. If your
 app already calls `ensure_schema()` during startup, that call performs the
 upgrade.
 
+## Rolling upgrades (multiple workers)
+
+If several processes share one database file, whether you can do a **rolling**
+upgrade (start new-version workers while old-version workers are still running)
+depends on whether the new version changes the schema.
+
+How migrations work: the core schema is versioned by SQLite's `PRAGMA
+user_version`. On the first `ensure_schema()`, Engrava runs each pending
+`vN → vN+1` step **inside a transaction** (forward-only). Most steps are
+**additive** (new columns, tables, and indexes), but some rebuild a table in
+place (create a new table, copy rows, drop the old, rename) — so the on-disk
+shape of a table can change across a migration.
+
+What that means for a rolling deploy:
+
+- **Patch upgrades that don't change `user_version`** (e.g. `0.3.0 → 0.3.1`) make
+  no schema change. Old and new workers can run side by side; roll them at will.
+- **Minor upgrades that run migrations are not guaranteed to be
+  backward-readable.** Once the first new-version worker calls `ensure_schema()`
+  and a table is rebuilt, an old-version worker may no longer match the new
+  on-disk shape. Do **not** run old and new workers concurrently across such an
+  upgrade.
+
+Recommended procedure for a schema-changing (minor) upgrade:
+
+1. **Back up** the database (see [Before You Upgrade](#before-you-upgrade)).
+2. **Quiesce writers** — stop the old workers (or take a brief maintenance
+   window) so no old-version process writes during the migration.
+3. **Run the migration once** — let a single new-version process call
+   `ensure_schema()` (or run `engrava migrate`) to completion.
+4. **Start the new workers** against the migrated database.
+
+When you are unsure whether a target release changes the schema, treat it as
+schema-changing and follow the quiesce procedure — it is always safe. The
+[compatibility matrix](#compatibility-matrix) notes which listed upgrades change
+the schema.
+
 ## Before You Upgrade
 
 These steps are recommended, not required:
 
 ```bash
+# Checkpoint the WAL first so the copy is complete, then back up.
+sqlite3 my-data.db "PRAGMA wal_checkpoint(TRUNCATE);"
 cp my-data.db my-data.db.bak
 pip install --upgrade engrava
 ```
 
-- Create a copy of the SQLite database file before the upgrade.
+- Create a copy of the SQLite database file before the upgrade. In WAL mode a
+  bare `cp` of just the `.db` can miss data still in the `-wal` file — checkpoint
+  first (above), or copy `my-data.db` together with `my-data.db-wal` and
+  `my-data.db-shm`. See [Backup & Recovery](backup-and-recovery.md) for all the
+  WAL-safe options.
 - Review [CHANGELOG.md](../CHANGELOG.md) for breaking changes and database notes.
 - If you ship custom extensions, make sure their schema migrations are included
   in the version you are about to install.
@@ -40,8 +83,10 @@ engrava --db my-data.db migrate
 
 - `engrava info` confirms the database is readable and reports current counts.
 - `engrava migrate` is safe to run after upgrade; it re-checks that schema is up to date.
-- `engrava gc` is optional if you want to compact archived or expired data after
-  the upgrade.
+- `engrava gc` is optional if you want to remove archived or expired data after
+  the upgrade. Note that `gc` deletes rows but does **not** shrink the database
+  file — freed pages return to SQLite's free-list. To reclaim file size, run
+  `VACUUM`. See [Data lifecycle → reclaiming disk space](data-lifecycle.md#reclaiming-disk-space).
 
 ## If Migration Fails
 
@@ -70,15 +115,32 @@ engrava --db my-data.db snapshot -o backup.snapshot.jsonl
 engrava --db new-old-version.db restore -i backup.snapshot.jsonl
 ```
 
+> **Note:** a snapshot exports thoughts, edges, embeddings, and actions, but
+> **not** the audit journal (`journal_entry`). A database restored from a
+> snapshot starts with an empty journal. If you need the audit history preserved,
+> take a physical file backup instead — see
+> [Backup & Recovery](backup-and-recovery.md).
+
 ## Compatibility Matrix
 
 | From | To | Supported | Notes |
 |---|---|---|---|
 | 0.2.0 | 0.2.2 | Yes | Patch-level upgrade, no dedicated new extension migration layer |
 | 0.2.2 | 0.3.0 | Yes | Minor upgrade with extension migration tracking and upgrade CI coverage |
+| 0.3.0 | 0.3.1 | Yes | Patch-level upgrade; no schema change (`user_version` unchanged) — safe to roll across workers |
+
+For any upgrade not listed, the rule of thumb is: **patch** upgrades within a
+`0.x.*` line do not change the schema and are low-risk; **minor** upgrades
+(`0.X` → `0.(X+1)`) may run schema migrations — back up first and read the
+[rolling-upgrades](#rolling-upgrades-multiple-workers) note below.
 
 ## Version Notes
 
+### 0.3.0 -> 0.3.1
+
+- Patch release: **no schema change** (`user_version` stays at its 0.3.0 value),
+  so it is safe to roll across multiple workers without a quiesce.
+
 ### 0.2.2 -> 0.3.0
 
 - Extension schema migration tracking is now part of the upgrade path.
diff --git a/examples/agent_loop.py b/examples/agent_loop.py
new file mode 100644
index 0000000..b861486
--- /dev/null
+++ b/examples/agent_loop.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+"""A memory-backed agent loop using only engrava — no external services.
+
+This is the canonical "wire engrava into an agent" example: a per-turn loop
+that, for each user message,
+
+  1. stores the message as a ``percept`` thought,
+  2. retrieves relevant prior memory with ``search_hybrid``,
+  3. builds a prompt from the retrieved essences and calls an LLM
+     (a deterministic stand-in here — swap in your real model),
+  4. stores the agent's reply as an ``utterance`` thought,
+  5. records the action it took (an ``ActionRecord``),
+  6. advances the cycle counter, and
+  7. runs dreaming consolidation every N turns.
+
+The cycle counter is the agent's logical clock: engrava never advances it for
+you, so this loop owns it and increments it once per turn (see the Core
+Concepts docs). On restart you would recover it from the maximum stored
+``created_cycle``; this in-memory demo just starts at 0.
+
+No LLM and no embedding API are required: the "LLM" is a canned responder and
+embeddings come from a deterministic ``CallbackProvider``. Run directly::
+
+    python examples/agent_loop.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import hashlib
+import uuid
+
+import aiosqlite
+
+from engrava import (
+    ActionRecord,
+    ActionStatus,
+    ActionType,
+    CallbackProvider,
+    DreamingConfig,
+    DreamingExtension,
+    DreamingGates,
+    LifecycleStatus,
+    Priority,
+    SqliteEngravaCore,
+    ThoughtRecord,
+    ThoughtType,
+    VerificationStatus,
+    percept,
+    utterance,
+)
+
+EMBED_DIM = 64
+CONSOLIDATE_EVERY = 3
+RETRIEVE_TOP_K = 3
+
+
+def _deterministic_embed(text: str) -> list[float]:
+    """Map text to a stable pseudo-embedding (no model, fully reproducible).
+
+    A real agent passes a real provider (sentence-transformers, OpenAI, …);
+    this keeps the example dependency-free and deterministic across runs.
+    """
+    digest = hashlib.sha256(text.lower().encode("utf-8")).digest()
+    # Repeat the 32-byte digest to fill EMBED_DIM bytes, then scale to [0, 1].
+    repeats = (EMBED_DIM // len(digest)) + 1
+    stretched = (digest * repeats)[:EMBED_DIM]
+    return [byte / 255.0 for byte in stretched]
+
+
+def _mock_llm(prompt: str) -> str:
+    """Stand in for an LLM call. Replace with your provider."""
+    return f"(reply based on {prompt.count('-')} retrieved memories)"
+
+
+async def _store_percept(
+    store: SqliteEngravaCore,
+    text: str,
+    cycle: int,
+    user_id: str,
+    session_id: str,
+    turn_index: int,
+) -> ThoughtRecord:
+    """Persist an incoming user message as an OBSERVATION percept.
+
+    The percept metadata is extended with ``session_id`` and ``turn_index`` so
+    every memory is anchored to its conversation and position within it — the
+    keys you'd later filter or post-filter on for per-session retrieval.
+    """
+    record = ThoughtRecord(
+        thought_id=str(uuid.uuid4()),
+        thought_type=ThoughtType.OBSERVATION,
+        essence=text[:200],
+        content=text,
+        priority=Priority.P2,
+        lifecycle_status=LifecycleStatus.ACTIVE,
+        created_cycle=cycle,
+        updated_cycle=cycle,
+        source=user_id,
+        metadata={
+            **percept(source_id=user_id, label="user"),
+            "session_id": session_id,
+            "turn_index": turn_index,
+        },
+    )
+    return await store.create_thought(record)
+
+
+async def _retrieve_context(store: SqliteEngravaCore, query: str, cycle: int) -> list[str]:
+    """Return the essences of the most relevant prior memories."""
+    result = await store.search_hybrid(
+        query,
+        query_vector=_deterministic_embed(query),
+        top_k=RETRIEVE_TOP_K,
+        current_cycle=cycle,  # the agent clock — drives the recency signal
+    )
+    essences: list[str] = []
+    for thought_id, _score in result.results:
+        record = await store.get_thought(thought_id)
+        if record is not None:
+            essences.append(record.essence)
+    return essences
+
+
+async def _store_utterance(
+    store: SqliteEngravaCore,
+    reply: str,
+    cycle: int,
+    session_id: str,
+    turn_index: int,
+) -> ThoughtRecord:
+    """Persist the agent's own reply as an OUTPUT_DRAFT utterance.
+
+    Tagged with the same ``session_id``/``turn_index`` as the percept it
+    answered, so a turn's input and output stay linked.
+    """
+    record = ThoughtRecord(
+        thought_id=str(uuid.uuid4()),
+        thought_type=ThoughtType.OUTPUT_DRAFT,
+        essence=reply[:200],
+        content=reply,
+        priority=Priority.P3,
+        lifecycle_status=LifecycleStatus.ACTIVE,
+        created_cycle=cycle,
+        updated_cycle=cycle,
+        source="agent",
+        metadata={
+            **utterance(),
+            "session_id": session_id,
+            "turn_index": turn_index,
+        },
+    )
+    return await store.create_thought(record)
+
+
+async def _record_action(store: SqliteEngravaCore, source_thought_id: str, intent: str) -> None:
+    """Record that the agent took an action, linked to the source thought."""
+    await store.create_action(
+        ActionRecord(
+            action_id=str(uuid.uuid4()),
+            source_thought_id=source_thought_id,
+            action_type=ActionType.MESSAGE,
+            intent=intent,
+            status=ActionStatus.CONFIRMED,
+            verification_status=VerificationStatus.CONFIRMED,
+        )
+    )
+
+
+async def main() -> None:
+    """Run a few turns of a memory-backed agent over an in-memory store."""
+    provider = CallbackProvider(
+        callback=_deterministic_embed,
+        dimension=EMBED_DIM,
+        model_name="demo-deterministic",
+    )
+    async with aiosqlite.connect(":memory:") as conn:
+        conn.row_factory = aiosqlite.Row
+        store = SqliteEngravaCore(conn, embedding_provider=provider, auto_embed=True)
+        await store.ensure_schema()
+
+        dreaming = DreamingExtension(
+            config=DreamingConfig(
+                enabled=True,
+                gates=DreamingGates(min_confirmations=0, min_age_cycles=0),
+            ),
+        )
+
+        user_id = "user-demo"
+        session_id = str(uuid.uuid4())  # one conversation; tag every memory with it
+        conversation = [
+            "I'm planning a trip to Japan in spring.",
+            "What's the weather like in Kyoto in April?",
+            "Remind me which city I'm visiting.",
+            "I prefer trains over flights for getting around.",
+        ]
+
+        cycle = 0  # the agent's logical clock; advance once per turn
+        for turn_index, user_message in enumerate(conversation):
+            # 1. store the incoming message (anchored to session + turn)
+            percept_thought = await _store_percept(
+                store, user_message, cycle, user_id, session_id, turn_index
+            )
+
+            # 2. retrieve relevant prior memory
+            context = await _retrieve_context(store, user_message, cycle)
+
+            # 3. build a prompt and call the LLM (stand-in)
+            prompt = "Context:\n" + "\n".join(f"- {c}" for c in context)
+            prompt += f"\n\nUser: {user_message}\nAssistant:"
+            reply = _mock_llm(prompt)
+
+            # 4. store the agent's reply (same session + turn as its percept)
+            await _store_utterance(store, reply, cycle, session_id, turn_index)
+
+            # 5. record the action taken
+            await _record_action(store, percept_thought.thought_id, intent="answered user")
+
+            print(f"cycle {cycle}: user={user_message!r}")
+            print(f"          retrieved {len(context)} memory(ies); reply={reply!r}")
+
+            # 6. advance the clock
+            cycle += 1
+
+            # 7. consolidate periodically
+            if cycle % CONSOLIDATE_EVERY == 0:
+                result = await dreaming.run_consolidation(store, current_cycle=cycle)
+                print(f"          [dreaming] promoted={result.promoted_count}")
+
+        total = await store.count_thoughts()
+        print(f"\nDone. {total} thoughts stored across {cycle} turns.")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/config.yaml b/examples/config.yaml
index fb772ac..2379687 100644
--- a/examples/config.yaml
+++ b/examples/config.yaml
@@ -11,7 +11,7 @@ database:
 
 extensions:
   vector:
-    backend: numpy       # "numpy" (brute-force) or "sqlite-vec" (ANN)
+    backend: numpy       # "numpy" (brute-force) or "sqlite-vec" (faster KNN, not ANN)
     dimension: 384       # must match your embedding model
 
   dreaming:
diff --git a/examples/notes_memory.py b/examples/notes_memory.py
new file mode 100644
index 0000000..af9746a
--- /dev/null
+++ b/examples/notes_memory.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+"""A small notes memory built with engrava — the companion to the tutorial.
+
+This is the complete, runnable version of ``docs/tutorial.md``: ingest a few
+notes, embed them, link related ones with an edge, and search by meaning. It
+uses a tiny deterministic embedding function so it runs with no external
+services (swap in a real provider from the Embeddings guide for production).
+
+Run directly::
+
+    python examples/notes_memory.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import hashlib
+import uuid
+
+import aiosqlite
+
+from engrava import (
+    CallbackProvider,
+    EdgeRecord,
+    EdgeType,
+    LifecycleStatus,
+    Priority,
+    SqliteEngravaCore,
+    ThoughtRecord,
+    ThoughtType,
+)
+
+EMBED_DIM = 32
+
+NOTES = [
+    "Buy oat milk and coffee beans on the way home.",
+    "The espresso machine descaling is overdue.",
+    "Standup moved to 10am on Thursdays.",
+    "Coffee tastes better with freshly ground beans.",
+]
+
+
+def embed(text: str) -> list[float]:
+    """A tiny deterministic stand-in. Use a real provider in production."""
+    digest = hashlib.sha256(text.lower().encode("utf-8")).digest()
+    return [byte / 255.0 for byte in (digest * 2)[:EMBED_DIM]]
+
+
+async def ingest(store: SqliteEngravaCore, notes: list[str]) -> list[ThoughtRecord]:
+    """Store each note as an OBSERVATION thought; return the persisted records."""
+    records: list[ThoughtRecord] = []
+    for index, text in enumerate(notes):
+        record = ThoughtRecord(
+            thought_id=str(uuid.uuid4()),
+            thought_type=ThoughtType.OBSERVATION,
+            essence=text[:200],
+            content=text,
+            priority=Priority.P3,
+            lifecycle_status=LifecycleStatus.ACTIVE,
+            created_cycle=index,
+            updated_cycle=index,
+            source="notes",
+        )
+        records.append(await store.create_thought(record))
+    return records
+
+
+async def link(
+    store: SqliteEngravaCore,
+    a: ThoughtRecord,
+    b: ThoughtRecord,
+    weight: float = 0.8,
+) -> None:
+    """Connect two related notes with an ASSOCIATED edge."""
+    await store.create_edge(
+        EdgeRecord(
+            edge_id=str(uuid.uuid4()),
+            from_thought_id=a.thought_id,
+            to_thought_id=b.thought_id,
+            edge_type=EdgeType.ASSOCIATED,
+            weight=weight,
+            created_cycle=0,
+        )
+    )
+
+
+async def search(store: SqliteEngravaCore, query: str, cycle: int) -> None:
+    """Print the top matches for a query (search embeds the query for you)."""
+    result = await store.search_hybrid(query, top_k=3, current_cycle=cycle)
+    print(f"\nQuery: {query!r}  (signals: {sorted(result.backends_used)})")
+    for thought_id, score in result.results:
+        record = await store.get_thought(thought_id)
+        if record is not None:
+            print(f"  {score:.3f}  {record.essence}")
+
+
+async def main() -> None:
+    """Build the notes memory and run a search over it."""
+    provider = CallbackProvider(callback=embed, dimension=EMBED_DIM, model_name="tutorial")
+    async with aiosqlite.connect(":memory:") as conn:
+        conn.row_factory = aiosqlite.Row
+        store = SqliteEngravaCore(conn, embedding_provider=provider, auto_embed=True)
+        await store.ensure_schema()
+
+        notes = await ingest(store, NOTES)
+
+        # link the two coffee-related notes
+        await link(store, notes[0], notes[3])
+
+        await search(store, "anything about coffee?", cycle=len(NOTES))
+
+        total = await store.count_thoughts()
+        print(f"\nStored {total} notes.")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/src/engrava/config.py b/src/engrava/config.py
index 9a8a96d..cb06b32 100644
--- a/src/engrava/config.py
+++ b/src/engrava/config.py
@@ -727,7 +727,8 @@ class EngravaConfig:
         database_path: Path to the SQLite database file.
         wal_mode: Enable WAL journal mode for concurrent reads.
         hooks_class: Dotted import path to a ``EngravaHooksProtocol`` class.
-        vector_backend: ``"numpy"`` (default brute-force) or ``"sqlite-vec"`` (ANN).
+        vector_backend: ``"numpy"`` (default brute-force) or ``"sqlite-vec"``
+            (compact ``vec0`` vector table — faster brute-force KNN, not ANN).
         embedding_dimension: Dimension of embedding vectors (e.g. 384 for MiniLM).
         dreaming: Optional dreaming-consolidation configuration.
         embeddings: Optional embedding-provider configuration.
diff --git a/src/engrava/extensions/__init__.py b/src/engrava/extensions/__init__.py
index 8cc53f1..1fe81f4 100644
--- a/src/engrava/extensions/__init__.py
+++ b/src/engrava/extensions/__init__.py
@@ -1,6 +1,6 @@
 """engrava extensions package.
 
 Extensions provide optional capabilities to engrava:
-- ``vec``: ANN vector search via sqlite-vec
+- ``vec``: KNN vector search via sqlite-vec (compact ``vec0`` vector table)
 - ``dreaming``: Periodic memory consolidation
 """
diff --git a/src/engrava/extensions/vector_sqlite_vec.py b/src/engrava/extensions/vector_sqlite_vec.py
index 197acd0..39930d8 100644
--- a/src/engrava/extensions/vector_sqlite_vec.py
+++ b/src/engrava/extensions/vector_sqlite_vec.py
@@ -1,9 +1,13 @@
-"""SqliteVecSearchBackend — ANN vector search via sqlite-vec.
+"""SqliteVecSearchBackend — KNN vector search via sqlite-vec.
 
 Drop-in replacement for the brute-force numpy cosine similarity search
 in ``SqliteEngravaCore``.  When ``sqlite-vec`` is installed and its
 extension is loaded, ``search_similar()`` delegates to the ``vec0``
-virtual table for O(log n) approximate nearest-neighbor queries.
+virtual table for k-nearest-neighbour queries.  In the pinned
+``sqlite-vec`` 0.1.x line ``vec0`` performs an exhaustive scan over a
+compact, chunked columnar store of the vectors — faster and more
+memory-efficient than the Python brute-force path, but **not** an
+approximate / sub-linear index (no ANN guarantee at this version).
 
 If sqlite-vec is unavailable at runtime the store falls back to the
 existing numpy implementation — no crash, just a warning log.
@@ -23,12 +27,12 @@
 
 
 class SqliteVecSearchBackend:
-    """ANN vector search backend backed by a ``vec0`` virtual table.
+    """KNN vector search backend backed by a ``vec0`` virtual table.
 
     Lifecycle:
         1. ``ensure_index(db, dimension)`` — creates the virtual table.
         2. ``sync_embeddings(db)`` — backfills existing rows.
-        3. ``search(db, query_vector, ...)`` — runs ANN queries.
+        3. ``search(db, query_vector, ...)`` — runs k-nearest-neighbour queries.
 
     All state is kept in SQLite; this class is stateless aside from
     the cached ``dimension``.
@@ -110,7 +114,7 @@ async def search(
         top_k: int = 10,
         threshold: float = 0.0,
     ) -> list[tuple[str, float]]:
-        """ANN search via sqlite-vec ``vec0`` virtual table.
+        """k-nearest-neighbour search via the sqlite-vec ``vec0`` virtual table.
 
         The ``vec0`` table uses cosine distance (``1 - cosine_similarity``).
         Results are converted to cosine similarity via ``1 - distance``
@@ -174,8 +178,8 @@ async def upsert_embedding(
     ) -> None:
         """Insert or replace a single embedding in the ``vec0`` index.
 
-        Used by ``store_embedding()`` to keep the ANN index in sync
-        after each write to the ``embedding`` table.
+        Used by ``store_embedding()`` to keep the ``vec0`` vector table in
+        sync after each write to the ``embedding`` table.
 
         Args:
             db: Active database connection with sqlite-vec loaded.
diff --git a/src/engrava/infrastructure/sqlite/engrava_core.py b/src/engrava/infrastructure/sqlite/engrava_core.py
index c85d693..bdd3659 100644
--- a/src/engrava/infrastructure/sqlite/engrava_core.py
+++ b/src/engrava/infrastructure/sqlite/engrava_core.py
@@ -2353,7 +2353,7 @@ async def store_embedding(
                 ("THOUGHT", thought_id, model_name, dimension, blob, created_at, eid),
             )
 
-        # Keep the vec0 ANN index in sync when a vector backend is active.
+        # Keep the vec0 vector table in sync when a vector backend is active.
         if self._vector_backend is not None:
             await self._vector_backend.upsert_embedding(
                 self._db,
@@ -2404,9 +2404,9 @@ async def search_similar(
         """Cosine similarity search — delegates to sqlite-vec if available.
 
         When a ``SqliteVecSearchBackend`` is configured (via
-        ``from_config`` with ``vector_backend: "sqlite-vec"``), ANN
-        search is used.  Otherwise falls back to brute-force numpy
-        cosine similarity.
+        ``from_config`` with ``vector_backend: "sqlite-vec"``), the
+        ``vec0`` vector table serves the query.  Otherwise falls back to
+        brute-force numpy cosine similarity.
 
         Args:
             query_vector: Query embedding vector.
diff --git a/tests/docs/test_docs_examples_execute.py b/tests/docs/test_docs_examples_execute.py
index a1e4105..b5d7bae 100644
--- a/tests/docs/test_docs_examples_execute.py
+++ b/tests/docs/test_docs_examples_execute.py
@@ -37,6 +37,7 @@
 EXECUTABLE_BLOCKS: tuple[tuple[str, str], ...] = (
     ("README.md", "async def main() -> None:"),
     ("docs/quickstart.md", 'print("Store ready!")'),
+    ("docs/guides/migrating-from-other-memory.md", "Imported {total} thoughts."),
 )
 
 
diff --git a/tests/examples/test_quickstart_runs.py b/tests/examples/test_quickstart_runs.py
index 14b9473..fbbe6c5 100644
--- a/tests/examples/test_quickstart_runs.py
+++ b/tests/examples/test_quickstart_runs.py
@@ -49,6 +49,31 @@ def test_quickstart_runs_to_completion() -> None:
     assert "teal" in result.stdout.lower()
 
 
+def test_agent_loop_runs_to_completion() -> None:
+    """``agent_loop.py`` runs the full memory-backed turn loop to a clean exit.
+
+    Unlike the quickstart it needs no local-embeddings extra — it uses a
+    deterministic ``CallbackProvider`` and a mock LLM — so it always runs.
+    """
+    result = _run_example("agent_loop.py")
+    assert result.returncode == 0, f"non-zero exit; stderr=\n{result.stderr}"
+    assert "cycle 0:" in result.stdout
+    assert "[dreaming]" in result.stdout
+    assert "Done." in result.stdout
+
+
+def test_notes_memory_runs_to_completion() -> None:
+    """``notes_memory.py`` (the tutorial companion) runs to a clean exit.
+
+    Uses a deterministic ``CallbackProvider`` — no local-embeddings extra — so it
+    always runs.
+    """
+    result = _run_example("notes_memory.py")
+    assert result.returncode == 0, f"non-zero exit; stderr=\n{result.stderr}"
+    assert "Query:" in result.stdout
+    assert "Stored 4 notes." in result.stdout
+
+
 def test_dreaming_benefit_script_not_shipped() -> None:
     """The fresh-store dreaming walkthrough script is not part of the public surface.