From 2c3fa82f03c809ce92044abd1bd30c78c90ceb8b Mon Sep 17 00:00:00 2001 From: PMarzec <98286080+przemarzec@users.noreply.github.com> Date: Fri, 5 Jun 2026 01:32:44 +0200 Subject: [PATCH] docs: expand and correct the engrava documentation set (#17) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Documentation expansion bringing docs/ up to the shipped 0.3.x behaviour: new pages (Core Concepts, Positioning, Migration, Troubleshooting, FAQ, Performance, Data lifecycle, Deployment, Concurrency, Backup & Recovery, CLI, Glossary) + accuracy fixes across existing pages, all verified against the running package. tests/docs/ compiles, phantom-scans, and executes documentation code. docs:/style: only — no release. --- README.md | 32 +- docs/api-reference.md | 92 +++++- docs/audit-trail.md | 261 +++++++++++++++++ docs/backup-and-recovery.md | 129 +++++++++ docs/cli.md | 228 +++++++++++++++ docs/concepts.md | 247 ++++++++++++++++ docs/concurrency.md | 124 ++++++++ docs/configuration.md | 94 +++++- docs/data-lifecycle.md | 170 +++++++++++ docs/deployment.md | 133 +++++++++ docs/dreaming.md | 82 +++++- docs/faq.md | 116 ++++++++ docs/glossary.md | 161 ++++++++++ docs/guides/agent-memory.md | 260 +++++++++++++++++ docs/guides/embeddings.md | 228 +++++++++++++++ docs/guides/migrating-from-other-memory.md | 274 ++++++++++++++++++ docs/observability.md | 121 ++++++++ docs/performance.md | 175 +++++++++++ docs/positioning.md | 97 +++++++ docs/quickstart.md | 42 ++- docs/recipes/index.md | 190 ++++++++++++ docs/troubleshooting.md | 193 ++++++++++++ docs/tutorial.md | 153 ++++++++++ docs/upgrade.md | 68 ++++- examples/agent_loop.py | 235 +++++++++++++++ examples/config.yaml | 2 +- examples/notes_memory.py | 117 ++++++++ src/engrava/config.py | 3 +- src/engrava/extensions/__init__.py | 2 +- src/engrava/extensions/vector_sqlite_vec.py | 18 +- .../infrastructure/sqlite/engrava_core.py | 8 +- tests/docs/test_docs_examples_execute.py | 1 + tests/examples/test_quickstart_runs.py | 25 ++ 33 files changed, 4044 insertions(+), 37 deletions(-) create mode 100644 docs/audit-trail.md create mode 100644 docs/backup-and-recovery.md create mode 100644 docs/cli.md create mode 100644 docs/concepts.md create mode 100644 docs/concurrency.md create mode 100644 docs/data-lifecycle.md create mode 100644 docs/deployment.md create mode 100644 docs/faq.md create mode 100644 docs/glossary.md create mode 100644 docs/guides/agent-memory.md create mode 100644 docs/guides/embeddings.md create mode 100644 docs/guides/migrating-from-other-memory.md create mode 100644 docs/performance.md create mode 100644 docs/positioning.md create mode 100644 docs/recipes/index.md create mode 100644 docs/troubleshooting.md create mode 100644 docs/tutorial.md create mode 100644 examples/agent_loop.py create mode 100644 examples/notes_memory.py diff --git a/README.md b/README.md index 0539b04..0b94764 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,17 @@ since 0.3.0. → See [`docs/benchmarks.md`](docs/benchmarks.md) for reproducible evidence (synthetic benchmark suite runnable in ~5 minutes). +### Tamper-Evident Audit Trail + +Opt-in hash-chain **journal** that records every thought/edge mutation as a +SHA-256-linked, before/after entry — off by default, one config flag to enable. +Query history with `store.journal.get_entries(...)` and validate the chain with +`store.journal.verify_integrity()`. + +→ See [`docs/audit-trail.md`](docs/audit-trail.md) for enabling, querying, +verification, and the security model (what "tamper-evident" does and does not +guarantee). + ### Multi-Service Isolation Run multiple independent databases under one `EngravaManager`: @@ -203,6 +214,8 @@ engrava --db mydata.db export -o portable.json `engrava info` now renders the same metrics snapshot contract exposed by `await store.metrics()`. +See the [CLI reference](docs/cli.md) for every command and option. + ## Architecture - **SQLite** with WAL mode for concurrent reads @@ -214,13 +227,30 @@ engrava --db mydata.db export -o portable.json ## Documentation -- [Upgrade Guide](docs/upgrade.md) — compatibility matrix, backups, and troubleshooting +- [Core Concepts](docs/concepts.md) — the mental model (thought, edge, reflection, cycle, …) — start here +- [Positioning](docs/positioning.md) — when Engrava is (and isn't) the right tool, and how it compares - [Quick Start](docs/quickstart.md) — 5-minute setup guide +- [Tutorial](docs/tutorial.md) — build a small notes memory end to end +- [Recipes](docs/recipes/index.md) — copy-paste snippets for common tasks (store a turn, retrieve context, TTL, dedup, …) +- [Building a memory-backed agent](docs/guides/agent-memory.md) — the end-to-end agent turn loop (ingest → retrieve → generate → consolidate) +- [Migrating from another memory system](docs/guides/migrating-from-other-memory.md) — concept mapping, porting calls, bulk import, and scoping/multi-tenancy +- [Embeddings](docs/guides/embeddings.md) — wiring a real embedding provider (local / OpenAI / Ollama / HuggingFace / custom) - [Configuration](docs/configuration.md) — YAML config format and options +- [Upgrade Guide](docs/upgrade.md) — compatibility matrix, backups, and troubleshooting - [Extensions](docs/extensions.md) — Writing custom extensions and hooks - [Observability](docs/observability.md) — Metrics snapshot API +- [Audit Trail](docs/audit-trail.md) — Tamper-evident hash-chain journal (enabling, querying, verifying, security model) - [API Reference](docs/api-reference.md) — Full protocol and class reference +- [CLI Reference](docs/cli.md) — every `engrava` command and option +- [Glossary](docs/glossary.md) — quick definitions of every Engrava term - [MindQL](docs/mindql.md) — Query language syntax and examples +- [Troubleshooting](docs/troubleshooting.md) — symptom → cause → fix for common errors +- [FAQ](docs/faq.md) — quick answers (LLM/keys, embeddings-optional, scale, concurrency, backups, …) +- [Performance & Scaling](docs/performance.md) — the vector-backend switch, bulk-ingest, and dreaming cost at scale +- [Data Lifecycle & Retention](docs/data-lifecycle.md) — lifecycle states, TTL, archive-vs-delete, GDPR erasure, disk reclamation +- [Deployment](docs/deployment.md) — process model, database files on disk, containers, graceful shutdown +- [Concurrency](docs/concurrency.md) — the WAL single-writer model, busy timeout, and per-service isolation +- [Backup & Recovery](docs/backup-and-recovery.md) — WAL-safe backups, snapshot vs file copy, restore verification - [Known Limitations](docs/known-limitations.md) — Platform notes and constraints ## Development diff --git a/docs/api-reference.md b/docs/api-reference.md index d3e5651..3c6cc99 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -65,6 +65,7 @@ keyword arguments and does **not** return a UUID string. | `await list_thoughts(...)` | `list[ThoughtRecord]` | List with filters (keyword-only) | | `await count_thoughts(...)` | `int` | Count with filters (keyword-only) | | `await delete_thought(thought_id)` | `bool` | Hard delete; `True` if a row was removed | +| `await record_access(thought_id)` | `None` | Mark a thought as accessed — bumps `access_count` and sets `last_accessed_at`; raises `ThoughtNotFoundError` if missing. Drives the access-frequency dreaming signal. | ```python import uuid @@ -135,6 +136,40 @@ await store.create_edge( ) ``` +#### REFLECTION lineage + +Helpers for navigating the `CONSOLIDATED_FROM` graph that dreaming builds +between a REFLECTION and the source thoughts it summarises. + +| Method | Returns | Description | +|--------|---------|-------------| +| `await consolidated_member_ids(reflection_id)` | `list[str]` | The thought IDs a REFLECTION was consolidated from | +| `await consolidated_source_statuses(reflection_id)` | `list[str]` | The lifecycle statuses of those source thoughts (e.g. to detect a fully-archived, orphaned cluster) | +| `await reflections_consolidated_from(source_id)` | `list[str]` | The REFLECTION IDs that consolidated a given source thought (the reverse direction) | +| `await thought_exists_by_source(*, source, thought_type_value)` | `bool` | Whether any thought exists with the given `source` and type — keyword-only | + +```python +# Walk a REFLECTION down to its sources, and back from a source to its REFLECTIONs. +member_ids = await store.consolidated_member_ids(reflection_id) +for thought_id in member_ids: + source = await store.get_thought(thought_id) + if source is not None: + print(source.essence) + +# Detect an orphaned cluster — every source archived/gone: +statuses = await store.consolidated_source_statuses(reflection_id) +is_orphaned = bool(statuses) and all(s != "ACTIVE" for s in statuses) + +# Reverse direction: which REFLECTIONs summarise this source? +parents = await store.reflections_consolidated_from(member_ids[0]) + +# Exact-source existence check (e.g. dreaming's idempotency guard — a REFLECTION's +# source is "dreaming:", so match the full value, not a prefix): +exists = await store.thought_exists_by_source( + source="dreaming:abc123def4567890", thought_type_value="REFLECTION" +) +``` + #### Embedding Operations | Method | Returns | Description | @@ -167,11 +202,23 @@ returns a single `HybridSearchResult` container. | `await metrics()` | `EngravaMetrics` | Snapshot of thought/edge counts, storage, and search-latency percentiles (see [Observability](observability.md)) | | `await cleanup_expired(now=None, *, exclude_id=None)` | `CleanupResult` | Archive or delete thoughts past their `expires_at` | | `await verify_embedding_model()` | `None` | Raise `EmbeddingModelMismatchError` if the stored model lock disagrees with the configured provider | +| `async with store.suspend_auto_commit():` | context manager | Defer per-call commits so a block of writes commits once (rolls back on error) — use for bulk ingest | | `await close()` | `None` | Close the owned connection (only when the store opened it via `from_config`) | +```python +# Bulk ingest: one transaction instead of one commit per write. +async with store.suspend_auto_commit(): + for record in many_records: + await store.create_thought(record) +# commit happens once on clean exit; any exception rolls the whole block back +``` + ### `ReadOnlyEngrava` -Wrapper that raises `ReadOnlyViolationError` on any write operation. +A composition wrapper that delegates reads to the wrapped store and raises +`ReadOnlyViolationError` on any write. Use it to hand a retrieval-only view of +shared memory to a component that should never mutate it — e.g. a sub-agent or +worker whose job is only to look things up. ```python from engrava import ReadOnlyEngrava @@ -299,14 +346,51 @@ extension is recommended for filtering queries (`json_extract(metadata_json, '$. ### `ActionRecord` +Records an action the agent took (a tool call, a message, …), linked to the +thought that prompted it, with execution and verification state. + | Field | Type | Description | |-------|------|-------------| | `action_id` | `str` | UUID primary key | -| `source_thought_id` | `str` | Linked thought | +| `source_thought_id` | `str` | The thought this action originated from | | `action_type` | `ActionType` | Action classification | -| `intent` | `str` | Description of intent | -| `status` | `ActionStatus` | Current status | +| `intent` | `str` | Description of intent (min length 1) | +| `status` | `ActionStatus` | Current execution status | | `verification_status` | `VerificationStatus` | Verification state | +| `raw_metrics_json` | `str \| None` | Optional ground-truth facts for verification | + +**Store methods** (on `SqliteEngravaCore`): + +| Method | Returns | Description | +|--------|---------|-------------| +| `await create_action(action)` | `ActionRecord` | Persist an `ActionRecord` | +| `await get_actions(thought_id)` | `list[ActionRecord]` | Actions linked to a thought | + +`ActionStatus` is a state machine: `PLANNED → EXECUTING → CONFIRMED` / `FAILED`, +and `PLANNED → BLOCKED → PLANNED`. `can_transition_to(...)` / `evolve(...)` +enforce valid transitions (an illegal change raises `InvalidTransitionError`). + +```python +import uuid +from engrava import ActionRecord, ActionType, ActionStatus, VerificationStatus + +action = ActionRecord( + action_id=str(uuid.uuid4()), + source_thought_id=prompting_thought_id, + action_type=ActionType.TOOL_CALL, + intent="search the web for flight prices", + status=ActionStatus.PLANNED, + verification_status=VerificationStatus.PENDING, +) +await store.create_action(action) + +# advance through the lifecycle (frozen model → evolve returns a new instance): +done = action.evolve(status=ActionStatus.EXECUTING).evolve( + status=ActionStatus.CONFIRMED +) + +actions = await store.get_actions(prompting_thought_id) +``` ### `HybridSearchResult` diff --git a/docs/audit-trail.md b/docs/audit-trail.md new file mode 100644 index 0000000..40015ee --- /dev/null +++ b/docs/audit-trail.md @@ -0,0 +1,261 @@ +# Audit Trail (hash-chain journal) + +Engrava can record every change to your thought-graph in an append-only, +hash-linked **journal** — a tamper-evident audit trail. Each entry captures one +mutation (insert / update / delete of a thought or edge) as a before/after +delta, and is cryptographically chained to the previous entry with SHA-256. + +> **Read the [Security model](#security-model--guarantees) before relying on this +> for compliance.** The chain detects accidental corruption and naive edits, but +> it is a *keyless* chain stored in the same database file — see the boundary +> below. + +## Enabling the journal + +Journaling is **off by default** (zero overhead when disabled — the +`journal_entry` table exists but is never written to). Turn it on either via +configuration or the constructor. + +In `engrava.yaml`: + +```yaml +database: + path: "./engrava.db" + +journal: + enabled: true +``` + +```python +from engrava import SqliteEngravaCore + +async with await SqliteEngravaCore.from_config("engrava.yaml") as store: + assert store.journal is not None # journaling is active +``` + +Or when constructing the store directly: + +```python +import aiosqlite +from engrava import SqliteEngravaCore + +async with aiosqlite.connect("engrava.db") as conn: + conn.row_factory = aiosqlite.Row + store = SqliteEngravaCore(conn, journal_enabled=True) + await store.ensure_schema() +``` + +`store.journal` returns the `JournalWriter` when journaling is enabled, or +`None` when it is off — so a quick `if store.journal is not None:` guards any +journal-specific code. + +## What gets recorded + +When journaling is enabled, the store records a journal entry **automatically** +on every mutation of a thought or an edge — you do not call the journal +yourself. The recorded `mutation_type` values (the `MutationType` enum) are: + +| `MutationType` | When | +|---|---| +| `INSERT_THOUGHT` | `create_thought()` | +| `UPDATE_THOUGHT` | `update_thought()` | +| `DELETE_THOUGHT` | `delete_thought()` (only when a row was actually deleted) | +| `INSERT_EDGE` | `create_edge()` | +| `UPDATE_EDGE` | `update_edge()` | +| `DELETE_EDGE` | `delete_edge()` (only when a row was actually deleted) | + +Each entry's `delta` is a `{"before": ..., "after": ...}` dictionary: inserts +have `before: null`, deletes have `after: null`, and updates carry both sides. + +> **Not recorded:** embeddings (`store_embedding`) and action records +> (`create_action`) are **not** written to the journal — the audit trail covers +> the thought-and-edge graph, not the embedding or action tables. This also +> matters for backups — see [Backup note](#backup--retention-note). + +**TTL expiry is recorded.** `cleanup_expired()` (and the auto-cleanup it +triggers) goes through the same journaled paths, so expiry of a thought is +captured according to the configured TTL strategy: + +- **archive** strategy → an `UPDATE_THOUGHT` entry (the thought's + `lifecycle_status` flips to `ARCHIVED` and `expires_at` is cleared; the delta + carries the before/after). +- **delete** strategy → a `DELETE_THOUGHT` entry (`after: null`). + +(The separate `engrava gc` CLI command, which physically purges already-archived +rows, operates at the storage layer and is not journaled.) + +## The `JournalEntry` schema + +Each entry is an immutable `JournalEntry`: + +| Field | Type | Meaning | +|---|---|---| +| `entry_id` | `str` | Stable UUID for this entry | +| `sequence_number` | `int` | Monotonic, gapless position in the chain (starts at 1) | +| `mutation_type` | `str` | One of the `MutationType` values above | +| `target_id` | `str \| None` | The affected `thought_id` / `edge_id` | +| `delta` | `dict` | `{"before": {...}, "after": {...}}` diff | +| `parent_hash` | `str \| None` | SHA-256 of the previous entry (`None` for the first entry) | +| `entry_hash` | `str` | SHA-256 of this entry's canonical content | +| `created_at` | `str` | ISO-8601 UTC timestamp | + +The hash is computed over the canonical string +`"{sequence_number}|{mutation_type}|{target_id}|{json(delta, sort_keys)}|{parent_hash}"` +via `JournalWriter.compute_hash(...)` (a static method, exposed for callers who +want to recompute a hash independently). + +## Querying history + +Use `store.journal.get_entries(...)` to read the trail. All filters are +optional; results are ordered by `sequence_number` ascending. + +```python +# Everything that ever happened to one thought: +history = await store.journal.get_entries(target_id="thought-001") +for entry in history: + print(entry.sequence_number, entry.mutation_type, entry.created_at) + +# Only deletions, since a timestamp, capped: +deletions = await store.journal.get_entries( + mutation_type="DELETE_THOUGHT", + since="2026-01-01T00:00:00+00:00", + limit=500, +) +``` + +| Parameter | Default | Meaning | +|---|---|---| +| `target_id` | `None` | Filter by the affected entity ID | +| `mutation_type` | `None` | Filter by mutation type string | +| `since` | `None` | ISO-8601 lower bound on `created_at` (inclusive) | +| `limit` | `100` | Maximum entries returned | + +## Verifying integrity + +`store.journal.verify_integrity()` walks the whole chain in order, recomputes +every hash, and checks the parent-hash linkage. It returns a +`JournalIntegrityResult`: + +```python +result = await store.journal.verify_integrity() +if result.valid: + print(f"Chain OK — {result.entries_checked} entries verified.") +else: + print( + f"Tampering or corruption detected at sequence " + f"{result.first_invalid_sequence}: {result.error_message}" + ) +``` + +| Field | Type | Meaning | +|---|---|---| +| `valid` | `bool` | `True` if every hash and link checks out | +| `entries_checked` | `int` | Number of entries verified | +| `first_invalid_sequence` | `int \| None` | Sequence of the first broken entry, or `None` | +| `error_message` | `str \| None` | Description of the first error, or `None` | + +An empty journal verifies as `valid=True` with `entries_checked=0`. + +**Run verification on a schedule** (e.g. before each backup, during incident +response, or as a periodic monitoring check) rather than only ad hoc — that is +what turns the chain from a passive structure into an active control. + +## Worked example + +```python +import aiosqlite +import uuid +from engrava import ( + SqliteEngravaCore, + ThoughtRecord, + ThoughtType, + Priority, + LifecycleStatus, +) + +async with aiosqlite.connect(":memory:") as conn: + conn.row_factory = aiosqlite.Row + store = SqliteEngravaCore(conn, journal_enabled=True) + await store.ensure_schema() + + note = ThoughtRecord( + thought_id=str(uuid.uuid4()), + thought_type=ThoughtType.OBSERVATION, + essence="User prefers email over phone", + content="Stated during onboarding call.", + priority=Priority.P2, + lifecycle_status=LifecycleStatus.ACTIVE, + created_cycle=0, + updated_cycle=0, + source="human", + ) + await store.create_thought(note) + await store.update_thought(note.thought_id, essence="User strongly prefers email") + + # Two entries were recorded automatically (INSERT_THOUGHT, UPDATE_THOUGHT). + entries = await store.journal.get_entries(target_id=note.thought_id) + assert [e.mutation_type for e in entries] == ["INSERT_THOUGHT", "UPDATE_THOUGHT"] + + # The chain verifies. + result = await store.journal.verify_integrity() + assert result.valid and result.entries_checked == 2 +``` + +## Security model & guarantees + +The journal is a **keyless** SHA-256 integrity chain stored **in the same +SQLite file** it protects. `verify_integrity()` recomputes each entry's hash +from that entry's own stored data — there is no secret key, HMAC, signature, or +external anchor. + +**What it protects against (in scope):** + +- **Accidental corruption** — bit-rot, a truncated file, a half-written row: the + recomputed hash or the parent linkage will not match, and verification fails. +- **Naive tampering** — someone who edits, deletes, or reorders a journal row + (or an audited record) *without* recomputing the rest of the chain: the break + is detected at the first inconsistent entry. + +**What it does NOT protect against (out of scope):** + +- **A chain-aware actor with write access to the database file.** Because the + chain is keyless and self-contained, anyone who can write to the `.db` can + edit an entry **and** recompute every subsequent hash, producing a fully + self-consistent chain that passes `verify_integrity()` with `valid=True`. The + journal is **not** forgery-proof against an adversary (including the agent + process itself) who controls the file. + +If you need genuine, multi-party tamper-evidence, treat the in-file chain as one +layer and add at least one of: + +- **Restrict write access** — store the `.db` on a volume only the trusted + writer process can modify (OS file permissions / ownership). +- **Anchor the chain externally** — periodically export the latest + `entry_hash` (the chain tail) to an append-only / WORM store, a signed log, or + another system out of the writer's control. A later `verify_integrity()` plus + a match against the externally-anchored tail hash detects a full-file rewrite. +- **Verify on a schedule** — run `verify_integrity()` from a separate monitored + process so a detected mismatch raises an alert. + +State this boundary plainly to stakeholders: Engrava's journal gives you +**integrity detection for accidental damage and unsophisticated edits**, not +cryptographic non-repudiation against a file-level adversary. + +## Backup & retention note + +The logical snapshot/restore path (`engrava snapshot` / `engrava restore`) +covers the thought / edge / embedding / action tables — it does **not** include +the `journal_entry` table. A snapshot is therefore **not** a backup of the audit +trail, and restoring from one starts a fresh chain. To preserve the journal, +back up the database file itself (see the upgrade/backup guidance), and note +that hard-deleting an audited thought still leaves its content in the journal's +`before`/`after` delta — relevant when handling erasure requests. + +## See also + +- The [Enabling the journal](#enabling-the-journal) section above is the + canonical reference for the `journal.enabled` configuration flag; the general + [Configuration](configuration.md) guide covers the rest of `engrava.yaml`. +- [API Reference](api-reference.md) — the broader public API (the journal + classes `JournalWriter` / `JournalEntry` / `JournalIntegrityResult` and the + `MutationType` enum are documented on this page). diff --git a/docs/backup-and-recovery.md b/docs/backup-and-recovery.md new file mode 100644 index 0000000..4f99f32 --- /dev/null +++ b/docs/backup-and-recovery.md @@ -0,0 +1,129 @@ +# Backup & Recovery + +Two ways to back up an Engrava database, what each one covers, and how to restore +and verify. The most important thing to know up front: a **logical snapshot does +not include the audit journal**, and a **naive file copy in WAL mode can lose +data** — both are explained below. + +## Two kinds of backup + +| Method | What it captures | Portable across versions? | +|---|---|---| +| **Logical snapshot** (`engrava snapshot`) | Thoughts, edges, embeddings, and actions as JSONL records | Yes — it's data, not file format | +| **Physical file backup** | The exact database file(s) — *everything*, including the audit journal | Tied to the SQLite file format (very stable) | + +Pick the logical snapshot for portability and selective restore; pick a physical +backup when you need a byte-exact copy (including the journal) or point-in-time +file recovery. + +## Logical snapshot and restore + +```bash +engrava --db engrava.db snapshot -o backup.jsonl # export +engrava --db fresh.db restore -i backup.jsonl # import into a fresh db +``` + +The snapshot is JSONL: a metadata header line, then one record per +thought / edge / embedding / action. + +> **A snapshot does NOT include the audit journal.** The `journal_entry` table — +> the tamper-evident hash chain — is **not** exported by `engrava snapshot`, and +> therefore is **not** recreated by `restore`. A database restored from a snapshot +> starts with an **empty journal**: the data is intact, but its prior audit +> history is gone. If audit continuity matters, use a **physical file backup** +> (which copies the journal verbatim), not a logical snapshot. See +> [Audit Trail](audit-trail.md). + +`restore` options worth knowing (see the [CLI reference](cli.md#restore) for the +full list): `--clear` to wipe the target first, `--skip-embeddings` / `--re-embed` +to control embedding handling, and `--service` for multi-service targets. + +## Physical file backup (WAL-safe) + +Engrava runs in **WAL mode**, where recently-written data lives in the `-wal` +file until it is checkpointed into the main `.db`. A plain file copy is only safe +under specific conditions, so choose the method by whether the database is +**live** (being written) or **stopped**. + +### If the database is live (writers running) + +A file copy of a database under active writes is **not reliable** — the `.db` and +`-wal` change during the copy and can be captured inconsistently. Use a method +that produces an internally consistent copy *without* stopping writers: + +**SQLite Online Backup API** — a hot, consistent backup driven from your own code +via Python's `sqlite3` backup API (`source.backup(dest)`). This is the +recommended way to back up a running database, and it supports incremental copies. + +**`VACUUM INTO`** — writes a fresh, consistent, compacted copy of the database to +a new file. SQLite serialises it correctly against ongoing activity: + +```bash +sqlite3 engrava.db "VACUUM INTO 'engrava-backup.db';" +``` + +Both produce a single clean `.db` you can store or move; neither requires copying +the `-wal`/`-shm` files. + +### If you can stop or quiesce writers + +When you can take the database offline (or guarantee no writes for the duration), +a file copy is safe — preferably after folding the WAL back into the main file: + +**Checkpoint, then copy the single file:** + +```bash +# with no writers active: +sqlite3 engrava.db "PRAGMA wal_checkpoint(TRUNCATE);" +cp engrava.db engrava.db.bak +``` + +**Or copy the file set** (`engrava.db` + `-wal` + `-shm`) **as one atomic unit** — +e.g. via a filesystem-level snapshot (LVM, ZFS, a cloud volume snapshot) that +captures all three at the same instant. A plain `cp` of the three files of a +*live* database is **not** atomic and can still be inconsistent; only do the +multi-file copy when writers are stopped or behind a consistent snapshot. + +> **Do not** rely on a bare `cp engrava.db backup.db` — or even a non-atomic +> `cp engrava.db engrava.db-wal engrava.db-shm ...` — while the database is being +> written. For a live database use the Online Backup API or `VACUUM INTO`. + +## Restoring + +- **From a snapshot:** `engrava --db restore -i backup.jsonl`. Restore + into a **fresh** database (optionally `--clear` an existing one). Remember the + journal is not restored. +- **From a physical backup:** stop the process, put the backed-up file in place, + and start again. A backup made with the Online Backup API, `VACUUM INTO`, or a + checkpoint-then-copy is a single self-contained `.db`. If instead you captured a + multi-file filesystem snapshot, restore `engrava.db`, `engrava.db-wal`, and + `engrava.db-shm` together as the unit they were snapshotted in. + +### Verify a restore + +After restoring, confirm the database is readable and the counts look right: + +```bash +engrava --db restored.db info # reports counts; confirms the schema is readable +``` + +For a snapshot restore you can compare `info` counts against the source. If you +rely on the audit journal and restored from a **physical** backup, also re-run +journal verification (see [Audit Trail](audit-trail.md)) to confirm the chain is +intact. + +## Multi-service backups + +With [`EngravaManager`](concurrency.md#per-service-isolation), each service is its +own database file under the shared data directory. Back them up the same way — +either snapshot each service (`snapshot --service `) or take a WAL-safe +physical copy of each `.db` (plus its `-wal`/`-shm`). Because services are +independent files, you can back up, restore, or delete one without touching the +others. + +## See also + +- [Audit Trail](audit-trail.md) — the journal that snapshots exclude +- [Concurrency](concurrency.md) — why WAL needs a WAL-safe backup +- [Data Lifecycle](data-lifecycle.md) — retention, erasure, and VACUUM +- [Upgrade Guide](upgrade.md) — backing up before an upgrade diff --git a/docs/cli.md b/docs/cli.md new file mode 100644 index 0000000..6f0e4b9 --- /dev/null +++ b/docs/cli.md @@ -0,0 +1,228 @@ +# CLI reference + +Engrava ships an `engrava` command-line tool for inspecting, querying, and +maintaining a database without writing code. This page documents every command +and option. + +```bash +engrava [GLOBAL OPTIONS] COMMAND [ARGS]... +``` + +## Global options + +These apply to every command and go **before** the command name: + +| Option | Values / type | Default | Description | +|---|---|---|---| +| `--db` | path | `./engrava.db` | Path to the SQLite database. Falls back to the `ENGRAVA_DB` env var, then the default. | +| `--config` | path | — | Path to `engrava.yaml`. Falls back to the `ENGRAVA_CONFIG` env var. | +| `--format` | `table` \| `json` \| `csv` | `table` | Output format for commands that print records. | +| `--verbose` | flag | off | Enable verbose output. | +| `--help` | flag | — | Show help and exit (works on the root and on every command). | + +**Environment variables.** `ENGRAVA_DB` and `ENGRAVA_CONFIG` are CLI fallbacks for +`--db` and `--config` respectively; the explicit flag always wins +(`--db` > `ENGRAVA_DB` > `./engrava.db`). + +```bash +export ENGRAVA_DB=/data/engrava.db +engrava info # uses /data/engrava.db +engrava --db other.db info # flag overrides the env var +``` + +## Commands + +| Command | Purpose | +|---|---| +| [`info`](#info) | Show a metrics snapshot for the database. | +| [`query`](#query) | Run a MindQL query. | +| [`snapshot`](#snapshot) | Export the whole database to a JSONL snapshot. | +| [`restore`](#restore) | Restore a database from a JSONL snapshot. | +| [`gc`](#gc) | Garbage-collect archived thoughts (and optionally expired ones). | +| [`migrate`](#migrate) | Run pending schema migrations. | +| [`export`](#export) | Export thoughts to a portable JSON file. | + +## Service resolution + +The `--service` option on `snapshot` and `restore` resolves the same way in both +commands: + +| `--service` | Services config loaded? | Result | +|---|---|---| +| `--service NAME` (explicit) | either | Targets service **NAME**. Its database is found/created in the services `data_dir` if a config is loaded, otherwise in the **parent directory of `--db`** (i.e. `/NAME.db`). | +| omitted | yes | Falls back to `services.default_service`. | +| omitted | no | Operates on the single `--db` database (not service mode). | + +In short: an explicit `--service` works even without a services config (using +`--db`'s directory as the data directory), while omitting it only enters +multi-service mode when a services config is present. + +### `info` + +Shows a metrics snapshot (counts, etc.) for the current database. Takes no +command-specific options. + +```bash +engrava --db engrava.db info +``` + +Use this after an upgrade or a restore to confirm the database is readable and +the counts look right. + +### `query` + +Executes a [MindQL](mindql.md) query and prints the results in the chosen +`--format`. + +```bash +engrava query "MQL" +``` + +The `MQL` string is a positional argument. It accepts `FIND`, `COUNT`, `SELECT`, +or registered extension commands: + +```bash +engrava query "FIND thoughts WHERE lifecycle_status = 'ACTIVE'" +engrava query "COUNT thoughts WHERE priority = 'P1'" +engrava --format json query "SELECT thought_id, essence FROM thought LIMIT 5" +``` + +### `snapshot` + +Exports the **entire** database to a JSONL snapshot (one record per line). + +| Option | Type | Default | Description | +|---|---|---|---| +| `-o`, `--output` | path | derived (see below) | Output JSONL file path. | +| `--service` | name | see below | The service to snapshot (multi-service mode only). | + +**Default output path** depends on the mode: + +- **Single database:** `.snapshot.jsonl` next to the database — e.g. + `--db engrava.db` → `engrava.snapshot.jsonl` (the `.db` suffix is replaced). +- **Multi-service:** `/.snapshot.jsonl`. + +**`--service`** resolves in three ways (see [Service resolution](#service-resolution)): + +- **Explicit `--service NAME`** targets that service even with no services config + — the service database is looked up/created in the data directory, which is the + services config's `data_dir` if one is loaded, otherwise the **parent directory + of `--db`**. +- **Omitted, with a services config loaded** → falls back to + `services.default_service`. +- **Omitted, with no services config** → snapshots the single `--db` database. + +```bash +engrava --db engrava.db snapshot -o backup.jsonl +engrava --db engrava.db snapshot # -> engrava.snapshot.jsonl +engrava --db /data/engrava.db snapshot --service tenant_a # -> /data/tenant_a.snapshot.jsonl +engrava --config engrava.yaml snapshot --service tenant_a # data_dir from config +``` + +> A snapshot exports `thought`, `edge`, `embedding`, and `action` records — but +> **not** the audit journal (`journal_entry`). See +> [Backup & Recovery](backup-and-recovery.md) for what this means and when to use +> a physical file backup instead. + +### `restore` + +Restores a database from a JSONL snapshot produced by `snapshot`. + +| Option | Type | Default | Description | +|---|---|---|---| +| `-i`, `--input` | path | **required** | JSONL snapshot file to restore. | +| `--clear` | flag | off | Clear existing data before restoring. | +| `--skip-embeddings` | flag | off | Import without embedding records. | +| `--re-embed` | flag | off | Re-embed all thoughts via the target provider, ignoring source embeddings. | +| `--service` | name | see below | The service to restore into. | + +`--service` resolves exactly as for [`snapshot`](#service-resolution): an explicit +`--service NAME` targets that service even without a services config (its database +resolves in the services `data_dir`, or the **parent directory of `--db`** when no +config is loaded); omitted with a services config falls back to +`services.default_service`; omitted with no services config restores into the +single `--db` database. + +`--skip-embeddings` and `--re-embed` are **mutually exclusive** — passing both +fails with: + +``` +Error: --re-embed and --skip-embeddings are mutually exclusive. +``` + +Use `--re-embed` when the target uses a different embedding model than the +snapshot (the embeddings would otherwise be incompatible — see +[Troubleshooting → EmbeddingModelMismatchError](troubleshooting.md#embeddingmodelmismatcherror-when-opening-an-existing-database)). +Use `--skip-embeddings` to import text only. + +```bash +engrava --db fresh.db restore -i backup.jsonl +engrava --db fresh.db restore -i backup.jsonl --clear --re-embed +``` + +> Restore recreates thoughts, edges, embeddings, and actions, **not** the audit +> journal — a restored database starts with an empty journal. + +### `gc` + +Garbage-collects `ARCHIVED` thoughts and their orphaned edges. With `--expired` +it also runs the TTL expiry cleanup first. + +| Option | Type | Default | Description | +|---|---|---|---| +| `--dry-run` | flag | off | Show what would be deleted without changing anything. | +| `--expired` | flag | off | Also run expiry cleanup (archive or delete per `ttl.strategy`) before collecting. | + +```bash +engrava --db engrava.db gc # delete ARCHIVED thoughts + orphaned edges +engrava --db engrava.db gc --expired # run expiry cleanup first (per strategy) +engrava --db engrava.db gc --expired --dry-run +``` + +The behaviour of `gc --expired` depends on `ttl.strategy`: with `delete` it +removes expired rows and then collects pre-existing archived rows; with the +default `archive` it archives the expired rows and stops (it does not collect +them in the same pass). See +[Data lifecycle → running cleanup](data-lifecycle.md#running-cleanup). + +### `migrate` + +Runs pending schema migrations (ensures the core tables exist and are +up to date). Takes no command-specific options. Safe to run after an upgrade. + +```bash +engrava --db engrava.db migrate +``` + +### `export` + +Exports thoughts to a portable JSON file (with edges and metadata). Unlike +`snapshot` (JSONL, whole-database, for backup/restore), `export` writes a single +indented JSON document and can be filtered by lifecycle status. + +| Option | Type | Default | Description | +|---|---|---|---| +| `-o`, `--output` | path | `.export.json` (derived) | Output JSON file path. | +| `--status` | lifecycle status | all | Only export thoughts with this `lifecycle_status` (e.g. `ACTIVE`). | + +```bash +engrava --db engrava.db export -o thoughts.json +engrava --db engrava.db export --status ACTIVE +``` + +## Journal verification + +There is **no `engrava verify` command** in this version. To verify the +[audit journal](audit-trail.md)'s hash chain, use the Python API: + +```python +result = await store.journal.verify_integrity() +print(result.valid) +``` + +## See also + +- [MindQL](mindql.md) — the query language `engrava query` runs +- [Backup & Recovery](backup-and-recovery.md) — snapshot/restore vs physical backup +- [Data Lifecycle](data-lifecycle.md) — what `gc` and `gc --expired` do +- [Configuration](configuration.md) — the `engrava.yaml` that `--config` loads diff --git a/docs/concepts.md b/docs/concepts.md new file mode 100644 index 0000000..d186f10 --- /dev/null +++ b/docs/concepts.md @@ -0,0 +1,247 @@ +# Core Concepts + +Engrava models an agent's memory as a **thought-graph**: typed *thoughts* +connected by typed *edges*, made searchable by *embeddings*, and refined over +time by *dreaming* into higher-order *reflections*. This page explains those +pieces as a mental model — what each is, why it exists, and when you'd create +it — before the how-to guides. Read it once and the rest of the docs will make +more sense. + +> For a one-line definition of any term used here (essence, cycle, signal, gate, +> provenance, …), see the [Glossary](glossary.md). + +``` + ┌──────────────────────────────────────────┐ + OBSERVATION │ "User prefers email over phone" │ essence (prompt-facing) + (a thought) │ content: "Stated during onboarding..." │ content (full text) + │ priority P2 · lifecycle ACTIVE │ + └───────────────┬──────────────────────────┘ + │ ASSOCIATED (an edge: typed, weighted) + ┌───────────────▼──────────────────────────┐ + BELIEF │ "This user is low-touch" │ + └───────────────┬──────────────────────────┘ + │ CONSOLIDATED_FROM (created by dreaming) + ┌───────────────▼──────────────────────────┐ + REFLECTION │ cluster summary of related thoughts │ (higher-order, system-made) + └──────────────────────────────────────────┘ +``` + +## Thought + +A **thought** (`ThoughtRecord`) is the unit of memory — one idea, fact, +observation, or message. Thoughts are *frozen* (immutable) value objects; you +don't mutate one in place, you `create_thought()` it and later +`update_thought()` to get a new version. + +### `essence` vs `content` (two text fields, on purpose) + +Every thought carries **two** texts, and the split is deliberate: + +- **`essence`** — the compact, canonical, **prompt-facing** one-liner + (1–200 characters, enforced). This is the text you inject into an LLM prompt + when this memory is retrieved. Keep it short and self-contained. +- **`content`** — the **full** source text, retained for full-text search and + provenance. It can be as long as you like. + +> Why it matters: when you retrieve memories to build a prompt, you want the +> tight `essence`, not the whole `content`. Putting the same long text in both +> defeats the purpose. Think *headline* (`essence`) vs *article* (`content`). + +### Thought types + +`ThoughtType` is a closed set — choose the one that fits what you're storing: + +| Type | What it is | Who creates it | +|---|---|---| +| `OBSERVATION` | Something learned from the world (a user message, a fact) | you (ingest) | +| `BELIEF` | A held conclusion or stance derived from observations | you / your agent | +| `TASK` | Something to be done | you / your agent | +| `OUTPUT_DRAFT` | The agent's own outgoing content (a reply it produced) | your agent | +| `NOTE` | A free-form internal note | you / your agent | +| `REFLECTION` | A cluster summary produced by **dreaming** | the system (don't hand-create) | + +There is no `INSIGHT`/`IDEA`/`GOAL` — the set is exactly the six above. Type is +not cosmetic: dreaming only clusters `OBSERVATION`s by default, and +`REFLECTION` is reserved for dreaming's output, so mis-typing changes downstream +behaviour. + +### Priority + +`Priority` is `P1` (highest) … `P4` (lowest). It is one of the signals that +hybrid search fuses into a ranking, so higher-priority thoughts surface more +readily. Set it to reflect how important a memory is to keep at hand. + +### Lifecycle + +A thought moves through a small state machine: + +``` +CREATED → ACTIVE → DONE → ARCHIVED +``` + +`LifecycleStatus` transitions are enforced (`evolve()` rejects illegal jumps). +Most thoughts you create will start `ACTIVE`. `ARCHIVED` is a **soft-retired** +retention state and a marker for garbage collection — an archived regular thought +is **not** automatically hidden from `search_hybrid` / `list_thoughts` / +`count_thoughts`; it stays searchable until you remove it with `engrava gc`. The +only rows search auto-excludes are **expired** thoughts and **retired +REFLECTIONs**. See [Data Lifecycle](data-lifecycle.md) for the full +retention and garbage-collection behavior. + +## Edge + +An **edge** (`EdgeRecord`) is a typed, weighted, directional link between two +thoughts — this is what makes Engrava a *graph*, not just a table. The +`EdgeType` set includes `ASSOCIATED`, `DEPENDS_ON`, `DERIVED_FROM`, +`MESSAGE_OF`, `BRIDGE`, `CONSOLIDATED_FROM`, and `CONTESTED_BY`. `weight` (0.0–1.0) +expresses how strong the relation is. + +Create edges when a relationship between two memories is itself meaningful — +e.g. one thought supports, contradicts, or depends on another. Dreaming also +creates edges automatically (`ASSOCIATED` between consolidated thoughts, and +`CONSOLIDATED_FROM` from a reflection back to its sources). + +## Embedding + +An **embedding** is the vector representation of a thought that powers semantic +(meaning-based) search. Embeddings are optional: with no embedding provider +configured, search still works using the bundled lexical (FTS5/BM25) index, and +the vector signal is simply skipped. Configure a provider (and `auto_embed`) to +get semantic retrieval. See [Configuration](configuration.md) and the search +docs for the provider options. + +## Reflection + +A **reflection** is a `ThoughtType.REFLECTION` thought created by **dreaming**: +Engrava clusters semantically related thoughts and writes a higher-order summary +node, linked back to its members by `CONSOLIDATED_FROM` edges, with a centroid +embedding. Reflections are how a pile of individual observations becomes +fewer, more retrievable, higher-level memories over an agent's lifetime. You do +not create reflections by hand — dreaming makes them. See +[Dreaming](dreaming.md). + +## Cycle (the agent clock) + +A **cycle** is a *logical clock* — a monotonically increasing integer tick that +**you own and advance**. It is not wall-clock time and not a database row; +Engrava never increments or stores it for you. Typically one cycle = one agent +turn / interaction / scheduled pass. + +Three fields use it: + +- **`created_cycle`** / **`updated_cycle`** — required on every `ThoughtRecord` + (the model enforces `updated_cycle >= created_cycle`). They stamp *when, in + your agent's logical time*, a thought appeared and last changed. +- **`current_cycle`** — the value you pass into `search_hybrid(...)` and + `run_consolidation(...)` to tell Engrava "it is now tick N." + +Why a cycle exists *alongside* timestamps: it gives recency and dreaming +deterministic, wall-clock-independent math. Search's recency signal and all of +dreaming's age/scheduling gates (`min_age_cycles`, `schedule_every_n_cycles`, +`recency_half_life`) are expressed in cycles, not seconds. + +> **The trap to avoid.** Because Engrava does not advance the cycle for you, +> there are two distinct failure modes — and neither raises an error: +> +> - **Omitting it entirely** (`current_cycle=None`, the default in +> `search_hybrid`) makes the recency signal **inactive** — it is dropped from +> the ranking and its weight is redistributed to the other signals. +> - **Passing a constant** (e.g. always `current_cycle=0`, and never advancing +> `created_cycle`/`updated_cycle`) keeps recency active but **useless**: a +> thought's age is `current_cycle - updated_cycle`, so with everything frozen +> at the same value every memory looks equally fresh and recency cannot +> distinguish old from new. The same staleness also means dreaming's age gate +> (`min_age_cycles`) never opens — `created_cycle`/`current_cycle` never grow, +> so no thought ever ages enough to be promoted. +> +> **Do this instead:** keep a counter in your application, increment it once per +> turn, pass it as `current_cycle`, and use it for `created_cycle`/`updated_cycle` +> when building thoughts. On restart, recover it (e.g. from the maximum +> `created_cycle` you've stored) so it stays monotonic across process restarts. + +## Provenance (where a memory came from) + +Two distinct fields record origin, and they are easy to confuse: + +- **`source`** — a free-form **string** identifier of the origin (e.g. + `"human"`, `"ingest"`, your component name). Required, your choice. +- **`source_type`** — the **`KnowledgeSource` enum**: how the knowledge was + obtained. + +| `KnowledgeSource` | Set it when the memory came from… | +|---|---| +| `EXPERIENCE` | The agent's own experience / observed reality (the default) | +| `SEEDED_LLM` | Content seeded by an LLM up front | +| `DISTILLED_LLM` | Content distilled/derived by an LLM | +| `DREAMING` | Produced by consolidation — **the system sets this itself** on dream-created edges/reflections | + +Provenance is not decoration: dreaming can filter on it (e.g. preferring +experience-based confirmations), so setting `source_type` honestly lets you tune +what consolidation trusts. + +## Visibility (inner vs outer speech) + +`ThoughtVisibility` marks whether a thought may surface in the agent's **outer +speech** (what it says) or stays **internal** (what it only thinks): + +- **`private`** — never disclosed externally; internal memory only. +- **`selective`** — shared with trusted entities on request (the **default**). +- **`public`** — may appear in the agent's outer speech / output. + +Engrava *stores* the level; **honouring it is your application's +responsibility** (Engrava won't stop you from reading a `private` thought — it +records the intent so your agent can respect it). Use it to keep a privacy +boundary between what the agent knows and what it's allowed to say. + +## Reliability: `confidence` vs `confirmation_count` + +A thought carries **two different** notions of how much to trust it, and they +feed dreaming as separate signals: + +- **`confidence`** — a static `0.0–1.0` belief-strength **you assign** at + creation (nullable; treated as `0.5` when unset). "How sure am I of this?" +- **`confirmation_count`** — a counter of how many times the thought has been + **independently re-encountered / validated** over time. It grows via + `deduplicate=True` on `create_thought` (identical content bumps the count) or + your own logic. "How many times has reality re-confirmed this?" + +Dreaming's `ConfidenceSignal` reads the first and `ConfirmationSignal` reads the +second, so they tune consolidation in different ways. (Relatedly, +`DreamingGates.allow_zero_confirmation` exists so single-write batch ingest — +where `confirmation_count` never grows — can still be consolidated.) + +## Putting it together + +```python +import uuid +from engrava import ( + ThoughtRecord, + ThoughtType, + Priority, + LifecycleStatus, + KnowledgeSource, + ThoughtVisibility, +) + +observation = ThoughtRecord( + thought_id=str(uuid.uuid4()), + thought_type=ThoughtType.OBSERVATION, # learned from the world + essence="User prefers email over phone", # prompt-facing one-liner + content="The user said during onboarding that email is the best way to reach them.", + priority=Priority.P2, + lifecycle_status=LifecycleStatus.ACTIVE, + created_cycle=12, # your agent's logical clock, this turn + updated_cycle=12, + source="onboarding-flow", # free-form origin id + source_type=KnowledgeSource.EXPERIENCE, # how it was obtained + confidence=0.9, # how sure you are + visibility=ThoughtVisibility.SELECTIVE, # inner/outer-speech boundary +) +``` + +## Next + +- [Quick Start](quickstart.md) — create, link, and search in five minutes. +- [Dreaming](dreaming.md) — how consolidation turns observations into reflections. +- [Hybrid Search](search.md) — how the signals (including recency/cycle and priority) fuse into a ranking. +- [API Reference](api-reference.md) — the exact fields, enums, and methods. diff --git a/docs/concurrency.md b/docs/concurrency.md new file mode 100644 index 0000000..ca2df89 --- /dev/null +++ b/docs/concurrency.md @@ -0,0 +1,124 @@ +# Concurrency + +Engrava is built on SQLite, so it inherits SQLite's concurrency model: **many +concurrent readers, one writer at a time.** This page explains what that means in +practice — within one process and across processes — and the specific behaviours +to know about (busy timeout, the journal's in-process lock, and per-service +isolation). + +## WAL: many readers, one writer + +File databases opened via `from_config` use **WAL** (write-ahead logging) mode. +Under WAL: + +- **Readers don't block the writer and the writer doesn't block readers.** A + read sees a consistent snapshot while a write is in progress. +- **There is still only one writer at a time.** Two writes are serialised; the + second waits for the first to finish. + +This is ideal for read-heavy agent-memory workloads: retrieval (the hot path) is +all reads and scales freely; writes are comparatively infrequent. + +## Many async tasks, one store + +**A single store instance safely serves many concurrent `asyncio` tasks.** You do +not need a connection pool or multiple stores for in-process concurrency: + +- aiosqlite runs the actual SQLite calls on a dedicated background thread and + marshals every query to it, so concurrent `await`s against one store are + serialised onto that thread rather than racing. +- The store additionally guards order-sensitive operations (deduplication, the + embedding-model check) with internal `asyncio.Lock`s. + +What you must **not** do is share one store across **different event loops** — the +connection is bound to the loop it was created on. One store per loop; within +that loop, share it freely. (See +[Known Limitations](known-limitations.md#aiosqlite-proxy-architecture).) + +## Busy timeout + +When a connection can't immediately get the lock it needs (another writer holds +it), SQLite waits up to the **busy timeout** before giving up with +`database is locked`. Engrava inherits Python's `sqlite3` default of **5000 ms +(5 s)** — it does not override it. + +For workloads with more write contention you can raise it on your own connection +before handing it to the store, or after `from_config` via the store's +connection: + +```python +import aiosqlite +from engrava import SqliteEngravaCore + +conn = await aiosqlite.connect("engrava.db") +conn.row_factory = aiosqlite.Row +await conn.execute("PRAGMA busy_timeout = 15000") # wait up to 15s for a lock +store = SqliteEngravaCore(conn) +await store.ensure_schema() +``` + +A longer busy timeout trades latency-on-contention for fewer `database is locked` +errors; tune it to your write pattern. + +## Multiple processes + +WAL allows multiple **processes** to read concurrently, and one to write — but +heavy multi-process **writing** of the same database file is **out of scope** for +Engrava, for two reasons: + +1. **SQLite is single-writer.** Multiple OS processes writing the same file + contend on the database lock; the busy timeout only papers over light + contention. +2. **The audit journal's lock is in-process only.** When journaling is enabled, + appends are serialised by an `asyncio.Lock` keyed on the connection — which + exists **only within one process**. A second process shares no such lock, so + two processes journaling the same database can race the journal's + monotonic `sequence_number`. The writer retries on the resulting + `UNIQUE` collision up to **5 times**; if contention persists it raises: + + ``` + RuntimeError: Failed to append journal entry after 5 retries due to sequence contention + ``` + + This is the signal that you have more than one process writing a journaled + database — which is unsupported. + +If you need multiple independent writers, don't point them at the same file — +give each its own database (next section). + +## Per-service isolation + +`EngravaManager` runs **one database file per named service**, each with its own +connection and its own lock. This is the supported way to isolate writers (per +tenant, per worker, per logical partition): + +```python +from engrava import EngravaManager, load_config + +config = load_config("engrava.yaml") +async with EngravaManager.from_config(config.services) as mgr: + store_a = await mgr.get_store("tenant_a") # tenant_a.db + store_b = await mgr.get_store("tenant_b") # tenant_b.db +``` + +Because each service is a separate file, writes to `tenant_a` never contend with +writes to `tenant_b`, and each can be backed up or deleted independently. See the +[scoping section](guides/migrating-from-other-memory.md#filtering-scoping-and-multi-tenancy) +for when to choose per-service isolation over in-store filtering. + +## Summary + +| Scenario | Supported? | Notes | +|---|---|---| +| Many async tasks, one store, one loop | ✅ | The normal case — share the store. | +| Many readers (WAL) | ✅ | Readers never block the writer. | +| One writer at a time | ✅ | SQLite serialises writes. | +| One store across multiple event loops | ❌ | Connection is loop-bound; one store per loop. | +| Many processes reading the same file | ✅ | WAL supports concurrent readers. | +| Many processes writing the same file | ❌ | Single-writer; journal lock is in-process — use `EngravaManager`. | + +## See also + +- [Deployment](deployment.md) — process model, files on disk, graceful shutdown +- [Known Limitations](known-limitations.md) — the aiosqlite proxy and write-safety notes +- [Audit Trail](audit-trail.md) — the journal whose lock is discussed above diff --git a/docs/configuration.md b/docs/configuration.md index da6e682..5aadc1a 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -167,12 +167,102 @@ is no per-service `db_path` — the file is derived as `/.db`): |-----|------|---------|-------------| | `embeddings` | `dict` | — | Per-service embedding-provider override (same shape as the top-level `embeddings` section) | +### `journal` + +The hash-chain audit trail. Off by default. See [Audit Trail](audit-trail.md). + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `enabled` | `bool` | `false` | Record every thought/edge mutation as a hash-linked journal entry | + +```yaml +journal: + enabled: true +``` + +### `ttl` + +Time-to-live / auto-expiry of thoughts. See the +[data-lifecycle recipes](recipes/index.md). + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `strategy` | `str` | `"archive"` | What `cleanup_expired` does to expired thoughts: `"archive"` (soft, marks `ARCHIVED`) or `"delete"` (hard) | +| `check_every_n_operations` | `int` | `0` | Run auto-cleanup every *N* store operations (`0` = manual only, via `cleanup_expired()` / `engrava gc --expired`) | +| `default_ttl_seconds` | `int \| null` | `null` | Default TTL applied to new thoughts with no explicit `expires_at` (`null` = no default) | + +```yaml +ttl: + strategy: archive # or "delete" + check_every_n_operations: 100 + default_ttl_seconds: 2592000 # 30 days +``` + +### `ingest` + +Ingest-layer behaviour (content-hash deduplication). + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `deduplication_enabled` | `bool` | `true` | Whether ingest pipelines should pass `deduplicate=True` so identical `content` collapses into one thought (bumping `confirmation_count`) instead of a duplicate row | + +> Note: this flag advises ingest-layer callers; the persistence-layer +> `create_thought` still defaults to `deduplicate=False` — see +> [Recipes → Deduplicate repeated facts](recipes/index.md). + +### `hooks` + +Wire a custom `EngravaHooksProtocol` implementation by dotted path. See +[Extensions](extensions.md). + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `class` | `str \| null` | `null` | Dotted import path to a hooks class, last segment is the class name (e.g. `"my_package.hooks.MyHooks"`), instantiated and used by `from_config` | + +```yaml +hooks: + class: "my_package.hooks.MyHooks" +``` + +The path is split on the final dot (`module.path` + `ClassName`) — this is a +plain dotted path, **not** the `module.path:ATTRIBUTE` colon form used by +[`manifests.paths`](#manifests) below. + +### `manifests` + +Load extension manifests (their hooks + schema migrations). Accepts a plain +list of dotted paths, or a mapping with `discover` / `paths`. See +[Extensions](extensions.md). + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `paths` | `list[str]` | `[]` | Dotted `module.path:ATTRIBUTE` references to `ExtensionManifest` objects | +| `discover` | `bool` | `false` | Also scan the `engrava.extensions` entry-point group for manifests | + +```yaml +# list form +manifests: + - "my_plugin.manifest:MANIFEST" + +# or mapping form +manifests: + discover: true + paths: + - "my_plugin.manifest:MANIFEST" +``` + +> The `metrics:` section (latency window size, enable/disable) is documented in +> [Observability](observability.md). + ## Environment Variables +Both are read by the **`engrava` CLI** only (library callers pass paths +explicitly to `load_config` / `SqliteEngravaCore`). + | Variable | Description | |----------|-------------| -| `ENGRAVA_CONFIG` | Path to the YAML configuration file | -| `ENGRAVA_DB` | Override `db_path` from configuration | +| `ENGRAVA_CONFIG` | Fallback path to the YAML configuration file when `--config` is omitted (`--config` > `ENGRAVA_CONFIG` > none) | +| `ENGRAVA_DB` | Fallback database-file path when `--db` is omitted (`--db` > `ENGRAVA_DB` > `./engrava.db`) | ## Multi-Service Usage diff --git a/docs/data-lifecycle.md b/docs/data-lifecycle.md new file mode 100644 index 0000000..41615c5 --- /dev/null +++ b/docs/data-lifecycle.md @@ -0,0 +1,170 @@ +# Data lifecycle, retention & deletion + +How a thought moves through its lifecycle, how time-to-live expiry works, and — +importantly for privacy and compliance — what it takes to **truly** erase data, +including the residue a naive delete leaves behind. + +> **Compliance note.** This page describes the mechanics honestly so you can build +> a correct retention/erasure process. The default expiry strategy **archives** +> (does not erase), and a hard delete can still leave content in the audit +> journal and in backups. Read the [GDPR / hard deletion](#gdpr-and-hard-deletion) +> section before relying on TTL for "deletion". + +## Lifecycle states + +Every thought carries a `LifecycleStatus`. There are four states: + +| State | Meaning | +|---|---| +| `CREATED` | Just created, not yet promoted into active use. | +| `ACTIVE` | In normal use — the default working state, included in queries. | +| `DONE` | Completed (e.g. a finished task) but retained. | +| `ARCHIVED` | Soft-retired and retained until garbage-collected. **Not a global results filter** — see the note below. | + +You set the status on the `ThoughtRecord` you create, and update it over the +thought's life. Archiving is the soft-retire step: an `ARCHIVED` thought still +exists (and its content is still stored) until you garbage-collect it. + +> **`ARCHIVED` does not hide a thought from search or queries.** Marking a +> regular thought `ARCHIVED` is a *retention* state, not a visibility filter: an +> archived `OBSERVATION` still appears in `search_hybrid` / `search_fts` and is +> still counted by `count_thoughts()` / `list_thoughts()`. Only two kinds of rows +> are auto-excluded: **expired** thoughts (dropped by the TTL expiry checks +> described below, unless you pass `include_expired=True`), and **retired +> REFLECTIONs** — a `REFLECTION` whose `lifecycle_status` is no longer `ACTIVE` is +> filtered out of search by a *freshness floor* so a stale cluster centroid can't +> resurface. This REFLECTION gate is type-specific; it does **not** apply to +> ordinary thoughts. To keep archived regular thoughts out of your own results, +> either filter on `lifecycle_status` yourself or remove them with `engrava gc`. + +## Time-to-live (TTL) and expiry + +A thought can carry an expiry time. Two ways to set it: + +- **Per-thought, absolute:** set `ThoughtRecord.expires_at` to a timestamp. +- **Per-thought, relative at create time:** pass `expires_after_seconds=` to + `create_thought(...)`, which computes `expires_at` for you. +- **A default for the whole store:** `ttl.default_ttl_seconds` in config applies a + default TTL to new thoughts that don't set their own (see + [Configuration → ttl](configuration.md#ttl)). + +Expiry is **not** automatic on a timer. Expired thoughts remain until a cleanup +pass runs (see [running cleanup](#running-cleanup) below). By default, expired +thoughts are **excluded** from `count_thoughts(...)` and `list_thoughts(...)` — +pass `include_expired=True` to include them: + +```python +live = await store.count_thoughts() # excludes expired +everything = await store.count_thoughts(include_expired=True) +``` + +## Archive vs. delete + +What a cleanup pass *does* to an expired thought is governed by the store's TTL +strategy, set via `ttl.strategy` in config (see +[Configuration → ttl](configuration.md#ttl)): + +| Strategy | Effect on an expired thought | Reversible? | Content erased? | +|---|---|---|---| +| `"archive"` (default) | Flips `lifecycle_status` to `ARCHIVED`; the row and its `content` stay in the database | Yes | **No** | +| `"delete"` | Removes the thought row from the `thought` table | No | From the live table, yes — but see [residue](#gdpr-and-hard-deletion) | + +The default is **`archive`** — chosen so expiry is non-destructive and +auditable. This means **expiry alone does not erase anything** under the default +configuration. To make expiry actually remove rows, set `ttl.strategy: delete`. + +## Running cleanup + +Expiry is applied by an explicit cleanup pass — nothing happens on a timer. + +**From Python:** `cleanup_expired()` returns a `CleanupResult`: + +```python +result = await store.cleanup_expired() +print(result.expired_count) # how many thoughts were expired +print(result.strategy_applied) # "archive" or "delete" (per config) +print(result.timestamp) # ISO-8601 time of the pass +``` + +You can also have the store run cleanup automatically every *N* operations via +`ttl.check_every_n_operations` (default `0` = manual only). + +**From the CLI:** `engrava gc --expired` runs the expiry cleanup per your TTL +strategy. What it does next depends on that strategy: + +```bash +engrava gc --expired # run expiry cleanup (per ttl.strategy) +engrava gc --expired --dry-run # show what would happen, change nothing +engrava gc # delete ARCHIVED thoughts (+ orphaned edges) +``` + +- **With `ttl.strategy: delete`:** the expired rows are deleted outright, and the + same pass then garbage-collects any pre-existing `ARCHIVED` thoughts. +- **With `ttl.strategy: archive` (default):** the expired rows are *archived* + (marked `ARCHIVED`), and the pass **stops there** — it does **not** also + garbage-collect archived rows in the same run. (Collecting the rows it just + archived would defeat the soft-retire.) To physically remove archived rows you + must either run a **separate** `engrava gc`, or switch to `ttl.strategy: + delete`. + +Plain `engrava gc` (no `--expired`) removes `ARCHIVED` thoughts and their +orphaned edges. This is how archived data is finally deleted from the live table. + +## GDPR and hard deletion + +If you must erase a user's data (e.g. a GDPR erasure request), be aware that +**neither archiving nor a single delete is sufficient on its own**. Three places +can retain the content: + +1. **Archive does not erase.** Under the default `ttl.strategy: archive`, an + "expired" thought is only marked `ARCHIVED` — the row and its `content` remain + in the database. Note that `engrava gc --expired` under the `archive` strategy + *archives* the rows and stops; it does **not** delete archived rows in the same + pass. To remove the row you must run a **separate** `engrava gc` afterwards, or + use `ttl.strategy: delete` so the row is deleted outright. +2. **The audit journal retains a content delta.** If the + [audit journal](audit-trail.md) is enabled, deleting a thought does **not** + remove its content from the journal. The original `INSERT_THOUGHT` entry holds + the content in its `delta`, and the `DELETE_THOUGHT` entry records the deletion + delta too — so the data survives in `journal_entry` after the thought row is + gone. A true erasure must also purge the relevant journal entries (and doing so + breaks the hash chain from that point — re-baseline if you depend on + verification). +3. **Backups.** Any snapshot or file backup taken before the deletion still + contains the data. Erasure must extend to your backup retention. + +A correct hard-erasure procedure therefore looks like: delete (or +archive-then-gc) the thought rows → purge the matching `journal_entry` rows if +journaling is on → roll the deletion through your backup retention. Don't treat +"the thought no longer appears in search" as "the data is gone." + +## Reclaiming disk space + +Deleting rows — whether via `ttl.strategy: delete`, `engrava gc`, or a hard +erasure — **does not shrink the database file**. SQLite returns the freed pages +to an internal free-list and reuses them for future writes; the file stays the +same size on disk. + +To actually reclaim file size you must run `VACUUM`, which rebuilds the database +into a compact file. Plan for its cost: + +- **Exclusive lock.** `VACUUM` takes an exclusive lock for its whole duration — + no concurrent reads or writes. Run it during a maintenance window. +- **Temporary space.** It writes a fresh copy before swapping, so it needs + roughly **2× the database size** in free disk (temp + final) transiently. +- **Off-peak.** On a large database this can take a while; schedule it off-peak. + +```sql +VACUUM; -- rebuild in place (exclusive lock, ~2x temp space) +VACUUM INTO 'copy.db'; -- write a compacted copy without locking in place as long +``` + +Until you `VACUUM`, expect the file size to reflect the high-water mark, not the +live row count — this is normal SQLite behaviour, not a leak. + +## See also + +- [Configuration → ttl](configuration.md#ttl) — the strategy and default-TTL knobs +- [Audit Trail](audit-trail.md) — what the journal records (and its delta residue) +- [CLI](cli.md#gc) — the full `engrava gc` option reference +- [Known Limitations](known-limitations.md) — storage and concurrency constraints diff --git a/docs/deployment.md b/docs/deployment.md new file mode 100644 index 0000000..bf32e0b --- /dev/null +++ b/docs/deployment.md @@ -0,0 +1,133 @@ +# Deployment + +How to run Engrava in production: opening the store, the database files on disk, +multi-worker setups, and shutting down cleanly. Engrava is an embedded library — +there is no server to deploy; "deployment" means how your process opens and owns +the database. + +For the concurrency model behind these recommendations, see +[Concurrency](concurrency.md). For backups, see +[Backup & Recovery](backup-and-recovery.md). + +## One store per process, opened at startup + +Open the store **once at process startup** and reuse it for the process's +lifetime. `from_config` opens and **owns** the connection (it applies the schema +and the right PRAGMAs), so use it as an async context manager that spans your +app's life: + +```python +from engrava import SqliteEngravaCore + + +async def main() -> None: + async with await SqliteEngravaCore.from_config("engrava.yaml") as store: + # Hold this store for the lifetime of the process / app. + await run_app(store) +``` + +- **Do not open a new store per request.** Opening a store applies schema checks + and PRAGMAs; doing it per request is wasteful and multiplies open handles to + the same file. +- **Do not share one store across event loops.** The underlying connection is + bound to the loop/thread that aiosqlite created it on — see + [Known Limitations](known-limitations.md#aiosqlite-proxy-architecture). One + store belongs to one running loop. +- **A single store safely serves many concurrent async tasks** within that one + loop — see [Concurrency](concurrency.md). You do **not** need a pool of stores + for in-process concurrency. + +## The database files on disk + +In WAL mode (the default for file databases opened via `from_config`), SQLite +keeps **three** files side by side: + +| File | Purpose | +|---|---| +| `engrava.db` | The main database. | +| `engrava.db-wal` | The write-ahead log — **uncommitted and recently-committed data lives here** until checkpointed. | +| `engrava.db-shm` | Shared-memory index for the WAL. | + +Operational consequences: + +- **Use a WAL-safe backup method** — copying only the `.db` file (or copying the + three files non-atomically while writes continue) can capture inconsistent + state. See [Backup & Recovery](backup-and-recovery.md) for the live-vs-stopped + options. +- **Put them on a real local filesystem.** SQLite + WAL on networked filesystems + (NFS, some container overlay mounts) can corrupt or fail locking. Use a local + disk or a properly-configured volume. +- **Permissions.** The process needs read/write on the directory (SQLite creates + and deletes `-wal`/`-shm`), not just the `.db` file. Lock the directory down to + the service user. + +## Containers + +- **Mount a volume for the database directory**, not just the file — SQLite needs + to create the `-wal`/`-shm` siblings next to the `.db`. +- Point `database.path` in your `engrava.yaml` at the mounted volume — that's the + setting `from_config` reads. (`ENGRAVA_DB` is a **CLI-only** fallback for the + `engrava --db` flag; it does **not** configure `from_config`, so application + code should set `database.path`, not rely on `ENGRAVA_DB`.) +- One container instance = one writer. If you scale to multiple replicas, they + must **not** all write the same database file (see + [multi-process](concurrency.md#multiple-processes)). Either run a single writer + replica, or give each replica its own database via + [`EngravaManager`](concurrency.md#per-service-isolation). + +## Multiple workers + +Engrava follows SQLite's single-writer model. For multi-worker app servers +(Gunicorn/Uvicorn workers, etc.): + +- **Reads scale freely** under WAL — many readers and one writer coexist. +- **Concentrate writes.** Heavy write fan-out across many OS processes hitting the + same file is out of scope; see [Concurrency → Multiple processes](concurrency.md#multiple-processes). +- **Per-tenant or per-worker isolation:** give each its own database file via + [`EngravaManager`](concurrency.md#per-service-isolation) when you need + independent writers. + +## Graceful shutdown + +Who closes the connection depends on how you opened the store — because the store +only closes a connection it **owns**: + +- **`from_config` (owned connection).** `from_config` opens and owns the + connection. Leaving the `async with` block closes it for you; equivalently, call + `await store.close()`, which **closes and releases the owned connection + cleanly**. (It does not issue an explicit WAL checkpoint — that is a + backup/maintenance step, `PRAGMA wal_checkpoint(TRUNCATE)`, covered in + [Backup & Recovery](backup-and-recovery.md#if-you-can-stop-or-quiesce-writers).) + + ```python + async with await SqliteEngravaCore.from_config("engrava.yaml") as store: + ... + # connection closed here + + # or, if you hold the store yourself: + await store.close() + ``` + +- **Manual `SqliteEngravaCore(conn)` (caller-managed connection).** The store does + **not** own your connection, so `store.close()` is a **no-op** here — *you* must + close the connection you created: + + ```python + conn = await aiosqlite.connect("engrava.db") + conn.row_factory = aiosqlite.Row + store = SqliteEngravaCore(conn) + ... + await conn.close() # the caller owns and closes the connection + ``` + + (Using `async with aiosqlite.connect(...) as conn:` handles this for you.) + +Wire whichever applies into your framework's shutdown hook (e.g. FastAPI +`lifespan`, a signal handler) so an interrupted process still closes cleanly. + +## See also + +- [Concurrency](concurrency.md) — the single-writer model, busy timeout, isolation +- [Backup & Recovery](backup-and-recovery.md) — WAL-safe backup and restore +- [Configuration](configuration.md) — the YAML the deployment loads +- [Known Limitations](known-limitations.md) — filesystem and locking constraints diff --git a/docs/dreaming.md b/docs/dreaming.md index 5c722b6..fdefd18 100644 --- a/docs/dreaming.md +++ b/docs/dreaming.md @@ -8,6 +8,75 @@ Dreaming runs **outside** the normal CRUD path — the consumer decides when to invoke `run_consolidation()` (after N cycles, in a cron job, or manually). +## How memory consolidation works (the dreaming loop) + +Think of a single memory's journey through an agent's lifetime. The first two +steps — ingest and confirmation — happen on the **normal write path** as you use +the store. The consolidation part is **manual**: when you call +`run_consolidation()`, that one call runs promotion → edge creation → reflection +clustering/creation → an orphan sweep, in order. + +``` + ingest you create an OBSERVATION ("user prefers email") (write path) + │ + ▼ + confirm the same fact is re-encountered over time, so its (write path) + │ confirmation_count grows (e.g. via deduplicate=True) + │ + ▼ run_consolidation(current_cycle=N) ── manual ── + │ + ┌─┴───────────────────────────────────────────────────────┐ + │ 1. promote thoughts that pass the gates and clear │ + │ promote_threshold are raised to priority P1 │ + │ 2. link a promoted thought *may* gain ASSOCIATED │ + │ edges to similar neighbours (when enabled) │ + │ 3. reflect related thoughts *may* be clustered into │ + │ REFLECTION meta-thoughts (when enabled) │ + │ 4. sweep stale REFLECTIONs whose sources left the │ + │ active set are retired │ + └─┬───────────────────────────────────────────────────────┘ + │ + ▼ + improved later searches rank the P1 memory higher (priority + retrieval signal), follow any new edges (graph signal), and can + surface a REFLECTION instead of many raw thoughts +``` + +Walking the journey: + +1. **Ingest.** You store memories as thoughts (typically `OBSERVATION`s) on the + normal write path. Dreaming does nothing yet. +2. **Confirm.** As the same knowledge recurs, its `confirmation_count` rises — + automatically when you write with `deduplicate=True` (identical content + collapses and bumps the count), or via your own logic. This is *evidence the + memory matters*, and it feeds dreaming's confirmation signal. (Distinct from + `confidence`, the static belief-strength you set — see + [Core Concepts](concepts.md#reliability-confidence-vs-confirmation_count).) +3. **Promote.** When you run consolidation, each candidate must first pass the + [gates](#gates) (e.g. old enough, enough confirmations) and then score above + `promote_threshold` across the weighted [signals](#signals). Survivors are + promoted to **P1**. (Both bars matter: a thought that passes the gates but + scores low is *not* promoted — see + [Troubleshooting](troubleshooting.md#dreaming-promotes-nothing-consolidation-is-inert).) +4. **Link.** A promoted thought *may* gain `ASSOCIATED` [edges](#edge-creation) + to similar neighbours — when edge creation is enabled, the thought has a stored + embedding, and qualifying neighbours (above `min_similarity`) are found. New + edges persist the structure in the graph, idempotently (re-runs don't + duplicate edges). +5. **Reflect.** Related thoughts *may* be clustered and summarised into + [`REFLECTION`](#reflections-meta-consolidation) meta-thoughts — a centroid + embedding plus `CONSOLIDATED_FROM` edges back to the members — when reflections + are enabled and eligible clusters pass the clustering/quality gates. This turns + a pile of observations into fewer, higher-level memories. (A REFLECTION whose + source cluster later leaves the active set is automatically retired so a stale + summary can't resurface.) +6. **Improved retrieval.** All of this changes future + [hybrid search](search.md): the P1 memory ranks higher via the priority + signal, any new edges feed the opt-in graph signal, and reflections let one + high-level memory stand in for many raw ones. + +The rest of this page is the knob-by-knob reference for each phase. + ## Quick start ```python @@ -75,6 +144,7 @@ class MySignal: def __call__(self, thought: ThoughtRecord, ctx: DreamingContext) -> float: return 0.42 + ext = DreamingExtension( config=config, custom_signals={"my_signal": MySignal()}, @@ -224,6 +294,14 @@ counts from member text, centroid from member vectors). LLM-generated prose summaries belong in downstream extension hooks, not in the core graph layer. +> **Navigating the lineage.** The `CONSOLIDATED_FROM` edges are queryable +> through dedicated store helpers — `consolidated_member_ids(reflection_id)`, +> `consolidated_source_statuses(reflection_id)`, and the reverse +> `reflections_consolidated_from(source_id)`. Use them to walk from a REFLECTION +> to its sources and back (e.g. for provenance views or orphan detection) +> instead of querying the edge table directly. See +> [REFLECTION lineage](api-reference.md#reflection-lineage) in the API reference. + ### How clustering works Two algorithms are available via `DreamingGates.cluster_algorithm`: @@ -267,8 +345,8 @@ extensions: ```python result = await ext.run_consolidation(store, current_cycle=42) -print(result.promoted_count) # thoughts promoted to P1 -print(result.edges_created) # ASSOCIATED edges created +print(result.promoted_count) # thoughts promoted to P1 +print(result.edges_created) # ASSOCIATED edges created print(result.reflections_created) # new REFLECTION thoughts created ``` diff --git a/docs/faq.md b/docs/faq.md new file mode 100644 index 0000000..a13f870 --- /dev/null +++ b/docs/faq.md @@ -0,0 +1,116 @@ +# FAQ + +Short answers to the questions that come up most. For "something is broken" see +[Troubleshooting](troubleshooting.md); for "is this the right tool" see +[Positioning](positioning.md). + +## Does Engrava call an LLM? Do I need an API key? + +No. Engrava never calls a language model and needs no API key to run. It stores +and retrieves what your agent gives it; deciding *what* to remember (extraction, +summarisation) is your agent's job, above the storage layer. The one feature +that synthesises new thoughts — [dreaming](dreaming.md) — is purely structural +(clustering, centroids, keyword counts), with no LLM involved. See +[Non-goals](positioning.md#non-goals). + +An API key is only relevant if **you** choose a remote embedding provider (e.g. +an OpenAI-compatible endpoint) — and that's for embeddings, not for any +Engrava-side reasoning. See the [Embeddings guide](guides/embeddings.md). + +## Does it need network access or any running service? + +No. Engrava is an embedded library built on SQLite — one `pip install`, runs +in-process, no server, no network. The only time network is involved is if you +configure a remote embedding provider yourself. + +## Are embeddings required? + +No. Without an embedding provider, search runs on FTS5/BM25 (keyword), priority, +and recency signals — semantic vector matching is simply skipped. Add a provider +(local or remote) when you want semantic retrieval. See the +[Embeddings guide](guides/embeddings.md). Note that storing on write only embeds +when you set both `embedding_provider=...` **and** `auto_embed=True`. + +## How large a corpus can it handle? + +The default vector backend brute-forces cosine similarity in Python, which works +well up to roughly **100k embeddings**. Beyond that, install the `sqlite-vec` +backend (`pip install engrava[vec]`, then `extensions.vector.backend: +sqlite-vec`) for indexed vector search. FTS5 scales well independently. SQLite +itself has been exercised here into the multi-GB / millions-of-thoughts range. +See [Known Limitations](known-limitations.md#sqlite-vec-pre-v1-status). + +## Can multiple processes or tasks use the same store at once? + +A single process can drive **many async tasks** against one store safely — +aiosqlite serialises them on its background thread, and WAL mode lets readers and +a single writer coexist. SQLite is **single-writer**, so heavy concurrent writes +from **multiple processes** are out of scope. For multi-tenant isolation, give +each tenant its own database file via `EngravaManager` (each has its own lock). +See [Known Limitations → Concurrent Write Safety](known-limitations.md#concurrent-write-safety) +and the [migration guide's scoping section](guides/migrating-from-other-memory.md#filtering-scoping-and-multi-tenancy). + +## How do I scope search to one user or session? + +The `search_*` methods are **unscoped by default** — they take no `user_id` / +`session_id` filter and rank across the whole store. Scope it yourself with one +of three patterns: over-fetch + post-filter, one store per tenant via +`EngravaManager`, or a raw-SQL pre-filter on `metadata_json` with `json_extract`. +The tradeoffs are laid out in the +[scoping section](guides/migrating-from-other-memory.md#filtering-scoping-and-multi-tenancy). + +## When should I enable dreaming? + +Enable [dreaming](dreaming.md) when memory **accumulates over time** and you want +the store to surface and link what matters: it promotes important thoughts to P1, +builds associative edges, and clusters related thoughts into +[`REFLECTION`](concepts.md) summaries. It is not useful on a tiny or write-once +store. Run it periodically (every N cycles, a cron +job, or manually) — never on the hot CRUD path. For single-write batch ingest, +keep `allow_zero_confirmation=True` or nothing will ever pass the confirmation +gate. See the agent loop's +[consolidation cadence](guides/agent-memory.md) pattern. + +## What is a "cycle" and do I have to manage it? + +A cycle is a **consumer-owned monotonic logical clock** — your agent's tick. +Engrava never advances or persists it for you; you pass `current_cycle` into +search and consolidation. It drives the recency signal and the dreaming age gate. +On restart, recover it from `max(created_cycle)` in the store. + +Two ways to get it wrong have different effects: passing `current_cycle=None` +(the `search_hybrid` default) makes the recency signal **inactive** — it is +dropped from the ranking. Passing a **constant** (e.g. always `0`, never +advancing `created_cycle`/`updated_cycle`) keeps recency *active but useless* — +every thought's age collapses to the same value, so nothing looks more recent +than anything else, and the dreaming age gate (`min_age_cycles`) never opens. +Advance the cycle each turn. See +[Core Concepts → Cycle](concepts.md) and the related +[Troubleshooting entry](troubleshooting.md#dreaming-promotes-nothing-consolidation-is-inert). + +## How do I back up the database safely? + +Because Engrava uses WAL mode, a naive copy of just the `.db` file can miss +in-flight data in the `-wal` file. Use a WAL-safe approach — checkpoint then +copy, `VACUUM INTO`, or SQLite's backup API. Note that a logical snapshot does +**not** include the audit journal. See [Upgrade Guide](upgrade.md) for the +current backup guidance. + +## Is the audit trail tamper-proof? + +It is **tamper-evident**, not tamper-proof. The journal is a keyless in-file +SHA-256 hash chain: it reliably detects accidental corruption and naive edits or +truncation, but a write-capable actor who rewrites the whole file and recomputes +the chain is out of its threat model. Treat it as integrity evidence with OS +file permissions and periodic off-box verification, not as a cryptographic +guarantee against a privileged attacker. It is **off by default** +(`journal.enabled: false`). See [Audit Trail](audit-trail.md). + +## Is Engrava production-ready? + +Engrava is published on PyPI and maintained to a strict quality bar (typed, +linted, high test coverage). For production, the things to plan are the same as +for any embedded SQLite system: pick the right vector backend for your corpus +size, respect the single-writer model, set up WAL-safe backups, and (if you need +it) enable and monitor the audit trail. The [Known Limitations](known-limitations.md) +page is the honest list of constraints to design around. diff --git a/docs/glossary.md b/docs/glossary.md new file mode 100644 index 0000000..61c3568 --- /dev/null +++ b/docs/glossary.md @@ -0,0 +1,161 @@ +# Glossary + +Short definitions of the terms Engrava uses, each linking to the page that +explains it in depth. New to Engrava? Read [Core Concepts](concepts.md) first — +this page is a quick reference, not a tutorial. + +### Thought + +The unit of memory — one idea, fact, observation, or message, stored as a frozen +(immutable) `ThoughtRecord`. You don't mutate a thought in place; you +`create_thought()` it and `update_thought()` to get a new version. See +[Core Concepts → Thought](concepts.md#thought). + +### Essence + +The compact, canonical, **prompt-facing** one-liner of a thought (1–200 +characters, enforced) — the text you inject into an LLM prompt when the memory is +retrieved. Think *headline*. See +[Core Concepts → essence vs content](concepts.md#essence-vs-content-two-text-fields-on-purpose). + +### Content + +The **full** source text of a thought, retained for full-text search and +provenance — as long as you like. Think *article* (to the essence's *headline*). +See [Core Concepts → essence vs content](concepts.md#essence-vs-content-two-text-fields-on-purpose). + +### Edge + +A typed, weighted, directional link between two thoughts — what makes Engrava a +*graph* rather than a flat table. The `EdgeType` set is `ASSOCIATED`, +`DEPENDS_ON`, `DERIVED_FROM`, `MESSAGE_OF`, `BRIDGE`, `CONSOLIDATED_FROM`, and +`CONTESTED_BY`; `weight` (0.0–1.0) expresses how strong the relation is. See +[Core Concepts → Edge](concepts.md#edge). + +### Embedding + +The vector representation of a thought that powers semantic (meaning-based) +search. Embeddings are optional — without a provider, search falls back to the +lexical (FTS5) index and the vector signal is skipped. See the +[Embeddings guide](guides/embeddings.md). + +### Reflection + +A higher-order summary thought (`ThoughtType.REFLECTION`) created by **dreaming**: +Engrava clusters semantically related thoughts and writes a centroid-embedded +summary node, linked back to its members by `CONSOLIDATED_FROM` edges. You don't +create reflections by hand. See [Core Concepts → Reflection](concepts.md#reflection) +and [Dreaming](dreaming.md). + +### Dreaming + +The periodic, off-the-hot-path consolidation process you invoke with +`run_consolidation()`: it scores stored thoughts, **promotes** the important ones, +links related ones with edges, and clusters them into reflections. No LLM is +involved — it is purely structural. See [Dreaming](dreaming.md). + +### Consolidation + +Another name for what dreaming does in a single pass — evaluating candidates and +producing promotions, edges, and reflections via `run_consolidation()`. See +[Dreaming](dreaming.md). + +### Promotion + +The act, during consolidation, of marking an important thought by setting its +priority to **P1** so it surfaces more readily in search. Whether a candidate is +promoted depends on the [gates](#gate) and the `promote_threshold`. See +[Dreaming](dreaming.md). + +### Cycle + +A **logical clock** — a monotonically increasing integer tick that *you own and +advance* (typically one cycle per agent turn). It is not wall-clock time and not +a stored row; Engrava never increments it for you. It drives the recency signal +and dreaming's age gates. Leaving it at `None` makes recency inactive; freezing it +at a constant makes recency useless and stalls dreaming. See +[Core Concepts → Cycle](concepts.md#cycle-the-agent-clock). + +### Signal + +One scoring component that [hybrid search](#hybrid-search) computes for a +candidate and fuses into the final rank. Engrava has five: FTS5 keyword, vector +similarity, recency, priority, and graph. A signal whose prerequisite is missing +(e.g. no embeddings) is skipped rather than erroring. See [Search](search.md). + +### Gate + +A cheap boolean check in dreaming that a candidate must pass *before* it is scored +for promotion — e.g. `min_age_cycles` (the thought must be old enough) and the +confirmation gate. Gates filter out clearly ineligible thoughts. See +[Dreaming → Gates](dreaming.md#gates). + +### Priority + +A thought's importance level, `P1` (highest) to `P4` (lowest). It is one of the +hybrid-search signals, so higher-priority thoughts surface more readily; dreaming +**promotes** thoughts to `P1`. See [Core Concepts → Priority](concepts.md#priority). + +### Lifecycle + +The small state machine a thought moves through: `CREATED → ACTIVE → DONE → +ARCHIVED` (`LifecycleStatus`, with transitions enforced). `ARCHIVED` is a +soft-retired state and a thought there remains (and stays searchable) until +garbage-collected — it is a retention marker, not an automatic results filter. See +[Core Concepts → Lifecycle](concepts.md#lifecycle) and +[Data Lifecycle](data-lifecycle.md). + +### Provenance + +Where a memory came from, recorded in two fields: `source` (a free-form string id +you choose, e.g. `"onboarding-flow"`) and `source_type` (the `KnowledgeSource` +enum: `EXPERIENCE`, `SEEDED_LLM`, `DISTILLED_LLM`, `DREAMING`). Dreaming can +filter on provenance, so set it honestly. See +[Core Concepts → Provenance](concepts.md#provenance-where-a-memory-came-from). + +### Confirmation + +`confirmation_count` — a counter of how many times a thought has been +independently re-encountered or validated over time (grows via `deduplicate=True` +or your own logic). Distinct from `confidence`, the static belief-strength you +assign at creation. Dreaming reads them as separate signals. See +[Core Concepts → confidence vs confirmation_count](concepts.md#reliability-confidence-vs-confirmation_count). + +### Visibility + +`ThoughtVisibility` — whether a thought may surface in the agent's **outer +speech**: `private` (internal only), `selective` (shared on request — the +default), or `public` (may appear in output). Engrava stores the level; +**honouring it is your application's responsibility**. See +[Core Concepts → Visibility](concepts.md#visibility-inner-vs-outer-speech). + +### Hybrid search + +`search_hybrid()` — retrieval that fuses up to five [signals](#signal) (FTS5 +keyword, vector, recency, priority, graph) into one ranked result, rather than +relying on vector similarity alone. See [Search](search.md). + +### Graph signal + +The fifth, **opt-in** hybrid-search signal: a 1-hop-weighted neighbour boost where +a candidate gains score if its graph neighbours also match the query. Disabled by +default (`default_graph_weight = 0.0`), so no graph queries run unless you enable +it. See [Search](search.md). + +### Percept + +In the agent loop, an incoming observation (e.g. a user message) stored as an +`OBSERVATION` thought, typically tagged with the `percept(...)` helper. It is what +the agent *takes in*. See [Building a memory-backed agent](guides/agent-memory.md). + +### Utterance + +In the agent loop, the agent's own outgoing reply, stored as an `OUTPUT_DRAFT` +thought. It is what the agent *produces*. See +[Building a memory-backed agent](guides/agent-memory.md). + +## See also + +- [Core Concepts](concepts.md) — the same ideas as a guided mental model +- [Search](search.md) — the signal model in depth +- [Dreaming](dreaming.md) — consolidation, gates, promotion, reflections diff --git a/docs/guides/agent-memory.md b/docs/guides/agent-memory.md new file mode 100644 index 0000000..becbba8 --- /dev/null +++ b/docs/guides/agent-memory.md @@ -0,0 +1,260 @@ +# Building a memory-backed agent + +This guide shows the canonical way to wire Engrava into an agent's turn loop: +give a chat/agent long-term memory that persists across sessions and surfaces +relevant context on every turn. It's the end-to-end pattern behind Engrava's +one-line pitch — "the memory database for AI agents." + +A complete, runnable version of everything here ships as +[`examples/agent_loop.py`](https://github.com/sovantica/engrava/blob/main/examples/agent_loop.py) +— no LLM or embedding API required (it uses a canned responder and a +deterministic embedder). This page walks through the shape of that loop. + +> New to the model (thought, edge, reflection, **cycle**)? Read +> [Core Concepts](../concepts.md) first — this guide assumes those terms. + +## The loop, in one picture + +Per user turn: + +``` +user message + │ + ▼ +1. store it as a percept ──────────────► create_thought(OBSERVATION) +2. retrieve relevant memory ───────────► search_hybrid(query, current_cycle) +3. build prompt from retrieved essences ─► call your LLM +4. store the reply as an utterance ─────► create_thought(OUTPUT_DRAFT) +5. record the action taken ─────────────► create_action(ActionRecord) +6. advance the cycle counter ───────────► cycle += 1 (you own this clock) + │ + └─ every N turns ────────────────────► dreaming.run_consolidation(current_cycle) +``` + +## Setup + +Create one store for the lifetime of the agent. Configure an embedding provider +so retrieval is semantic (the example uses a deterministic stand-in; in +production pass a real provider such as `SentenceTransformerProvider` or +`OpenAICompatibleProvider`, configurable via +[`engrava.yaml`](../configuration.md)): + +```python +import aiosqlite +from engrava import SqliteEngravaCore, CallbackProvider + +provider = CallbackProvider( + callback=my_embed_fn, # swap in a real provider in production + dimension=64, + model_name="demo", +) +conn = await aiosqlite.connect("agent-memory.db") # a file persists across runs +conn.row_factory = aiosqlite.Row +store = SqliteEngravaCore(conn, embedding_provider=provider, auto_embed=True) +await store.ensure_schema() +``` + +`auto_embed=True` means thoughts are embedded on write. At search time you may +pass an explicit `query_vector`; if you omit it, the store embeds the query +text for you **when an embedding provider is configured**. Passing it yourself +is handy when you've already computed the vector or want a different query +representation. + +## Step 1 — store the incoming message (a *percept*) + +Each user message becomes an `OBSERVATION` thought, tagged with `percept(...)` +metadata so its origin is recorded. Extend that metadata with a `session_id` +(which conversation) and `turn_index` (position within it) so every memory is +anchored to its conversation — these are the keys you'd later filter on (or +post-filter on) to scope retrieval to one session or user: + +```python +import uuid +from engrava import ThoughtRecord, ThoughtType, Priority, LifecycleStatus, percept + +async def store_percept(store, text, cycle, user_id, session_id, turn_index): + record = ThoughtRecord( + thought_id=str(uuid.uuid4()), + thought_type=ThoughtType.OBSERVATION, + essence=text[:200], # the prompt-facing one-liner + content=text, # the full message + priority=Priority.P2, + lifecycle_status=LifecycleStatus.ACTIVE, + created_cycle=cycle, # the agent clock (see step 6) + updated_cycle=cycle, + source=user_id, + metadata={ + **percept(source_id=user_id, label="user"), + "session_id": session_id, + "turn_index": turn_index, + }, + ) + return await store.create_thought(record) +``` + +## Step 2 — retrieve relevant memory + +Before calling the LLM, pull the most relevant prior memories with +`search_hybrid`. Pass `current_cycle` so the recency signal works, and turn the +returned `(thought_id, score)` tuples back into text via `get_thought`: + +```python +async def retrieve_context(store, query, cycle): + result = await store.search_hybrid( + query, + query_vector=my_embed_fn(query), # optional: omit to let the provider embed `query` + top_k=3, + current_cycle=cycle, + ) + essences = [] + for thought_id, _score in result.results: + record = await store.get_thought(thought_id) + if record is not None: + essences.append(record.essence) # essence = prompt-ready text + return essences +``` + +`result.results` is a list of `(thought_id, score)` — Engrava returns IDs, not +records, so you fetch the ones you want. `result.backends_used` tells you which +signals contributed (e.g. `{"fts5", "vector", "recency"}`). + +## Step 3 — build the prompt and call your LLM + +This is the only step that touches your model. Engrava is LLM-free; you own the +call: + +```python +prompt = "Context:\n" + "\n".join(f"- {c}" for c in context) +prompt += f"\n\nUser: {user_message}\nAssistant:" +reply = await my_llm(prompt) # your provider here +``` + +## Step 4 — store the agent's reply (an *utterance*) + +Persist what the agent said as an `OUTPUT_DRAFT` thought with `utterance(...)` +metadata, so the agent's own outputs are part of memory too: + +```python +from engrava import utterance + +async def store_utterance(store, reply, cycle, session_id, turn_index): + record = ThoughtRecord( + thought_id=str(uuid.uuid4()), + thought_type=ThoughtType.OUTPUT_DRAFT, + essence=reply[:200], + content=reply, + priority=Priority.P3, + lifecycle_status=LifecycleStatus.ACTIVE, + created_cycle=cycle, + updated_cycle=cycle, + source="agent", + metadata={ # same session + turn as the percept it answered + **utterance(), + "session_id": session_id, + "turn_index": turn_index, + }, + ) + return await store.create_thought(record) +``` + +## Step 5 — record the action taken (optional) + +If your agent *does* things (sends a message, calls a tool), record each as an +`ActionRecord` linked to the source thought. This is how the audit/action +surface tracks what the agent did and whether it succeeded: + +```python +from engrava import ActionRecord, ActionType, ActionStatus, VerificationStatus + +await store.create_action( + ActionRecord( + action_id=str(uuid.uuid4()), + source_thought_id=percept_thought.thought_id, + action_type=ActionType.MESSAGE, # or TOOL_CALL / CLI_OUTPUT / STATE_UPDATE + intent="answered user", + status=ActionStatus.CONFIRMED, + verification_status=VerificationStatus.CONFIRMED, + ) +) +``` + +Read them back with `await store.get_actions(thought_id)`. + +## Step 6 — advance the cycle + +A **cycle** is the agent's logical clock, and **you own it** — Engrava never +advances or persists it. Increment it once per turn and use it for +`created_cycle`/`updated_cycle` and the `current_cycle` you pass to search and +consolidation: + +```python +cycle = 0 +while running: + ... # steps 1–5 use `cycle` + cycle += 1 +``` + +If you leave it at a constant, recency can't distinguish old memories from new +and dreaming's age gate never opens (see +[Cycle (the agent clock)](../concepts.md#cycle-the-agent-clock)). On restart, +recover it so it stays monotonic — see [Persistence across restarts](#persistence-across-restarts). + +## Step 7 — consolidate periodically + +Dreaming turns accumulated observations into higher-order REFLECTIONs. In a +long-running agent, run it on a cadence — e.g. every N turns — rather than every +turn: + +```python +from engrava import DreamingExtension, DreamingConfig + +dreaming = DreamingExtension(config=DreamingConfig(enabled=True)) + +# inside the loop, after advancing the cycle: +if cycle % 20 == 0: + result = await dreaming.run_consolidation(store, current_cycle=cycle) +``` + +The cadence is yours to choose: every-N-turns (as above), a background asyncio +task on a timer, or an out-of-process job. Engrava is single-writer, so run +consolidation on the same writer that handles turns (or coordinate so they don't +write concurrently). A brand-new store has little to consolidate — REFLECTIONs +emerge as memories accumulate and repeat. See [Dreaming](../dreaming.md) for the +knobs. + +## Persistence across restarts + +- **Embeddings persist.** They are stored in the database; you do **not** + re-embed on a normal restart. (You only need `engrava restore --re-embed` + when you deliberately change the embedding model.) +- **The cycle counter does not persist** — Engrava doesn't store it. Recover it + on startup so it keeps increasing. `list_thoughts` returns rows ordered by + `updated_cycle` descending, so the most recent thought carries the highest + cycle you've used; resume one past it: + + ```python + recent = await store.list_thoughts(limit=1) # ordered by updated_cycle desc + cycle = (recent[0].updated_cycle + 1) if recent else 0 + ``` + +- **Model lock.** If you configured an embedding provider, the store remembers + which model produced its vectors; calling `store_embedding` later with a + different model raises `EmbeddingModelMismatchError`. Keep the same provider + across restarts (or migrate deliberately). + +## Full example + +The complete, runnable loop — including the deterministic embedder and the +mock LLM so it runs with zero external dependencies — is in +[`examples/agent_loop.py`](https://github.com/sovantica/engrava/blob/main/examples/agent_loop.py): + +```bash +python examples/agent_loop.py +``` + +## Next + +- [Core Concepts](../concepts.md) — thought / edge / reflection / cycle. +- [Hybrid Search](../search.md) — how the retrieval ranking works. +- [Dreaming](../dreaming.md) — consolidation in depth. +- [Configuration](../configuration.md) — wiring an embedding provider via `engrava.yaml`. diff --git a/docs/guides/embeddings.md b/docs/guides/embeddings.md new file mode 100644 index 0000000..6899d8b --- /dev/null +++ b/docs/guides/embeddings.md @@ -0,0 +1,228 @@ +# Embeddings + +Engrava's semantic (meaning-based) search is powered by **embeddings** — vector +representations of your thoughts. This guide shows how to wire a real embedding +provider so retrieval actually understands meaning, and how the query side +works. + +> **Embeddings are optional.** With no provider configured, search still works +> using the bundled lexical FTS5/BM25 index — the vector signal is simply +> skipped (`HybridSearchResult.backends_used` will not contain `"vector"`). Add +> a provider to get semantic retrieval. + +## Two things a provider gives you + +1. **Ingest-time embedding** — with `auto_embed=True`, every thought is embedded + on write, so it becomes findable by meaning. +2. **Query-time embedding** — at search time the query must also be a vector. + `search_hybrid` takes the query *text* and, when a provider is configured, + embeds it **for you** (unless you pass an explicit `query_vector`). + `search_similar` takes a *vector* directly, so you embed the query yourself + first. See [The query side](#the-query-side) for both. + +The corpus and the query must use **the same model / dimension** — once a store +has embeddings for one model, writing with a different model raises +`EmbeddingModelMismatchError`. + +## Wiring a provider + +Pass the provider to the store constructor (and set `auto_embed=True`): + +```python +import aiosqlite +from engrava import SqliteEngravaCore, SentenceTransformerProvider + +provider = SentenceTransformerProvider(model_name="all-MiniLM-L6-v2") +async with aiosqlite.connect("engrava.db") as conn: + conn.row_factory = aiosqlite.Row + store = SqliteEngravaCore(conn, embedding_provider=provider, auto_embed=True) + await store.ensure_schema() +``` + +Or declare it in `engrava.yaml` and let `from_config` build it (see the +[`embeddings` section](../configuration.md)): + +```yaml +embeddings: + provider: sentence-transformer + model: all-MiniLM-L6-v2 + auto_embed: true +``` + +```python +from engrava import SqliteEngravaCore + +async with await SqliteEngravaCore.from_config("engrava.yaml") as store: + ... # provider wired from config, auto_embed honoured +``` + +## Providers + +Every provider implements the same async interface — `await provider.embed(text)` +returns a `list[float]` — so they're interchangeable. Pick by where you want the +model to run. + +### `SentenceTransformerProvider` — local model (no API, no network) + +Runs a sentence-transformers model on your machine. Requires the +`embeddings-local` extra (pulls `sentence-transformers` + `torch`). + +```bash +pip install "engrava[embeddings-local]" +``` + +```python +from engrava import SentenceTransformerProvider + +provider = SentenceTransformerProvider( + model_name="all-MiniLM-L6-v2", # default: all-MiniLM-L12-v2 + device="cpu", # or "cuda" + batch_size=32, +) +``` + +No API key, no network after the first model download. Best default for +self-hosting. + +### `OpenAICompatibleProvider` — OpenAI or any OpenAI-compatible API + +Calls an OpenAI-style `/embeddings` endpoint. Requires the `embeddings-openai` +extra (pulls `httpx`). + +```bash +pip install "engrava[embeddings-openai]" +``` + +```python +import os +from engrava import OpenAICompatibleProvider + +provider = OpenAICompatibleProvider( + model_name="text-embedding-3-small", # this is the default + base_url="https://api.openai.com/v1", # default; point at any compatible API + api_key=os.environ["OPENAI_API_KEY"], # or omit — falls back to $OPENAI_API_KEY +) +``` + +`api_key` defaults to the `OPENAI_API_KEY` environment variable when omitted. +Set `base_url` to target a compatible gateway (Azure OpenAI, a local proxy, etc.). + +### `OllamaProvider` — local Ollama server + +Calls a running [Ollama](https://ollama.com) instance. Requires the +`embeddings-ollama` extra (pulls `httpx`); no API key. + +```bash +pip install "engrava[embeddings-ollama]" +``` + +```python +from engrava import OllamaProvider + +provider = OllamaProvider( + model_name="nomic-embed-text", # default + base_url="http://localhost:11434", # default Ollama address +) +``` + +### `HuggingFaceProvider` — HuggingFace Inference API + +Calls the HuggingFace Inference API. Requires the `embeddings-hf` extra (pulls +`huggingface_hub`). + +```bash +pip install "engrava[embeddings-hf]" +``` + +```python +import os +from engrava import HuggingFaceProvider + +provider = HuggingFaceProvider( + model_name="sentence-transformers/all-MiniLM-L12-v2", # default + api_key=os.environ["HF_TOKEN"], # or omit — falls back to $HF_TOKEN +) +``` + +`api_key` defaults to the `HF_TOKEN` environment variable when omitted. + +### `CallbackProvider` — bring your own embedding function + +Wrap any function `str -> list[float]`. Built-in (no extra). Use it for a custom +model, a cached lookup, or testing. + +```python +from engrava import CallbackProvider + +provider = CallbackProvider( + callback=my_embed_fn, # str -> list[float] + dimension=384, # the length your callback returns + model_name="my-model", +) +``` + +> Do **not** ship a placeholder like `lambda text: [0.1] * 384` — a constant +> vector makes every thought identical, so similarity is meaningless. Use a real +> model (the providers above) or a genuine embedding function. + +## The query side + +The two search methods handle the query vector differently — `search_hybrid` +takes the query **text**, `search_similar` takes a query **vector**. + +**`search_hybrid(query_text, query_vector=None, ...)`** — pass the query text. +When an embedding provider is configured, Engrava embeds that text for you if +you don't supply a `query_vector`; pass one explicitly only to override: + +```python +# Provider configured → the query text is embedded for you: +result = await store.search_hybrid("trips to Japan", top_k=5, current_cycle=cycle) + +# Or override with a vector you already have: +query_vec = await provider.embed("trips to Japan") +result = await store.search_hybrid("trips to Japan", query_vector=query_vec, top_k=5) +``` + +If **no** provider is configured **and** you pass no `query_vector`, +`search_hybrid` skips the vector signal and falls back to the lexical (FTS5/BM25) +signal — still useful, just keyword-based rather than semantic. + +**`search_similar(query_vector, ...)`** — takes a ready vector as its first, +required argument. It does not accept query text, so there is nothing for it to +auto-embed: you must embed the query yourself first. + +```python +query_vec = await provider.embed("trips to Japan") # required — no auto-embed here +result = await store.search_similar(query_vec, top_k=5) +``` + +## Choosing a model and dimension + +- **Keep one model per store.** The query and corpus vectors must come from the + same model; switching models on an existing store requires re-embedding (see + `engrava restore --re-embed`). +- **Dimension follows the model.** Local/HF providers infer it from the model; + `CallbackProvider` requires you to declare `dimension` to match what your + callback returns. For the `sqlite-vec` ANN backend, set + `extensions.vector.dimension` in config to match. + +## Config-driven equivalents + +Each provider has a `provider:` name for `engrava.yaml`, resolved by +`resolve_embedding_provider(config.embeddings)`: + +| `provider:` value | Class | Extra | +|---|---|---| +| `sentence-transformer` | `SentenceTransformerProvider` | `embeddings-local` | +| `openai-compatible` | `OpenAICompatibleProvider` | `embeddings-openai` | +| `ollama` | `OllamaProvider` | `embeddings-ollama` | +| `huggingface` | `HuggingFaceProvider` | `embeddings-hf` | + +`CallbackProvider` takes a Python callable, so it's wired in code (via the +`embedding_provider=` constructor argument), not YAML. + +## Next + +- [Configuration](../configuration.md) — the `embeddings` YAML section. +- [Hybrid Search](../search.md) — how the vector signal fuses with the others. +- [Building a memory-backed agent](agent-memory.md) — embeddings in the agent loop. diff --git a/docs/guides/migrating-from-other-memory.md b/docs/guides/migrating-from-other-memory.md new file mode 100644 index 0000000..2b7954c --- /dev/null +++ b/docs/guides/migrating-from-other-memory.md @@ -0,0 +1,274 @@ +# Migrating from another memory system + +This guide helps you move an agent's memory from another store — a hosted +agent-memory service (mem0, Zep, …), a framework's built-in memory (LangChain, +…), or a plain vector database (Chroma, Qdrant, pgvector, …) — into Engrava. + +It covers three things: + +1. [Mapping concepts](#concept-mapping) from other systems onto Engrava's model. +2. [Porting your write/read calls](#porting-your-calls) with before/after snippets. +3. [Bulk-importing](#bulk-import) an existing corpus efficiently. + +It ends with [filtering, scoping & multi-tenancy](#filtering-scoping-and-multi-tenancy) +— the one area where Engrava's defaults differ most from a hosted service, and +what to do about it. + +Read [Core Concepts](../concepts.md) first if the terms *thought*, *edge*, +*cycle*, or *reflection* are unfamiliar, and [Positioning](../positioning.md) +to confirm Engrava is the right destination for your workload. + +## Concept mapping + +Other memory systems use different vocabulary for similar ideas. This table maps +common concepts onto Engrava: + +| Concept elsewhere | Engrava equivalent | Notes | +|---|---|---| +| "Memory" / "record" / "document" | **`ThoughtRecord`** | The unit you store. Has `essence` (short) + `content` (full). | +| "Memory type" / "role" | **`thought_type`** (`OBSERVATION`, `BELIEF`, `TASK`, …) | A small fixed taxonomy; see [Core Concepts](../concepts.md). | +| Free-form metadata / `metadata={...}` | **`ThoughtRecord.metadata`** | An arbitrary JSON dict, persisted and round-tripped. | +| "User id" / "session id" / namespace | A key inside **`metadata`** (or `source`) | Engrava has no built-in tenant field — see [scoping](#filtering-scoping-and-multi-tenancy). | +| Relationship / link between memories | **`EdgeRecord`** (typed, weighted) | First-class graph; edges also feed ranking. | +| Embedding / vector | Stored on write only with `embedding_provider=...` **and** `auto_embed=True`; otherwise call `store_embedding(thought_id, vector)` yourself | See the [Embeddings guide](embeddings.md). | +| Vector / similarity search | **`search_similar(query_vector, …)`** | Needs a ready query vector. | +| Keyword / BM25 search | **`search_fts(query, …)`** | Returns `list[(thought_id, score)]`. | +| Hybrid search | **`search_hybrid(query_text, …)`** | Fuses FTS + vector + recency + priority + graph. | +| Automatic summarisation / fact extraction | *(none — by design)* | Engrava does no LLM-side extraction; see [Non-goals](../positioning.md#non-goals). | +| Decay / forgetting | TTL + lifecycle + the recency signal | See [Data lifecycle](../data-lifecycle.md) (TTL, archive-vs-delete, erasure) and the recency signal in [Search](../search.md). | +| Summaries of clusters | **`REFLECTION`** thoughts via [dreaming](../dreaming.md) | Structural (centroid + keywords), not LLM prose. | + +## Porting your calls + +The shapes below are illustrative fragments — they assume you already have a +`store` (see [Quick Start](../quickstart.md) for how to open one). + +**Writing a memory.** Where another library takes a string and does extraction +for you, Engrava takes a fully-formed `ThoughtRecord` — you decide the type, +priority, and metadata: + +```python +import uuid + +from engrava import LifecycleStatus, Priority, ThoughtRecord, ThoughtType + +# before (illustrative, another library): +# memory.add("User prefers dark mode", user_id="u1") + +# after (engrava): +await store.create_thought( + ThoughtRecord( + thought_id=str(uuid.uuid4()), + thought_type=ThoughtType.OBSERVATION, + essence="User prefers dark mode", + content="User prefers dark mode", + priority=Priority.P3, + lifecycle_status=LifecycleStatus.ACTIVE, + created_cycle=0, + updated_cycle=0, + source="chat", + metadata={"user_id": "u1"}, + ) +) +``` + +**Searching.** Where another library returns ranked memories from a single +`search`, pick the Engrava method that matches the signal you want; `search_hybrid` +is the closest analogue to a managed hybrid search: + +```python +# before (illustrative): +# hits = memory.search("what theme does the user like?", user_id="u1") + +# after (engrava) — note: search is unscoped; filter by user yourself: +result = await store.search_hybrid("what theme does the user like?", top_k=10) +for thought_id, score in result.results: + record = await store.get_thought(thought_id) + if record is not None and record.metadata.get("user_id") == "u1": + print(score, record.essence) +``` + +See [filtering, scoping & multi-tenancy](#filtering-scoping-and-multi-tenancy) +for why the post-filter is there and how to do it better. + +## Bulk import + +When migrating an existing corpus, insert under a single transaction instead of +committing once per row. The `suspend_auto_commit()` async context manager +defers the commit until the block exits — it **commits once on success and rolls +back the whole batch on any error**. Pair it with `deduplicate=True` so repeated +`content` collapses into one thought (bumping `confirmation_count`) instead of +inserting duplicate rows. + +The following is a complete, runnable example (it uses an in-memory store and a +small fake export): + +```python +import asyncio +import uuid + +import aiosqlite + +from engrava import LifecycleStatus, Priority, SqliteEngravaCore, ThoughtRecord, ThoughtType + +# Pretend this came from your previous memory system's export. +EXPORTED_MEMORIES = [ + {"text": "User prefers dark mode", "user": "u1"}, + {"text": "User is based in Berlin", "user": "u1"}, + {"text": "User prefers dark mode", "user": "u1"}, # a duplicate + {"text": "Project deadline is Friday", "user": "u2"}, +] + + +def to_thought(item: dict[str, str]) -> ThoughtRecord: + return ThoughtRecord( + thought_id=str(uuid.uuid4()), + thought_type=ThoughtType.OBSERVATION, + essence=item["text"][:200], + content=item["text"], + priority=Priority.P3, + lifecycle_status=LifecycleStatus.ACTIVE, + created_cycle=0, + updated_cycle=0, + source="import", + metadata={"user_id": item["user"]}, + ) + + +async def bulk_import(store, items: list[dict[str, str]]) -> int: + # One transaction for the whole batch: commit on success, roll back on error. + async with store.suspend_auto_commit(): + for item in items: + # deduplicate=True collapses identical content into one thought. + await store.create_thought(to_thought(item), deduplicate=True) + return await store.count_thoughts() + + +async def main() -> None: + async with aiosqlite.connect(":memory:") as conn: + conn.row_factory = aiosqlite.Row + store = SqliteEngravaCore(conn) + await store.ensure_schema() + + total = await bulk_import(store, EXPORTED_MEMORIES) + # 4 exported rows, one duplicate collapsed -> 3 stored. + assert total == 3 + print(f"Imported {total} thoughts.") + + +if __name__ == "__main__": + asyncio.run(main()) +``` + +For large corpora, import in batches (e.g. a few thousand rows per +`suspend_auto_commit()` block) to keep each transaction short — long +transactions block the background SQLite thread (see +[Known Limitations](../known-limitations.md#aiosqlite-proxy-architecture)). +If you have embeddings configured, note that each new thought is embedded on +write (see the [Embeddings guide](embeddings.md)), so a bulk load pays the +embedding cost up front — pre-compute vectors or import in batches accordingly. +See the [Performance guide](../performance.md#write-throughput-and-bulk-ingest) +for the throughput levers in detail. + +## Filtering, scoping & multi-tenancy + +This is the most important difference from a hosted memory service. Engrava's +**`search_hybrid` / `search_similar` / `search_fts` take no scope or metadata +filter** — they rank across the entire store. There is no `user_id=` or +`session_id=` argument on the ranked path. You scope retrieval yourself, and +there are three patterns, with clear tradeoffs. + +### Option A — over-fetch, then post-filter (simplest) + +Ask for more results than you need, then drop the ones that don't match. Keep +the scope key in `metadata` when you write. + +```python +# Want the top 5 for user "u1": over-fetch, then filter and trim. +result = await store.search_hybrid("dark mode", top_k=50) +scoped = [] +for thought_id, score in result.results: + record = await store.get_thought(thought_id) + if record is not None and record.metadata.get("user_id") == "u1": + scoped.append((thought_id, score)) + if len(scoped) >= 5: + break +``` + +- **Pros:** no SQL, works with the high-level API, fine for modest stores. +- **Cons:** wasteful when one tenant is a small slice of a large store (you may + over-fetch a lot, or miss matches if `top_k` is too small). Ranking is still + computed over everything. + +### Option B — one store per tenant (strongest isolation) + +Give each tenant its own database file via +[`EngravaManager`](../api-reference.md). Each service has its own file and its +own lock, so retrieval is naturally scoped and tenants are physically isolated. + +```python +from engrava import EngravaManager, load_config + +config = load_config("engrava.yaml") +async with EngravaManager.from_config(config.services) as mgr: + store_u1 = await mgr.get_store("u1") # u1.db + result = await store_u1.search_hybrid("dark mode", top_k=5) +``` + +- **Pros:** true isolation (separate files, separate locks, easy per-tenant + backup/delete); search is scoped for free. +- **Cons:** not suitable for a very large number of tenants (one file each); no + cross-tenant query. Best when tenants are coarse (a handful of services), not + per-end-user at massive cardinality. + +### Option C — pre-filter in raw SQL (scoped recall without over-fetch) + +When you need keyword/metadata-scoped recall without over-fetching, query the +`thought` table directly. The Python `metadata` dict is persisted to a +`metadata_json` column you can index into with SQLite's `json_extract`: + +```sql +-- thoughts for one user, most recent first +SELECT thought_id, essence +FROM thought +WHERE json_extract(metadata_json, '$.user_id') = :user_id +ORDER BY updated_cycle DESC +LIMIT 20; +``` + +Run it through the same connection you gave the store: + +```python +cursor = await conn.execute( + "SELECT thought_id, essence FROM thought " + "WHERE json_extract(metadata_json, '$.user_id') = ? " + "ORDER BY updated_cycle DESC LIMIT 20", + ("u1",), +) +rows = await cursor.fetchall() +``` + +- **Pros:** exact scoping, no over-fetch; you can combine it with FTS by joining + the `thought_fts` table. +- **Cons:** you drop below the high-level API to raw SQL against the schema, and + this path does **not** apply the hybrid ranking signals (it is a filter, not a + ranked search). Treat the schema as semi-stable and re-check it across upgrades. + +### Choosing + +| Situation | Use | +|---|---| +| Small/medium store, occasional scoping | **A** (over-fetch + post-filter) | +| A handful of coarse tenants needing real isolation | **B** (store per tenant) | +| Scoped recall over a large store, ranking not required | **C** (raw `json_extract`) | + +> **Want a real filter on the ranked path?** Adding a scope/metadata argument to +> `search_*` is a public-API change under consideration, not a current feature. +> Until then, the patterns above are the supported approach. + +## See also + +- [Positioning](../positioning.md) — when Engrava fits, and its non-goals +- [Core Concepts](../concepts.md) — thoughts, edges, cycles, reflections +- [Recipes](../recipes/index.md) — short task-oriented snippets, incl. dedup +- [Known Limitations](../known-limitations.md) — concurrency and scale constraints diff --git a/docs/observability.md b/docs/observability.md index 08782f0..970b520 100644 --- a/docs/observability.md +++ b/docs/observability.md @@ -17,6 +17,7 @@ storage footprint, and a rolling-window search-latency histogram. from engrava import SqliteEngravaCore import aiosqlite + async def main() -> None: conn = await aiosqlite.connect("engrava.db") conn.row_factory = aiosqlite.Row @@ -56,3 +57,123 @@ engrava --db mydata.db --format json info - Nested calls inside `search_hybrid()` are suppressed, so one hybrid search contributes one latency sample. - This snapshot API tracks only aggregate counts and search latency — not individual events. + +## Production monitoring + +`store.metrics()` is a **pull** snapshot — there is no built-in exporter. To +monitor a deployment, scrape the snapshot on an interval and feed the fields into +your metrics system (Prometheus, OpenTelemetry, StatsD, …). + +### Exporting the snapshot + +The snapshot is a plain dataclass, so mapping it to any client is +straightforward. A Prometheus example: + +```python +from prometheus_client import Gauge + +THOUGHTS = Gauge("engrava_thoughts_total", "Total thoughts") +DB_BYTES = Gauge("engrava_db_bytes", "Main database size in bytes") +WAL_BYTES = Gauge("engrava_wal_bytes", "WAL size in bytes") +SEARCH_P95 = Gauge("engrava_search_p95_ms", "Search p95 latency (ms)") +SEARCH_P99 = Gauge("engrava_search_p99_ms", "Search p99 latency (ms)") + + +async def collect(store) -> None: + m = await store.metrics() + THOUGHTS.set(m.thoughts.total) + DB_BYTES.set(m.storage.db_bytes) + WAL_BYTES.set(m.storage.wal_bytes) + SEARCH_P95.set(m.search_latency.p95_ms) + SEARCH_P99.set(m.search_latency.p99_ms) +``` + +The main metric groups on `EngravaMetrics` are `thoughts` (`total`, `by_type`, +`by_status`), `edges` (`total`, `by_type`), `storage` (`db_bytes`, `wal_bytes`, +`vec_index_bytes`, `total_bytes`), and `search_latency` (`sample_count`, +`p50_ms`, `p95_ms`, `p99_ms`, `min_ms`, `max_ms`, `mean_ms`). The snapshot also +carries `schema_version` and `snapshot_timestamp` for the snapshot itself. + +### Scrape cadence + +Treat `metrics()` like any pull endpoint: a **30–60 s** scrape interval is +typically plenty. Counts and storage change slowly; the latency histogram is a +rolling window (`metrics.window_size`, default 1000 samples), so it already +smooths short spikes. Avoid sub-second scrapes — each call runs a few aggregate +SQL queries. + +### What to alert on + +| Signal | Source field | Alert when… | +|---|---|---| +| Storage growth | `storage.db_bytes`, `storage.total_bytes` | size approaches your disk budget, or grows unexpectedly fast | +| WAL not checkpointing | `storage.wal_bytes` | the WAL keeps growing and never shrinks (checkpoints not happening) | +| Search latency | `search_latency.p95_ms` / `p99_ms` | p95/p99 exceeds your budget — often the sign you've passed the brute-force vector ceiling (see [Performance](performance.md)) | +| Expired backlog | `count_thoughts(include_expired=True)` − `count_thoughts()` | the number of expired-but-not-cleaned thoughts grows (run `engrava gc --expired`) — see [Data Lifecycle](data-lifecycle.md) | +| Audit integrity | `store.journal.verify_integrity()` (journaling only) | the chain fails verification (tampering or corruption) — see [Audit Trail](audit-trail.md) | + +The expired-backlog and audit-integrity signals are **not** in the metrics +snapshot — compute them from the calls shown above on your own cadence. + +The audit-integrity check applies **only when journaling is enabled** +(`journal.enabled: true`). With journaling off, `store.journal` is `None`, so +guard the call: + +```python +async def journal_ok(store) -> bool: + if store.journal is None: + return True # journaling disabled — nothing to verify + result = await store.journal.verify_integrity() + return result.valid +``` + +### Health check + +For a readiness probe you want a call that actually touches the database. Note +that `metrics()` is **not** reliable for this when metrics are disabled: with +`metrics.enabled: false`, `store.metrics()` returns a zero-filled snapshot +**without issuing any SQL**, so it would report healthy even if the database were +unreadable. Use a lightweight real read instead — `count_thoughts()` always +queries the database (independent of the metrics setting): + +```python +async def healthcheck(store) -> bool: + try: + await store.count_thoughts() # issues SQL — confirms DB + schema are readable + except Exception: + return False + return True +``` + +(If you know metrics are enabled in your deployment, `await store.metrics()` +works too and additionally returns the live counts.) + +### Logging + +The library logs through the standard `logging` module under the **`engrava.*`** +namespace (each module uses `logging.getLogger(__name__)`, e.g. +`engrava.extensions.dreaming`, `engrava.extensions.vector_sqlite_vec`, +`engrava.config`). It logs at **`WARNING`** (degraded conditions, e.g. sqlite-vec +unavailable → numpy fallback), **`INFO`** (dreaming progress), and **`DEBUG`** +(detailed internals) — it does **not** log at `ERROR`/`CRITICAL`; failures are +raised as typed exceptions for the caller to handle. Configure it like any +library logger: + +```python +import logging + +logging.getLogger("engrava").setLevel(logging.WARNING) # quiet, production default +# logging.getLogger("engrava").setLevel(logging.INFO) # see dreaming activity +``` + +### Out of scope + +The snapshot is deliberately small. It does **not** include: + +- **write / mutation counters** or **error counters** — track those at your + application layer (Engrava raises typed exceptions you can count there); +- **dreaming metrics** — `run_consolidation()` returns a `ConsolidationResult` + (promoted / edges / reflections counts) per run; consume that directly; +- **journal size or per-event audit metrics** — the audit history lives in the + [journal](audit-trail.md) itself, which you query and verify directly, not via + the metrics snapshot. diff --git a/docs/performance.md b/docs/performance.md new file mode 100644 index 0000000..ac36147 --- /dev/null +++ b/docs/performance.md @@ -0,0 +1,175 @@ +# Performance & scaling + +How Engrava behaves as data grows, where the limits are, and the two levers that +matter most: the **vector backend** and **batched writes**. The numbers that +matter for *your* workload depend on corpus size, embedding dimension, query mix, +and hardware — measure on your own data rather than trusting a single headline +figure. This page explains *what* drives cost so you know what to measure. + +For the dreaming *quality* benchmark (does consolidation help retrieval), see +[Benchmarks](benchmarks.md). For the hard platform constraints, see +[Known Limitations](known-limitations.md). + +## Where the cost is + +A query touches up to five signals; each scales differently: + +| Signal | Cost driver | Scaling | +|---|---|---| +| **FTS5 / BM25** | SQLite's FTS5 inverted index | Sub-linear; scales well into large corpora. | +| **Vector** | The vector backend (see below) | Linear in #embeddings for both backends; **sqlite-vec scans a compact `vec0` table with a much smaller constant factor** than the Python path. | +| **Recency** | A cheap per-candidate arithmetic decay | Negligible. | +| **Priority** | A per-candidate enum→multiplier lookup | Negligible. | +| **Graph** | 1-hop neighbour expansion over edges | Proportional to the fusion-pool size × average degree; **opt-in** (`graph_weight=0.0` makes zero graph queries). | + +The dominant term at scale is almost always the **vector** signal, because both +backends compare the query against every stored embedding — the difference is how +efficiently they do it (see below). + +## The brute-force ceiling (and how to pass it) + +Without the `vec` extra, vector search is **brute-force cosine similarity in +Python**: every `search_similar` / `search_hybrid` query scans all embeddings. +This is simple and dependency-free, and works well up to roughly **100k +embeddings**. Past that, vector-query latency grows linearly and becomes the +bottleneck. + +The fix is the **sqlite-vec** backend, which stores vectors in a dedicated, +compact `vec0` virtual table. In the pinned `sqlite-vec` 0.1.x line a `vec0` +query is still an **exhaustive k-nearest-neighbour scan** — not an approximate or +sub-linear index — but over a tightly packed, chunked columnar store, so it runs +with a far smaller constant factor (and lower memory overhead) than the Python +brute-force path. The practical effect is that the same corpus stays well under +your latency budget for much longer. FTS5 scales independently and usually needs +no special handling. + +> The ~100k figure is a rule of thumb, not a cliff — see +> [Known Limitations → sqlite-vec](known-limitations.md#sqlite-vec-pre-v1-status). +> Measure your own p95 query latency and switch when it stops meeting your budget. + +## Switching to sqlite-vec (incl. migrating an existing database) + +The migration is designed to be turnkey: your embeddings already live in the +`embedding` table, so switching backends only builds and backfills the `vec0` +vector table — you do **not** re-embed anything. + +**1. Install the extra.** + +```bash +pip install 'engrava[vec]' +``` + +**2. Set the backend in your config.** + +```yaml +extensions: + vector: + backend: sqlite-vec # default is "numpy" + dimension: 384 # must match your embedding model +``` + +**3. Open the store with `from_config`.** On open, Engrava creates the `vec0` +virtual table and **backfills every existing embedding into it automatically** +(idempotent — safe to run repeatedly). From then on, new writes keep the index +in sync. + +```python +from engrava import SqliteEngravaCore + +# from_config wires the vector backend; the index is created and back-filled +# on open. A plain SqliteEngravaCore(conn) constructor stays on numpy. +async with await SqliteEngravaCore.from_config("engrava.yaml") as store: + result = await store.search_similar(query_vector, top_k=5) +``` + +That's the whole migration — no manual re-index step, and no re-embedding, +because the vectors are reused from the existing `embedding` table. + +**Important caveats.** + +- **Use `from_config`.** Only the `from_config` path configures the vector + backend. If you build the store directly with `SqliteEngravaCore(conn)`, it + stays on the numpy backend regardless of the YAML. +- **Graceful fallback, not a hard error.** If the `sqlite-vec` package is missing + or the extension can't load, Engrava logs a warning and **falls back to numpy** + rather than crashing — so a "switch" that silently kept numpy usually means the + extension didn't load. +- **macOS system SQLite blocks extensions.** The most common load failure is + macOS's bundled SQLite, which disables extension loading. Install Python via + Homebrew or pyenv (a full-featured SQLite build). See + [Known Limitations → macOS](known-limitations.md#macos-sqlite-extension-loading). +- **Dimension must match.** The index is created for a fixed dimension; it must + equal your embedding model's output. Mixing dimensions corrupts results (see + [Embedding Dimension Consistency](known-limitations.md#embedding-dimension-consistency)). + +## Write throughput and bulk ingest + +By default each mutating call commits its own transaction. For a bulk load that +is the wrong granularity — one commit per row dominates wall-clock. Wrap the +batch in `suspend_auto_commit()`, which defers to a **single commit on success +and rolls the whole batch back on any error**: + +```python +async def bulk_load(store, items): + async with store.suspend_auto_commit(): + for item in items: + await store.create_thought(item, deduplicate=True) + return await store.count_thoughts() +``` + +- **`deduplicate=True`** collapses identical `content` into one thought (bumping + `confirmation_count`) instead of inserting duplicate rows — cheaper storage and + fewer embeddings to compute. (Note the persistence default is + `deduplicate=False`; opt in per call.) +- **Keep each transaction short.** A long-running transaction blocks aiosqlite's + background thread (see + [Known Limitations → aiosqlite](known-limitations.md#aiosqlite-proxy-architecture)), + so for very large imports, batch in chunks (e.g. a few thousand rows per + `suspend_auto_commit()` block) rather than one giant transaction. +- **Embedding cost dominates a bulk load** when a provider is configured with + `auto_embed=True`: each new thought is embedded on write. Pre-compute vectors + and store them with `store_embedding(...)`, use a batching local provider, or + import in chunks so the encoder isn't the bottleneck. See the + [Embeddings guide](guides/embeddings.md). + +A runnable end-to-end bulk-import example lives in the +[migration guide](guides/migrating-from-other-memory.md#bulk-import). + +## Dreaming cost at scale + +[Dreaming](dreaming.md) runs **off the hot path** — you invoke +`run_consolidation()` on your own cadence, so it never adds latency to CRUD or +search. Its own cost scales with the number of candidate thoughts and the +clustering algorithm: + +- Run it **periodically**, not every turn (every N cycles, a cron job, or + manually). +- `candidates_limit` caps how many thoughts are evaluated per pass — keep it + bounded on large stores. +- Clustering has two backends via `extensions.dreaming.clustering_backend` + (`"numpy"` default, or `"python"`); `numpy` is faster for the similarity math + on larger candidate sets. +- The LPA clustering algorithm is `O(edges × iterations)`; the agglomerative + algorithm operates over active thoughts — see [Dreaming](dreaming.md) for the + algorithm tradeoffs. + +## Checklist: scaling Engrava + +1. **Past ~100k embeddings or missing your latency budget?** Switch to + `sqlite-vec` (above). +2. **Bulk loading?** Batch writes with `suspend_auto_commit()` and consider + `deduplicate=True`. +3. **Embedding is the bottleneck?** Use a batching provider or pre-compute + vectors. +4. **Multi-tenant?** One database file per tenant via `EngravaManager` keeps each + store smaller and independently lockable (see the + [scoping section](guides/migrating-from-other-memory.md#filtering-scoping-and-multi-tenancy)). +5. **Dreaming heavy?** Cap `candidates_limit`, run it on a schedule, pick the + right `clustering_backend`. + +## See also + +- [Known Limitations](known-limitations.md) — the brute-force ceiling, macOS, concurrency +- [Configuration](configuration.md) — the `extensions.vector` and dreaming knobs +- [Benchmarks](benchmarks.md) — the dreaming retrieval-quality benchmark +- [Embeddings](guides/embeddings.md) — provider choice and batching diff --git a/docs/positioning.md b/docs/positioning.md new file mode 100644 index 0000000..31a0e6e --- /dev/null +++ b/docs/positioning.md @@ -0,0 +1,97 @@ +# Positioning — what Engrava is (and isn't) + +Engrava is a **standalone embedded database for AI-agent memory**. It is built on +SQLite and runs in-process: one `pip install`, no server, no LLM, no external +services. It gives an agent a durable thought-graph with hybrid retrieval +(full-text + vector + recency + priority + graph) and an optional tamper-evident +audit trail. + +This page explains **when Engrava is the right tool**, when it isn't, and how it +relates to the other memory options you might be choosing between. + +## When Engrava is a good fit + +- **You want memory you own and can inspect.** The whole store is one SQLite + file. You can open it with any SQLite tool, back it up with a file copy + ([with care around WAL](known-limitations.md#concurrent-write-safety)), and + query it with SQL when the high-level API isn't enough. +- **You want retrieval, not just a vector index.** Engrava fuses FTS5/BM25, + vector similarity, recency, priority, and a 1-hop graph signal into one ranked + result. See [Search](search.md). +- **You want a graph, not a flat list.** Thoughts are connected by typed, + weighted [edges](concepts.md), and the graph itself contributes to ranking. +- **You want it embedded.** No network hop, no service to operate, no separate + process. It runs anywhere Python and SQLite run. +- **You want embeddings to be optional and pluggable.** Bring a local model, an + OpenAI-compatible endpoint, Ollama, HuggingFace, or your own callback — or run + with FTS-only and no embeddings at all. See the + [Embeddings guide](guides/embeddings.md). +- **Small-to-medium corpora.** The default backend brute-forces vector search in + Python and works well up to roughly 100k embeddings; beyond that, switch to + the `sqlite-vec` backend. See + [Known Limitations](known-limitations.md#sqlite-vec-pre-v1-status). + +## When Engrava is *not* a good fit + +- **You need a managed, horizontally-scaled vector service.** Engrava is a local + embedded library, not a clustered database. One store is one SQLite file + written by one process. If you need sharding, replication, or a multi-writer + service across many machines, use a dedicated vector database. +- **You need many processes writing the same store concurrently.** SQLite is + single-writer. WAL mode lets readers and a single writer coexist, and a + single process can drive many async tasks safely, but heavy multi-process + write fan-out is out of scope. See + [Known Limitations → Concurrent Write Safety](known-limitations.md#concurrent-write-safety). +- **You want the library to call an LLM for you.** Engrava does no LLM-side fact + extraction, summarisation, or entity resolution (see [Non-goals](#non-goals)). + It stores and retrieves what you give it; your agent decides what to write. +- **You need per-tenant retrieval isolation on the ranked path out of the box.** + The `search_*` methods take no scope/metadata filter today — retrieval is + unscoped by default. There are good workarounds (over-fetch + post-filter, + one store per tenant, raw-SQL pre-filter); see the + [migration guide's scoping section](guides/migrating-from-other-memory.md#filtering-scoping-and-multi-tenancy). + +## Non-goals + +These are deliberate boundaries, not missing features: + +- **No LLM-side intelligence.** Engrava never calls a language model. It does no + fact extraction, no summarisation, no entity resolution, no automatic + "memory writing" from raw text. Those belong in your agent (or a downstream + extension), above the storage layer. The one consolidation feature that *does* + synthesise — [dreaming](dreaming.md) — is purely structural (clustering + + centroids + keyword counts), with **no LLM involved**. +- **Retrieval is unscoped by default.** `search_hybrid` / `search_similar` / + `search_fts` rank across the whole store; they accept no per-user or + per-session filter argument. Scoping is an application-level concern today — + see the [workarounds](guides/migrating-from-other-memory.md#filtering-scoping-and-multi-tenancy). +- **Not a distributed system.** No clustering, replication, or cross-machine + consistency. One file, one writer. +- **Not an application framework.** Engrava is the memory layer. It does not + provide an agent runtime, tool-calling, or prompt orchestration. + +## How it compares + +A rough orientation, not a feature scorecard. Evaluate the specifics against +your own workload. + +| | Engrava | Hosted agent-memory services (e.g. mem0, Zep) | Framework memory (e.g. LangChain memory) | Standalone vector DBs (e.g. Chroma, Qdrant, pgvector) | +|---|---|---|---|---| +| **Deployment** | Embedded library, one SQLite file, in-process | Typically a hosted/managed service or self-hosted server | In-process, tied to the framework | Separate database/service (some have embedded modes) | +| **Retrieval model** | Hybrid: FTS + vector + recency + priority + graph, fused | Varies; often vector + recency with managed pipelines | Usually buffer/window or a vector-store wrapper | Primarily vector similarity (some add keyword/hybrid) | +| **Graph** | First-class typed/weighted edges that feed ranking | Some offer entity/graph memory | Generally no | Generally no | +| **LLM-side extraction** | None — you decide what to store | Often built in (auto fact-extraction/summarisation) | Sometimes, via chains | None | +| **External services** | None required | Usually yes | Depends on the chosen store | Usually a running service | +| **Audit trail** | Optional tamper-evident hash-chain journal | Varies | No | Generally no | +| **Best for** | Owning a local, inspectable, hybrid memory graph for an agent | Offloading memory ops to a managed pipeline | Quick memory inside an existing framework app | Large-scale pure vector retrieval | + +If you are currently using one of these and want concept mappings and porting +snippets, see +[Migrating from another memory system](guides/migrating-from-other-memory.md). + +## See also + +- [Core Concepts](concepts.md) — the mental model behind thoughts, edges, and cycles +- [Search](search.md) — how the hybrid ranking actually works +- [Known Limitations](known-limitations.md) — the hard constraints in one place +- [Migrating from another memory system](guides/migrating-from-other-memory.md) diff --git a/docs/quickstart.md b/docs/quickstart.md index e1741a8..a1689f7 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -135,6 +135,14 @@ stored = await store.create_thought(observation) print(f"Created thought: {stored.thought_id}") ``` +> **About `created_cycle` / `updated_cycle`.** A *cycle* is a consumer-owned +> logical clock — Engrava never advances it for you. `0` is fine for this +> quickstart, but in a real long-running agent you should keep a counter and +> increment it once per turn, using it for these fields (and for `current_cycle` +> in search / consolidation). Otherwise recency can't tell old memories from new +> and dreaming's age gate never opens. See +> [Cycle (the agent clock)](concepts.md#cycle-the-agent-clock). + ## Link Thoughts with Edges ```python @@ -180,19 +188,20 @@ for thought_id, score in await store.search_fts("Python AI", top_k=5): ### Embedding Similarity Search +Use a real embedding provider so similarity is meaningful (this needs the +`embeddings-local` extra; see the [Embeddings guide](guides/embeddings.md) for +all provider options): + ```python -from engrava import CallbackProvider +from engrava import SentenceTransformerProvider -# Use any embedding function -provider = CallbackProvider( - callback=lambda text: [0.1] * 384, # Replace with real embeddings - dimension=384, - model_name="my-model", -) +provider = SentenceTransformerProvider(model_name="all-MiniLM-L6-v2") # Store an embedding for an existing thought vector = await provider.embed(observation.content) -await store.store_embedding(observation.thought_id, vector, model_name="my-model") +await store.store_embedding( + observation.thought_id, vector, model_name=provider.model_name +) # Search by similarity — returns (thought_id, score) tuples for thought_id, score in await store.search_similar(vector, top_k=5): @@ -201,6 +210,10 @@ for thought_id, score in await store.search_similar(vector, top_k=5): print(f" {record.essence} (score: {score:.3f})") ``` +> Tip: configure the provider on the store with `auto_embed=True` (or via +> `engrava.yaml`) and Engrava embeds thoughts on write — and embeds your query +> for you in `search_hybrid`. See the [Embeddings guide](guides/embeddings.md). + ## Query with MindQL ```python @@ -240,7 +253,14 @@ engrava --db my_thoughts.db restore -i backup.jsonl ## Next Steps +Build something next, then reach for the references: + +- [Tutorial](tutorial.md) — build a small notes memory end to end (start here) +- [Recipes](recipes/index.md) — copy-paste snippets: store a turn, retrieve context, TTL, dedup, session scoping +- [Building a memory-backed agent](guides/agent-memory.md) — the full agent turn loop +- [Core Concepts](concepts.md) — the mental model (thought, edge, reflection, cycle) - [Configuration](configuration.md) — YAML-based setup for production use -- [Extensions](extensions.md) — Hook into the thought lifecycle -- [API Reference](api-reference.md) — Full class and method reference -- [MindQL](mindql.md) — Complete query language reference +- [API Reference](api-reference.md) — full class and method reference +- [MindQL](mindql.md) — complete query language reference +- [Troubleshooting](troubleshooting.md) — when something doesn't work as expected +- [FAQ](faq.md) — quick answers to common questions diff --git a/docs/recipes/index.md b/docs/recipes/index.md new file mode 100644 index 0000000..79ee5e6 --- /dev/null +++ b/docs/recipes/index.md @@ -0,0 +1,190 @@ +# Recipes + +Short, copy-paste snippets for the things you actually do with an agent-memory +database. Each assumes you already have an open `store` (see the +[Quick Start](../quickstart.md)); imports are shown once per recipe. + +> New to the model? Read [Core Concepts](../concepts.md) first. For the full +> agent turn loop, see [Building a memory-backed agent](../guides/agent-memory.md). + +## Store a conversation turn + +Persist a user message and the agent's reply, tagged with conversation metadata +so you can scope retrieval later: + +```python +import uuid +from engrava import ThoughtRecord, ThoughtType, Priority, LifecycleStatus, percept, utterance + +async def store_turn(store, user_text, agent_text, *, cycle, session_id, turn_index, user_id): + user_thought = ThoughtRecord( + thought_id=str(uuid.uuid4()), + thought_type=ThoughtType.OBSERVATION, + essence=user_text[:200], content=user_text, + priority=Priority.P2, lifecycle_status=LifecycleStatus.ACTIVE, + created_cycle=cycle, updated_cycle=cycle, source=user_id, + metadata={**percept(source_id=user_id, label="user"), + "session_id": session_id, "turn_index": turn_index}, + ) + await store.create_thought(user_thought) + + agent_thought = ThoughtRecord( + thought_id=str(uuid.uuid4()), + thought_type=ThoughtType.OUTPUT_DRAFT, + essence=agent_text[:200], content=agent_text, + priority=Priority.P3, lifecycle_status=LifecycleStatus.ACTIVE, + created_cycle=cycle, updated_cycle=cycle, source="agent", + metadata={**utterance(), "session_id": session_id, "turn_index": turn_index}, + ) + await store.create_thought(agent_thought) +``` + +## Retrieve context for a prompt + +Get the most relevant prior memories and turn them into prompt-ready text. With +an embedding provider configured, `search_hybrid` embeds the query for you: + +```python +async def context_for(store, query, cycle, top_k=5): + result = await store.search_hybrid(query, top_k=top_k, current_cycle=cycle) + lines = [] + for thought_id, _score in result.results: + record = await store.get_thought(thought_id) + if record is not None: + lines.append(record.essence) # essence = the prompt-facing one-liner + return "\n".join(f"- {line}" for line in lines) +``` + +## Filter retrieval by session (or user) + +The ranked search methods take **no** metadata/scope filter, so "only this +session's memories" is done by over-fetching and post-filtering on metadata in +Python: + +```python +async def search_in_session(store, query, session_id, cycle, want=5): + # over-fetch, then keep only this session's hits, preserving rank order + result = await store.search_hybrid(query, top_k=want * 5, current_cycle=cycle) + scoped = [] + for thought_id, _score in result.results: + record = await store.get_thought(thought_id) + if record is not None and record.metadata.get("session_id") == session_id: + scoped.append(record) + if len(scoped) >= want: + break + return scoped +``` + +> For *hard* isolation between users/tenants (separate databases rather than a +> shared one with a metadata tag), use [`EngravaManager`](../api-reference.md) — +> one `.db` per service. That trades cross-tenant search for strong +> isolation; the metadata approach keeps one searchable store. + +## Set a TTL on transient memories + +Give a thought an expiry, then expire due thoughts. The default strategy is +`archive` (soft — marks `ARCHIVED`); switch to `delete` for hard removal: + +```python +# expire this thought one hour from now +await store.create_thought(transient_thought, expires_after_seconds=3600) + +# later: process everything past its expiry (archive or delete per ttl_strategy) +result = await store.cleanup_expired() +print(f"{result.expired_count} thoughts expired via '{result.strategy_applied}'") +``` + +A store-wide default TTL and the archive-vs-delete strategy are set in config — +see the [`ttl` configuration](../configuration.md). Archived thoughts leave disk +only on a later `engrava gc`. + +## Deduplicate repeated facts + +Pass `deduplicate=True` so identical `content` collapses into one thought with a +bumped `confirmation_count` instead of a duplicate row: + +```python +first = await store.create_thought(fact, deduplicate=True) +again = await store.create_thought(same_fact, deduplicate=True) +# again.thought_id == first.thought_id; confirmation_count incremented, no new row +``` + +The growing `confirmation_count` is also a reliability signal dreaming uses (a +fact re-confirmed many times ranks as more trustworthy) — see +[Core Concepts](../concepts.md#reliability-confidence-vs-confirmation_count). + +## Run consolidation on a schedule + +In a long-running agent, run dreaming every N turns rather than every turn: + +```python +from engrava import DreamingExtension, DreamingConfig + +dreaming = DreamingExtension(config=DreamingConfig(enabled=True)) + +# inside your turn loop, after advancing the cycle counter: +if cycle % 20 == 0: + result = await dreaming.run_consolidation(store, current_cycle=cycle) + print(f"consolidation: promoted {result.promoted_count}") +``` + +A fresh store has little to consolidate — REFLECTIONs emerge as memories +accumulate and repeat. See [Dreaming](../dreaming.md) for the cadence and knobs. + +## Inspect what changed (audit trail) + +With the [audit journal](../audit-trail.md) enabled, read the history of any +thought: + +```python +history = await store.journal.get_entries(target_id=some_thought_id) +for entry in history: + print(entry.sequence_number, entry.mutation_type, entry.created_at) +``` + +## Record a tool result / action + +If your agent *does* things (calls a tool, sends a message), record each as an +`ActionRecord` linked to the thought that prompted it, so what the agent did — +and whether it worked — is part of memory: + +```python +import uuid +from engrava import ActionRecord, ActionType, ActionStatus, VerificationStatus + +await store.create_action( + ActionRecord( + action_id=str(uuid.uuid4()), + source_thought_id=prompting_thought_id, + action_type=ActionType.TOOL_CALL, # or MESSAGE / CLI_OUTPUT / STATE_UPDATE + intent="search the web for flight prices", + status=ActionStatus.CONFIRMED, # PLANNED → EXECUTING → CONFIRMED / FAILED / BLOCKED + verification_status=VerificationStatus.CONFIRMED, + ) +) + +# read an entity's actions back: +actions = await store.get_actions(prompting_thought_id) +``` + +## Restore the cycle counter after a restart + +The cycle is the agent's logical clock and Engrava does **not** persist it — on +startup, seed it from the highest cycle already stored so it keeps increasing. +`list_thoughts` returns rows ordered by `updated_cycle` descending, so the most +recent thought carries the highest value: + +```python +recent = await store.list_thoughts(limit=1) # ordered by updated_cycle desc +cycle = (recent[0].updated_cycle + 1) if recent else 0 +``` + +See [Cycle (the agent clock)](../concepts.md#cycle-the-agent-clock) for why this +matters (a frozen clock disables recency and stalls dreaming). + +## Next + +- [Building a memory-backed agent](../guides/agent-memory.md) — these recipes assembled into a loop. +- [Tutorial](../tutorial.md) — build a small notes memory from scratch. +- [Core Concepts](../concepts.md) — the model behind the snippets. +- [Hybrid Search](../search.md) · [Dreaming](../dreaming.md) · [Configuration](../configuration.md). diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md new file mode 100644 index 0000000..047d084 --- /dev/null +++ b/docs/troubleshooting.md @@ -0,0 +1,193 @@ +# Troubleshooting + +Common symptoms, their cause, and the fix. Each entry shows the error (or the +surprising behaviour) you actually see, then what to change. + +If your problem is a platform constraint rather than a mistake (macOS extension +loading, the ~100k brute-force ceiling, FTS5 availability), see +[Known Limitations](known-limitations.md) instead. + +## `AttributeError: 'tuple' object has no attribute 'keys'` on read + +**Symptom.** Writes succeed, but the first `get_thought` / search call raises: + +``` +AttributeError: 'tuple' object has no attribute 'keys' +``` + +**Cause.** The aiosqlite connection has no row factory, so rows come back as +plain tuples. Engrava maps rows to records by column name and needs +`aiosqlite.Row`. The failure surfaces on **read**, not on connect or write, +which makes it look unrelated to setup. + +**Fix.** Set the row factory immediately after connecting: + +```python +import aiosqlite + +conn = await aiosqlite.connect("engrava.db") +conn.row_factory = aiosqlite.Row # required +``` + +`SqliteEngravaCore.from_config(...)` opens the connection for you and sets this +correctly — the manual snippet above only applies when you construct the store +from your own connection. + +## `ValueError: '...' is not a valid ThoughtType` (or `Priority`, `EdgeType`, …) + +**Symptom.** + +``` +ValueError: 'INSIGHT' is not a valid ThoughtType +``` + +**Cause.** A string was passed that is not a member of the enum. The valid +`ThoughtType` members are `TASK`, `OBSERVATION`, `BELIEF`, `REFLECTION`, +`OUTPUT_DRAFT`, and `NOTE` — there is no `INSIGHT`. The same applies to +`Priority` (`P1`–`P4`), `EdgeType`, `LifecycleStatus`, etc. + +**Fix.** Use a real enum member, ideally the symbol rather than a string literal: + +```python +from engrava import ThoughtType + +ThoughtType.BELIEF # preferred +ThoughtType("BELIEF") # also valid — must match a real member +``` + +See [Core Concepts](concepts.md) for the full taxonomy and when to use each type. + +## Search returns nothing (or fewer results than expected) + +**Symptom.** `search_hybrid` / `search_fts` returns an empty or short result +list even though matching thoughts exist. + +**Cause.** A signal you assumed was active was **silently skipped**, so the query +ran on fewer signals than you expected. Engrava skips a signal rather than +erroring when its prerequisite is missing. Work through this checklist: + +| If… | then… | +|---|---| +| No `embedding_provider` is configured | the **vector** signal is skipped — only FTS/priority run. A purely semantic query with no shared keywords may find nothing. | +| You pass `query_text` but no provider and no `query_vector` | same as above — there is no vector to compare against. | +| `current_cycle` is `None` | the **recency** signal is skipped (it cannot compute an age). | +| `recency_weight` is `0.0` | recency is disabled even if `current_cycle` is set. | +| The query shares no FTS tokens with any thought | FTS legitimately returns nothing — this is a real miss, not a bug. | + +Inspect which signals actually ran via `HybridSearchResult.backends_used`: + +```python +result = await store.search_hybrid("python async", top_k=5, current_cycle=10) +print(sorted(result.backends_used)) # e.g. ['fts5', 'priority', 'recency'] +``` + +If `'vector'` is missing and you expected semantic matching, configure an +embedding provider (see the [Embeddings guide](guides/embeddings.md)). If +`'recency'` is missing, pass a non-`None` `current_cycle` **and** a +`recency_weight > 0`. + +## Dreaming promotes nothing (consolidation is inert) + +**Symptom.** `run_consolidation(...)` returns `promoted_count == 0` every time. + +**Cause.** Promotion requires a candidate to clear **two independent bars**, and +either one alone keeps the count at zero: + +1. **The age gate.** A thought is eligible only when + `current_cycle - created_cycle >= min_age_cycles` (default `1`). If you never + advance your cycle counter — every thought stays at the same `current_cycle` + you created it in — `0 >= 1` is false and nothing is ever eligible. This is + the most common cause. See [Core Concepts → Cycle](concepts.md). +2. **The promotion threshold.** Even after the gate passes, a candidate's + weighted signal score must reach `promote_threshold`. Brand-new, unconfirmed, + never-accessed thoughts score low, so a high threshold promotes nothing. + +**Fix.** + +```python +from engrava.config import DreamingConfig, DreamingGates +from engrava.extensions.dreaming import DreamingExtension + +config = DreamingConfig( + enabled=True, + promote_threshold=0.4, # lower it if nothing clears the bar + gates=DreamingGates( + allow_zero_confirmation=True, # essential for single-write ingest + min_age_cycles=1, + ), +) +ext = DreamingExtension(config=config) + +# Advance current_cycle past the thoughts' created_cycle so the age gate passes: +result = await ext.run_consolidation(store, current_cycle=10) +print(result.promoted_count) +``` + +See [Dreaming](dreaming.md) for the full gate-and-signal model. + +## `EmbeddingModelMismatchError` when opening an existing database + +**Symptom.** A store that worked before now raises `EmbeddingModelMismatchError` +on startup or first embed. + +**Cause.** Engrava records the embedding **model name and dimension** in the +database the first time it embeds. If you later open that same database with a +different model name or a different dimension, the stored vectors are +incompatible with new ones, so it refuses rather than silently mixing +dimensions (which would corrupt similarity results). + +**Fix.** Use the same embedding model the database was created with, or +re-embed the corpus under the new model. The CLI does this safely: + +```bash +engrava restore --re-embed # validates model consistency, re-embeds +``` + +See [Known Limitations → Embedding Dimension Consistency](known-limitations.md#embedding-dimension-consistency). + +## `ReferentialIntegrityError` — and you can't import it from `engrava` + +**Symptom.** Creating an edge to a thought that doesn't exist raises: + +``` +referential integrity violation: edge.to_thought_id='...' does not reference an existing thought +``` + +…and the obvious import fails: + +```python +from engrava import ReferentialIntegrityError # ImportError! +``` + +**Cause (two parts).** + +1. **The error itself** means one endpoint of an edge (`from_thought_id` or + `to_thought_id`) is not a real thought id. Create both thoughts before the + edge that links them. +2. **The import:** `ReferentialIntegrityError` is **not** re-exported from the + top-level `engrava` package. It lives in `engrava.domain.exceptions`. + +**Fix.** Import it from its real module, and ensure both endpoints exist first: + +```python +from engrava.domain.exceptions import ReferentialIntegrityError + +try: + await store.create_edge(edge) +except ReferentialIntegrityError: + ... # one endpoint is missing — create the thought, then retry +``` + +The exceptions that *are* re-exported at the top level are `EngravaError` (the +base), `ConfigError`, `EmbeddingModelMismatchError`, `ExtensionMigrationError`, +`InvalidTransitionError`, `MindQLParseError`, `ReadOnlyViolationError`, +`StaleDataError`, and `ThoughtNotFoundError`. Anything else lives under +`engrava.domain.exceptions`. + +## Still stuck? + +- Re-read the relevant guide: [Core Concepts](concepts.md), + [Search](search.md), [Embeddings](guides/embeddings.md), [Dreaming](dreaming.md). +- Check the [FAQ](faq.md) for "is this supposed to work this way?" questions. +- Confirm it isn't a documented constraint in [Known Limitations](known-limitations.md). +- Open an issue with a minimal reproduction. diff --git a/docs/tutorial.md b/docs/tutorial.md new file mode 100644 index 0000000..ee33427 --- /dev/null +++ b/docs/tutorial.md @@ -0,0 +1,153 @@ +# Tutorial: a small notes memory + +The [Quick Start](quickstart.md) shows the primitives in isolation. This +tutorial builds one small, real thing end to end — a personal-notes memory you +can search by meaning and consolidate — typing each step yourself. By the end +you'll have a script that runs. + +It uses no external services: embeddings come from a tiny deterministic function +(swap in a real provider from the [Embeddings guide](guides/embeddings.md) for +production). Read [Core Concepts](concepts.md) first if "thought", "cycle", or +"reflection" are unfamiliar. + +## 1. Imports and a store + +Open an in-memory store with a (toy) embedding provider so search is semantic: + +```python +import asyncio +import hashlib +import uuid + +import aiosqlite + +from engrava import ( + CallbackProvider, + EdgeRecord, + EdgeType, + LifecycleStatus, + Priority, + SqliteEngravaCore, + ThoughtRecord, + ThoughtType, +) + + +def embed(text: str) -> list[float]: + """A tiny deterministic stand-in. Use a real provider in production.""" + digest = hashlib.sha256(text.lower().encode("utf-8")).digest() + return [byte / 255.0 for byte in (digest * 2)[:32]] +``` + +## 2. Ingest some notes + +Each note becomes an `OBSERVATION` thought. We keep the returned records so we +can link them next: + +```python +NOTES = [ + "Buy oat milk and coffee beans on the way home.", + "The espresso machine descaling is overdue.", + "Standup moved to 10am on Thursdays.", + "Coffee tastes better with freshly ground beans.", +] + + +async def ingest(store, notes): + records = [] + for index, text in enumerate(notes): + record = ThoughtRecord( + thought_id=str(uuid.uuid4()), + thought_type=ThoughtType.OBSERVATION, + essence=text[:200], + content=text, + priority=Priority.P3, + lifecycle_status=LifecycleStatus.ACTIVE, + created_cycle=index, # one cycle per note here + updated_cycle=index, + source="notes", + ) + records.append(await store.create_thought(record)) + return records +``` + +With `auto_embed=True` (step 5) each note is embedded on write. + +## 3. Link related notes + +Connect notes that are about the same thing with an `ASSOCIATED` edge — this is +what makes the memory a *graph*: + +```python +async def link(store, a, b, weight=0.8): + await store.create_edge( + EdgeRecord( + edge_id=str(uuid.uuid4()), + from_thought_id=a.thought_id, + to_thought_id=b.thought_id, + edge_type=EdgeType.ASSOCIATED, + weight=weight, + created_cycle=0, + ) + ) +``` + +## 4. Search by meaning + +Ask a question; `search_hybrid` embeds the query for you and returns ranked +`(thought_id, score)` tuples, which we turn back into text: + +```python +async def search(store, query, cycle): + result = await store.search_hybrid(query, top_k=3, current_cycle=cycle) + print(f"\nQuery: {query!r} (signals: {sorted(result.backends_used)})") + for thought_id, score in result.results: + record = await store.get_thought(thought_id) + if record is not None: + print(f" {score:.3f} {record.essence}") +``` + +## 5. Put it together + +Wire the pieces into a `main()` and run it: + +```python +async def main(): + provider = CallbackProvider(callback=embed, dimension=32, model_name="tutorial") + async with aiosqlite.connect(":memory:") as conn: + conn.row_factory = aiosqlite.Row + store = SqliteEngravaCore(conn, embedding_provider=provider, auto_embed=True) + await store.ensure_schema() + + notes = await ingest(store, NOTES) + + # link the two coffee-related notes + await link(store, notes[0], notes[3]) + + await search(store, "anything about coffee?", cycle=len(NOTES)) + + total = await store.count_thoughts() + print(f"\nStored {total} notes.") + + +if __name__ == "__main__": + asyncio.run(main()) +``` + +Run it and you'll see the coffee notes rank for the coffee query, plus the total +count. That's a working memory: ingest, embed, link, search. + +The complete script is also shipped as +[`examples/notes_memory.py`](https://github.com/sovantica/engrava/blob/main/examples/notes_memory.py) +— run it directly with `python examples/notes_memory.py`. + +## Where to go next + +- **Make it an agent.** [Building a memory-backed agent](guides/agent-memory.md) + turns this into a per-turn loop (retrieve before you answer, store the reply). +- **More tasks.** The [Recipes](recipes/index.md) cover TTL, dedup, session + scoping, and scheduled consolidation. +- **Real embeddings.** Swap the toy `embed` for a provider in the + [Embeddings guide](guides/embeddings.md). +- **Consolidation.** [Dreaming](dreaming.md) turns accumulating notes into + higher-level reflections over time. diff --git a/docs/upgrade.md b/docs/upgrade.md index 0ac391a..9891dc3 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -15,16 +15,59 @@ In practice, most applications do not need a separate migration step. If your app already calls `ensure_schema()` during startup, that call performs the upgrade. +## Rolling upgrades (multiple workers) + +If several processes share one database file, whether you can do a **rolling** +upgrade (start new-version workers while old-version workers are still running) +depends on whether the new version changes the schema. + +How migrations work: the core schema is versioned by SQLite's `PRAGMA +user_version`. On the first `ensure_schema()`, Engrava runs each pending +`vN → vN+1` step **inside a transaction** (forward-only). Most steps are +**additive** (new columns, tables, and indexes), but some rebuild a table in +place (create a new table, copy rows, drop the old, rename) — so the on-disk +shape of a table can change across a migration. + +What that means for a rolling deploy: + +- **Patch upgrades that don't change `user_version`** (e.g. `0.3.0 → 0.3.1`) make + no schema change. Old and new workers can run side by side; roll them at will. +- **Minor upgrades that run migrations are not guaranteed to be + backward-readable.** Once the first new-version worker calls `ensure_schema()` + and a table is rebuilt, an old-version worker may no longer match the new + on-disk shape. Do **not** run old and new workers concurrently across such an + upgrade. + +Recommended procedure for a schema-changing (minor) upgrade: + +1. **Back up** the database (see [Before You Upgrade](#before-you-upgrade)). +2. **Quiesce writers** — stop the old workers (or take a brief maintenance + window) so no old-version process writes during the migration. +3. **Run the migration once** — let a single new-version process call + `ensure_schema()` (or run `engrava migrate`) to completion. +4. **Start the new workers** against the migrated database. + +When you are unsure whether a target release changes the schema, treat it as +schema-changing and follow the quiesce procedure — it is always safe. The +[compatibility matrix](#compatibility-matrix) notes which listed upgrades change +the schema. + ## Before You Upgrade These steps are recommended, not required: ```bash +# Checkpoint the WAL first so the copy is complete, then back up. +sqlite3 my-data.db "PRAGMA wal_checkpoint(TRUNCATE);" cp my-data.db my-data.db.bak pip install --upgrade engrava ``` -- Create a copy of the SQLite database file before the upgrade. +- Create a copy of the SQLite database file before the upgrade. In WAL mode a + bare `cp` of just the `.db` can miss data still in the `-wal` file — checkpoint + first (above), or copy `my-data.db` together with `my-data.db-wal` and + `my-data.db-shm`. See [Backup & Recovery](backup-and-recovery.md) for all the + WAL-safe options. - Review [CHANGELOG.md](../CHANGELOG.md) for breaking changes and database notes. - If you ship custom extensions, make sure their schema migrations are included in the version you are about to install. @@ -40,8 +83,10 @@ engrava --db my-data.db migrate - `engrava info` confirms the database is readable and reports current counts. - `engrava migrate` is safe to run after upgrade; it re-checks that schema is up to date. -- `engrava gc` is optional if you want to compact archived or expired data after - the upgrade. +- `engrava gc` is optional if you want to remove archived or expired data after + the upgrade. Note that `gc` deletes rows but does **not** shrink the database + file — freed pages return to SQLite's free-list. To reclaim file size, run + `VACUUM`. See [Data lifecycle → reclaiming disk space](data-lifecycle.md#reclaiming-disk-space). ## If Migration Fails @@ -70,15 +115,32 @@ engrava --db my-data.db snapshot -o backup.snapshot.jsonl engrava --db new-old-version.db restore -i backup.snapshot.jsonl ``` +> **Note:** a snapshot exports thoughts, edges, embeddings, and actions, but +> **not** the audit journal (`journal_entry`). A database restored from a +> snapshot starts with an empty journal. If you need the audit history preserved, +> take a physical file backup instead — see +> [Backup & Recovery](backup-and-recovery.md). + ## Compatibility Matrix | From | To | Supported | Notes | |---|---|---|---| | 0.2.0 | 0.2.2 | Yes | Patch-level upgrade, no dedicated new extension migration layer | | 0.2.2 | 0.3.0 | Yes | Minor upgrade with extension migration tracking and upgrade CI coverage | +| 0.3.0 | 0.3.1 | Yes | Patch-level upgrade; no schema change (`user_version` unchanged) — safe to roll across workers | + +For any upgrade not listed, the rule of thumb is: **patch** upgrades within a +`0.x.*` line do not change the schema and are low-risk; **minor** upgrades +(`0.X` → `0.(X+1)`) may run schema migrations — back up first and read the +[rolling-upgrades](#rolling-upgrades-multiple-workers) note below. ## Version Notes +### 0.3.0 -> 0.3.1 + +- Patch release: **no schema change** (`user_version` stays at its 0.3.0 value), + so it is safe to roll across multiple workers without a quiesce. + ### 0.2.2 -> 0.3.0 - Extension schema migration tracking is now part of the upgrade path. diff --git a/examples/agent_loop.py b/examples/agent_loop.py new file mode 100644 index 0000000..b861486 --- /dev/null +++ b/examples/agent_loop.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 +"""A memory-backed agent loop using only engrava — no external services. + +This is the canonical "wire engrava into an agent" example: a per-turn loop +that, for each user message, + + 1. stores the message as a ``percept`` thought, + 2. retrieves relevant prior memory with ``search_hybrid``, + 3. builds a prompt from the retrieved essences and calls an LLM + (a deterministic stand-in here — swap in your real model), + 4. stores the agent's reply as an ``utterance`` thought, + 5. records the action it took (an ``ActionRecord``), + 6. advances the cycle counter, and + 7. runs dreaming consolidation every N turns. + +The cycle counter is the agent's logical clock: engrava never advances it for +you, so this loop owns it and increments it once per turn (see the Core +Concepts docs). On restart you would recover it from the maximum stored +``created_cycle``; this in-memory demo just starts at 0. + +No LLM and no embedding API are required: the "LLM" is a canned responder and +embeddings come from a deterministic ``CallbackProvider``. Run directly:: + + python examples/agent_loop.py +""" + +from __future__ import annotations + +import asyncio +import hashlib +import uuid + +import aiosqlite + +from engrava import ( + ActionRecord, + ActionStatus, + ActionType, + CallbackProvider, + DreamingConfig, + DreamingExtension, + DreamingGates, + LifecycleStatus, + Priority, + SqliteEngravaCore, + ThoughtRecord, + ThoughtType, + VerificationStatus, + percept, + utterance, +) + +EMBED_DIM = 64 +CONSOLIDATE_EVERY = 3 +RETRIEVE_TOP_K = 3 + + +def _deterministic_embed(text: str) -> list[float]: + """Map text to a stable pseudo-embedding (no model, fully reproducible). + + A real agent passes a real provider (sentence-transformers, OpenAI, …); + this keeps the example dependency-free and deterministic across runs. + """ + digest = hashlib.sha256(text.lower().encode("utf-8")).digest() + # Repeat the 32-byte digest to fill EMBED_DIM bytes, then scale to [0, 1]. + repeats = (EMBED_DIM // len(digest)) + 1 + stretched = (digest * repeats)[:EMBED_DIM] + return [byte / 255.0 for byte in stretched] + + +def _mock_llm(prompt: str) -> str: + """Stand in for an LLM call. Replace with your provider.""" + return f"(reply based on {prompt.count('-')} retrieved memories)" + + +async def _store_percept( + store: SqliteEngravaCore, + text: str, + cycle: int, + user_id: str, + session_id: str, + turn_index: int, +) -> ThoughtRecord: + """Persist an incoming user message as an OBSERVATION percept. + + The percept metadata is extended with ``session_id`` and ``turn_index`` so + every memory is anchored to its conversation and position within it — the + keys you'd later filter or post-filter on for per-session retrieval. + """ + record = ThoughtRecord( + thought_id=str(uuid.uuid4()), + thought_type=ThoughtType.OBSERVATION, + essence=text[:200], + content=text, + priority=Priority.P2, + lifecycle_status=LifecycleStatus.ACTIVE, + created_cycle=cycle, + updated_cycle=cycle, + source=user_id, + metadata={ + **percept(source_id=user_id, label="user"), + "session_id": session_id, + "turn_index": turn_index, + }, + ) + return await store.create_thought(record) + + +async def _retrieve_context(store: SqliteEngravaCore, query: str, cycle: int) -> list[str]: + """Return the essences of the most relevant prior memories.""" + result = await store.search_hybrid( + query, + query_vector=_deterministic_embed(query), + top_k=RETRIEVE_TOP_K, + current_cycle=cycle, # the agent clock — drives the recency signal + ) + essences: list[str] = [] + for thought_id, _score in result.results: + record = await store.get_thought(thought_id) + if record is not None: + essences.append(record.essence) + return essences + + +async def _store_utterance( + store: SqliteEngravaCore, + reply: str, + cycle: int, + session_id: str, + turn_index: int, +) -> ThoughtRecord: + """Persist the agent's own reply as an OUTPUT_DRAFT utterance. + + Tagged with the same ``session_id``/``turn_index`` as the percept it + answered, so a turn's input and output stay linked. + """ + record = ThoughtRecord( + thought_id=str(uuid.uuid4()), + thought_type=ThoughtType.OUTPUT_DRAFT, + essence=reply[:200], + content=reply, + priority=Priority.P3, + lifecycle_status=LifecycleStatus.ACTIVE, + created_cycle=cycle, + updated_cycle=cycle, + source="agent", + metadata={ + **utterance(), + "session_id": session_id, + "turn_index": turn_index, + }, + ) + return await store.create_thought(record) + + +async def _record_action(store: SqliteEngravaCore, source_thought_id: str, intent: str) -> None: + """Record that the agent took an action, linked to the source thought.""" + await store.create_action( + ActionRecord( + action_id=str(uuid.uuid4()), + source_thought_id=source_thought_id, + action_type=ActionType.MESSAGE, + intent=intent, + status=ActionStatus.CONFIRMED, + verification_status=VerificationStatus.CONFIRMED, + ) + ) + + +async def main() -> None: + """Run a few turns of a memory-backed agent over an in-memory store.""" + provider = CallbackProvider( + callback=_deterministic_embed, + dimension=EMBED_DIM, + model_name="demo-deterministic", + ) + async with aiosqlite.connect(":memory:") as conn: + conn.row_factory = aiosqlite.Row + store = SqliteEngravaCore(conn, embedding_provider=provider, auto_embed=True) + await store.ensure_schema() + + dreaming = DreamingExtension( + config=DreamingConfig( + enabled=True, + gates=DreamingGates(min_confirmations=0, min_age_cycles=0), + ), + ) + + user_id = "user-demo" + session_id = str(uuid.uuid4()) # one conversation; tag every memory with it + conversation = [ + "I'm planning a trip to Japan in spring.", + "What's the weather like in Kyoto in April?", + "Remind me which city I'm visiting.", + "I prefer trains over flights for getting around.", + ] + + cycle = 0 # the agent's logical clock; advance once per turn + for turn_index, user_message in enumerate(conversation): + # 1. store the incoming message (anchored to session + turn) + percept_thought = await _store_percept( + store, user_message, cycle, user_id, session_id, turn_index + ) + + # 2. retrieve relevant prior memory + context = await _retrieve_context(store, user_message, cycle) + + # 3. build a prompt and call the LLM (stand-in) + prompt = "Context:\n" + "\n".join(f"- {c}" for c in context) + prompt += f"\n\nUser: {user_message}\nAssistant:" + reply = _mock_llm(prompt) + + # 4. store the agent's reply (same session + turn as its percept) + await _store_utterance(store, reply, cycle, session_id, turn_index) + + # 5. record the action taken + await _record_action(store, percept_thought.thought_id, intent="answered user") + + print(f"cycle {cycle}: user={user_message!r}") + print(f" retrieved {len(context)} memory(ies); reply={reply!r}") + + # 6. advance the clock + cycle += 1 + + # 7. consolidate periodically + if cycle % CONSOLIDATE_EVERY == 0: + result = await dreaming.run_consolidation(store, current_cycle=cycle) + print(f" [dreaming] promoted={result.promoted_count}") + + total = await store.count_thoughts() + print(f"\nDone. {total} thoughts stored across {cycle} turns.") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/config.yaml b/examples/config.yaml index fb772ac..2379687 100644 --- a/examples/config.yaml +++ b/examples/config.yaml @@ -11,7 +11,7 @@ database: extensions: vector: - backend: numpy # "numpy" (brute-force) or "sqlite-vec" (ANN) + backend: numpy # "numpy" (brute-force) or "sqlite-vec" (faster KNN, not ANN) dimension: 384 # must match your embedding model dreaming: diff --git a/examples/notes_memory.py b/examples/notes_memory.py new file mode 100644 index 0000000..af9746a --- /dev/null +++ b/examples/notes_memory.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +"""A small notes memory built with engrava — the companion to the tutorial. + +This is the complete, runnable version of ``docs/tutorial.md``: ingest a few +notes, embed them, link related ones with an edge, and search by meaning. It +uses a tiny deterministic embedding function so it runs with no external +services (swap in a real provider from the Embeddings guide for production). + +Run directly:: + + python examples/notes_memory.py +""" + +from __future__ import annotations + +import asyncio +import hashlib +import uuid + +import aiosqlite + +from engrava import ( + CallbackProvider, + EdgeRecord, + EdgeType, + LifecycleStatus, + Priority, + SqliteEngravaCore, + ThoughtRecord, + ThoughtType, +) + +EMBED_DIM = 32 + +NOTES = [ + "Buy oat milk and coffee beans on the way home.", + "The espresso machine descaling is overdue.", + "Standup moved to 10am on Thursdays.", + "Coffee tastes better with freshly ground beans.", +] + + +def embed(text: str) -> list[float]: + """A tiny deterministic stand-in. Use a real provider in production.""" + digest = hashlib.sha256(text.lower().encode("utf-8")).digest() + return [byte / 255.0 for byte in (digest * 2)[:EMBED_DIM]] + + +async def ingest(store: SqliteEngravaCore, notes: list[str]) -> list[ThoughtRecord]: + """Store each note as an OBSERVATION thought; return the persisted records.""" + records: list[ThoughtRecord] = [] + for index, text in enumerate(notes): + record = ThoughtRecord( + thought_id=str(uuid.uuid4()), + thought_type=ThoughtType.OBSERVATION, + essence=text[:200], + content=text, + priority=Priority.P3, + lifecycle_status=LifecycleStatus.ACTIVE, + created_cycle=index, + updated_cycle=index, + source="notes", + ) + records.append(await store.create_thought(record)) + return records + + +async def link( + store: SqliteEngravaCore, + a: ThoughtRecord, + b: ThoughtRecord, + weight: float = 0.8, +) -> None: + """Connect two related notes with an ASSOCIATED edge.""" + await store.create_edge( + EdgeRecord( + edge_id=str(uuid.uuid4()), + from_thought_id=a.thought_id, + to_thought_id=b.thought_id, + edge_type=EdgeType.ASSOCIATED, + weight=weight, + created_cycle=0, + ) + ) + + +async def search(store: SqliteEngravaCore, query: str, cycle: int) -> None: + """Print the top matches for a query (search embeds the query for you).""" + result = await store.search_hybrid(query, top_k=3, current_cycle=cycle) + print(f"\nQuery: {query!r} (signals: {sorted(result.backends_used)})") + for thought_id, score in result.results: + record = await store.get_thought(thought_id) + if record is not None: + print(f" {score:.3f} {record.essence}") + + +async def main() -> None: + """Build the notes memory and run a search over it.""" + provider = CallbackProvider(callback=embed, dimension=EMBED_DIM, model_name="tutorial") + async with aiosqlite.connect(":memory:") as conn: + conn.row_factory = aiosqlite.Row + store = SqliteEngravaCore(conn, embedding_provider=provider, auto_embed=True) + await store.ensure_schema() + + notes = await ingest(store, NOTES) + + # link the two coffee-related notes + await link(store, notes[0], notes[3]) + + await search(store, "anything about coffee?", cycle=len(NOTES)) + + total = await store.count_thoughts() + print(f"\nStored {total} notes.") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/engrava/config.py b/src/engrava/config.py index 9a8a96d..cb06b32 100644 --- a/src/engrava/config.py +++ b/src/engrava/config.py @@ -727,7 +727,8 @@ class EngravaConfig: database_path: Path to the SQLite database file. wal_mode: Enable WAL journal mode for concurrent reads. hooks_class: Dotted import path to a ``EngravaHooksProtocol`` class. - vector_backend: ``"numpy"`` (default brute-force) or ``"sqlite-vec"`` (ANN). + vector_backend: ``"numpy"`` (default brute-force) or ``"sqlite-vec"`` + (compact ``vec0`` vector table — faster brute-force KNN, not ANN). embedding_dimension: Dimension of embedding vectors (e.g. 384 for MiniLM). dreaming: Optional dreaming-consolidation configuration. embeddings: Optional embedding-provider configuration. diff --git a/src/engrava/extensions/__init__.py b/src/engrava/extensions/__init__.py index 8cc53f1..1fe81f4 100644 --- a/src/engrava/extensions/__init__.py +++ b/src/engrava/extensions/__init__.py @@ -1,6 +1,6 @@ """engrava extensions package. Extensions provide optional capabilities to engrava: -- ``vec``: ANN vector search via sqlite-vec +- ``vec``: KNN vector search via sqlite-vec (compact ``vec0`` vector table) - ``dreaming``: Periodic memory consolidation """ diff --git a/src/engrava/extensions/vector_sqlite_vec.py b/src/engrava/extensions/vector_sqlite_vec.py index 197acd0..39930d8 100644 --- a/src/engrava/extensions/vector_sqlite_vec.py +++ b/src/engrava/extensions/vector_sqlite_vec.py @@ -1,9 +1,13 @@ -"""SqliteVecSearchBackend — ANN vector search via sqlite-vec. +"""SqliteVecSearchBackend — KNN vector search via sqlite-vec. Drop-in replacement for the brute-force numpy cosine similarity search in ``SqliteEngravaCore``. When ``sqlite-vec`` is installed and its extension is loaded, ``search_similar()`` delegates to the ``vec0`` -virtual table for O(log n) approximate nearest-neighbor queries. +virtual table for k-nearest-neighbour queries. In the pinned +``sqlite-vec`` 0.1.x line ``vec0`` performs an exhaustive scan over a +compact, chunked columnar store of the vectors — faster and more +memory-efficient than the Python brute-force path, but **not** an +approximate / sub-linear index (no ANN guarantee at this version). If sqlite-vec is unavailable at runtime the store falls back to the existing numpy implementation — no crash, just a warning log. @@ -23,12 +27,12 @@ class SqliteVecSearchBackend: - """ANN vector search backend backed by a ``vec0`` virtual table. + """KNN vector search backend backed by a ``vec0`` virtual table. Lifecycle: 1. ``ensure_index(db, dimension)`` — creates the virtual table. 2. ``sync_embeddings(db)`` — backfills existing rows. - 3. ``search(db, query_vector, ...)`` — runs ANN queries. + 3. ``search(db, query_vector, ...)`` — runs k-nearest-neighbour queries. All state is kept in SQLite; this class is stateless aside from the cached ``dimension``. @@ -110,7 +114,7 @@ async def search( top_k: int = 10, threshold: float = 0.0, ) -> list[tuple[str, float]]: - """ANN search via sqlite-vec ``vec0`` virtual table. + """k-nearest-neighbour search via the sqlite-vec ``vec0`` virtual table. The ``vec0`` table uses cosine distance (``1 - cosine_similarity``). Results are converted to cosine similarity via ``1 - distance`` @@ -174,8 +178,8 @@ async def upsert_embedding( ) -> None: """Insert or replace a single embedding in the ``vec0`` index. - Used by ``store_embedding()`` to keep the ANN index in sync - after each write to the ``embedding`` table. + Used by ``store_embedding()`` to keep the ``vec0`` vector table in + sync after each write to the ``embedding`` table. Args: db: Active database connection with sqlite-vec loaded. diff --git a/src/engrava/infrastructure/sqlite/engrava_core.py b/src/engrava/infrastructure/sqlite/engrava_core.py index c85d693..bdd3659 100644 --- a/src/engrava/infrastructure/sqlite/engrava_core.py +++ b/src/engrava/infrastructure/sqlite/engrava_core.py @@ -2353,7 +2353,7 @@ async def store_embedding( ("THOUGHT", thought_id, model_name, dimension, blob, created_at, eid), ) - # Keep the vec0 ANN index in sync when a vector backend is active. + # Keep the vec0 vector table in sync when a vector backend is active. if self._vector_backend is not None: await self._vector_backend.upsert_embedding( self._db, @@ -2404,9 +2404,9 @@ async def search_similar( """Cosine similarity search — delegates to sqlite-vec if available. When a ``SqliteVecSearchBackend`` is configured (via - ``from_config`` with ``vector_backend: "sqlite-vec"``), ANN - search is used. Otherwise falls back to brute-force numpy - cosine similarity. + ``from_config`` with ``vector_backend: "sqlite-vec"``), the + ``vec0`` vector table serves the query. Otherwise falls back to + brute-force numpy cosine similarity. Args: query_vector: Query embedding vector. diff --git a/tests/docs/test_docs_examples_execute.py b/tests/docs/test_docs_examples_execute.py index a1e4105..b5d7bae 100644 --- a/tests/docs/test_docs_examples_execute.py +++ b/tests/docs/test_docs_examples_execute.py @@ -37,6 +37,7 @@ EXECUTABLE_BLOCKS: tuple[tuple[str, str], ...] = ( ("README.md", "async def main() -> None:"), ("docs/quickstart.md", 'print("Store ready!")'), + ("docs/guides/migrating-from-other-memory.md", "Imported {total} thoughts."), ) diff --git a/tests/examples/test_quickstart_runs.py b/tests/examples/test_quickstart_runs.py index 14b9473..fbbe6c5 100644 --- a/tests/examples/test_quickstart_runs.py +++ b/tests/examples/test_quickstart_runs.py @@ -49,6 +49,31 @@ def test_quickstart_runs_to_completion() -> None: assert "teal" in result.stdout.lower() +def test_agent_loop_runs_to_completion() -> None: + """``agent_loop.py`` runs the full memory-backed turn loop to a clean exit. + + Unlike the quickstart it needs no local-embeddings extra — it uses a + deterministic ``CallbackProvider`` and a mock LLM — so it always runs. + """ + result = _run_example("agent_loop.py") + assert result.returncode == 0, f"non-zero exit; stderr=\n{result.stderr}" + assert "cycle 0:" in result.stdout + assert "[dreaming]" in result.stdout + assert "Done." in result.stdout + + +def test_notes_memory_runs_to_completion() -> None: + """``notes_memory.py`` (the tutorial companion) runs to a clean exit. + + Uses a deterministic ``CallbackProvider`` — no local-embeddings extra — so it + always runs. + """ + result = _run_example("notes_memory.py") + assert result.returncode == 0, f"non-zero exit; stderr=\n{result.stderr}" + assert "Query:" in result.stdout + assert "Stored 4 notes." in result.stdout + + def test_dreaming_benefit_script_not_shipped() -> None: """The fresh-store dreaming walkthrough script is not part of the public surface.