From d7eaff0e32916442c84fb5b1350da37c7ee23d9d Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Sun, 7 Jun 2026 19:29:15 +0200
Subject: [PATCH 01/40] build(deps): raise pydantic floor to >=2.11
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The MCP server extra (engrava[mcp], v0.4.0) depends on the mcp SDK, which
requires pydantic>=2.11. Raise the floor uniformly so the whole package stays
on one pydantic minimum. Declaration change only — pyproject + uv.lock metadata
both declare >=2.11.0; no upper cap; no code change (already runs on >=2.12).
---
pyproject.toml | 2 +-
uv.lock | 6 ++++--
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 788e470..c88f3f3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ classifiers = [
]
dependencies = [
- "pydantic>=2.9.0",
+ "pydantic>=2.11.0",
"aiosqlite>=0.20.0",
"numpy>=1.26.0",
"click>=8.1.0",
diff --git a/uv.lock b/uv.lock
index 780c21e..7e224f3 100644
--- a/uv.lock
+++ b/uv.lock
@@ -253,7 +253,7 @@ nvtx = [
[[package]]
name = "engrava"
-version = "0.3.0"
+version = "0.3.1"
source = { editable = "." }
dependencies = [
{ name = "aiosqlite" },
@@ -271,6 +271,7 @@ dev = [
{ name = "pytest-cov" },
{ name = "ruff" },
{ name = "sentence-transformers" },
+ { name = "sqlite-vec" },
{ name = "types-pyyaml" },
]
embeddings-hf = [
@@ -299,7 +300,7 @@ requires-dist = [
{ name = "huggingface-hub", marker = "extra == 'embeddings-hf'", specifier = ">=0.24.0" },
{ name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.0" },
{ name = "numpy", specifier = ">=1.26.0" },
- { name = "pydantic", specifier = ">=2.9.0" },
+ { name = "pydantic", specifier = ">=2.11.0" },
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" },
{ name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=5.0.0" },
@@ -307,6 +308,7 @@ requires-dist = [
{ name = "ruff", marker = "extra == 'dev'", specifier = ">=0.7.0" },
{ name = "sentence-transformers", marker = "extra == 'dev'", specifier = ">=3.0.0" },
{ name = "sentence-transformers", marker = "extra == 'embeddings-local'", specifier = ">=3.0.0" },
+ { name = "sqlite-vec", marker = "extra == 'dev'", specifier = ">=0.1.0,<0.2.0" },
{ name = "sqlite-vec", marker = "extra == 'vec'", specifier = ">=0.1.0,<0.2.0" },
{ name = "torch", marker = "extra == 'embeddings-local'", specifier = ">=2.0.0" },
{ name = "types-pyyaml", marker = "extra == 'dev'", specifier = ">=6.0" },
From 1c8ffb4b18c3ed817f6e229744e943262608413c Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Sun, 7 Jun 2026 23:40:15 +0200
Subject: [PATCH 02/40] feat(mindql): add store-level execute_mindql entry
point
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
SqliteEngravaCore gains a public, policy-free async method that executes an
already-parsed MindQL query against the store's own connection and returns the
executor's MindQLResult. It applies no command-level filtering — callers that
expose a restricted command set (e.g. FIND-only over the wire) validate
query.command before calling. This lets consumers run MindQL without reaching
into the private connection.
---
.../infrastructure/sqlite/engrava_core.py | 50 +++++++++++
tests/test_mindql.py | 86 +++++++++++++++++++
2 files changed, 136 insertions(+)
diff --git a/src/engrava/infrastructure/sqlite/engrava_core.py b/src/engrava/infrastructure/sqlite/engrava_core.py
index bdd3659..16c5b6d 100644
--- a/src/engrava/infrastructure/sqlite/engrava_core.py
+++ b/src/engrava/infrastructure/sqlite/engrava_core.py
@@ -62,7 +62,10 @@
from engrava.domain.models.metrics import EngravaMetrics, LatencyHistogram
from engrava.domain.models.search import HybridSearchResult
from engrava.domain.protocols.embedding_provider import EmbeddingProviderProtocol
+ from engrava.domain.protocols.hooks import MindQLExtension
from engrava.extensions.vector_sqlite_vec import SqliteVecSearchBackend
+ from engrava.mindql.executor import MindQLResult
+ from engrava.mindql.parser import MindQLQuery
logger = logging.getLogger(__name__)
@@ -2596,6 +2599,53 @@ async def search_fts(
await self._record_search_latency((_time.perf_counter() - _t_start) * 1000)
return results
+ # ------------------------------------------------------------------
+ # MindQL execution
+ # ------------------------------------------------------------------
+
+ async def execute_mindql(
+ self,
+ query: MindQLQuery,
+ *,
+ extensions: dict[str, MindQLExtension] | None = None,
+ ) -> MindQLResult:
+ """Execute an already-parsed MindQL query against this store's connection.
+
+ This is the store-level entry point for the MindQL execution
+ contract: it lets a caller whose connection is owned by this store
+ run MindQL without reaching into store internals. Parse the query
+ first with :func:`engrava.mindql.parse`.
+
+ This method performs **no command-level policy filtering** — it will
+ execute whatever command the parsed query carries (``FIND``,
+ ``COUNT``, ``SELECT``, or a registered extension command). Callers
+ that need to restrict the command set (for example an
+ over-the-wire consumer exposing ``FIND`` only) **must** validate
+ ``query.command`` *before* calling this method.
+
+ Args:
+ query: A parsed MindQL query.
+ extensions: Optional registered MindQL extension commands. When
+ omitted, no extension commands are available. Callers that
+ expose extension commands supply their own map (the store
+ holds no extension-command registry of its own).
+
+ Returns:
+ The ``MindQLResult`` produced by the executor, carrying
+ ``columns``, ``rows``, ``count``, and the executed ``command``.
+
+ Raises:
+ MindQLParseError: If the executor rejects the query at
+ execution time (for example a ``SELECT`` whose raw SQL is
+ not a ``SELECT`` statement, or a ``FIND`` referencing an
+ invalid column).
+
+ """
+ from engrava.mindql.executor import MindQLExecutor # noqa: PLC0415
+
+ executor = MindQLExecutor(self._db, extensions=extensions or {})
+ return await executor.execute(query)
+
# ------------------------------------------------------------------
# Hybrid search (FTS5 + vector + recency fusion)
# ------------------------------------------------------------------
diff --git a/tests/test_mindql.py b/tests/test_mindql.py
index a9fe517..9048b64 100644
--- a/tests/test_mindql.py
+++ b/tests/test_mindql.py
@@ -239,6 +239,92 @@ async def test_count_all(self, populated_db: aiosqlite.Connection) -> None:
result = await executor.execute(q)
assert result.count == 5
+
+# ---------------------------------------------------------------------------
+# SqliteEngravaCore.execute_mindql — store-level execution entry point
+# ---------------------------------------------------------------------------
+
+
+class TestStoreExecuteMindql:
+ """Test the store-level ``execute_mindql`` entry point.
+
+ The method is deliberately **policy-free**: it executes whatever command
+ the parsed query carries (FIND / COUNT / SELECT / extension). Command-set
+ restriction (e.g. FIND-only) is a consumer concern, not the core's.
+ """
+
+ async def test_find_executes(self, populated_db: aiosqlite.Connection) -> None:
+ store = SqliteEngravaCore(populated_db)
+ result = await store.execute_mindql(parse("FIND thoughts WHERE priority = 'P1'"))
+ assert result.command == MindQLCommand.FIND
+ assert len(result.rows) == 2
+
+ async def test_count_executes_not_rejected(
+ self,
+ populated_db: aiosqlite.Connection,
+ ) -> None:
+ # Proves the method does NOT enforce FIND-only: COUNT runs.
+ store = SqliteEngravaCore(populated_db)
+ result = await store.execute_mindql(parse("COUNT thoughts"))
+ assert result.command == MindQLCommand.COUNT
+ assert result.count == 5
+
+ async def test_select_executes_not_rejected(
+ self,
+ populated_db: aiosqlite.Connection,
+ ) -> None:
+ # Proves neutrality further: raw-SQL SELECT passthrough runs.
+ store = SqliteEngravaCore(populated_db)
+ result = await store.execute_mindql(
+ parse("SELECT thought_id FROM thought WHERE lifecycle_status = 'ACTIVE'"),
+ )
+ assert result.command == MindQLCommand.SELECT
+ assert len(result.rows) == 4
+
+ async def test_extensions_param_routes_command(
+ self,
+ populated_db: aiosqlite.Connection,
+ ) -> None:
+ # The optional extensions map is wired through to the executor.
+ received: dict[str, object] = {}
+
+ async def _handler(
+ conn: aiosqlite.Connection,
+ args: object,
+ ) -> list[dict[str, object]]:
+ received["args"] = args
+ return [{"ok": True}]
+
+ extensions = {
+ "PING": MindQLExtension(
+ command_name="PING",
+ handler=_handler,
+ description="echo",
+ ),
+ }
+ store = SqliteEngravaCore(populated_db)
+ parsed = parse("PING hello", known_extensions={"PING"})
+ result = await store.execute_mindql(parsed, extensions=extensions)
+ # The extensions map was wired through and the registered handler ran.
+ assert result.command == "PING"
+ assert result.rows == [{"ok": True}]
+ assert received["args"] is not None
+
+ async def test_invalid_find_column_raises(
+ self,
+ populated_db: aiosqlite.Connection,
+ ) -> None:
+ store = SqliteEngravaCore(populated_db)
+ q = MindQLQuery(
+ command=MindQLCommand.FIND,
+ table="thought",
+ conditions=[
+ Condition(field="nonexistent_col", operator=MindQLOperator.EQ, value="x"),
+ ],
+ )
+ with pytest.raises(MindQLParseError, match="not allowed"):
+ await store.execute_mindql(q)
+
async def test_count_with_filter(self, populated_db: aiosqlite.Connection) -> None:
executor = MindQLExecutor(populated_db)
q = parse("COUNT thoughts WHERE priority = 'P1'")
From 68a8085c601eb6343ee21622ff422f0234cdc294 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Sun, 7 Jun 2026 23:40:32 +0200
Subject: [PATCH 03/40] feat(mcp): add MCP server with read tools (engrava[mcp]
extra)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add an in-tree MCP (Model Context Protocol) server exposing engrava memory
over stdio for any MCP-capable agent. Read tools: get_thought, search_memory
(hybrid), search_keywords (FTS/BM25), query_memory (MindQL FIND-only),
memory_stats — all read-only, wrapping only the public store API. query_memory
runs via the public execute_mindql entry point and keeps a FIND-only guard at
the server boundary (rejects SELECT/COUNT/extension before execution). Ships as
the optional engrava[mcp] extra (FastMCP/stdio); a plain install is unaffected.
Entry points: python -m engrava.mcp and the engrava-mcp console script.
---
pyproject.toml | 5 +
src/engrava/mcp/__init__.py | 16 ++
src/engrava/mcp/__main__.py | 12 +
src/engrava/mcp/config.py | 129 +++++++++++
src/engrava/mcp/server.py | 413 +++++++++++++++++++++++++++++++++++
tests/mcp/__init__.py | 1 +
tests/mcp/conftest.py | 97 ++++++++
tests/mcp/test_read_tools.py | 152 +++++++++++++
tests/mcp/test_server.py | 211 ++++++++++++++++++
9 files changed, 1036 insertions(+)
create mode 100644 src/engrava/mcp/__init__.py
create mode 100644 src/engrava/mcp/__main__.py
create mode 100644 src/engrava/mcp/config.py
create mode 100644 src/engrava/mcp/server.py
create mode 100644 tests/mcp/__init__.py
create mode 100644 tests/mcp/conftest.py
create mode 100644 tests/mcp/test_read_tools.py
create mode 100644 tests/mcp/test_server.py
diff --git a/pyproject.toml b/pyproject.toml
index c88f3f3..7d298bd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,7 @@ dependencies = [
[project.scripts]
engrava = "engrava.cli.main:main"
+engrava-mcp = "engrava.mcp.server:main"
[project.urls]
Homepage = "https://engrava.ai"
@@ -48,6 +49,7 @@ Changelog = "https://github.com/sovantica/engrava/blob/main/CHANGELOG.md"
[project.optional-dependencies]
vec = ["sqlite-vec>=0.1.0,<0.2.0"]
+mcp = ["mcp>=1.27.0"]
dreaming = []
embeddings-local = ["sentence-transformers>=3.0.0", "torch>=2.0.0"]
embeddings-openai = ["httpx>=0.27.0"]
@@ -76,6 +78,9 @@ dev = [
# not ANN at the pinned 0.1.x line — not just the numpy fallback. Mirrors
# the constraint declared in the ``vec`` extra above.
"sqlite-vec>=0.1.0,<0.2.0",
+ # MCP SDK so the server module and its tests are importable in CI.
+ # Mirrors the constraint declared in the ``mcp`` extra above.
+ "mcp>=1.27.0",
]
[tool.setuptools.packages.find]
diff --git a/src/engrava/mcp/__init__.py b/src/engrava/mcp/__init__.py
new file mode 100644
index 0000000..b9ae522
--- /dev/null
+++ b/src/engrava/mcp/__init__.py
@@ -0,0 +1,16 @@
+"""Model Context Protocol server for engrava.
+
+Exposes engrava's public read API as MCP tools over stdio, so MCP-aware
+agents can fetch thoughts, search memory, and run structured queries.
+The server is a standalone API consumer — it registers no engrava hooks,
+manifests, or extensions.
+
+Run it with ``python -m engrava.mcp`` or the ``engrava-mcp`` console
+script.
+"""
+
+from __future__ import annotations
+
+from engrava.mcp.server import build_server, main
+
+__all__ = ["build_server", "main"]
diff --git a/src/engrava/mcp/__main__.py b/src/engrava/mcp/__main__.py
new file mode 100644
index 0000000..d9e9406
--- /dev/null
+++ b/src/engrava/mcp/__main__.py
@@ -0,0 +1,12 @@
+"""``python -m engrava.mcp`` entry point.
+
+Delegates to :func:`engrava.mcp.server.main`, which builds the MCP server
+and serves it over stdio.
+"""
+
+from __future__ import annotations
+
+from engrava.mcp.server import main
+
+if __name__ == "__main__":
+ main()
diff --git a/src/engrava/mcp/config.py b/src/engrava/mcp/config.py
new file mode 100644
index 0000000..c5b7b4f
--- /dev/null
+++ b/src/engrava/mcp/config.py
@@ -0,0 +1,129 @@
+"""Store resolution for the engrava MCP server.
+
+The MCP server is a standalone process that wraps engrava's public async
+API. It resolves a :class:`~engrava.SqliteEngravaCore` from environment
+variables so the same server entry point can target either a fully
+configured deployment (``engrava.yaml``) or a bare database file.
+
+Two environment variables are recognised, in priority order:
+
+``ENGRAVA_MCP_CONFIG``
+ Path to an ``engrava.yaml`` file. When set, the store is built with
+ :meth:`SqliteEngravaCore.from_config`, which applies the configured
+ embedding provider, vector backend, journal, and TTL settings.
+
+``ENGRAVA_DB_PATH``
+ Path to a SQLite database file. When set (and ``ENGRAVA_MCP_CONFIG``
+ is not), a connection is opened directly and the core schema is
+ ensured. No embedding provider or vector backend is configured, so
+ hybrid search degrades to its lexical backend.
+
+:func:`resolve_store` returns a :class:`ResolvedStore` that bundles the
+store with an :meth:`~ResolvedStore.aclose` coroutine. Closing the
+``ResolvedStore`` always releases the underlying connection regardless of
+which resolution path produced it, so callers never depend on store
+connection-ownership internals.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import aiosqlite
+
+from engrava.infrastructure.sqlite.engrava_core import SqliteEngravaCore
+
+if TYPE_CHECKING:
+ from collections.abc import Awaitable, Callable
+
+#: Environment variable naming an ``engrava.yaml`` config file.
+CONFIG_ENV_VAR = "ENGRAVA_MCP_CONFIG"
+
+#: Environment variable naming a bare SQLite database file.
+DB_PATH_ENV_VAR = "ENGRAVA_DB_PATH"
+
+
+class StoreResolutionError(RuntimeError):
+ """Raised when no store can be resolved from the environment.
+
+ Args:
+ message: Human-readable description of the resolution failure.
+
+ """
+
+
+@dataclass(frozen=True)
+class ResolvedStore:
+ """A resolved store paired with its connection-cleanup coroutine.
+
+ Attributes:
+ store: The schema-ready ``SqliteEngravaCore`` to serve queries.
+ _closer: Async callback that releases the underlying connection.
+
+ """
+
+ store: SqliteEngravaCore
+ _closer: Callable[[], Awaitable[None]]
+
+ async def aclose(self) -> None:
+ """Release the underlying database connection."""
+ await self._closer()
+
+
+async def resolve_store() -> ResolvedStore:
+ """Resolve a store from the environment.
+
+ Resolution honours :data:`CONFIG_ENV_VAR` first, then
+ :data:`DB_PATH_ENV_VAR`.
+
+ Returns:
+ A :class:`ResolvedStore` whose connection is released by
+ :meth:`ResolvedStore.aclose`.
+
+ Raises:
+ StoreResolutionError: If neither environment variable is set.
+ ConfigError: If the configured ``engrava.yaml`` is invalid.
+
+ """
+ config_path = os.environ.get(CONFIG_ENV_VAR)
+ if config_path:
+ store = await SqliteEngravaCore.from_config(config_path)
+ return ResolvedStore(store=store, _closer=store.close)
+
+ db_path = os.environ.get(DB_PATH_ENV_VAR)
+ if db_path:
+ return await _resolve_from_db_path(db_path)
+
+ msg = (
+ "No engrava store configured. Set "
+ f"{CONFIG_ENV_VAR} to an engrava.yaml path or "
+ f"{DB_PATH_ENV_VAR} to a SQLite database path."
+ )
+ raise StoreResolutionError(msg)
+
+
+async def _resolve_from_db_path(db_path: str) -> ResolvedStore:
+ """Open a database file and build a store over it.
+
+ Args:
+ db_path: Filesystem path to a SQLite database file.
+
+ Returns:
+ A :class:`ResolvedStore` whose cleanup closes the opened
+ connection.
+
+ """
+ connection = await aiosqlite.connect(str(Path(db_path)))
+ try:
+ await connection.execute("PRAGMA journal_mode=WAL")
+ await connection.execute("PRAGMA foreign_keys=ON")
+ connection.row_factory = aiosqlite.Row
+ store = SqliteEngravaCore(connection)
+ await store.ensure_schema()
+ except Exception:
+ await connection.close()
+ raise
+ return ResolvedStore(store=store, _closer=connection.close)
diff --git a/src/engrava/mcp/server.py b/src/engrava/mcp/server.py
new file mode 100644
index 0000000..47cfb45
--- /dev/null
+++ b/src/engrava/mcp/server.py
@@ -0,0 +1,413 @@
+"""FastMCP server exposing engrava's read API as agent tools.
+
+This module builds a Model Context Protocol server that wraps the public
+async read API of :class:`~engrava.SqliteEngravaCore`. It is an *API
+consumer*, not an engrava extension: it registers no hooks, manifests, or
+MindQL extension commands. Think of it as a sibling of the command-line
+interface that speaks MCP over stdio.
+
+Five read-only tools are exposed:
+
+``get_thought``
+ Fetch a single thought by identifier.
+``search_memory``
+ Hybrid (lexical + vector + recency) ranked search.
+``search_keywords``
+ Pure full-text BM25 keyword search.
+``query_memory``
+ Structured ``FIND`` queries in the MindQL query language. Only the
+ ``FIND`` command is accepted; raw-SQL passthrough and every other
+ command are rejected.
+``memory_stats``
+ Aggregate counts and store-health metrics.
+
+The active store is supplied to tool calls through a :class:`StoreProvider`
+that the server's lifespan populates on startup and clears on shutdown.
+Each tool delegates to a module-level implementation function that takes an
+explicit store argument, which keeps the query logic unit-testable without a
+running server.
+"""
+
+from __future__ import annotations
+
+from contextlib import asynccontextmanager
+from dataclasses import replace
+from typing import TYPE_CHECKING, Any
+
+import anyio
+from mcp.server.fastmcp import FastMCP
+from mcp.types import ToolAnnotations
+
+from engrava.mcp.config import ResolvedStore, resolve_store
+from engrava.mindql.parser import MindQLCommand, MindQLQuery, parse
+
+if TYPE_CHECKING:
+ from collections.abc import AsyncIterator
+
+ from engrava.infrastructure.sqlite.engrava_core import SqliteEngravaCore
+
+#: Server name advertised to MCP clients.
+SERVER_NAME = "engrava"
+
+#: Default number of results returned by search tools.
+DEFAULT_TOP_K = 10
+
+_READ_ONLY = ToolAnnotations(readOnlyHint=True)
+
+
+class StoreNotReadyError(RuntimeError):
+ """Raised when a tool is invoked before a store has been provided.
+
+ This indicates a lifecycle bug — tools should only run while the
+ server lifespan is active.
+ """
+
+
+class UnsupportedQueryError(ValueError):
+ """Raised when ``query_memory`` receives a non-``FIND`` command.
+
+ The MCP read surface deliberately accepts only the MindQL ``FIND``
+ command. Raw-SQL passthrough (``SELECT``), aggregate ``COUNT``, and
+ extension commands are rejected so the tool cannot be used to run
+ arbitrary statements against the database.
+
+ Args:
+ command: The rejected command verb.
+
+ """
+
+ def __init__(self, command: str) -> None:
+ self.command = command
+ super().__init__(
+ f"query_memory accepts only FIND queries; received {command!r}. "
+ "Use the FIND command, for example: "
+ "FIND thoughts WHERE lifecycle_status = 'ACTIVE' LIMIT 10"
+ )
+
+
+class StoreProvider:
+ """Holds the active store for the lifetime of a running server.
+
+ The server lifespan calls :meth:`set` on startup and :meth:`clear`
+ on shutdown. Registered tools call :meth:`require` to obtain the
+ store, which raises if the server is not currently serving.
+ """
+
+ def __init__(self) -> None:
+ self._store: SqliteEngravaCore | None = None
+
+ def set(self, store: SqliteEngravaCore) -> None:
+ """Record the active store.
+
+ Args:
+ store: The store that tools should query.
+
+ """
+ self._store = store
+
+ def clear(self) -> None:
+ """Forget the active store after shutdown."""
+ self._store = None
+
+ def require(self) -> SqliteEngravaCore:
+ """Return the active store.
+
+ Returns:
+ The store recorded by the lifespan.
+
+ Raises:
+ StoreNotReadyError: If no store is currently active.
+
+ """
+ if self._store is None:
+ msg = "No active engrava store; the server lifespan is not running."
+ raise StoreNotReadyError(msg)
+ return self._store
+
+
+async def get_thought_impl(store: SqliteEngravaCore, thought_id: str) -> dict[str, Any]:
+ """Fetch a single thought by identifier.
+
+ Args:
+ store: The store to query.
+ thought_id: Identifier of the thought to retrieve.
+
+ Returns:
+ A dict with a ``found`` flag and a ``thought`` entry. ``thought``
+ is the JSON-serialisable thought when it exists, otherwise
+ ``None``.
+
+ """
+ thought = await store.get_thought(thought_id)
+ if thought is None:
+ return {"found": False, "thought": None}
+ return {"found": True, "thought": thought.model_dump(mode="json")}
+
+
+async def search_memory_impl(
+ store: SqliteEngravaCore,
+ query_text: str,
+ *,
+ top_k: int = DEFAULT_TOP_K,
+ include_reflections: bool = True,
+) -> dict[str, Any]:
+ """Run a hybrid ranked search over stored memory.
+
+ Args:
+ store: The store to query.
+ query_text: Natural-language query text.
+ top_k: Maximum number of ranked results to return.
+ include_reflections: Whether consolidated reflection thoughts may
+ appear in the results.
+
+ Returns:
+ A dict with a ``results`` list of ``{"thought_id", "score"}``
+ entries and a ``backends_used`` list naming the search backends
+ that were available for the query.
+
+ """
+ result = await store.search_hybrid(
+ query_text,
+ top_k=top_k,
+ include_reflections=include_reflections,
+ )
+ return {
+ "results": [
+ {"thought_id": thought_id, "score": score} for thought_id, score in result.results
+ ],
+ "backends_used": sorted(result.backends_used),
+ }
+
+
+async def search_keywords_impl(
+ store: SqliteEngravaCore,
+ query: str,
+ *,
+ top_k: int = DEFAULT_TOP_K,
+) -> dict[str, Any]:
+ """Run a full-text BM25 keyword search over stored memory.
+
+ Args:
+ store: The store to query.
+ query: Full-text query string (supports ``AND``, ``OR``, ``NOT``
+ and prefix ``*`` operators).
+ top_k: Maximum number of ranked results to return.
+
+ Returns:
+ A dict with a ``results`` list of ``{"thought_id", "score"}``
+ entries ordered by descending relevance.
+
+ """
+ matches = await store.search_fts(query, top_k=top_k)
+ return {
+ "results": [{"thought_id": thought_id, "score": score} for thought_id, score in matches],
+ }
+
+
+async def query_memory_impl(
+ store: SqliteEngravaCore,
+ query: str,
+ *,
+ limit: int | None = None,
+) -> dict[str, Any]:
+ """Run a MindQL ``FIND`` query over stored memory.
+
+ Only the ``FIND`` command is accepted. The grammar is
+ ``FIND
WHERE '' [LIMIT n]``.
+
+ Args:
+ store: The store to query.
+ query: A MindQL ``FIND`` query string.
+ limit: Optional row cap. When provided, it overrides any
+ ``LIMIT`` clause present in ``query``.
+
+ Returns:
+ A dict with the result ``columns`` and matching ``rows``.
+
+ Raises:
+ UnsupportedQueryError: If the query is not a ``FIND`` command.
+ MindQLParseError: If the query is malformed.
+
+ """
+ parsed = parse(query)
+ if parsed.command is not MindQLCommand.FIND:
+ raise UnsupportedQueryError(parsed.command.value)
+
+ effective = parsed if limit is None else _with_limit(parsed, limit)
+
+ # Execute via the public store-level entry point. The store owns the
+ # connection; this consumer must not reach into it. The FIND-only guard
+ # above is intentionally kept here (a consumer exposure policy), and no
+ # ``extensions`` map is passed — both keep the over-the-wire surface
+ # restricted to FIND.
+ result = await store.execute_mindql(effective)
+ return {"columns": result.columns, "rows": result.rows}
+
+
+async def memory_stats_impl(store: SqliteEngravaCore) -> dict[str, Any]:
+ """Return aggregate counts and store-health metrics.
+
+ Args:
+ store: The store to inspect.
+
+ Returns:
+ A dict with the live ``thought_count`` plus a ``metrics`` block
+ carrying thought/edge counts and a storage-byte total.
+
+ """
+ thought_count = await store.count_thoughts()
+ metrics = await store.metrics()
+ return {
+ "thought_count": thought_count,
+ "metrics": {
+ "thoughts": {
+ "total": metrics.thoughts.total,
+ "by_type": metrics.thoughts.by_type,
+ "by_status": metrics.thoughts.by_status,
+ },
+ "edges": {
+ "total": metrics.edges.total,
+ "by_type": metrics.edges.by_type,
+ },
+ "storage_total_bytes": metrics.storage.total_bytes,
+ },
+ }
+
+
+def _with_limit(parsed: MindQLQuery, limit: int) -> MindQLQuery:
+ """Return a copy of a parsed query with its ``limit`` replaced.
+
+ Args:
+ parsed: The parsed ``MindQLQuery``.
+ limit: The row cap to apply.
+
+ Returns:
+ A new ``MindQLQuery`` identical to ``parsed`` but with ``limit``
+ set to the supplied value.
+
+ """
+ return replace(parsed, limit=limit)
+
+
+def build_server() -> FastMCP:
+ """Build the engrava MCP server with all read tools registered.
+
+ The returned server resolves its store from the environment when its
+ lifespan starts and releases the connection when the lifespan ends.
+
+ Returns:
+ A configured :class:`FastMCP` server ready to ``run()``.
+
+ """
+ provider = StoreProvider()
+
+ @asynccontextmanager
+ async def lifespan(_server: FastMCP) -> AsyncIterator[None]:
+ resolved: ResolvedStore = await resolve_store()
+ provider.set(resolved.store)
+ try:
+ yield
+ finally:
+ provider.clear()
+ # Shield the connection teardown so it runs to completion even
+ # when the surrounding server task is being cancelled (as it is
+ # on stdio EOF). Without the shield the database worker thread
+ # can outlive the event loop and raise on a late callback.
+ with anyio.CancelScope(shield=True):
+ await resolved.aclose()
+
+ server: FastMCP = FastMCP(
+ SERVER_NAME,
+ instructions=(
+ "Read-only access to an engrava agent-memory store: fetch "
+ "thoughts, run hybrid and keyword search, run structured "
+ "MindQL FIND queries, and read store statistics."
+ ),
+ lifespan=lifespan,
+ )
+ register_tools(server, provider)
+ return server
+
+
+def register_tools(server: FastMCP, provider: StoreProvider) -> None:
+ """Register the five read tools on a server.
+
+ Args:
+ server: The server to register tools on.
+ provider: Supplies the active store to each tool at call time.
+
+ """
+
+ @server.tool(
+ name="get_thought",
+ description="Fetch a single thought by its identifier.",
+ annotations=_READ_ONLY,
+ )
+ async def get_thought(thought_id: str) -> dict[str, Any]:
+ return await get_thought_impl(provider.require(), thought_id)
+
+ @server.tool(
+ name="search_memory",
+ description=(
+ "Hybrid ranked search (lexical + vector + recency) over stored "
+ "memory. Returns ranked thought identifiers with scores and the "
+ "search backends that were available."
+ ),
+ annotations=_READ_ONLY,
+ )
+ async def search_memory(
+ query_text: str,
+ top_k: int = DEFAULT_TOP_K,
+ *,
+ include_reflections: bool = True,
+ ) -> dict[str, Any]:
+ return await search_memory_impl(
+ provider.require(),
+ query_text,
+ top_k=top_k,
+ include_reflections=include_reflections,
+ )
+
+ @server.tool(
+ name="search_keywords",
+ description=(
+ "Full-text BM25 keyword search over stored memory. Returns ranked "
+ "thought identifiers with scores."
+ ),
+ annotations=_READ_ONLY,
+ )
+ async def search_keywords(query: str, top_k: int = DEFAULT_TOP_K) -> dict[str, Any]:
+ return await search_keywords_impl(provider.require(), query, top_k=top_k)
+
+ @server.tool(
+ name="query_memory",
+ description=(
+ "Run a structured MindQL FIND query over stored memory, e.g. "
+ "\"FIND thoughts WHERE lifecycle_status = 'ACTIVE' LIMIT 10\". "
+ "Only the FIND command is supported."
+ ),
+ annotations=_READ_ONLY,
+ )
+ async def query_memory(query: str, limit: int | None = None) -> dict[str, Any]:
+ return await query_memory_impl(provider.require(), query, limit=limit)
+
+ @server.tool(
+ name="memory_stats",
+ description=(
+ "Return aggregate statistics about the memory store: thought and "
+ "edge counts and total storage size."
+ ),
+ annotations=_READ_ONLY,
+ )
+ async def memory_stats() -> dict[str, Any]:
+ return await memory_stats_impl(provider.require())
+
+
+def main() -> None:
+ """Run the engrava MCP server over stdio.
+
+ Builds the server and serves it on the stdio transport (the FastMCP
+ default). This is the console-script and ``python -m engrava.mcp``
+ entry point.
+ """
+ build_server().run()
diff --git a/tests/mcp/__init__.py b/tests/mcp/__init__.py
new file mode 100644
index 0000000..0cc12e6
--- /dev/null
+++ b/tests/mcp/__init__.py
@@ -0,0 +1 @@
+"""Tests for the engrava MCP server."""
diff --git a/tests/mcp/conftest.py b/tests/mcp/conftest.py
new file mode 100644
index 0000000..45e8078
--- /dev/null
+++ b/tests/mcp/conftest.py
@@ -0,0 +1,97 @@
+"""Shared fixtures for the MCP server tests.
+
+Builds a real in-memory ``SqliteEngravaCore`` (no vector backend, so
+hybrid search degrades to its lexical backend) seeded with a couple of
+thoughts. Tools are exercised directly against this store.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import aiosqlite
+import pytest
+
+from engrava import (
+ CoreThoughtRecord,
+ LifecycleStatus,
+ Priority,
+ SqliteEngravaCore,
+ ThoughtType,
+)
+
+if TYPE_CHECKING:
+ from collections.abc import AsyncIterator
+
+#: Identifiers of the thoughts seeded by the ``store`` fixture.
+SEEDED_THOUGHT_IDS = ("thought-alpha", "thought-beta")
+
+
+def make_thought(
+ thought_id: str,
+ *,
+ essence: str,
+ content: str,
+ lifecycle_status: LifecycleStatus = LifecycleStatus.ACTIVE,
+ priority: Priority = Priority.P2,
+) -> CoreThoughtRecord:
+ """Build a core thought record for seeding.
+
+ Args:
+ thought_id: Stable identifier for the thought.
+ essence: Compact canonical text.
+ content: Full stored content.
+ lifecycle_status: Lifecycle state to persist.
+ priority: Priority level.
+
+ Returns:
+ A constructed ``CoreThoughtRecord``.
+
+ """
+ return CoreThoughtRecord(
+ thought_id=thought_id,
+ thought_type=ThoughtType.BELIEF,
+ essence=essence,
+ content=content,
+ priority=priority,
+ lifecycle_status=lifecycle_status,
+ created_cycle=0,
+ updated_cycle=0,
+ source="test",
+ )
+
+
+@pytest.fixture
+async def store() -> AsyncIterator[SqliteEngravaCore]:
+ """Yield a seeded in-memory store with no vector backend.
+
+ Yields:
+ A ``SqliteEngravaCore`` containing two active thoughts.
+
+ """
+ connection = await aiosqlite.connect(":memory:")
+ connection.row_factory = aiosqlite.Row
+ await connection.execute("PRAGMA foreign_keys=ON")
+ backend = SqliteEngravaCore(connection)
+ await backend.ensure_schema()
+
+ await backend.create_thought(
+ make_thought(
+ "thought-alpha",
+ essence="Coffee brewing notes",
+ content="Pour-over coffee extracts best between 90 and 96 degrees.",
+ )
+ )
+ await backend.create_thought(
+ make_thought(
+ "thought-beta",
+ essence="Tea steeping notes",
+ content="Green tea steeps best below boiling to avoid bitterness.",
+ priority=Priority.P1,
+ )
+ )
+
+ try:
+ yield backend
+ finally:
+ await connection.close()
diff --git a/tests/mcp/test_read_tools.py b/tests/mcp/test_read_tools.py
new file mode 100644
index 0000000..aa2cd86
--- /dev/null
+++ b/tests/mcp/test_read_tools.py
@@ -0,0 +1,152 @@
+"""Unit tests for the MCP read-tool implementations.
+
+Each tool implementation is exercised directly against a seeded
+in-memory store (see ``conftest.store``).
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+from engrava.mcp.server import (
+ DEFAULT_TOP_K,
+ StoreNotReadyError,
+ StoreProvider,
+ UnsupportedQueryError,
+ get_thought_impl,
+ memory_stats_impl,
+ query_memory_impl,
+ search_keywords_impl,
+ search_memory_impl,
+)
+from engrava.mindql.parser import MindQLParseError
+
+if TYPE_CHECKING:
+ from engrava.infrastructure.sqlite.engrava_core import SqliteEngravaCore
+
+
+class TestGetThought:
+ """Tests for the ``get_thought`` tool."""
+
+ async def test_returns_serialised_thought(self, store: SqliteEngravaCore) -> None:
+ result = await get_thought_impl(store, "thought-alpha")
+ assert result["found"] is True
+ thought = result["thought"]
+ assert thought is not None
+ assert thought["thought_id"] == "thought-alpha"
+ assert thought["essence"] == "Coffee brewing notes"
+ # The payload must be JSON-friendly (the enum serialised to its value).
+ assert thought["lifecycle_status"] == "ACTIVE"
+
+ async def test_missing_thought_reports_not_found(self, store: SqliteEngravaCore) -> None:
+ result = await get_thought_impl(store, "does-not-exist")
+ assert result == {"found": False, "thought": None}
+
+
+class TestSearchMemory:
+ """Tests for the ``search_memory`` tool."""
+
+ async def test_returns_results_and_backends(self, store: SqliteEngravaCore) -> None:
+ result = await search_memory_impl(store, "coffee")
+
+ assert [entry["thought_id"] for entry in result["results"]] == ["thought-alpha"]
+ assert all(isinstance(entry["score"], float) for entry in result["results"])
+ # No embedding provider / vector backend was configured, so the
+ # vector backend must not appear in the diagnostics.
+ assert "vector" not in result["backends_used"]
+ assert "fts5" in result["backends_used"]
+
+ async def test_respects_top_k(self, store: SqliteEngravaCore) -> None:
+ result = await search_memory_impl(store, "notes", top_k=1)
+ assert len(result["results"]) <= 1
+
+ async def test_exclude_reflections_flag_is_passed(self, store: SqliteEngravaCore) -> None:
+ result = await search_memory_impl(store, "tea", include_reflections=False)
+ assert [entry["thought_id"] for entry in result["results"]] == ["thought-beta"]
+
+
+class TestSearchKeywords:
+ """Tests for the ``search_keywords`` tool."""
+
+ async def test_returns_ranked_matches(self, store: SqliteEngravaCore) -> None:
+ result = await search_keywords_impl(store, "tea")
+ assert [entry["thought_id"] for entry in result["results"]] == ["thought-beta"]
+ assert all(isinstance(entry["score"], float) for entry in result["results"])
+
+ async def test_no_match_returns_empty(self, store: SqliteEngravaCore) -> None:
+ result = await search_keywords_impl(store, "spaceship")
+ assert result["results"] == []
+
+
+class TestQueryMemory:
+ """Tests for the ``query_memory`` MindQL tool (FIND only)."""
+
+ async def test_find_returns_rows(self, store: SqliteEngravaCore) -> None:
+ result = await query_memory_impl(
+ store,
+ "FIND thoughts WHERE lifecycle_status = 'ACTIVE'",
+ )
+ ids = {row["thought_id"] for row in result["rows"]}
+ assert ids == {"thought-alpha", "thought-beta"}
+ assert "thought_id" in result["columns"]
+
+ async def test_find_with_explicit_limit_override(self, store: SqliteEngravaCore) -> None:
+ result = await query_memory_impl(
+ store,
+ "FIND thoughts WHERE lifecycle_status = 'ACTIVE' LIMIT 5",
+ limit=1,
+ )
+ assert len(result["rows"]) == 1
+
+ async def test_select_is_rejected(self, store: SqliteEngravaCore) -> None:
+ with pytest.raises(UnsupportedQueryError) as excinfo:
+ await query_memory_impl(store, "SELECT * FROM thought")
+ assert excinfo.value.command == "SELECT"
+
+ async def test_count_is_rejected(self, store: SqliteEngravaCore) -> None:
+ with pytest.raises(UnsupportedQueryError) as excinfo:
+ await query_memory_impl(store, "COUNT thoughts")
+ assert excinfo.value.command == "COUNT"
+
+ async def test_malformed_query_raises_parse_error(self, store: SqliteEngravaCore) -> None:
+ with pytest.raises(MindQLParseError):
+ await query_memory_impl(store, "")
+
+
+class TestMemoryStats:
+ """Tests for the ``memory_stats`` tool."""
+
+ async def test_reports_counts(self, store: SqliteEngravaCore) -> None:
+ result = await memory_stats_impl(store)
+ assert result["thought_count"] == 2
+ assert result["metrics"]["thoughts"]["total"] == 2
+ assert result["metrics"]["edges"]["total"] == 0
+ assert result["metrics"]["thoughts"]["by_status"]["ACTIVE"] == 2
+ assert isinstance(result["metrics"]["storage_total_bytes"], int)
+
+
+class TestStoreProvider:
+ """Tests for the ``StoreProvider`` lifecycle holder."""
+
+ def test_require_without_store_raises(self) -> None:
+ provider = StoreProvider()
+ with pytest.raises(StoreNotReadyError):
+ provider.require()
+
+ def test_set_then_require(self, store: SqliteEngravaCore) -> None:
+ provider = StoreProvider()
+ provider.set(store)
+ assert provider.require() is store
+
+ def test_clear_resets(self, store: SqliteEngravaCore) -> None:
+ provider = StoreProvider()
+ provider.set(store)
+ provider.clear()
+ with pytest.raises(StoreNotReadyError):
+ provider.require()
+
+
+def test_default_top_k_is_ten() -> None:
+ assert DEFAULT_TOP_K == 10
diff --git a/tests/mcp/test_server.py b/tests/mcp/test_server.py
new file mode 100644
index 0000000..18a5ed9
--- /dev/null
+++ b/tests/mcp/test_server.py
@@ -0,0 +1,211 @@
+"""End-to-end and store-resolution tests for the MCP server.
+
+Exercises the server through the in-memory MCP client transport (so the
+lifespan, tool registration, and JSON serialisation all run for real) and
+covers store resolution from environment variables.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import aiosqlite
+import pytest
+from mcp.shared.memory import create_connected_server_and_client_session as connect_client
+
+from engrava import (
+ CoreThoughtRecord,
+ LifecycleStatus,
+ Priority,
+ SqliteEngravaCore,
+ ThoughtType,
+)
+from engrava.mcp import build_server
+from engrava.mcp.config import (
+ CONFIG_ENV_VAR,
+ DB_PATH_ENV_VAR,
+ ResolvedStore,
+ StoreResolutionError,
+ resolve_store,
+)
+
+if TYPE_CHECKING:
+ from pathlib import Path
+
+EXPECTED_TOOL_NAMES = frozenset(
+ {"get_thought", "search_memory", "search_keywords", "query_memory", "memory_stats"}
+)
+
+
+async def _seed_database(path: Path) -> None:
+ """Create a database file with a single active thought.
+
+ Args:
+ path: Filesystem path for the new database.
+
+ """
+ connection = await aiosqlite.connect(str(path))
+ connection.row_factory = aiosqlite.Row
+ store = SqliteEngravaCore(connection)
+ await store.ensure_schema()
+ await store.create_thought(
+ CoreThoughtRecord(
+ thought_id="seeded-1",
+ thought_type=ThoughtType.BELIEF,
+ essence="Persisted note",
+ content="A note that survives a fresh connection.",
+ priority=Priority.P2,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=0,
+ updated_cycle=0,
+ source="test",
+ )
+ )
+ await connection.close()
+
+
+class TestServerEndToEnd:
+ """Drive the server through a connected in-memory client."""
+
+ async def test_lists_exactly_five_tools(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(tmp_path / "tools.db"))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ listed = await client.list_tools()
+
+ assert {tool.name for tool in listed.tools} == EXPECTED_TOOL_NAMES
+ assert all(
+ tool.annotations is not None and tool.annotations.readOnlyHint for tool in listed.tools
+ )
+
+ async def test_get_thought_round_trip(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "seeded.db"
+ await _seed_database(db_path)
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ result = await client.call_tool("get_thought", {"thought_id": "seeded-1"})
+
+ assert result.isError is False
+ assert result.structuredContent is not None
+ assert result.structuredContent["found"] is True
+ assert result.structuredContent["thought"]["thought_id"] == "seeded-1"
+
+ async def test_query_memory_rejects_select_over_transport(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "reject.db"
+ await _seed_database(db_path)
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ result = await client.call_tool(
+ "query_memory",
+ {"query": "SELECT * FROM thought"},
+ )
+
+ assert result.isError is True
+ assert "FIND" in result.content[0].text # type: ignore[union-attr]
+
+ async def test_memory_stats_reports_seeded_count(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "stats.db"
+ await _seed_database(db_path)
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ result = await client.call_tool("memory_stats", {})
+
+ assert result.structuredContent is not None
+ assert result.structuredContent["thought_count"] == 1
+
+
+class TestStoreResolution:
+ """Tests for environment-driven store resolution."""
+
+ async def test_db_path_resolution(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "resolve.db"
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+
+ resolved = await resolve_store()
+ assert isinstance(resolved, ResolvedStore)
+ try:
+ assert await resolved.store.count_thoughts() == 0
+ finally:
+ await resolved.aclose()
+ assert db_path.exists()
+
+ async def test_config_resolution(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "from_config.db"
+ config_path = tmp_path / "engrava.yaml"
+ config_path.write_text(
+ f"database:\n path: {db_path.as_posix()}\n",
+ encoding="utf-8",
+ )
+ monkeypatch.setenv(CONFIG_ENV_VAR, str(config_path))
+ monkeypatch.delenv(DB_PATH_ENV_VAR, raising=False)
+
+ resolved = await resolve_store()
+ try:
+ assert await resolved.store.count_thoughts() == 0
+ finally:
+ await resolved.aclose()
+
+ async def test_config_takes_priority_over_db_path(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ config_db = tmp_path / "config_priority.db"
+ config_path = tmp_path / "priority.yaml"
+ config_path.write_text(
+ f"database:\n path: {config_db.as_posix()}\n",
+ encoding="utf-8",
+ )
+ monkeypatch.setenv(CONFIG_ENV_VAR, str(config_path))
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(tmp_path / "ignored.db"))
+
+ resolved = await resolve_store()
+ await resolved.aclose()
+ # The config path's database is the one that gets created.
+ assert config_db.exists()
+ assert not (tmp_path / "ignored.db").exists()
+
+ async def test_no_configuration_raises(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ ) -> None:
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.delenv(DB_PATH_ENV_VAR, raising=False)
+ with pytest.raises(StoreResolutionError):
+ await resolve_store()
From 79d7604aa492aeab8b2e8dacbbaab1231738d5c7 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Mon, 8 Jun 2026 12:15:14 +0200
Subject: [PATCH 04/40] feat(mcp): add write tools, opt-in read-only mode, and
per-tool safety annotations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Expose store_thought, update_thought and link_thoughts so an MCP agent can
write to the memory graph, alongside the existing read tools. Each tool wraps
only the public store API.
Add an opt-in ENGRAVA_MCP_READ_ONLY flag (truthy: 1/true/yes): when set, the
write tools are not registered, so the server advertises a retrieval-only
surface rather than erroring on call.
Annotate every tool with MCP safety hints so clients can reason about each
call. link_thoughts is non-idempotent: an edge is unique per
(source, target, type), so repeating an identical link is rejected rather than
converging — its annotation and docstring state this explicitly.
---
src/engrava/mcp/server.py | 358 +++++++++++++++++++++++++++++++++-
tests/mcp/test_gating.py | 85 ++++++++
tests/mcp/test_server.py | 100 +++++++++-
tests/mcp/test_write_tools.py | 219 +++++++++++++++++++++
4 files changed, 749 insertions(+), 13 deletions(-)
create mode 100644 tests/mcp/test_gating.py
create mode 100644 tests/mcp/test_write_tools.py
diff --git a/src/engrava/mcp/server.py b/src/engrava/mcp/server.py
index 47cfb45..b69fc63 100644
--- a/src/engrava/mcp/server.py
+++ b/src/engrava/mcp/server.py
@@ -21,15 +21,31 @@
``memory_stats``
Aggregate counts and store-health metrics.
+Three write tools complete the surface:
+
+``store_thought``
+ Create a new thought node.
+``update_thought``
+ Mutate selected fields of an existing thought.
+``link_thoughts``
+ Create a typed edge between two existing thoughts.
+
+The write tools are gated by the :data:`READ_ONLY_ENV_VAR` environment
+variable. When it is set to a truthy value the write tools are not
+registered at all, so a read-only deployment never advertises them to
+clients. The read tools are always available.
+
The active store is supplied to tool calls through a :class:`StoreProvider`
that the server's lifespan populates on startup and clears on shutdown.
Each tool delegates to a module-level implementation function that takes an
-explicit store argument, which keeps the query logic unit-testable without a
-running server.
+explicit store argument, which keeps the query and mutation logic
+unit-testable without a running server.
"""
from __future__ import annotations
+import os
+import uuid
from contextlib import asynccontextmanager
from dataclasses import replace
from typing import TYPE_CHECKING, Any
@@ -38,6 +54,9 @@
from mcp.server.fastmcp import FastMCP
from mcp.types import ToolAnnotations
+from engrava.domain.enums import EdgeType, LifecycleStatus, Priority, ThoughtType
+from engrava.domain.models.edge import EdgeRecord
+from engrava.domain.models.thought import ThoughtRecord
from engrava.mcp.config import ResolvedStore, resolve_store
from engrava.mindql.parser import MindQLCommand, MindQLQuery, parse
@@ -52,8 +71,36 @@
#: Default number of results returned by search tools.
DEFAULT_TOP_K = 10
+#: Default edge weight when a caller does not supply one.
+DEFAULT_EDGE_WEIGHT = 1.0
+
+#: Cycle counter assigned to thoughts and edges created through the MCP
+#: write surface. This API consumer has no notion of a cognitive cycle
+#: clock, so new records start at the origin cycle.
+INITIAL_CYCLE = 0
+
+#: Environment variable that, when truthy, suppresses registration of the
+#: write tools so the server exposes a read-only surface.
+READ_ONLY_ENV_VAR = "ENGRAVA_MCP_READ_ONLY"
+
+#: Values that enable read-only mode (compared case-insensitively after
+#: stripping surrounding whitespace). Any other value — including unset
+#: or empty — leaves the full read and write surface enabled.
+READ_ONLY_TRUTHY_VALUES = frozenset({"1", "true", "yes"})
+
_READ_ONLY = ToolAnnotations(readOnlyHint=True)
+#: Annotation for a non-idempotent, non-destructive write. Covers both
+#: creating a new thought node (repeating the call creates another node)
+#: and creating a typed edge (an edge is unique per source/target/type, so
+#: repeating an identical link is rejected rather than converging) — neither
+#: is safe for a client to blindly retry.
+_WRITE = ToolAnnotations(readOnlyHint=False, destructiveHint=False, idempotentHint=False)
+
+#: Annotation for an idempotent, non-destructive write (updating a thought —
+#: repeating with the same arguments converges on the same end state).
+_WRITE_IDEMPOTENT = ToolAnnotations(readOnlyHint=False, destructiveHint=False, idempotentHint=True)
+
class StoreNotReadyError(RuntimeError):
"""Raised when a tool is invoked before a store has been provided.
@@ -274,6 +321,202 @@ async def memory_stats_impl(store: SqliteEngravaCore) -> dict[str, Any]:
}
+async def store_thought_impl(
+ store: SqliteEngravaCore,
+ essence: str,
+ content: str,
+ *,
+ thought_type: ThoughtType = ThoughtType.NOTE,
+ priority: Priority = Priority.P3,
+ source: str = "agent",
+ confidence: float | None = None,
+ thought_id: str | None = None,
+ deduplicate: bool = False,
+) -> dict[str, Any]:
+ """Create a new thought node in the store.
+
+ A :class:`~engrava.ThoughtRecord` is constructed from the supplied
+ fields and persisted. The remaining record fields take their model
+ defaults. New thoughts start in the ``CREATED`` lifecycle state at
+ the origin cycle.
+
+ Args:
+ store: The store to write to.
+ essence: Compact canonical text used in prompts (1-200 chars).
+ content: Full stored content (non-empty).
+ thought_type: Classification of the thought content.
+ priority: Urgency level (``P1`` highest).
+ source: Origin label for the thought (e.g. ``"agent"``, ``"human"``).
+ confidence: Optional reliability estimate in ``[0.0, 1.0]``.
+ thought_id: Optional caller-supplied identifier. When omitted a
+ fresh UUID4 is generated.
+ deduplicate: When ``True``, an existing thought whose content hash
+ matches has its confirmation count incremented and is returned
+ instead of inserting a duplicate.
+
+ Returns:
+ A dict with a ``thought`` entry carrying the persisted thought's
+ ``thought_id``, ``essence``, ``thought_type``, ``priority`` and
+ ``lifecycle_status``. When deduplication collapses onto an
+ existing record, its identifier is returned.
+
+ """
+ record = ThoughtRecord(
+ thought_id=thought_id if thought_id is not None else str(uuid.uuid4()),
+ thought_type=thought_type,
+ essence=essence,
+ content=content,
+ priority=priority,
+ lifecycle_status=LifecycleStatus.CREATED,
+ created_cycle=INITIAL_CYCLE,
+ updated_cycle=INITIAL_CYCLE,
+ source=source,
+ confidence=confidence,
+ )
+ created = await store.create_thought(record, deduplicate=deduplicate)
+ return {
+ "thought": {
+ "thought_id": created.thought_id,
+ "essence": created.essence,
+ "thought_type": created.thought_type.value,
+ "priority": created.priority.value,
+ "lifecycle_status": created.lifecycle_status.value,
+ }
+ }
+
+
+async def update_thought_impl(
+ store: SqliteEngravaCore,
+ thought_id: str,
+ *,
+ essence: str | None = None,
+ content: str | None = None,
+ priority: Priority | None = None,
+ lifecycle_status: LifecycleStatus | None = None,
+ confidence: float | None = None,
+) -> dict[str, Any]:
+ """Update selected fields of an existing thought.
+
+ Only the fields the caller supplies are changed; every omitted
+ argument leaves its stored value untouched. Field changes are
+ applied with the store's optimistic-concurrency guard.
+
+ Args:
+ store: The store to write to.
+ thought_id: Identifier of the thought to update.
+ essence: New compact canonical text, if changing.
+ content: New full content, if changing.
+ priority: New urgency level, if changing.
+ lifecycle_status: New lifecycle state, if changing. The store
+ validates that the transition is allowed.
+ confidence: New reliability estimate in ``[0.0, 1.0]``, if changing.
+
+ Returns:
+ A dict with a ``thought`` entry carrying the updated thought's
+ ``thought_id``, ``essence``, ``priority`` and ``lifecycle_status``.
+
+ Raises:
+ ThoughtNotFoundError: If no thought has the given identifier.
+ StaleDataError: If the thought changed concurrently.
+ InvalidTransitionError: If a lifecycle change is not permitted.
+
+ """
+ changes: dict[str, object] = {}
+ if essence is not None:
+ changes["essence"] = essence
+ if content is not None:
+ changes["content"] = content
+ if priority is not None:
+ changes["priority"] = priority
+ if lifecycle_status is not None:
+ changes["lifecycle_status"] = lifecycle_status
+ if confidence is not None:
+ changes["confidence"] = confidence
+
+ updated = await store.update_thought(thought_id, **changes)
+ return {
+ "thought": {
+ "thought_id": updated.thought_id,
+ "essence": updated.essence,
+ "priority": updated.priority.value,
+ "lifecycle_status": updated.lifecycle_status.value,
+ }
+ }
+
+
+async def link_thoughts_impl(
+ store: SqliteEngravaCore,
+ from_thought_id: str,
+ to_thought_id: str,
+ edge_type: EdgeType,
+ *,
+ weight: float = DEFAULT_EDGE_WEIGHT,
+ edge_id: str | None = None,
+) -> dict[str, Any]:
+ """Create a typed edge between two existing thoughts.
+
+ An :class:`~engrava.EdgeRecord` is constructed from the supplied
+ endpoints and persisted. Both endpoints must already exist.
+
+ Args:
+ store: The store to write to.
+ from_thought_id: Identifier of the source thought.
+ to_thought_id: Identifier of the target thought.
+ edge_type: Classification of the relationship.
+ weight: Relation strength in ``[0.0, 1.0]``.
+ edge_id: Optional caller-supplied identifier. When omitted a
+ fresh UUID4 is generated.
+
+ Returns:
+ A dict with an ``edge`` entry carrying the persisted edge's
+ ``edge_id``, ``from_thought_id``, ``to_thought_id``, ``edge_type``
+ and ``weight``.
+
+ Raises:
+ ReferentialIntegrityError: If either endpoint does not exist.
+ IntegrityError: If an edge with the same source, target and type
+ already exists. Edges are unique per ``(from, to, type)``, so
+ this write is not idempotent — repeating an identical link is
+ rejected rather than ignored.
+
+ """
+ record = EdgeRecord(
+ edge_id=edge_id if edge_id is not None else str(uuid.uuid4()),
+ from_thought_id=from_thought_id,
+ to_thought_id=to_thought_id,
+ edge_type=edge_type,
+ weight=weight,
+ created_cycle=INITIAL_CYCLE,
+ )
+ created = await store.create_edge(record)
+ return {
+ "edge": {
+ "edge_id": created.edge_id,
+ "from_thought_id": created.from_thought_id,
+ "to_thought_id": created.to_thought_id,
+ "edge_type": created.edge_type.value,
+ "weight": created.weight,
+ }
+ }
+
+
+def _read_only_enabled() -> bool:
+ """Report whether the server should expose a read-only surface.
+
+ Reads :data:`READ_ONLY_ENV_VAR` and compares it against
+ :data:`READ_ONLY_TRUTHY_VALUES` after stripping surrounding whitespace
+ and lower-casing. An unset or empty value is treated as not
+ read-only.
+
+ Returns:
+ ``True`` when the environment requests a read-only surface,
+ otherwise ``False``.
+
+ """
+ raw = os.environ.get(READ_ONLY_ENV_VAR, "")
+ return raw.strip().lower() in READ_ONLY_TRUTHY_VALUES
+
+
def _with_limit(parsed: MindQLQuery, limit: int) -> MindQLQuery:
"""Return a copy of a parsed query with its ``limit`` replaced.
@@ -290,10 +533,12 @@ def _with_limit(parsed: MindQLQuery, limit: int) -> MindQLQuery:
def build_server() -> FastMCP:
- """Build the engrava MCP server with all read tools registered.
+ """Build the engrava MCP server with its tools registered.
The returned server resolves its store from the environment when its
lifespan starts and releases the connection when the lifespan ends.
+ The read tools are always registered; the write tools are registered
+ unless :func:`_read_only_enabled` reports a read-only deployment.
Returns:
A configured :class:`FastMCP` server ready to ``run()``.
@@ -319,9 +564,11 @@ async def lifespan(_server: FastMCP) -> AsyncIterator[None]:
server: FastMCP = FastMCP(
SERVER_NAME,
instructions=(
- "Read-only access to an engrava agent-memory store: fetch "
- "thoughts, run hybrid and keyword search, run structured "
- "MindQL FIND queries, and read store statistics."
+ "Access to an engrava agent-memory store: fetch thoughts, run "
+ "hybrid and keyword search, run structured MindQL FIND queries, "
+ "and read store statistics. Unless the server is started in "
+ "read-only mode, you can also store new thoughts, update existing "
+ "thoughts, and link thoughts with typed edges."
),
lifespan=lifespan,
)
@@ -330,7 +577,12 @@ async def lifespan(_server: FastMCP) -> AsyncIterator[None]:
def register_tools(server: FastMCP, provider: StoreProvider) -> None:
- """Register the five read tools on a server.
+ """Register the MCP tools on a server.
+
+ The five read tools are always registered. The three write tools are
+ registered only when the server is not in read-only mode (see
+ :func:`_read_only_enabled`); in read-only mode they are never
+ advertised to clients.
Args:
server: The server to register tools on.
@@ -402,6 +654,98 @@ async def query_memory(query: str, limit: int | None = None) -> dict[str, Any]:
async def memory_stats() -> dict[str, Any]:
return await memory_stats_impl(provider.require())
+ if _read_only_enabled():
+ return
+
+ @server.tool(
+ name="store_thought",
+ description=(
+ "Create a new thought node. Provide its essence (short canonical "
+ "text) and full content; optionally set the thought type, "
+ "priority, source, and confidence. Returns the created thought's "
+ "identifier and key fields."
+ ),
+ annotations=_WRITE,
+ )
+ async def store_thought(
+ essence: str,
+ content: str,
+ thought_type: ThoughtType = ThoughtType.NOTE,
+ priority: Priority = Priority.P3,
+ source: str = "agent",
+ *,
+ confidence: float | None = None,
+ thought_id: str | None = None,
+ deduplicate: bool = False,
+ ) -> dict[str, Any]:
+ return await store_thought_impl(
+ provider.require(),
+ essence,
+ content,
+ thought_type=thought_type,
+ priority=priority,
+ source=source,
+ confidence=confidence,
+ thought_id=thought_id,
+ deduplicate=deduplicate,
+ )
+
+ @server.tool(
+ name="update_thought",
+ description=(
+ "Update fields of an existing thought by identifier. Only the "
+ "fields you supply change; omit the rest. Can change essence, "
+ "content, priority, lifecycle status, and confidence."
+ ),
+ annotations=_WRITE_IDEMPOTENT,
+ )
+ async def update_thought(
+ thought_id: str,
+ essence: str | None = None,
+ content: str | None = None,
+ priority: Priority | None = None,
+ lifecycle_status: LifecycleStatus | None = None,
+ *,
+ confidence: float | None = None,
+ ) -> dict[str, Any]:
+ return await update_thought_impl(
+ provider.require(),
+ thought_id,
+ essence=essence,
+ content=content,
+ priority=priority,
+ lifecycle_status=lifecycle_status,
+ confidence=confidence,
+ )
+
+ @server.tool(
+ name="link_thoughts",
+ description=(
+ "Create a typed edge between two existing thoughts, identified by "
+ "their identifiers. Choose the edge type and optionally a weight "
+ "in [0.0, 1.0]. Both endpoints must already exist. An edge is "
+ "unique per (source, target, type): linking the same pair with the "
+ "same type twice is rejected rather than ignored."
+ ),
+ annotations=_WRITE,
+ )
+ async def link_thoughts(
+ from_thought_id: str,
+ to_thought_id: str,
+ edge_type: EdgeType,
+ weight: float = DEFAULT_EDGE_WEIGHT,
+ *,
+ edge_id: str | None = None,
+ ) -> dict[str, Any]:
+ return await link_thoughts_impl(
+ provider.require(),
+ from_thought_id,
+ to_thought_id,
+ edge_type,
+ weight=weight,
+ edge_id=edge_id,
+ )
+
def main() -> None:
"""Run the engrava MCP server over stdio.
diff --git a/tests/mcp/test_gating.py b/tests/mcp/test_gating.py
new file mode 100644
index 0000000..eef9c30
--- /dev/null
+++ b/tests/mcp/test_gating.py
@@ -0,0 +1,85 @@
+"""Tests for read-only gating of the MCP write tools.
+
+The write tools must not be *registered* when the server is in read-only
+mode, so a read-only deployment never advertises them to clients. These
+tests build a server under each mode and inspect the registered tool list,
+and exercise the truthy parsing of :data:`READ_ONLY_ENV_VAR` directly.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from engrava.mcp.server import (
+ READ_ONLY_ENV_VAR,
+ _read_only_enabled,
+ build_server,
+)
+
+#: Tool names that are always registered.
+READ_TOOL_NAMES = frozenset(
+ {
+ "get_thought",
+ "search_memory",
+ "search_keywords",
+ "query_memory",
+ "memory_stats",
+ }
+)
+
+#: Tool names that are gated behind write access.
+WRITE_TOOL_NAMES = frozenset(
+ {
+ "store_thought",
+ "update_thought",
+ "link_thoughts",
+ }
+)
+
+
+async def _registered_tool_names() -> set[str]:
+ """Build a server and return the set of its registered tool names.
+
+ Returns:
+ The names of every tool the freshly built server advertises.
+
+ """
+ server = build_server()
+ return {tool.name for tool in await server.list_tools()}
+
+
+class TestRegistrationGating:
+ """Tests for which tools register under each mode."""
+
+ async def test_unset_registers_all_eight_tools(self, monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.delenv(READ_ONLY_ENV_VAR, raising=False)
+ names = await _registered_tool_names()
+ assert names == READ_TOOL_NAMES | WRITE_TOOL_NAMES
+
+ async def test_read_only_hides_write_tools(self, monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.setenv(READ_ONLY_ENV_VAR, "true")
+ names = await _registered_tool_names()
+ # Read tools stay; write tools are absent entirely.
+ assert names >= READ_TOOL_NAMES
+ assert names.isdisjoint(WRITE_TOOL_NAMES)
+ assert names == READ_TOOL_NAMES
+
+
+class TestTruthyParsing:
+ """Tests for :func:`_read_only_enabled` truthy parsing."""
+
+ @pytest.mark.parametrize("value", ["1", "true", "yes", "TRUE", "Yes", " true "])
+ def test_truthy_values_enable_read_only(
+ self, monkeypatch: pytest.MonkeyPatch, value: str
+ ) -> None:
+ monkeypatch.setenv(READ_ONLY_ENV_VAR, value)
+ assert _read_only_enabled() is True
+
+ @pytest.mark.parametrize("value", ["0", "false", "no", "off", "", " ", "maybe"])
+ def test_falsy_values_keep_writes(self, monkeypatch: pytest.MonkeyPatch, value: str) -> None:
+ monkeypatch.setenv(READ_ONLY_ENV_VAR, value)
+ assert _read_only_enabled() is False
+
+ def test_unset_keeps_writes(self, monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.delenv(READ_ONLY_ENV_VAR, raising=False)
+ assert _read_only_enabled() is False
diff --git a/tests/mcp/test_server.py b/tests/mcp/test_server.py
index 18a5ed9..5a8f61a 100644
--- a/tests/mcp/test_server.py
+++ b/tests/mcp/test_server.py
@@ -28,13 +28,16 @@
StoreResolutionError,
resolve_store,
)
+from engrava.mcp.server import READ_ONLY_ENV_VAR
if TYPE_CHECKING:
from pathlib import Path
-EXPECTED_TOOL_NAMES = frozenset(
+READ_TOOL_NAMES = frozenset(
{"get_thought", "search_memory", "search_keywords", "query_memory", "memory_stats"}
)
+WRITE_TOOL_NAMES = frozenset({"store_thought", "update_thought", "link_thoughts"})
+EXPECTED_TOOL_NAMES = READ_TOOL_NAMES | WRITE_TOOL_NAMES
async def _seed_database(path: Path) -> None:
@@ -67,22 +70,107 @@ async def _seed_database(path: Path) -> None:
class TestServerEndToEnd:
"""Drive the server through a connected in-memory client."""
- async def test_lists_exactly_five_tools(
+ async def test_lists_read_and_write_tools_by_default(
self,
monkeypatch: pytest.MonkeyPatch,
tmp_path: Path,
) -> None:
monkeypatch.setenv(DB_PATH_ENV_VAR, str(tmp_path / "tools.db"))
monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.delenv(READ_ONLY_ENV_VAR, raising=False)
server = build_server()
async with connect_client(server) as client:
listed = await client.list_tools()
- assert {tool.name for tool in listed.tools} == EXPECTED_TOOL_NAMES
- assert all(
- tool.annotations is not None and tool.annotations.readOnlyHint for tool in listed.tools
- )
+ read_only_by_name: dict[str, bool | None] = {}
+ idempotent_by_name: dict[str, bool | None] = {}
+ for tool in listed.tools:
+ # Every tool must carry an annotation block.
+ assert tool.annotations is not None
+ read_only_by_name[tool.name] = tool.annotations.readOnlyHint
+ idempotent_by_name[tool.name] = tool.annotations.idempotentHint
+
+ assert set(read_only_by_name) == EXPECTED_TOOL_NAMES
+ # The read tools are read-only and the write tools are not.
+ assert all(read_only_by_name[name] for name in READ_TOOL_NAMES)
+ assert all(read_only_by_name[name] is False for name in WRITE_TOOL_NAMES)
+
+ # Idempotency hints must match the real store semantics a client
+ # would rely on for safe retries:
+ # - update_thought converges on the same end state -> idempotent
+ # - store_thought creates a fresh node each call -> NOT idempotent
+ # - link_thoughts rejects a duplicate (from,to,type) -> NOT idempotent
+ assert idempotent_by_name["update_thought"] is True
+ assert idempotent_by_name["store_thought"] is False
+ assert idempotent_by_name["link_thoughts"] is False
+
+ async def test_read_only_mode_hides_write_tools(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(tmp_path / "ro.db"))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.setenv(READ_ONLY_ENV_VAR, "1")
+
+ server = build_server()
+ async with connect_client(server) as client:
+ listed = await client.list_tools()
+
+ assert {tool.name for tool in listed.tools} == READ_TOOL_NAMES
+
+ async def test_write_tools_round_trip_over_transport(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(tmp_path / "writes.db"))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.delenv(READ_ONLY_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ created = await client.call_tool(
+ "store_thought",
+ {"essence": "Live note", "content": "Stored over the transport."},
+ )
+ assert created.isError is False
+ assert created.structuredContent is not None
+ first_id = created.structuredContent["thought"]["thought_id"]
+
+ second = await client.call_tool(
+ "store_thought",
+ {"essence": "Second note", "content": "Another stored note."},
+ )
+ assert second.structuredContent is not None
+ second_id = second.structuredContent["thought"]["thought_id"]
+
+ updated = await client.call_tool(
+ "update_thought",
+ {"thought_id": first_id, "essence": "Edited note"},
+ )
+ assert updated.isError is False
+ assert updated.structuredContent is not None
+ assert updated.structuredContent["thought"]["essence"] == "Edited note"
+
+ linked = await client.call_tool(
+ "link_thoughts",
+ {
+ "from_thought_id": first_id,
+ "to_thought_id": second_id,
+ "edge_type": "ASSOCIATED",
+ },
+ )
+ assert linked.isError is False
+ assert linked.structuredContent is not None
+ assert linked.structuredContent["edge"]["from_thought_id"] == first_id
+
+ fetched = await client.call_tool("get_thought", {"thought_id": first_id})
+
+ assert fetched.structuredContent is not None
+ assert fetched.structuredContent["found"] is True
+ assert fetched.structuredContent["thought"]["essence"] == "Edited note"
async def test_get_thought_round_trip(
self,
diff --git a/tests/mcp/test_write_tools.py b/tests/mcp/test_write_tools.py
new file mode 100644
index 0000000..222b39e
--- /dev/null
+++ b/tests/mcp/test_write_tools.py
@@ -0,0 +1,219 @@
+"""Unit tests for the MCP write-tool implementations.
+
+Each tool implementation is exercised directly against a seeded
+in-memory store (see ``conftest.store``).
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import aiosqlite
+import pytest
+
+from engrava import (
+ EdgeType,
+ LifecycleStatus,
+ Priority,
+ ThoughtNotFoundError,
+ ThoughtType,
+)
+from engrava.domain.exceptions import ReferentialIntegrityError
+from engrava.mcp.server import (
+ get_thought_impl,
+ link_thoughts_impl,
+ store_thought_impl,
+ update_thought_impl,
+)
+
+if TYPE_CHECKING:
+ from engrava.infrastructure.sqlite.engrava_core import SqliteEngravaCore
+
+
+class TestStoreThought:
+ """Tests for the ``store_thought`` tool."""
+
+ async def test_creates_and_reads_back(self, store: SqliteEngravaCore) -> None:
+ result = await store_thought_impl(
+ store,
+ essence="Sourdough starter notes",
+ content="Feed the starter twice daily at a 1:1:1 ratio.",
+ )
+ new_id = result["thought"]["thought_id"]
+ assert isinstance(new_id, str)
+ assert new_id
+
+ read_back = await get_thought_impl(store, new_id)
+ assert read_back["found"] is True
+ thought = read_back["thought"]
+ assert thought is not None
+ assert thought["essence"] == "Sourdough starter notes"
+ # New thoughts start in the CREATED lifecycle state.
+ assert thought["lifecycle_status"] == "CREATED"
+
+ async def test_generates_unique_ids(self, store: SqliteEngravaCore) -> None:
+ first = await store_thought_impl(store, essence="One", content="First body.")
+ second = await store_thought_impl(store, essence="Two", content="Second body.")
+ assert first["thought"]["thought_id"] != second["thought"]["thought_id"]
+
+ async def test_honours_supplied_id_and_fields(self, store: SqliteEngravaCore) -> None:
+ result = await store_thought_impl(
+ store,
+ essence="Explicit identity",
+ content="A thought with a caller-chosen id.",
+ thought_type=ThoughtType.TASK,
+ priority=Priority.P1,
+ thought_id="thought-explicit",
+ )
+ thought = result["thought"]
+ assert thought["thought_id"] == "thought-explicit"
+ assert thought["thought_type"] == "TASK"
+ assert thought["priority"] == "P1"
+
+ async def test_deduplicate_collapses_on_content_hash(self, store: SqliteEngravaCore) -> None:
+ content = "Identical body used to trigger content-hash dedup."
+ first = await store_thought_impl(
+ store, essence="Dedup A", content=content, deduplicate=True
+ )
+ second = await store_thought_impl(
+ store, essence="Dedup B", content=content, deduplicate=True
+ )
+ assert first["thought"]["thought_id"] == second["thought"]["thought_id"]
+
+
+class TestUpdateThought:
+ """Tests for the ``update_thought`` tool."""
+
+ async def test_changes_supplied_field(self, store: SqliteEngravaCore) -> None:
+ result = await update_thought_impl(
+ store,
+ "thought-alpha",
+ essence="Updated coffee notes",
+ )
+ assert result["thought"]["essence"] == "Updated coffee notes"
+
+ read_back = await get_thought_impl(store, "thought-alpha")
+ thought = read_back["thought"]
+ assert thought is not None
+ assert thought["essence"] == "Updated coffee notes"
+
+ async def test_changes_lifecycle_status(self, store: SqliteEngravaCore) -> None:
+ # The seeded thought is ACTIVE; ACTIVE -> DONE is a valid transition.
+ result = await update_thought_impl(
+ store,
+ "thought-alpha",
+ lifecycle_status=LifecycleStatus.DONE,
+ )
+ assert result["thought"]["lifecycle_status"] == "DONE"
+
+ async def test_changes_content_and_confidence(self, store: SqliteEngravaCore) -> None:
+ await update_thought_impl(
+ store,
+ "thought-alpha",
+ content="Rewritten brewing guidance.",
+ confidence=0.75,
+ )
+ read_back = await get_thought_impl(store, "thought-alpha")
+ thought = read_back["thought"]
+ assert thought is not None
+ assert thought["content"] == "Rewritten brewing guidance."
+ assert thought["confidence"] == 0.75
+
+ async def test_omitted_fields_are_untouched(self, store: SqliteEngravaCore) -> None:
+ before = await get_thought_impl(store, "thought-beta")
+ before_thought = before["thought"]
+ assert before_thought is not None
+ original_content = before_thought["content"]
+
+ await update_thought_impl(store, "thought-beta", priority=Priority.P3)
+
+ after = await get_thought_impl(store, "thought-beta")
+ after_thought = after["thought"]
+ assert after_thought is not None
+ assert after_thought["content"] == original_content
+ assert after_thought["priority"] == "P3"
+
+ async def test_missing_thought_raises(self, store: SqliteEngravaCore) -> None:
+ with pytest.raises(ThoughtNotFoundError):
+ await update_thought_impl(store, "does-not-exist", essence="x")
+
+
+class TestLinkThoughts:
+ """Tests for the ``link_thoughts`` tool."""
+
+ async def test_creates_edge_between_existing_thoughts(self, store: SqliteEngravaCore) -> None:
+ result = await link_thoughts_impl(
+ store,
+ "thought-alpha",
+ "thought-beta",
+ EdgeType.ASSOCIATED,
+ weight=0.5,
+ )
+ edge = result["edge"]
+ assert edge["from_thought_id"] == "thought-alpha"
+ assert edge["to_thought_id"] == "thought-beta"
+ assert edge["edge_type"] == "ASSOCIATED"
+ assert edge["weight"] == 0.5
+
+ edges = await store.get_edges("thought-alpha", direction="OUT")
+ assert any(
+ e.to_thought_id == "thought-beta" and e.edge_type is EdgeType.ASSOCIATED for e in edges
+ )
+
+ async def test_generates_edge_id_when_omitted(self, store: SqliteEngravaCore) -> None:
+ result = await link_thoughts_impl(
+ store,
+ "thought-alpha",
+ "thought-beta",
+ EdgeType.DEPENDS_ON,
+ )
+ edge_id = result["edge"]["edge_id"]
+ assert isinstance(edge_id, str)
+ assert edge_id
+
+ async def test_default_weight_applied(self, store: SqliteEngravaCore) -> None:
+ result = await link_thoughts_impl(
+ store,
+ "thought-beta",
+ "thought-alpha",
+ EdgeType.DERIVED_FROM,
+ )
+ assert result["edge"]["weight"] == 1.0
+
+ async def test_missing_endpoint_raises(self, store: SqliteEngravaCore) -> None:
+ with pytest.raises(ReferentialIntegrityError):
+ await link_thoughts_impl(
+ store,
+ "thought-alpha",
+ "ghost-thought",
+ EdgeType.ASSOCIATED,
+ )
+
+ async def test_duplicate_link_is_rejected_not_idempotent(
+ self, store: SqliteEngravaCore
+ ) -> None:
+ # An edge is unique per (from, to, type). Linking the same pair with
+ # the same type twice must be rejected rather than silently ignored or
+ # converged — this is why link_thoughts is annotated idempotentHint=False.
+ await link_thoughts_impl(
+ store,
+ "thought-alpha",
+ "thought-beta",
+ EdgeType.ASSOCIATED,
+ )
+ with pytest.raises(aiosqlite.IntegrityError):
+ await link_thoughts_impl(
+ store,
+ "thought-alpha",
+ "thought-beta",
+ EdgeType.ASSOCIATED,
+ )
+
+ # The failed retry left exactly one edge — the write did not converge.
+ edges = await store.get_edges("thought-alpha", direction="OUT")
+ matching = [
+ e
+ for e in edges
+ if e.to_thought_id == "thought-beta" and e.edge_type is EdgeType.ASSOCIATED
+ ]
+ assert len(matching) == 1
From 84b87f6569958052605a87fbed1aebb3fe734043 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Mon, 8 Jun 2026 16:20:23 +0200
Subject: [PATCH 05/40] feat(mcp): add delete_thought and delete_edge tools
Expose two destructive write tools that wrap the existing core delete
methods so an agent can remove a wrong thought or edge over MCP.
Both carry a destructive-but-idempotent annotation: deleting an absent
identifier is a no-op returning deleted=false and converges on the same
end state, so a retry is safe. They register only outside read-only mode,
alongside the other write tools, and stay hidden when the read-only
surface is requested.
---
src/engrava/mcp/server.py | 89 +++++++++++++++++++++++++++++--
tests/mcp/test_delete_tools.py | 95 ++++++++++++++++++++++++++++++++++
tests/mcp/test_gating.py | 4 +-
tests/mcp/test_server.py | 76 ++++++++++++++++++++++++++-
4 files changed, 258 insertions(+), 6 deletions(-)
create mode 100644 tests/mcp/test_delete_tools.py
diff --git a/src/engrava/mcp/server.py b/src/engrava/mcp/server.py
index b69fc63..4b933e9 100644
--- a/src/engrava/mcp/server.py
+++ b/src/engrava/mcp/server.py
@@ -21,7 +21,7 @@
``memory_stats``
Aggregate counts and store-health metrics.
-Three write tools complete the surface:
+Five write tools complete the surface:
``store_thought``
Create a new thought node.
@@ -29,6 +29,10 @@
Mutate selected fields of an existing thought.
``link_thoughts``
Create a typed edge between two existing thoughts.
+``delete_thought``
+ Remove a thought by identifier.
+``delete_edge``
+ Remove an edge by identifier.
The write tools are gated by the :data:`READ_ONLY_ENV_VAR` environment
variable. When it is set to a truthy value the write tools are not
@@ -101,6 +105,13 @@
#: repeating with the same arguments converges on the same end state).
_WRITE_IDEMPOTENT = ToolAnnotations(readOnlyHint=False, destructiveHint=False, idempotentHint=True)
+#: Annotation for a destructive but idempotent write (deleting a thought or
+#: edge). It is marked idempotent because deleting an already-absent
+#: identifier is a no-op that returns ``deleted=False`` and leaves the same
+#: end state — the record is gone either way — so a client may safely retry a
+#: delete that appeared to fail.
+_WRITE_DESTRUCTIVE = ToolAnnotations(readOnlyHint=False, destructiveHint=True, idempotentHint=True)
+
class StoreNotReadyError(RuntimeError):
"""Raised when a tool is invoked before a store has been provided.
@@ -500,6 +511,44 @@ async def link_thoughts_impl(
}
+async def delete_thought_impl(store: SqliteEngravaCore, thought_id: str) -> dict[str, Any]:
+ """Delete a thought by identifier.
+
+ Deleting an identifier that is not present is a no-op rather than an
+ error: the call simply reports that nothing was removed.
+
+ Args:
+ store: The store to write to.
+ thought_id: Identifier of the thought to delete.
+
+ Returns:
+ A dict with a ``deleted`` flag: ``True`` when a thought was
+ removed, ``False`` when no thought had the given identifier.
+
+ """
+ deleted = await store.delete_thought(thought_id)
+ return {"deleted": deleted}
+
+
+async def delete_edge_impl(store: SqliteEngravaCore, edge_id: str) -> dict[str, Any]:
+ """Delete an edge by identifier.
+
+ Deleting an identifier that is not present is a no-op rather than an
+ error: the call simply reports that nothing was removed.
+
+ Args:
+ store: The store to write to.
+ edge_id: Identifier of the edge to delete.
+
+ Returns:
+ A dict with a ``deleted`` flag: ``True`` when an edge was removed,
+ ``False`` when no edge had the given identifier.
+
+ """
+ deleted = await store.delete_edge(edge_id)
+ return {"deleted": deleted}
+
+
def _read_only_enabled() -> bool:
"""Report whether the server should expose a read-only surface.
@@ -568,7 +617,8 @@ async def lifespan(_server: FastMCP) -> AsyncIterator[None]:
"hybrid and keyword search, run structured MindQL FIND queries, "
"and read store statistics. Unless the server is started in "
"read-only mode, you can also store new thoughts, update existing "
- "thoughts, and link thoughts with typed edges."
+ "thoughts, link thoughts with typed edges, and delete thoughts or "
+ "edges."
),
lifespan=lifespan,
)
@@ -576,10 +626,15 @@ async def lifespan(_server: FastMCP) -> AsyncIterator[None]:
return server
-def register_tools(server: FastMCP, provider: StoreProvider) -> None:
+# C901: the mccabe count is inflated by the nested ``@server.tool`` handler
+# definitions (one trivial delegating wrapper per tool), not by branching logic
+# — this function has a single branch, the read-only guard. Splitting the flat
+# registration list would hurt readability, so the complexity cap is waived here
+# deliberately.
+def register_tools(server: FastMCP, provider: StoreProvider) -> None: # noqa: C901
"""Register the MCP tools on a server.
- The five read tools are always registered. The three write tools are
+ The five read tools are always registered. The five write tools are
registered only when the server is not in read-only mode (see
:func:`_read_only_enabled`); in read-only mode they are never
advertised to clients.
@@ -746,6 +801,32 @@ async def link_thoughts(
edge_id=edge_id,
)
+ @server.tool(
+ name="delete_thought",
+ description=(
+ "Delete a thought by its identifier. Use this to remove a memory "
+ "that is wrong or no longer wanted. Returns whether a thought was "
+ "removed; deleting an identifier that does not exist is not an "
+ "error and simply reports that nothing was removed."
+ ),
+ annotations=_WRITE_DESTRUCTIVE,
+ )
+ async def delete_thought(thought_id: str) -> dict[str, Any]:
+ return await delete_thought_impl(provider.require(), thought_id)
+
+ @server.tool(
+ name="delete_edge",
+ description=(
+ "Delete an edge between two thoughts by its identifier. Use this to "
+ "remove a relationship that is wrong or no longer wanted. Returns "
+ "whether an edge was removed; deleting an identifier that does not "
+ "exist is not an error and simply reports that nothing was removed."
+ ),
+ annotations=_WRITE_DESTRUCTIVE,
+ )
+ async def delete_edge(edge_id: str) -> dict[str, Any]:
+ return await delete_edge_impl(provider.require(), edge_id)
+
def main() -> None:
"""Run the engrava MCP server over stdio.
diff --git a/tests/mcp/test_delete_tools.py b/tests/mcp/test_delete_tools.py
new file mode 100644
index 0000000..9b5ecad
--- /dev/null
+++ b/tests/mcp/test_delete_tools.py
@@ -0,0 +1,95 @@
+"""Unit tests for the MCP delete-tool implementations.
+
+Each tool implementation is exercised directly against a seeded
+in-memory store (see ``conftest.store``). The repeated-delete cases are
+the evidence that the delete tools are genuinely idempotent: a delete of
+an absent identifier returns ``{"deleted": False}`` without raising, so a
+client may safely retry a delete that appeared to fail.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from engrava import EdgeType
+from engrava.domain.models.edge import EdgeRecord
+from engrava.mcp.server import (
+ delete_edge_impl,
+ delete_thought_impl,
+ get_thought_impl,
+ link_thoughts_impl,
+)
+
+if TYPE_CHECKING:
+ from engrava.infrastructure.sqlite.engrava_core import SqliteEngravaCore
+
+
+class TestDeleteThought:
+ """Tests for the ``delete_thought`` tool."""
+
+ async def test_removes_existing_thought(self, store: SqliteEngravaCore) -> None:
+ result = await delete_thought_impl(store, "thought-alpha")
+ assert result == {"deleted": True}
+
+ # The thought is gone: a read-back reports it missing.
+ read_back = await get_thought_impl(store, "thought-alpha")
+ assert read_back["found"] is False
+ assert read_back["thought"] is None
+
+ async def test_repeated_delete_is_idempotent(self, store: SqliteEngravaCore) -> None:
+ # First delete removes the thought; the second converges on the same
+ # end state (already gone) and reports it without raising. This is the
+ # evidence behind the destructive-but-idempotent annotation.
+ first = await delete_thought_impl(store, "thought-alpha")
+ assert first == {"deleted": True}
+
+ second = await delete_thought_impl(store, "thought-alpha")
+ assert second == {"deleted": False}
+
+ async def test_unknown_id_is_not_an_error(self, store: SqliteEngravaCore) -> None:
+ result = await delete_thought_impl(store, "never-existed")
+ assert result == {"deleted": False}
+
+
+class TestDeleteEdge:
+ """Tests for the ``delete_edge`` tool."""
+
+ async def test_removes_existing_edge(self, store: SqliteEngravaCore) -> None:
+ created = await link_thoughts_impl(
+ store,
+ "thought-alpha",
+ "thought-beta",
+ EdgeType.ASSOCIATED,
+ )
+ edge_id = created["edge"]["edge_id"]
+
+ result = await delete_edge_impl(store, edge_id)
+ assert result == {"deleted": True}
+
+ # The edge is gone: the source thought has no outgoing edges left.
+ edges = await store.get_edges("thought-alpha", direction="OUT")
+ assert edges == []
+
+ async def test_repeated_delete_is_idempotent(self, store: SqliteEngravaCore) -> None:
+ edge_id = "edge-to-remove"
+ await store.create_edge(
+ EdgeRecord(
+ edge_id=edge_id,
+ from_thought_id="thought-alpha",
+ to_thought_id="thought-beta",
+ edge_type=EdgeType.ASSOCIATED,
+ weight=1.0,
+ created_cycle=0,
+ )
+ )
+
+ first = await delete_edge_impl(store, edge_id)
+ assert first == {"deleted": True}
+
+ # The repeated delete converges on the same end state without raising.
+ second = await delete_edge_impl(store, edge_id)
+ assert second == {"deleted": False}
+
+ async def test_unknown_id_is_not_an_error(self, store: SqliteEngravaCore) -> None:
+ result = await delete_edge_impl(store, "never-existed")
+ assert result == {"deleted": False}
diff --git a/tests/mcp/test_gating.py b/tests/mcp/test_gating.py
index eef9c30..7aab76a 100644
--- a/tests/mcp/test_gating.py
+++ b/tests/mcp/test_gating.py
@@ -33,6 +33,8 @@
"store_thought",
"update_thought",
"link_thoughts",
+ "delete_thought",
+ "delete_edge",
}
)
@@ -51,7 +53,7 @@ async def _registered_tool_names() -> set[str]:
class TestRegistrationGating:
"""Tests for which tools register under each mode."""
- async def test_unset_registers_all_eight_tools(self, monkeypatch: pytest.MonkeyPatch) -> None:
+ async def test_unset_registers_all_tools(self, monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.delenv(READ_ONLY_ENV_VAR, raising=False)
names = await _registered_tool_names()
assert names == READ_TOOL_NAMES | WRITE_TOOL_NAMES
diff --git a/tests/mcp/test_server.py b/tests/mcp/test_server.py
index 5a8f61a..3525274 100644
--- a/tests/mcp/test_server.py
+++ b/tests/mcp/test_server.py
@@ -36,7 +36,12 @@
READ_TOOL_NAMES = frozenset(
{"get_thought", "search_memory", "search_keywords", "query_memory", "memory_stats"}
)
-WRITE_TOOL_NAMES = frozenset({"store_thought", "update_thought", "link_thoughts"})
+WRITE_TOOL_NAMES = frozenset(
+ {"store_thought", "update_thought", "link_thoughts", "delete_thought", "delete_edge"}
+)
+#: The subset of write tools that remove data and therefore carry
+#: ``destructiveHint=True``.
+DESTRUCTIVE_TOOL_NAMES = frozenset({"delete_thought", "delete_edge"})
EXPECTED_TOOL_NAMES = READ_TOOL_NAMES | WRITE_TOOL_NAMES
@@ -85,11 +90,13 @@ async def test_lists_read_and_write_tools_by_default(
read_only_by_name: dict[str, bool | None] = {}
idempotent_by_name: dict[str, bool | None] = {}
+ destructive_by_name: dict[str, bool | None] = {}
for tool in listed.tools:
# Every tool must carry an annotation block.
assert tool.annotations is not None
read_only_by_name[tool.name] = tool.annotations.readOnlyHint
idempotent_by_name[tool.name] = tool.annotations.idempotentHint
+ destructive_by_name[tool.name] = tool.annotations.destructiveHint
assert set(read_only_by_name) == EXPECTED_TOOL_NAMES
# The read tools are read-only and the write tools are not.
@@ -101,9 +108,18 @@ async def test_lists_read_and_write_tools_by_default(
# - update_thought converges on the same end state -> idempotent
# - store_thought creates a fresh node each call -> NOT idempotent
# - link_thoughts rejects a duplicate (from,to,type) -> NOT idempotent
+ # - delete_* of an absent id is a no-op, same end state -> idempotent
assert idempotent_by_name["update_thought"] is True
assert idempotent_by_name["store_thought"] is False
assert idempotent_by_name["link_thoughts"] is False
+ assert idempotent_by_name["delete_thought"] is True
+ assert idempotent_by_name["delete_edge"] is True
+
+ # Only the delete tools remove data, so only they are destructive.
+ assert all(destructive_by_name[name] is True for name in DESTRUCTIVE_TOOL_NAMES)
+ assert all(
+ destructive_by_name[name] is False for name in WRITE_TOOL_NAMES - DESTRUCTIVE_TOOL_NAMES
+ )
async def test_read_only_mode_hides_write_tools(
self,
@@ -172,6 +188,64 @@ async def test_write_tools_round_trip_over_transport(
assert fetched.structuredContent["found"] is True
assert fetched.structuredContent["thought"]["essence"] == "Edited note"
+ async def test_delete_tools_round_trip_over_transport(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(tmp_path / "deletes.db"))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.delenv(READ_ONLY_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ first = await client.call_tool(
+ "store_thought",
+ {"essence": "From note", "content": "Source thought."},
+ )
+ assert first.structuredContent is not None
+ first_id = first.structuredContent["thought"]["thought_id"]
+
+ second = await client.call_tool(
+ "store_thought",
+ {"essence": "To note", "content": "Target thought."},
+ )
+ assert second.structuredContent is not None
+ second_id = second.structuredContent["thought"]["thought_id"]
+
+ linked = await client.call_tool(
+ "link_thoughts",
+ {
+ "from_thought_id": first_id,
+ "to_thought_id": second_id,
+ "edge_type": "ASSOCIATED",
+ },
+ )
+ assert linked.structuredContent is not None
+ edge_id = linked.structuredContent["edge"]["edge_id"]
+
+ deleted_edge = await client.call_tool("delete_edge", {"edge_id": edge_id})
+ assert deleted_edge.isError is False
+ assert deleted_edge.structuredContent is not None
+ assert deleted_edge.structuredContent["deleted"] is True
+
+ deleted_thought = await client.call_tool("delete_thought", {"thought_id": first_id})
+ assert deleted_thought.isError is False
+ assert deleted_thought.structuredContent is not None
+ assert deleted_thought.structuredContent["deleted"] is True
+
+ # Deleting the same thought again converges on the same end state
+ # (already gone) and reports it without erroring.
+ again = await client.call_tool("delete_thought", {"thought_id": first_id})
+ assert again.isError is False
+ assert again.structuredContent is not None
+ assert again.structuredContent["deleted"] is False
+
+ fetched = await client.call_tool("get_thought", {"thought_id": first_id})
+
+ assert fetched.structuredContent is not None
+ assert fetched.structuredContent["found"] is False
+
async def test_get_thought_round_trip(
self,
monkeypatch: pytest.MonkeyPatch,
From c54dcf77180a31c7bcb06d815de316ae1c93d488 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Mon, 8 Jun 2026 17:19:00 +0200
Subject: [PATCH 06/40] feat(mcp): expose memory as resources (thought, stats,
recent)
Add three read-only MCP resources alongside the existing tools, so
clients can attach memory as context (resources are addressable URIs,
distinct from invokable tools):
- engrava://thought/{thought_id} - a single thought as JSON; an unknown
id returns a graceful not-found payload rather than erroring.
- engrava://stats - store-health counts and size. Reuses the same
memory_stats_impl as the memory_stats tool, so the two agree by
construction (no duplicate stats logic).
- engrava://recent - the most-recently-updated thoughts as JSON.
Resources are reads by definition, so they are not gated by the
read-only environment flag and are advertised in both the default and
read-only deployments. Registration lives in a new register_resources()
wired into build_server() before the write-tool guard.
---
src/engrava/mcp/server.py | 133 ++++++++++++++++--
tests/mcp/test_resources.py | 272 ++++++++++++++++++++++++++++++++++++
2 files changed, 397 insertions(+), 8 deletions(-)
create mode 100644 tests/mcp/test_resources.py
diff --git a/src/engrava/mcp/server.py b/src/engrava/mcp/server.py
index 4b933e9..28c92f3 100644
--- a/src/engrava/mcp/server.py
+++ b/src/engrava/mcp/server.py
@@ -39,15 +39,33 @@
registered at all, so a read-only deployment never advertises them to
clients. The read tools are always available.
-The active store is supplied to tool calls through a :class:`StoreProvider`
-that the server's lifespan populates on startup and clears on shutdown.
-Each tool delegates to a module-level implementation function that takes an
-explicit store argument, which keeps the query and mutation logic
-unit-testable without a running server.
+Three read-only *resources* round out the surface. Where tools are
+*invoked*, resources are addressable ``engrava://`` URIs that clients
+surface as attachable context:
+
+``engrava://thought/{thought_id}``
+ A single thought as a JSON document. Reading an unknown identifier
+ yields a graceful not-found payload rather than an error.
+``engrava://stats``
+ Store-health counts and size, identical to the ``memory_stats`` tool
+ (both share :func:`memory_stats_impl`).
+``engrava://recent``
+ The most-recently-updated thoughts as a JSON document.
+
+Resources are reads by definition, so — unlike the write tools — they are
+*not* gated by :data:`READ_ONLY_ENV_VAR`; they are advertised in both the
+default and read-only deployments.
+
+The active store is supplied to tool and resource calls through a
+:class:`StoreProvider` that the server's lifespan populates on startup and
+clears on shutdown. Each tool delegates to a module-level implementation
+function that takes an explicit store argument, which keeps the query and
+mutation logic unit-testable without a running server.
"""
from __future__ import annotations
+import json
import os
import uuid
from contextlib import asynccontextmanager
@@ -75,6 +93,14 @@
#: Default number of results returned by search tools.
DEFAULT_TOP_K = 10
+#: Default number of thoughts returned by the ``engrava://recent`` resource.
+DEFAULT_RECENT_LIMIT = 10
+
+#: MIME type advertised for every ``engrava://`` resource. Resource
+#: handlers return a JSON document as text, so clients receive a stable,
+#: machine-parseable content type.
+RESOURCE_MIME_TYPE = "application/json"
+
#: Default edge weight when a caller does not supply one.
DEFAULT_EDGE_WEIGHT = 1.0
@@ -332,6 +358,33 @@ async def memory_stats_impl(store: SqliteEngravaCore) -> dict[str, Any]:
}
+async def recent_thoughts_impl(
+ store: SqliteEngravaCore,
+ *,
+ limit: int = DEFAULT_RECENT_LIMIT,
+) -> dict[str, Any]:
+ """Return the most-recently-updated thoughts.
+
+ Wraps the public :meth:`~engrava.SqliteEngravaCore.list_thoughts`,
+ which orders by descending ``updated_cycle`` — so the first entry is
+ the thought touched most recently.
+
+ Args:
+ store: The store to query.
+ limit: Maximum number of thoughts to return, newest first.
+
+ Returns:
+ A dict with a ``thoughts`` list of JSON-serialisable thoughts
+ (newest first) and the ``limit`` that was applied.
+
+ """
+ thoughts = await store.list_thoughts(limit=limit)
+ return {
+ "thoughts": [thought.model_dump(mode="json") for thought in thoughts],
+ "limit": limit,
+ }
+
+
async def store_thought_impl(
store: SqliteEngravaCore,
essence: str,
@@ -586,8 +639,9 @@ def build_server() -> FastMCP:
The returned server resolves its store from the environment when its
lifespan starts and releases the connection when the lifespan ends.
- The read tools are always registered; the write tools are registered
- unless :func:`_read_only_enabled` reports a read-only deployment.
+ The read tools and the resources are always registered; the write
+ tools are registered unless :func:`_read_only_enabled` reports a
+ read-only deployment.
Returns:
A configured :class:`FastMCP` server ready to ``run()``.
@@ -618,14 +672,77 @@ async def lifespan(_server: FastMCP) -> AsyncIterator[None]:
"and read store statistics. Unless the server is started in "
"read-only mode, you can also store new thoughts, update existing "
"thoughts, link thoughts with typed edges, and delete thoughts or "
- "edges."
+ "edges. Read-only resources are also available as attachable "
+ "context: a single thought (engrava://thought/{thought_id}), store "
+ "statistics (engrava://stats), and the most recent thoughts "
+ "(engrava://recent)."
),
lifespan=lifespan,
)
+ register_resources(server, provider)
register_tools(server, provider)
return server
+def register_resources(server: FastMCP, provider: StoreProvider) -> None:
+ """Register the read-only MCP resources on a server.
+
+ Three resources are registered. They are reads by definition, so —
+ unlike the write tools — they are *not* gated by the read-only
+ environment flag and are advertised in every deployment:
+
+ ``engrava://thought/{thought_id}``
+ A single thought as a JSON document. An unknown identifier
+ yields a graceful not-found payload rather than an error.
+ ``engrava://stats``
+ Store-health counts and size. Shares :func:`memory_stats_impl`
+ with the ``memory_stats`` tool, so the two agree by construction.
+ ``engrava://recent``
+ The most-recently-updated thoughts as a JSON document.
+
+ Each handler returns a JSON string with the ``application/json`` MIME
+ type, so clients receive a stable, machine-parseable payload.
+
+ Args:
+ server: The server to register resources on.
+ provider: Supplies the active store to each resource at read time.
+
+ """
+
+ @server.resource(
+ "engrava://thought/{thought_id}",
+ name="thought",
+ title="Thought",
+ description="A single thought by its identifier, as a JSON document.",
+ mime_type=RESOURCE_MIME_TYPE,
+ )
+ async def thought_resource(thought_id: str) -> str:
+ payload = await get_thought_impl(provider.require(), thought_id)
+ return json.dumps(payload)
+
+ @server.resource(
+ "engrava://stats",
+ name="stats",
+ title="Store statistics",
+ description="Aggregate thought and edge counts and total storage size.",
+ mime_type=RESOURCE_MIME_TYPE,
+ )
+ async def stats_resource() -> str:
+ payload = await memory_stats_impl(provider.require())
+ return json.dumps(payload)
+
+ @server.resource(
+ "engrava://recent",
+ name="recent",
+ title="Recent thoughts",
+ description="The most-recently-updated thoughts, newest first, as a JSON document.",
+ mime_type=RESOURCE_MIME_TYPE,
+ )
+ async def recent_resource() -> str:
+ payload = await recent_thoughts_impl(provider.require())
+ return json.dumps(payload)
+
+
# C901: the mccabe count is inflated by the nested ``@server.tool`` handler
# definitions (one trivial delegating wrapper per tool), not by branching logic
# — this function has a single branch, the read-only guard. Splitting the flat
diff --git a/tests/mcp/test_resources.py b/tests/mcp/test_resources.py
new file mode 100644
index 0000000..09d4ccd
--- /dev/null
+++ b/tests/mcp/test_resources.py
@@ -0,0 +1,272 @@
+"""End-to-end tests for the MCP read-only resources.
+
+Exercises the resources through the in-memory MCP client transport (so
+registration, URI-template binding, and JSON serialisation all run for
+real), mirroring the tool tests in :mod:`tests.mcp.test_server`.
+
+Resources are reads by definition, so they are advertised in both the
+default and the read-only deployment; the read-only cases below assert
+that independence directly.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING
+
+import aiosqlite
+from mcp.shared.memory import create_connected_server_and_client_session as connect_client
+
+from engrava import (
+ CoreThoughtRecord,
+ LifecycleStatus,
+ Priority,
+ SqliteEngravaCore,
+ ThoughtType,
+)
+from engrava.mcp import build_server
+from engrava.mcp.config import CONFIG_ENV_VAR, DB_PATH_ENV_VAR
+from engrava.mcp.server import READ_ONLY_ENV_VAR
+
+if TYPE_CHECKING:
+ from pathlib import Path
+
+ import pytest
+
+#: Static resource URIs the server must advertise via ``list_resources``.
+STATIC_RESOURCE_URIS = frozenset({"engrava://stats", "engrava://recent"})
+#: Templated resource URI advertised via ``list_resource_templates``.
+THOUGHT_TEMPLATE_URI = "engrava://thought/{thought_id}"
+
+
+async def _seed_two_thoughts(path: Path) -> None:
+ """Create a database file with two thoughts updated in a known order.
+
+ The second thought carries the larger ``updated_cycle`` so it is the
+ most recent — ``list_thoughts`` orders by descending ``updated_cycle``,
+ so ``engrava://recent`` must return it first.
+
+ Args:
+ path: Filesystem path for the new database.
+
+ """
+ connection = await aiosqlite.connect(str(path))
+ connection.row_factory = aiosqlite.Row
+ store = SqliteEngravaCore(connection)
+ await store.ensure_schema()
+ await store.create_thought(
+ CoreThoughtRecord(
+ thought_id="older-thought",
+ thought_type=ThoughtType.BELIEF,
+ essence="Older note",
+ content="The earlier of the two seeded thoughts.",
+ priority=Priority.P2,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=1,
+ updated_cycle=1,
+ source="test",
+ )
+ )
+ await store.create_thought(
+ CoreThoughtRecord(
+ thought_id="newer-thought",
+ thought_type=ThoughtType.BELIEF,
+ essence="Newer note",
+ content="The later of the two seeded thoughts.",
+ priority=Priority.P1,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=2,
+ updated_cycle=2,
+ source="test",
+ )
+ )
+ await connection.close()
+
+
+def _decode_single(result: object) -> dict[str, object]:
+ """Parse the single JSON text payload of a ``read_resource`` result.
+
+ Args:
+ result: The ``ReadResourceResult`` returned by ``read_resource``.
+
+ Returns:
+ The decoded JSON object carried by the result's sole content
+ block.
+
+ """
+ contents = result.contents # type: ignore[attr-defined]
+ assert len(contents) == 1
+ block = contents[0]
+ assert block.mimeType == "application/json"
+ decoded = json.loads(block.text)
+ assert isinstance(decoded, dict)
+ return decoded
+
+
+class TestResourceListing:
+ """List resources and templates through a connected client."""
+
+ async def test_static_resources_are_listed(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(tmp_path / "list.db"))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.delenv(READ_ONLY_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ listed = await client.list_resources()
+
+ assert {str(resource.uri) for resource in listed.resources} == STATIC_RESOURCE_URIS
+
+ async def test_thought_template_is_listed(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(tmp_path / "templates.db"))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.delenv(READ_ONLY_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ listed = await client.list_resource_templates()
+
+ templates = {template.uriTemplate for template in listed.resourceTemplates}
+ assert THOUGHT_TEMPLATE_URI in templates
+
+
+class TestResourceReads:
+ """Read each resource through a connected client."""
+
+ async def test_thought_resource_returns_seeded_thought(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "thought.db"
+ await _seed_two_thoughts(db_path)
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ result = await client.read_resource("engrava://thought/newer-thought")
+
+ payload = _decode_single(result)
+ assert payload["found"] is True
+ thought = payload["thought"]
+ assert isinstance(thought, dict)
+ assert thought["thought_id"] == "newer-thought"
+ assert thought["essence"] == "Newer note"
+
+ async def test_thought_resource_unknown_id_is_graceful(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "missing.db"
+ await _seed_two_thoughts(db_path)
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+
+ server = build_server()
+ # An unknown identifier must not raise over the transport; it
+ # returns a not-found payload, mirroring the get_thought tool.
+ async with connect_client(server) as client:
+ result = await client.read_resource("engrava://thought/no-such-id")
+
+ payload = _decode_single(result)
+ assert payload == {"found": False, "thought": None}
+
+ async def test_recent_resource_orders_newest_first(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "recent.db"
+ await _seed_two_thoughts(db_path)
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ result = await client.read_resource("engrava://recent")
+
+ payload = _decode_single(result)
+ thoughts = payload["thoughts"]
+ assert isinstance(thoughts, list)
+ ids = [thought["thought_id"] for thought in thoughts]
+ # list_thoughts orders by descending updated_cycle, so the newer
+ # thought comes first.
+ assert ids == ["newer-thought", "older-thought"]
+ assert payload["limit"] == 10
+
+ async def test_stats_resource_matches_memory_stats_tool(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "stats.db"
+ await _seed_two_thoughts(db_path)
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ resource_result = await client.read_resource("engrava://stats")
+ tool_result = await client.call_tool("memory_stats", {})
+
+ resource_payload = _decode_single(resource_result)
+ assert tool_result.structuredContent is not None
+ # The resource and the memory_stats tool share memory_stats_impl,
+ # so they must agree field-for-field (no duplicate stats logic).
+ assert resource_payload == tool_result.structuredContent
+ assert resource_payload["thought_count"] == 2
+
+
+class TestResourcesInReadOnlyMode:
+ """Resources are reads, so they survive the write-tool gate."""
+
+ async def test_resources_listed_in_read_only_mode(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(tmp_path / "ro_list.db"))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.setenv(READ_ONLY_ENV_VAR, "1")
+
+ server = build_server()
+ async with connect_client(server) as client:
+ static = await client.list_resources()
+ templates = await client.list_resource_templates()
+
+ # Read-only mode hides the write tools but must not hide resources.
+ assert {str(resource.uri) for resource in static.resources} == STATIC_RESOURCE_URIS
+ assert THOUGHT_TEMPLATE_URI in {
+ template.uriTemplate for template in templates.resourceTemplates
+ }
+
+ async def test_resources_readable_in_read_only_mode(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "ro_read.db"
+ await _seed_two_thoughts(db_path)
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.setenv(READ_ONLY_ENV_VAR, "1")
+
+ server = build_server()
+ async with connect_client(server) as client:
+ stats = await client.read_resource("engrava://stats")
+ recent = await client.read_resource("engrava://recent")
+ thought = await client.read_resource("engrava://thought/newer-thought")
+
+ assert _decode_single(stats)["thought_count"] == 2
+ assert len(_decode_single(recent)["thoughts"]) == 2
+ assert _decode_single(thought)["found"] is True
From 1f6fa36d1b1fef6d63fa2f949c6f18441c84f25b Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Mon, 8 Jun 2026 20:21:09 +0200
Subject: [PATCH 07/40] feat(mcp): add guided memory prompts
Register three read-oriented MCP prompts that scaffold common retrieval
workflows as client slash-commands: summarize_recent_memory (optional
limit), find_related (topic), and reflect_on_topic (topic). Each renders
a ready-to-send instruction steering the assistant to the read tools and
resources; summarize_recent_memory also embeds a read-only snapshot of
the most recent thoughts.
Prompts are reads, so like the resources they are registered outside the
write-tool guard and stay available in read-only mode.
---
src/engrava/mcp/server.py | 179 +++++++++++++++++++++-
tests/mcp/test_prompts.py | 303 ++++++++++++++++++++++++++++++++++++++
2 files changed, 478 insertions(+), 4 deletions(-)
create mode 100644 tests/mcp/test_prompts.py
diff --git a/src/engrava/mcp/server.py b/src/engrava/mcp/server.py
index 28c92f3..24e870a 100644
--- a/src/engrava/mcp/server.py
+++ b/src/engrava/mcp/server.py
@@ -56,6 +56,23 @@
*not* gated by :data:`READ_ONLY_ENV_VAR`; they are advertised in both the
default and read-only deployments.
+Three *prompts* complete the surface. Prompts are parameterised templates
+that a client surfaces as slash-commands or buttons; each one renders a
+ready-to-send instruction that guides the assistant to gather context with
+the read tools and resources above. They are templates only — they open no
+write path and call no store method:
+
+``summarize_recent_memory``
+ Summarise the most recently stored thoughts. Takes an optional
+ ``limit`` (how many recent thoughts to consider).
+``find_related``
+ Find and synthesise thoughts related to a required ``topic``.
+``reflect_on_topic``
+ Reflect over what memory holds about a required ``topic``.
+
+Prompts are read-oriented, so — like the resources — they are *not* gated by
+:data:`READ_ONLY_ENV_VAR` and are advertised in both deployments.
+
The active store is supplied to tool and resource calls through a
:class:`StoreProvider` that the server's lifespan populates on startup and
clears on shutdown. Each tool delegates to a module-level implementation
@@ -96,6 +113,11 @@
#: Default number of thoughts returned by the ``engrava://recent`` resource.
DEFAULT_RECENT_LIMIT = 10
+#: Default number of recent thoughts the ``summarize_recent_memory`` prompt
+#: asks the assistant to consider when the caller omits ``limit``. Kept
+#: small so the summary stays focused on the latest activity.
+DEFAULT_SUMMARY_LIMIT = 5
+
#: MIME type advertised for every ``engrava://`` resource. Resource
#: handlers return a JSON document as text, so clients receive a stable,
#: machine-parseable content type.
@@ -634,14 +656,103 @@ def _with_limit(parsed: MindQLQuery, limit: int) -> MindQLQuery:
return replace(parsed, limit=limit)
+def _summarize_recent_prompt(limit: int, recent: dict[str, Any]) -> str:
+ """Build the ``summarize_recent_memory`` prompt text.
+
+ The text embeds the recent thoughts already gathered from the store so
+ the assistant can summarise them directly, while still naming the read
+ tools and resources it can use to widen the picture. Embedding is
+ read-only: ``recent`` is the output of :func:`recent_thoughts_impl`.
+
+ Args:
+ limit: Number of recent thoughts the summary should cover.
+ recent: The payload returned by :func:`recent_thoughts_impl`,
+ carrying a ``thoughts`` list newest-first.
+
+ Returns:
+ A ready-to-send instruction asking for a concise summary of the
+ most recent stored memory.
+
+ """
+ thoughts = recent.get("thoughts", [])
+ if thoughts:
+ snapshot = json.dumps(thoughts, indent=2)
+ data_section = (
+ f"Here are the {len(thoughts)} most recent thoughts "
+ f"(newest first), as JSON:\n\n{snapshot}\n\n"
+ )
+ else:
+ data_section = "The store currently holds no thoughts to summarise.\n\n"
+ return (
+ f"Summarise the {limit} most recently stored memories in this "
+ "engrava store.\n\n"
+ f"{data_section}"
+ "If you need more detail or want to confirm the latest activity, "
+ "read the `engrava://recent` resource or call the `memory_stats` "
+ "tool; use `get_thought` to expand any single thought by its "
+ "identifier. Produce a concise summary that highlights the main "
+ "themes, any recurring topics, and anything that looks important "
+ "or unresolved. Keep it brief — a short paragraph or a few bullet "
+ "points."
+ )
+
+
+def _find_related_prompt(topic: str) -> str:
+ """Build the ``find_related`` prompt text.
+
+ Args:
+ topic: The subject to find related thoughts about.
+
+ Returns:
+ A ready-to-send instruction asking the assistant to gather and
+ synthesise thoughts related to ``topic`` using ``search_memory``.
+
+ """
+ return (
+ f"Find and synthesise what this engrava memory store holds about "
+ f"{topic!r}.\n\n"
+ f"Use the `search_memory` tool with a query for {topic!r} (it ranks "
+ "results by lexical, vector, and recency signals); you can also try "
+ "`search_keywords` for an exact-term pass. Expand the most relevant "
+ "hits with `get_thought` to read their full content. Then synthesise "
+ "the findings into a short, organised summary of what is known about "
+ f"{topic!r}, grouping related points and noting any gaps or "
+ "contradictions."
+ )
+
+
+def _reflect_on_topic_prompt(topic: str) -> str:
+ """Build the ``reflect_on_topic`` prompt text.
+
+ Args:
+ topic: The subject to reflect on.
+
+ Returns:
+ A ready-to-send instruction that scaffolds a structured reflection
+ over what the store holds about ``topic``.
+
+ """
+ return (
+ f"Reflect on what this engrava memory store holds about {topic!r}.\n\n"
+ f"First gather the relevant memories: call `search_memory` for "
+ f"{topic!r} and read the strongest hits in full with `get_thought`. "
+ "Then reflect rather than merely listing: structure your response "
+ "around (1) what is well established about the topic, (2) open "
+ "questions or gaps in what is stored, and (3) any tensions or "
+ "contradictions between thoughts. Close with one or two concrete "
+ "follow-ups worth recording. Ground every observation in the "
+ "retrieved thoughts."
+ )
+
+
def build_server() -> FastMCP:
"""Build the engrava MCP server with its tools registered.
The returned server resolves its store from the environment when its
lifespan starts and releases the connection when the lifespan ends.
- The read tools and the resources are always registered; the write
- tools are registered unless :func:`_read_only_enabled` reports a
- read-only deployment.
+ The read tools, the resources, and the prompts are always registered;
+ the write tools are registered unless :func:`_read_only_enabled`
+ reports a read-only deployment.
Returns:
A configured :class:`FastMCP` server ready to ``run()``.
@@ -675,11 +786,14 @@ async def lifespan(_server: FastMCP) -> AsyncIterator[None]:
"edges. Read-only resources are also available as attachable "
"context: a single thought (engrava://thought/{thought_id}), store "
"statistics (engrava://stats), and the most recent thoughts "
- "(engrava://recent)."
+ "(engrava://recent). Guided prompts scaffold common retrieval "
+ "workflows: summarize_recent_memory, find_related, and "
+ "reflect_on_topic."
),
lifespan=lifespan,
)
register_resources(server, provider)
+ register_prompts(server, provider)
register_tools(server, provider)
return server
@@ -743,6 +857,63 @@ async def recent_resource() -> str:
return json.dumps(payload)
+def register_prompts(server: FastMCP, provider: StoreProvider) -> None:
+ """Register the guided retrieval prompts on a server.
+
+ Three prompts are registered. They are parameterised templates that a
+ client surfaces as slash-commands or buttons; each renders a
+ ready-to-send instruction guiding the assistant to gather context with
+ the read tools and resources before answering. Prompts are
+ read-oriented, so — like the resources and unlike the write tools —
+ they are *not* gated by the read-only environment flag and are
+ advertised in every deployment:
+
+ ``summarize_recent_memory``
+ Summarise the most recent thoughts. Takes an optional ``limit``;
+ this is the one prompt that reads the store, embedding the recent
+ thoughts (read-only) so the assistant can summarise them inline.
+ ``find_related``
+ Find and synthesise thoughts related to a required ``topic``.
+ ``reflect_on_topic``
+ Reflect over what memory holds about a required ``topic``.
+
+ Args:
+ server: The server to register prompts on.
+ provider: Supplies the active store to ``summarize_recent_memory``
+ at render time; the topic prompts are pure templates and do not
+ use it.
+
+ """
+
+ @server.prompt(
+ name="summarize_recent_memory",
+ title="Summarise recent memory",
+ description=(
+ "Summarise the most recently stored thoughts. Optionally set "
+ "how many recent thoughts to consider."
+ ),
+ )
+ async def summarize_recent_memory(limit: int = DEFAULT_SUMMARY_LIMIT) -> str:
+ recent = await recent_thoughts_impl(provider.require(), limit=limit)
+ return _summarize_recent_prompt(limit, recent)
+
+ @server.prompt(
+ name="find_related",
+ title="Find related thoughts",
+ description="Find and synthesise stored thoughts related to a topic.",
+ )
+ def find_related(topic: str) -> str:
+ return _find_related_prompt(topic)
+
+ @server.prompt(
+ name="reflect_on_topic",
+ title="Reflect on a topic",
+ description="Reflect on what stored memory holds about a topic.",
+ )
+ def reflect_on_topic(topic: str) -> str:
+ return _reflect_on_topic_prompt(topic)
+
+
# C901: the mccabe count is inflated by the nested ``@server.tool`` handler
# definitions (one trivial delegating wrapper per tool), not by branching logic
# — this function has a single branch, the read-only guard. Splitting the flat
diff --git a/tests/mcp/test_prompts.py b/tests/mcp/test_prompts.py
new file mode 100644
index 0000000..0d1aa5b
--- /dev/null
+++ b/tests/mcp/test_prompts.py
@@ -0,0 +1,303 @@
+"""End-to-end tests for the MCP guided retrieval prompts.
+
+Exercises the prompts through the in-memory MCP client transport (so
+registration, argument-schema derivation, and message rendering all run
+for real), mirroring the tool tests in :mod:`tests.mcp.test_server` and
+the resource tests in :mod:`tests.mcp.test_resources`.
+
+Prompts are read-oriented, so they are advertised in both the default and
+the read-only deployment; the read-only cases below assert that
+independence directly.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import aiosqlite
+from mcp.shared.memory import create_connected_server_and_client_session as connect_client
+from mcp.types import TextContent
+
+from engrava import (
+ CoreThoughtRecord,
+ LifecycleStatus,
+ Priority,
+ SqliteEngravaCore,
+ ThoughtType,
+)
+from engrava.mcp import build_server
+from engrava.mcp.config import CONFIG_ENV_VAR, DB_PATH_ENV_VAR
+from engrava.mcp.server import (
+ DEFAULT_SUMMARY_LIMIT,
+ READ_ONLY_ENV_VAR,
+ _find_related_prompt,
+ _reflect_on_topic_prompt,
+ _summarize_recent_prompt,
+)
+
+if TYPE_CHECKING:
+ from pathlib import Path
+
+ import pytest
+
+#: Names every prompt the server must advertise via ``list_prompts``.
+EXPECTED_PROMPT_NAMES = frozenset({"summarize_recent_memory", "find_related", "reflect_on_topic"})
+
+
+async def _seed_two_thoughts(path: Path) -> None:
+ """Create a database file with two thoughts updated in a known order.
+
+ The second thought carries the larger ``updated_cycle`` so it is the
+ most recent, which lets the ``summarize_recent_memory`` prompt embed a
+ deterministic newest-first snapshot.
+
+ Args:
+ path: Filesystem path for the new database.
+
+ """
+ connection = await aiosqlite.connect(str(path))
+ connection.row_factory = aiosqlite.Row
+ store = SqliteEngravaCore(connection)
+ await store.ensure_schema()
+ await store.create_thought(
+ CoreThoughtRecord(
+ thought_id="older-thought",
+ thought_type=ThoughtType.BELIEF,
+ essence="Older note",
+ content="The earlier of the two seeded thoughts.",
+ priority=Priority.P2,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=1,
+ updated_cycle=1,
+ source="test",
+ )
+ )
+ await store.create_thought(
+ CoreThoughtRecord(
+ thought_id="newer-thought",
+ thought_type=ThoughtType.BELIEF,
+ essence="Newer note",
+ content="The later of the two seeded thoughts.",
+ priority=Priority.P1,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=2,
+ updated_cycle=2,
+ source="test",
+ )
+ )
+ await connection.close()
+
+
+def _single_text(result: object) -> str:
+ """Return the text of a ``get_prompt`` result's single user message.
+
+ Args:
+ result: The ``GetPromptResult`` returned by ``get_prompt``.
+
+ Returns:
+ The text carried by the result's sole message, which must be a
+ ``user``-role text block.
+
+ """
+ messages = result.messages # type: ignore[attr-defined]
+ assert len(messages) == 1
+ message = messages[0]
+ assert message.role == "user"
+ content = message.content
+ assert isinstance(content, TextContent)
+ assert content.text
+ return content.text
+
+
+class TestPromptListing:
+ """List prompts and their declared arguments through a client."""
+
+ async def test_all_prompts_are_listed(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(tmp_path / "list.db"))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.delenv(READ_ONLY_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ listed = await client.list_prompts()
+
+ assert {prompt.name for prompt in listed.prompts} == EXPECTED_PROMPT_NAMES
+
+ async def test_prompt_arguments_are_declared(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(tmp_path / "args.db"))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.delenv(READ_ONLY_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ listed = await client.list_prompts()
+
+ required_by_name: dict[str, dict[str, bool]] = {}
+ for prompt in listed.prompts:
+ required_by_name[prompt.name] = {
+ argument.name: bool(argument.required) for argument in (prompt.arguments or [])
+ }
+
+ # topic is required for both topic prompts; limit is optional.
+ assert required_by_name["find_related"] == {"topic": True}
+ assert required_by_name["reflect_on_topic"] == {"topic": True}
+ assert required_by_name["summarize_recent_memory"] == {"limit": False}
+
+
+class TestPromptRendering:
+ """Render each prompt through a connected client."""
+
+ async def test_find_related_reflects_topic(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(tmp_path / "related.db"))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ result = await client.get_prompt("find_related", {"topic": "pour-over coffee"})
+
+ text = _single_text(result)
+ assert "pour-over coffee" in text
+ # The prompt must steer the model toward the search tool.
+ assert "search_memory" in text
+
+ async def test_reflect_on_topic_reflects_topic(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(tmp_path / "reflect.db"))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ result = await client.get_prompt("reflect_on_topic", {"topic": "green tea"})
+
+ text = _single_text(result)
+ assert "green tea" in text
+ assert "search_memory" in text
+
+ async def test_summarize_recent_uses_default_limit(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "summary_default.db"
+ await _seed_two_thoughts(db_path)
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ result = await client.get_prompt("summarize_recent_memory", {})
+
+ text = _single_text(result)
+ # With no limit supplied the prompt falls back to the default.
+ assert f"{DEFAULT_SUMMARY_LIMIT} most recently stored" in text
+ assert "engrava://recent" in text
+ # The embedded snapshot is read-only data drawn from the store.
+ assert "newer-thought" in text
+
+ async def test_summarize_recent_honours_explicit_limit(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "summary_limit.db"
+ await _seed_two_thoughts(db_path)
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ # Arguments arrive as strings over the wire; FastMCP coerces
+ # the declared ``int`` parameter.
+ result = await client.get_prompt("summarize_recent_memory", {"limit": "2"})
+
+ text = _single_text(result)
+ assert "2 most recently stored" in text
+ # limit=2 covers both seeded thoughts, so both appear in the snapshot.
+ assert "newer-thought" in text
+ assert "older-thought" in text
+
+
+class TestPromptsInReadOnlyMode:
+ """Prompts are reads, so they survive the write-tool gate."""
+
+ async def test_prompts_listed_in_read_only_mode(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(tmp_path / "ro_list.db"))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.setenv(READ_ONLY_ENV_VAR, "1")
+
+ server = build_server()
+ async with connect_client(server) as client:
+ listed = await client.list_prompts()
+
+ # Read-only mode hides the write tools but must not hide prompts.
+ assert {prompt.name for prompt in listed.prompts} == EXPECTED_PROMPT_NAMES
+
+ async def test_prompts_gettable_in_read_only_mode(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "ro_get.db"
+ await _seed_two_thoughts(db_path)
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.setenv(READ_ONLY_ENV_VAR, "1")
+
+ server = build_server()
+ async with connect_client(server) as client:
+ summary = await client.get_prompt("summarize_recent_memory", {})
+ related = await client.get_prompt("find_related", {"topic": "tea"})
+ reflect = await client.get_prompt("reflect_on_topic", {"topic": "tea"})
+
+ # Each prompt renders normally even with writes disabled.
+ assert "newer-thought" in _single_text(summary)
+ assert "tea" in _single_text(related)
+ assert "tea" in _single_text(reflect)
+
+
+class TestPromptTextBuilders:
+ """Unit-cover the pure prompt-text builders directly.
+
+ The end-to-end cases above always seed thoughts, so the builders'
+ empty-store path is exercised here without standing up a server.
+ """
+
+ def test_summarize_recent_handles_empty_store(self) -> None:
+ text = _summarize_recent_prompt(7, {"thoughts": [], "limit": 7})
+ assert "7 most recently stored" in text
+ assert "no thoughts to summarise" in text
+
+ def test_summarize_recent_embeds_thoughts(self) -> None:
+ recent = {"thoughts": [{"thought_id": "abc", "essence": "Note"}], "limit": 1}
+ text = _summarize_recent_prompt(1, recent)
+ assert "1 most recent thoughts" in text
+ assert "abc" in text
+
+ def test_find_related_builder_includes_topic_and_tool(self) -> None:
+ text = _find_related_prompt("databases")
+ assert "databases" in text
+ assert "search_memory" in text
+
+ def test_reflect_builder_includes_topic_and_tool(self) -> None:
+ text = _reflect_on_topic_prompt("databases")
+ assert "databases" in text
+ assert "search_memory" in text
From 57357b711b584e4b3b5a2d206344d33230f35a9e Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Mon, 8 Jun 2026 22:11:03 +0200
Subject: [PATCH 08/40] feat(mcp): add memory filters and pagination
Widen the MCP read surface to the filters and pagination the public
store API already supports, without changing any core method signature.
- search_memory: optional thought_type / lifecycle_status / priority
filters applied as a post-filter on the ranked hybrid results
(the hybrid ranker cannot filter). Ranking order and scores are
preserved untouched; the unfiltered path is unchanged. When a filter
is supplied the response adds a "filtered" block reporting the active
criteria and scanned/matched/dropped counts, so a short or empty list
is never mistaken for "nothing ranked".
- list_memory: new deterministic, unranked browse tool exposing the
full list_thoughts filter matrix (type, status, priority, cycle range)
with limit/offset pagination. Read-only tool, available in both
default and read-only modes.
- query_memory already exposes limit; the MindQL grammar has no OFFSET,
so pagination stays at limit only (documented).
Surface docs (module docstring, server instructions, register_tools
docstring) updated to enumerate the six read tools.
---
src/engrava/mcp/server.py | 273 +++++++++++++++++++++++++++++++++--
tests/mcp/test_gating.py | 1 +
tests/mcp/test_read_tools.py | 244 ++++++++++++++++++++++++++++++-
tests/mcp/test_server.py | 132 ++++++++++++++++-
4 files changed, 634 insertions(+), 16 deletions(-)
diff --git a/src/engrava/mcp/server.py b/src/engrava/mcp/server.py
index 24e870a..f159558 100644
--- a/src/engrava/mcp/server.py
+++ b/src/engrava/mcp/server.py
@@ -6,18 +6,29 @@
MindQL extension commands. Think of it as a sibling of the command-line
interface that speaks MCP over stdio.
-Five read-only tools are exposed:
+Six read-only tools are exposed:
``get_thought``
Fetch a single thought by identifier.
``search_memory``
- Hybrid (lexical + vector + recency) ranked search.
+ Hybrid (lexical + vector + recency) ranked search. Optional
+ ``thought_type`` / ``lifecycle_status`` / ``priority`` filters narrow
+ the ranked hits *after* ranking (the hybrid ranker cannot filter), so
+ a filtered call may return fewer than ``top_k`` results and reports
+ how many ranked hits it dropped.
``search_keywords``
Pure full-text BM25 keyword search.
+``list_memory``
+ Deterministic, unranked browse over stored thoughts with the full
+ filter matrix (``thought_type``, ``lifecycle_status``, ``priority``,
+ updated-cycle range) and ``limit`` / ``offset`` pagination. Returns
+ thoughts newest-first with no score — the clean home for "list memory
+ by structured field", complementing the ranked ``search_memory``.
``query_memory``
Structured ``FIND`` queries in the MindQL query language. Only the
``FIND`` command is accepted; raw-SQL passthrough and every other
- command are rejected.
+ command are rejected. Accepts an optional ``limit`` (the MindQL
+ grammar has no ``OFFSET``, so this tool paginates by ``limit`` only).
``memory_stats``
Aggregate counts and store-health metrics.
@@ -113,6 +124,11 @@
#: Default number of thoughts returned by the ``engrava://recent`` resource.
DEFAULT_RECENT_LIMIT = 10
+#: Default page size for the ``list_memory`` browse tool. Matches the
+#: store's own ``list_thoughts`` default so an unpaged listing behaves the
+#: same whether driven through MCP or the core API directly.
+DEFAULT_LIST_LIMIT = 50
+
#: Default number of recent thoughts the ``summarize_recent_memory`` prompt
#: asks the assistant to consider when the caller omits ``limit``. Kept
#: small so the summary stays focused on the latest activity.
@@ -250,26 +266,109 @@ async def get_thought_impl(store: SqliteEngravaCore, thought_id: str) -> dict[st
return {"found": True, "thought": thought.model_dump(mode="json")}
+def _filter_criteria(
+ *,
+ thought_type: ThoughtType | None,
+ lifecycle_status: LifecycleStatus | None,
+ priority: Priority | None,
+) -> dict[str, str]:
+ """Collect the active thought filters as a JSON-friendly mapping.
+
+ Only the filters the caller actually supplied appear in the result;
+ each enum is reduced to its string value so the mapping serialises
+ cleanly into a tool response.
+
+ Args:
+ thought_type: Thought-type filter, or ``None`` if not filtering.
+ lifecycle_status: Lifecycle-status filter, or ``None``.
+ priority: Priority filter, or ``None``.
+
+ Returns:
+ A dict mapping each supplied filter's field name to its string
+ value. Empty when no filter was supplied.
+
+ """
+ criteria: dict[str, str] = {}
+ if thought_type is not None:
+ criteria["thought_type"] = thought_type.value
+ if lifecycle_status is not None:
+ criteria["lifecycle_status"] = lifecycle_status.value
+ if priority is not None:
+ criteria["priority"] = priority.value
+ return criteria
+
+
+def _thought_matches(
+ thought: ThoughtRecord,
+ *,
+ thought_type: ThoughtType | None,
+ lifecycle_status: LifecycleStatus | None,
+ priority: Priority | None,
+) -> bool:
+ """Report whether a thought satisfies every supplied filter.
+
+ A ``None`` filter is not applied, so a thought matches when it equals
+ each filter that *was* supplied (logical AND). With no filters
+ supplied this trivially returns ``True``.
+
+ Args:
+ thought: The thought record to test.
+ thought_type: Required thought type, or ``None`` to ignore.
+ lifecycle_status: Required lifecycle state, or ``None`` to ignore.
+ priority: Required priority level, or ``None`` to ignore.
+
+ Returns:
+ ``True`` when the thought matches every supplied filter.
+
+ """
+ if thought_type is not None and thought.thought_type is not thought_type:
+ return False
+ if lifecycle_status is not None and thought.lifecycle_status is not lifecycle_status:
+ return False
+ return not (priority is not None and thought.priority is not priority)
+
+
async def search_memory_impl(
store: SqliteEngravaCore,
query_text: str,
*,
top_k: int = DEFAULT_TOP_K,
include_reflections: bool = True,
+ thought_type: ThoughtType | None = None,
+ lifecycle_status: LifecycleStatus | None = None,
+ priority: Priority | None = None,
) -> dict[str, Any]:
"""Run a hybrid ranked search over stored memory.
+ The hybrid ranker itself does not filter by type, status, or
+ priority, so any of those filters are applied *after* ranking: the
+ ranked hits are fetched and the ones that do not match every supplied
+ filter are dropped. Ranking order is preserved and scores are never
+ altered or fabricated — a filtered response simply carries fewer
+ entries than ``top_k`` and reports how many were dropped (see the
+ ``filtered`` block below) so the caller is never misled into reading
+ an empty or short list as "nothing was found".
+
Args:
store: The store to query.
query_text: Natural-language query text.
- top_k: Maximum number of ranked results to return.
+ top_k: Maximum number of ranked results to consider. Filters are
+ applied to this ranked window, so the returned list may be
+ shorter when filters drop hits.
include_reflections: Whether consolidated reflection thoughts may
appear in the results.
+ thought_type: When set, keep only hits of this type.
+ lifecycle_status: When set, keep only hits in this lifecycle state.
+ priority: When set, keep only hits at this priority level.
Returns:
A dict with a ``results`` list of ``{"thought_id", "score"}``
- entries and a ``backends_used`` list naming the search backends
- that were available for the query.
+ entries (ranking order preserved) and a ``backends_used`` list
+ naming the search backends that were available for the query.
+ When at least one filter is supplied, a ``filtered`` block is
+ added carrying the active ``criteria`` and the ``scanned`` /
+ ``matched`` / ``dropped`` counts over the ranked window, so a
+ short or empty list is never mistaken for "no hits ranked".
"""
result = await store.search_hybrid(
@@ -277,11 +376,43 @@ async def search_memory_impl(
top_k=top_k,
include_reflections=include_reflections,
)
+ backends_used = sorted(result.backends_used)
+
+ criteria = _filter_criteria(
+ thought_type=thought_type,
+ lifecycle_status=lifecycle_status,
+ priority=priority,
+ )
+ if not criteria:
+ # Unfiltered path: byte-for-byte the original response shape.
+ return {
+ "results": [
+ {"thought_id": thought_id, "score": score} for thought_id, score in result.results
+ ],
+ "backends_used": backends_used,
+ }
+
+ kept: list[dict[str, Any]] = []
+ for thought_id, score in result.results:
+ thought = await store.get_thought(thought_id)
+ if thought is not None and _thought_matches(
+ thought,
+ thought_type=thought_type,
+ lifecycle_status=lifecycle_status,
+ priority=priority,
+ ):
+ kept.append({"thought_id": thought_id, "score": score})
+
+ scanned = len(result.results)
return {
- "results": [
- {"thought_id": thought_id, "score": score} for thought_id, score in result.results
- ],
- "backends_used": sorted(result.backends_used),
+ "results": kept,
+ "backends_used": backends_used,
+ "filtered": {
+ "criteria": criteria,
+ "scanned": scanned,
+ "matched": len(kept),
+ "dropped": scanned - len(kept),
+ },
}
@@ -407,6 +538,67 @@ async def recent_thoughts_impl(
}
+async def list_memory_impl(
+ store: SqliteEngravaCore,
+ *,
+ thought_type: ThoughtType | None = None,
+ lifecycle_status: LifecycleStatus | None = None,
+ priority: Priority | None = None,
+ min_cycle: int | None = None,
+ max_cycle: int | None = None,
+ include_expired: bool = False,
+ limit: int = DEFAULT_LIST_LIMIT,
+ offset: int = 0,
+) -> dict[str, Any]:
+ """List thoughts deterministically with filters and pagination.
+
+ A direct pass-through to the public
+ :meth:`~engrava.SqliteEngravaCore.list_thoughts`, which orders by
+ descending ``updated_cycle`` (newest first) and applies every filter
+ server-side. Unlike :func:`search_memory_impl` this is a plain
+ browse: there is no relevance ranking and therefore no score. It is
+ the right tool when a caller wants an exhaustive, paginated slice of
+ memory narrowed by structured fields rather than the best matches for
+ a query.
+
+ Args:
+ store: The store to query.
+ thought_type: When set, keep only thoughts of this type.
+ lifecycle_status: When set, keep only thoughts in this state.
+ priority: When set, keep only thoughts at this priority level.
+ min_cycle: Inclusive lower bound on ``updated_cycle``.
+ max_cycle: Inclusive upper bound on ``updated_cycle``.
+ include_expired: When ``True``, expired thoughts are included.
+ Defaults to ``False`` so expired thoughts stay hidden.
+ limit: Maximum number of thoughts to return (page size).
+ offset: Number of leading thoughts to skip (page start).
+
+ Returns:
+ A dict with a ``thoughts`` list of JSON-serialisable thoughts
+ (newest first), the ``count`` of thoughts on this page, and the
+ ``limit`` / ``offset`` that were applied so the caller can drive
+ pagination.
+
+ """
+ thoughts = await store.list_thoughts(
+ thought_type=thought_type.value if thought_type is not None else None,
+ lifecycle_status=lifecycle_status.value if lifecycle_status is not None else None,
+ priority=priority.value if priority is not None else None,
+ min_cycle=min_cycle,
+ max_cycle=max_cycle,
+ include_expired=include_expired,
+ limit=limit,
+ offset=offset,
+ )
+ serialised = [thought.model_dump(mode="json") for thought in thoughts]
+ return {
+ "thoughts": serialised,
+ "count": len(serialised),
+ "limit": limit,
+ "offset": offset,
+ }
+
+
async def store_thought_impl(
store: SqliteEngravaCore,
essence: str,
@@ -779,8 +971,12 @@ async def lifespan(_server: FastMCP) -> AsyncIterator[None]:
SERVER_NAME,
instructions=(
"Access to an engrava agent-memory store: fetch thoughts, run "
- "hybrid and keyword search, run structured MindQL FIND queries, "
- "and read store statistics. Unless the server is started in "
+ "hybrid and keyword search, list thoughts with structured filters "
+ "and pagination, run structured MindQL FIND queries, and read "
+ "store statistics. Hybrid search (search_memory) can also be "
+ "narrowed by thought type, lifecycle status, or priority, but it "
+ "filters after ranking; for an exhaustive unranked listing by "
+ "those fields use list_memory. Unless the server is started in "
"read-only mode, you can also store new thoughts, update existing "
"thoughts, link thoughts with typed edges, and delete thoughts or "
"edges. Read-only resources are also available as attachable "
@@ -922,7 +1118,9 @@ def reflect_on_topic(topic: str) -> str:
def register_tools(server: FastMCP, provider: StoreProvider) -> None: # noqa: C901
"""Register the MCP tools on a server.
- The five read tools are always registered. The five write tools are
+ The six read tools (``get_thought``, ``search_memory``,
+ ``search_keywords``, ``list_memory``, ``query_memory``,
+ ``memory_stats``) are always registered. The five write tools are
registered only when the server is not in read-only mode (see
:func:`_read_only_enabled`); in read-only mode they are never
advertised to clients.
@@ -946,7 +1144,12 @@ async def get_thought(thought_id: str) -> dict[str, Any]:
description=(
"Hybrid ranked search (lexical + vector + recency) over stored "
"memory. Returns ranked thought identifiers with scores and the "
- "search backends that were available."
+ "search backends that were available. Optionally narrow the "
+ "ranked hits by thought type, lifecycle status, or priority; "
+ "these filters are applied after ranking, so a filtered call may "
+ "return fewer than top_k results and reports how many ranked hits "
+ "were dropped. For an exhaustive, unranked, paginated listing by "
+ "those same fields, use list_memory instead."
),
annotations=_READ_ONLY,
)
@@ -955,12 +1158,54 @@ async def search_memory(
top_k: int = DEFAULT_TOP_K,
*,
include_reflections: bool = True,
+ thought_type: ThoughtType | None = None,
+ lifecycle_status: LifecycleStatus | None = None,
+ priority: Priority | None = None,
) -> dict[str, Any]:
return await search_memory_impl(
provider.require(),
query_text,
top_k=top_k,
include_reflections=include_reflections,
+ thought_type=thought_type,
+ lifecycle_status=lifecycle_status,
+ priority=priority,
+ )
+
+ @server.tool(
+ name="list_memory",
+ description=(
+ "List stored thoughts deterministically with optional filters and "
+ "pagination. Unlike search_memory this does no relevance ranking "
+ "and returns no scores: it is a plain browse over memory, ordered "
+ "newest first. Filter by thought type, lifecycle status, priority, "
+ "and an updated-cycle range; page through results with limit and "
+ "offset. Use this to enumerate memory by structured fields; use "
+ "search_memory when you want the best matches for a query."
+ ),
+ annotations=_READ_ONLY,
+ )
+ async def list_memory(
+ thought_type: ThoughtType | None = None,
+ lifecycle_status: LifecycleStatus | None = None,
+ priority: Priority | None = None,
+ *,
+ min_cycle: int | None = None,
+ max_cycle: int | None = None,
+ include_expired: bool = False,
+ limit: int = DEFAULT_LIST_LIMIT,
+ offset: int = 0,
+ ) -> dict[str, Any]:
+ return await list_memory_impl(
+ provider.require(),
+ thought_type=thought_type,
+ lifecycle_status=lifecycle_status,
+ priority=priority,
+ min_cycle=min_cycle,
+ max_cycle=max_cycle,
+ include_expired=include_expired,
+ limit=limit,
+ offset=offset,
)
@server.tool(
diff --git a/tests/mcp/test_gating.py b/tests/mcp/test_gating.py
index 7aab76a..51c32f6 100644
--- a/tests/mcp/test_gating.py
+++ b/tests/mcp/test_gating.py
@@ -22,6 +22,7 @@
"get_thought",
"search_memory",
"search_keywords",
+ "list_memory",
"query_memory",
"memory_stats",
}
diff --git a/tests/mcp/test_read_tools.py b/tests/mcp/test_read_tools.py
index aa2cd86..e840e20 100644
--- a/tests/mcp/test_read_tools.py
+++ b/tests/mcp/test_read_tools.py
@@ -8,14 +8,23 @@
from typing import TYPE_CHECKING
+import aiosqlite
import pytest
+from engrava import (
+ CoreThoughtRecord,
+ LifecycleStatus,
+ Priority,
+ SqliteEngravaCore,
+ ThoughtType,
+)
from engrava.mcp.server import (
DEFAULT_TOP_K,
StoreNotReadyError,
StoreProvider,
UnsupportedQueryError,
get_thought_impl,
+ list_memory_impl,
memory_stats_impl,
query_memory_impl,
search_keywords_impl,
@@ -24,7 +33,7 @@
from engrava.mindql.parser import MindQLParseError
if TYPE_CHECKING:
- from engrava.infrastructure.sqlite.engrava_core import SqliteEngravaCore
+ from collections.abc import AsyncIterator
class TestGetThought:
@@ -148,5 +157,238 @@ def test_clear_resets(self, store: SqliteEngravaCore) -> None:
provider.require()
+class TestSearchMemoryFilters:
+ """Tests for the optional filters on the ``search_memory`` tool.
+
+ The shared ``store`` fixture seeds two ``BELIEF`` thoughts, both
+ ``ACTIVE``: ``thought-alpha`` at ``P2`` and ``thought-beta`` at ``P1``.
+ A query for "notes" ranks both (each essence ends in "notes"), which
+ lets a single filter drop exactly one ranked hit.
+ """
+
+ async def test_unfiltered_call_is_unchanged(self, store: SqliteEngravaCore) -> None:
+ # The unfiltered response must keep its original shape exactly: a
+ # results list and backends_used, and crucially no ``filtered``
+ # block (that key only appears once a filter is supplied).
+ result = await search_memory_impl(store, "notes")
+ assert set(result) == {"results", "backends_used"}
+ assert "filtered" not in result
+ assert {entry["thought_id"] for entry in result["results"]} == {
+ "thought-alpha",
+ "thought-beta",
+ }
+
+ async def test_priority_filter_keeps_only_matching(self, store: SqliteEngravaCore) -> None:
+ result = await search_memory_impl(store, "notes", priority=Priority.P1)
+ assert [entry["thought_id"] for entry in result["results"]] == ["thought-beta"]
+
+ async def test_lifecycle_filter_keeps_only_matching(self, store: SqliteEngravaCore) -> None:
+ # Both seeded thoughts are ACTIVE, so an ACTIVE filter keeps both
+ # while a DONE filter keeps none.
+ active = await search_memory_impl(store, "notes", lifecycle_status=LifecycleStatus.ACTIVE)
+ assert {entry["thought_id"] for entry in active["results"]} == {
+ "thought-alpha",
+ "thought-beta",
+ }
+ done = await search_memory_impl(store, "notes", lifecycle_status=LifecycleStatus.DONE)
+ assert done["results"] == []
+
+ async def test_thought_type_filter_keeps_only_matching(self, store: SqliteEngravaCore) -> None:
+ # Both seeded thoughts are BELIEF; a TASK filter matches nothing.
+ belief = await search_memory_impl(store, "notes", thought_type=ThoughtType.BELIEF)
+ assert {entry["thought_id"] for entry in belief["results"]} == {
+ "thought-alpha",
+ "thought-beta",
+ }
+ task = await search_memory_impl(store, "notes", thought_type=ThoughtType.TASK)
+ assert task["results"] == []
+
+ async def test_combined_filters_apply_as_and(self, store: SqliteEngravaCore) -> None:
+ # P1 AND ACTIVE matches only beta; P1 AND DONE matches nothing.
+ match = await search_memory_impl(
+ store,
+ "notes",
+ priority=Priority.P1,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ )
+ assert [entry["thought_id"] for entry in match["results"]] == ["thought-beta"]
+ miss = await search_memory_impl(
+ store,
+ "notes",
+ priority=Priority.P1,
+ lifecycle_status=LifecycleStatus.DONE,
+ )
+ assert miss["results"] == []
+
+ async def test_ranking_honesty_reports_dropped_hits(self, store: SqliteEngravaCore) -> None:
+ # Filtering "notes" to P1 drops alpha (P2). The response must say so
+ # truthfully rather than silently returning a short list.
+ result = await search_memory_impl(store, "notes", priority=Priority.P1)
+ filtered = result["filtered"]
+ assert filtered["criteria"] == {"priority": "P1"}
+ assert filtered["scanned"] == 2
+ assert filtered["matched"] == 1
+ assert filtered["dropped"] == 1
+ # matched must equal the number of returned results — no padding.
+ assert filtered["matched"] == len(result["results"])
+
+ async def test_ranking_honesty_when_filter_removes_all(self, store: SqliteEngravaCore) -> None:
+ # A filter that matches nothing returns an empty list, but the
+ # counts make clear hits *were* ranked and then dropped (so an empty
+ # result is not mistaken for "the query ranked nothing").
+ result = await search_memory_impl(store, "notes", thought_type=ThoughtType.TASK)
+ assert result["results"] == []
+ assert result["filtered"]["scanned"] == 2
+ assert result["filtered"]["matched"] == 0
+ assert result["filtered"]["dropped"] == 2
+
+ async def test_filtered_results_preserve_score_and_order(
+ self, store: SqliteEngravaCore
+ ) -> None:
+ # Scores are carried through from the ranker, never fabricated, and
+ # the surviving hits keep their ranked order.
+ unfiltered = await search_memory_impl(store, "notes")
+ ranked_order = [entry["thought_id"] for entry in unfiltered["results"]]
+ scores = {entry["thought_id"]: entry["score"] for entry in unfiltered["results"]}
+
+ filtered = await search_memory_impl(store, "notes", lifecycle_status=LifecycleStatus.ACTIVE)
+ kept_order = [entry["thought_id"] for entry in filtered["results"]]
+ # Order is the unfiltered order restricted to the survivors.
+ assert kept_order == [tid for tid in ranked_order if tid in set(kept_order)]
+ for entry in filtered["results"]:
+ assert entry["score"] == scores[entry["thought_id"]]
+
+
+@pytest.fixture
+async def varied_store() -> AsyncIterator[SqliteEngravaCore]:
+ """Yield a store seeded with thoughts spanning the filter matrix.
+
+ Five thoughts vary by type, lifecycle status, priority, and
+ ``updated_cycle`` so the ``list_memory`` filters and pagination can be
+ exercised independently.
+
+ Yields:
+ A ``SqliteEngravaCore`` seeded with five varied thoughts.
+
+ """
+ connection = await aiosqlite.connect(":memory:")
+ connection.row_factory = aiosqlite.Row
+ await connection.execute("PRAGMA foreign_keys=ON")
+ backend = SqliteEngravaCore(connection)
+ await backend.ensure_schema()
+
+ seeds = [
+ ("note-1", ThoughtType.NOTE, LifecycleStatus.CREATED, Priority.P3, 1),
+ ("task-1", ThoughtType.TASK, LifecycleStatus.ACTIVE, Priority.P1, 2),
+ ("task-2", ThoughtType.TASK, LifecycleStatus.ACTIVE, Priority.P2, 3),
+ ("belief-1", ThoughtType.BELIEF, LifecycleStatus.ACTIVE, Priority.P1, 4),
+ ("note-2", ThoughtType.NOTE, LifecycleStatus.CREATED, Priority.P4, 5),
+ ]
+ for thought_id, thought_type, status, priority, cycle in seeds:
+ await backend.create_thought(
+ CoreThoughtRecord(
+ thought_id=thought_id,
+ thought_type=thought_type,
+ essence=f"Essence for {thought_id}",
+ content=f"Content body for {thought_id}.",
+ priority=priority,
+ lifecycle_status=status,
+ created_cycle=cycle,
+ updated_cycle=cycle,
+ source="test",
+ )
+ )
+
+ try:
+ yield backend
+ finally:
+ await connection.close()
+
+
+class TestListMemory:
+ """Tests for the deterministic ``list_memory`` browse tool."""
+
+ async def test_unfiltered_lists_all_newest_first(self, varied_store: SqliteEngravaCore) -> None:
+ result = await list_memory_impl(varied_store)
+ ids = [thought["thought_id"] for thought in result["thoughts"]]
+ # list_thoughts orders by descending updated_cycle (newest first).
+ assert ids == ["note-2", "belief-1", "task-2", "task-1", "note-1"]
+ assert result["count"] == len(ids)
+ # Browse results never carry a relevance score.
+ assert all("score" not in thought for thought in result["thoughts"])
+
+ async def test_filter_by_thought_type(self, varied_store: SqliteEngravaCore) -> None:
+ result = await list_memory_impl(varied_store, thought_type=ThoughtType.TASK)
+ ids = {thought["thought_id"] for thought in result["thoughts"]}
+ assert ids == {"task-1", "task-2"}
+
+ async def test_filter_by_lifecycle_status(self, varied_store: SqliteEngravaCore) -> None:
+ result = await list_memory_impl(varied_store, lifecycle_status=LifecycleStatus.CREATED)
+ ids = {thought["thought_id"] for thought in result["thoughts"]}
+ assert ids == {"note-1", "note-2"}
+
+ async def test_filter_by_priority(self, varied_store: SqliteEngravaCore) -> None:
+ result = await list_memory_impl(varied_store, priority=Priority.P1)
+ ids = {thought["thought_id"] for thought in result["thoughts"]}
+ assert ids == {"task-1", "belief-1"}
+
+ async def test_combined_filters_apply_as_and(self, varied_store: SqliteEngravaCore) -> None:
+ result = await list_memory_impl(
+ varied_store,
+ thought_type=ThoughtType.TASK,
+ priority=Priority.P1,
+ )
+ ids = [thought["thought_id"] for thought in result["thoughts"]]
+ assert ids == ["task-1"]
+
+ async def test_cycle_range_filters(self, varied_store: SqliteEngravaCore) -> None:
+ result = await list_memory_impl(varied_store, min_cycle=2, max_cycle=4)
+ ids = {thought["thought_id"] for thought in result["thoughts"]}
+ assert ids == {"task-1", "task-2", "belief-1"}
+
+ async def test_pagination_limit_and_offset(self, varied_store: SqliteEngravaCore) -> None:
+ # Newest-first order: note-2, belief-1, task-2, task-1, note-1.
+ first = await list_memory_impl(varied_store, limit=2, offset=0)
+ assert [t["thought_id"] for t in first["thoughts"]] == ["note-2", "belief-1"]
+ assert first["count"] == 2
+ assert first["limit"] == 2
+ assert first["offset"] == 0
+
+ second = await list_memory_impl(varied_store, limit=2, offset=2)
+ assert [t["thought_id"] for t in second["thoughts"]] == ["task-2", "task-1"]
+ assert second["offset"] == 2
+
+ third = await list_memory_impl(varied_store, limit=2, offset=4)
+ assert [t["thought_id"] for t in third["thoughts"]] == ["note-1"]
+ assert third["count"] == 1
+
+ async def test_offset_past_end_is_empty(self, varied_store: SqliteEngravaCore) -> None:
+ result = await list_memory_impl(varied_store, offset=100)
+ assert result["thoughts"] == []
+ assert result["count"] == 0
+
+
+class TestQueryMemoryLimit:
+ """Tests that ``query_memory`` paginates by ``limit`` (MindQL has no OFFSET)."""
+
+ async def test_limit_argument_caps_rows(self, store: SqliteEngravaCore) -> None:
+ # Both seeded thoughts are ACTIVE; an explicit limit caps the rows.
+ result = await query_memory_impl(
+ store,
+ "FIND thoughts WHERE lifecycle_status = 'ACTIVE'",
+ limit=1,
+ )
+ assert len(result["rows"]) == 1
+
+ async def test_limit_argument_overrides_clause(self, store: SqliteEngravaCore) -> None:
+ # The limit argument wins over a larger LIMIT clause in the query.
+ result = await query_memory_impl(
+ store,
+ "FIND thoughts WHERE lifecycle_status = 'ACTIVE' LIMIT 10",
+ limit=1,
+ )
+ assert len(result["rows"]) == 1
+
+
def test_default_top_k_is_ten() -> None:
assert DEFAULT_TOP_K == 10
diff --git a/tests/mcp/test_server.py b/tests/mcp/test_server.py
index 3525274..46c9f96 100644
--- a/tests/mcp/test_server.py
+++ b/tests/mcp/test_server.py
@@ -34,7 +34,14 @@
from pathlib import Path
READ_TOOL_NAMES = frozenset(
- {"get_thought", "search_memory", "search_keywords", "query_memory", "memory_stats"}
+ {
+ "get_thought",
+ "search_memory",
+ "search_keywords",
+ "list_memory",
+ "query_memory",
+ "memory_stats",
+ }
)
WRITE_TOOL_NAMES = frozenset(
{"store_thought", "update_thought", "link_thoughts", "delete_thought", "delete_edge"}
@@ -303,6 +310,129 @@ async def test_memory_stats_reports_seeded_count(
assert result.structuredContent["thought_count"] == 1
+async def _seed_varied_database(path: Path) -> None:
+ """Create a database file spanning several types, statuses, priorities.
+
+ The seed gives the filter tools something to discriminate: a mix of
+ ``TASK``/``NOTE`` thoughts, ``ACTIVE``/``CREATED`` states, and ``P1``/
+ ``P3`` priorities, all sharing the keyword "widget" so a single query
+ ranks every row.
+
+ Args:
+ path: Filesystem path for the new database.
+
+ """
+ connection = await aiosqlite.connect(str(path))
+ connection.row_factory = aiosqlite.Row
+ store = SqliteEngravaCore(connection)
+ await store.ensure_schema()
+ seeds = [
+ ("active-task", ThoughtType.TASK, LifecycleStatus.ACTIVE, Priority.P1, 1),
+ ("created-note", ThoughtType.NOTE, LifecycleStatus.CREATED, Priority.P3, 2),
+ ("active-note", ThoughtType.NOTE, LifecycleStatus.ACTIVE, Priority.P3, 3),
+ ]
+ for thought_id, thought_type, status, priority, cycle in seeds:
+ await store.create_thought(
+ CoreThoughtRecord(
+ thought_id=thought_id,
+ thought_type=thought_type,
+ essence=f"Widget note {thought_id}",
+ content=f"A widget thought stored as {thought_id}.",
+ priority=priority,
+ lifecycle_status=status,
+ created_cycle=cycle,
+ updated_cycle=cycle,
+ source="test",
+ )
+ )
+ await connection.close()
+
+
+class TestFilterAndListOverTransport:
+ """Drive the new filter and browse surface through a connected client."""
+
+ async def test_search_memory_filter_round_trip(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "search_filter.db"
+ await _seed_varied_database(db_path)
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.delenv(READ_ONLY_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ unfiltered = await client.call_tool("search_memory", {"query_text": "widget"})
+ filtered = await client.call_tool(
+ "search_memory",
+ {"query_text": "widget", "thought_type": "NOTE"},
+ )
+
+ assert unfiltered.structuredContent is not None
+ # The unfiltered response carries no ``filtered`` block.
+ assert "filtered" not in unfiltered.structuredContent
+
+ assert filtered.structuredContent is not None
+ kept = {entry["thought_id"] for entry in filtered.structuredContent["results"]}
+ assert kept == {"created-note", "active-note"}
+ # Ranking honesty: the dropped TASK hit is accounted for truthfully.
+ block = filtered.structuredContent["filtered"]
+ assert block["criteria"] == {"thought_type": "NOTE"}
+ assert block["matched"] == 2
+ assert block["dropped"] == 1
+
+ async def test_list_memory_round_trip(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "list.db"
+ await _seed_varied_database(db_path)
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.delenv(READ_ONLY_ENV_VAR, raising=False)
+
+ server = build_server()
+ async with connect_client(server) as client:
+ listed = await client.call_tool(
+ "list_memory",
+ {"lifecycle_status": "ACTIVE", "limit": 10},
+ )
+ paged = await client.call_tool("list_memory", {"limit": 1, "offset": 1})
+
+ assert listed.structuredContent is not None
+ ids = {thought["thought_id"] for thought in listed.structuredContent["thoughts"]}
+ assert ids == {"active-task", "active-note"}
+
+ assert paged.structuredContent is not None
+ # Newest first (created-note at cycle 2 is the second row), one per page.
+ assert paged.structuredContent["count"] == 1
+ assert [t["thought_id"] for t in paged.structuredContent["thoughts"]] == ["created-note"]
+
+ async def test_list_memory_available_in_read_only_mode(
+ self,
+ monkeypatch: pytest.MonkeyPatch,
+ tmp_path: Path,
+ ) -> None:
+ db_path = tmp_path / "ro_list_tool.db"
+ await _seed_varied_database(db_path)
+ monkeypatch.setenv(DB_PATH_ENV_VAR, str(db_path))
+ monkeypatch.delenv(CONFIG_ENV_VAR, raising=False)
+ monkeypatch.setenv(READ_ONLY_ENV_VAR, "1")
+
+ server = build_server()
+ async with connect_client(server) as client:
+ listed = await client.list_tools()
+ result = await client.call_tool("list_memory", {})
+
+ # list_memory is a read tool, so it survives the write-tool gate.
+ assert "list_memory" in {tool.name for tool in listed.tools}
+ assert result.structuredContent is not None
+ assert result.structuredContent["count"] == 3
+
+
class TestStoreResolution:
"""Tests for environment-driven store resolution."""
From 8b615cc58c5b81adf33a29de16be8060f2bf9bfe Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Mon, 8 Jun 2026 23:38:25 +0200
Subject: [PATCH 09/40] feat(mcp): map known failures to typed, actionable tool
errors
Translate the typed exceptions a tool can hit (store-not-ready, a
non-FIND query_memory command, a malformed FIND, an update of a missing
thought, and a link to a missing endpoint) into a clean ToolError with a
helpful hint, instead of letting a raw exception surface.
This is presentation only: it adds no capability and relaxes no guard.
The FIND-only contract on query_memory is preserved verbatim and the
rejection message never suggests raw SQL is runnable. Messages name only
the documented configuration environment variables and carry no
filesystem paths, stack frames, or internal symbol names.
Add tests/mcp/test_errors.py covering each mapped condition, guard
preservation, and the no-leak property over the in-process transport.
---
src/engrava/mcp/server.py | 200 +++++++++++++++------
tests/mcp/test_errors.py | 355 ++++++++++++++++++++++++++++++++++++++
2 files changed, 498 insertions(+), 57 deletions(-)
create mode 100644 tests/mcp/test_errors.py
diff --git a/src/engrava/mcp/server.py b/src/engrava/mcp/server.py
index f159558..3ff4ce6 100644
--- a/src/engrava/mcp/server.py
+++ b/src/engrava/mcp/server.py
@@ -102,13 +102,15 @@
import anyio
from mcp.server.fastmcp import FastMCP
+from mcp.server.fastmcp.exceptions import ToolError
from mcp.types import ToolAnnotations
from engrava.domain.enums import EdgeType, LifecycleStatus, Priority, ThoughtType
+from engrava.domain.exceptions import ReferentialIntegrityError, ThoughtNotFoundError
from engrava.domain.models.edge import EdgeRecord
from engrava.domain.models.thought import ThoughtRecord
from engrava.mcp.config import ResolvedStore, resolve_store
-from engrava.mindql.parser import MindQLCommand, MindQLQuery, parse
+from engrava.mindql.parser import MindQLCommand, MindQLParseError, MindQLQuery, parse
if TYPE_CHECKING:
from collections.abc import AsyncIterator
@@ -147,6 +149,13 @@
#: clock, so new records start at the origin cycle.
INITIAL_CYCLE = 0
+#: A valid MindQL ``FIND`` query, embedded verbatim in the actionable hints
+#: that ``query_memory`` returns when a caller sends a malformed or
+#: unsupported query. Showing one correct example is the fastest way to get
+#: a client back onto the supported path; it deliberately demonstrates only
+#: the ``FIND`` command, never raw SQL.
+FIND_QUERY_EXAMPLE = "FIND thoughts WHERE lifecycle_status = 'ACTIVE' LIMIT 10"
+
#: Environment variable that, when truthy, suppresses registration of the
#: write tools so the server exposes a read-only surface.
READ_ONLY_ENV_VAR = "ENGRAVA_MCP_READ_ONLY"
@@ -202,9 +211,75 @@ def __init__(self, command: str) -> None:
self.command = command
super().__init__(
f"query_memory accepts only FIND queries; received {command!r}. "
- "Use the FIND command, for example: "
- "FIND thoughts WHERE lifecycle_status = 'ACTIVE' LIMIT 10"
+ f"Use the FIND command, for example: {FIND_QUERY_EXAMPLE}"
+ )
+
+
+@asynccontextmanager
+async def _tool_errors() -> AsyncIterator[None]:
+ """Translate known typed failures into clean, actionable MCP errors.
+
+ Wraps the body of a tool handler so that the typed exceptions raised by
+ the store, the MindQL parser, and this module's own consumer-policy
+ guard surface to the client as a :class:`ToolError` carrying a curated,
+ agent-facing message instead of an internal exception. FastMCP reports
+ a :class:`ToolError` to the client with ``isError`` set and the message
+ as text, so the client receives an actionable hint rather than a raw
+ traceback or an internal class name.
+
+ This is *presentation only*: it adds no new capability and relaxes no
+ guard. Each branch re-raises an existing failure with a better message;
+ the ``UnsupportedQueryError`` branch in particular preserves the
+ ``FIND``-only contract verbatim and never suggests that raw SQL is
+ runnable over the wire. Conditions this module does not recognise are
+ left to propagate unchanged.
+
+ The messages name only the documented configuration *environment
+ variables* (never a filesystem path), carry no stack frames, and expose
+ no internal symbol names, so a misuse reply leaks nothing about the
+ deployment.
+
+ Yields:
+ ``None``; the caller runs the guarded tool body inside the ``with``.
+
+ Raises:
+ ToolError: With an actionable message when a recognised typed
+ failure occurs while the body runs.
+
+ """
+ try:
+ yield
+ except StoreNotReadyError as exc:
+ msg = (
+ "The engrava memory store is not available yet. Start the server "
+ "with a store configured: set ENGRAVA_DB_PATH to a database file, "
+ "or point ENGRAVA_MCP_CONFIG at an engrava.yaml that names one."
+ )
+ raise ToolError(msg) from exc
+ except UnsupportedQueryError as exc:
+ # The exception text already states the FIND-only contract and shows
+ # a valid FIND example; echoing it keeps the guard's wording intact
+ # and never invites raw SQL.
+ raise ToolError(str(exc)) from exc
+ except MindQLParseError as exc:
+ msg = (
+ f"That query could not be parsed: {exc}. query_memory runs MindQL "
+ f"FIND queries; for example: {FIND_QUERY_EXAMPLE}"
+ )
+ raise ToolError(msg) from exc
+ except ThoughtNotFoundError as exc:
+ msg = (
+ f"No thought exists with id {exc.thought_id!r}. Check the "
+ "identifier, or use search_memory or list_memory to find it."
+ )
+ raise ToolError(msg) from exc
+ except ReferentialIntegrityError as exc:
+ msg = (
+ f"Cannot link thoughts: no thought exists with id "
+ f"{exc.referenced_id!r}. Create that thought first, or correct "
+ "the identifier."
)
+ raise ToolError(msg) from exc
class StoreProvider:
@@ -1137,7 +1212,8 @@ def register_tools(server: FastMCP, provider: StoreProvider) -> None: # noqa: C
annotations=_READ_ONLY,
)
async def get_thought(thought_id: str) -> dict[str, Any]:
- return await get_thought_impl(provider.require(), thought_id)
+ async with _tool_errors():
+ return await get_thought_impl(provider.require(), thought_id)
@server.tool(
name="search_memory",
@@ -1162,15 +1238,16 @@ async def search_memory(
lifecycle_status: LifecycleStatus | None = None,
priority: Priority | None = None,
) -> dict[str, Any]:
- return await search_memory_impl(
- provider.require(),
- query_text,
- top_k=top_k,
- include_reflections=include_reflections,
- thought_type=thought_type,
- lifecycle_status=lifecycle_status,
- priority=priority,
- )
+ async with _tool_errors():
+ return await search_memory_impl(
+ provider.require(),
+ query_text,
+ top_k=top_k,
+ include_reflections=include_reflections,
+ thought_type=thought_type,
+ lifecycle_status=lifecycle_status,
+ priority=priority,
+ )
@server.tool(
name="list_memory",
@@ -1196,17 +1273,18 @@ async def list_memory(
limit: int = DEFAULT_LIST_LIMIT,
offset: int = 0,
) -> dict[str, Any]:
- return await list_memory_impl(
- provider.require(),
- thought_type=thought_type,
- lifecycle_status=lifecycle_status,
- priority=priority,
- min_cycle=min_cycle,
- max_cycle=max_cycle,
- include_expired=include_expired,
- limit=limit,
- offset=offset,
- )
+ async with _tool_errors():
+ return await list_memory_impl(
+ provider.require(),
+ thought_type=thought_type,
+ lifecycle_status=lifecycle_status,
+ priority=priority,
+ min_cycle=min_cycle,
+ max_cycle=max_cycle,
+ include_expired=include_expired,
+ limit=limit,
+ offset=offset,
+ )
@server.tool(
name="search_keywords",
@@ -1217,7 +1295,8 @@ async def list_memory(
annotations=_READ_ONLY,
)
async def search_keywords(query: str, top_k: int = DEFAULT_TOP_K) -> dict[str, Any]:
- return await search_keywords_impl(provider.require(), query, top_k=top_k)
+ async with _tool_errors():
+ return await search_keywords_impl(provider.require(), query, top_k=top_k)
@server.tool(
name="query_memory",
@@ -1229,7 +1308,8 @@ async def search_keywords(query: str, top_k: int = DEFAULT_TOP_K) -> dict[str, A
annotations=_READ_ONLY,
)
async def query_memory(query: str, limit: int | None = None) -> dict[str, Any]:
- return await query_memory_impl(provider.require(), query, limit=limit)
+ async with _tool_errors():
+ return await query_memory_impl(provider.require(), query, limit=limit)
@server.tool(
name="memory_stats",
@@ -1240,7 +1320,8 @@ async def query_memory(query: str, limit: int | None = None) -> dict[str, Any]:
annotations=_READ_ONLY,
)
async def memory_stats() -> dict[str, Any]:
- return await memory_stats_impl(provider.require())
+ async with _tool_errors():
+ return await memory_stats_impl(provider.require())
if _read_only_enabled():
return
@@ -1266,17 +1347,18 @@ async def store_thought(
thought_id: str | None = None,
deduplicate: bool = False,
) -> dict[str, Any]:
- return await store_thought_impl(
- provider.require(),
- essence,
- content,
- thought_type=thought_type,
- priority=priority,
- source=source,
- confidence=confidence,
- thought_id=thought_id,
- deduplicate=deduplicate,
- )
+ async with _tool_errors():
+ return await store_thought_impl(
+ provider.require(),
+ essence,
+ content,
+ thought_type=thought_type,
+ priority=priority,
+ source=source,
+ confidence=confidence,
+ thought_id=thought_id,
+ deduplicate=deduplicate,
+ )
@server.tool(
name="update_thought",
@@ -1296,15 +1378,16 @@ async def update_thought(
*,
confidence: float | None = None,
) -> dict[str, Any]:
- return await update_thought_impl(
- provider.require(),
- thought_id,
- essence=essence,
- content=content,
- priority=priority,
- lifecycle_status=lifecycle_status,
- confidence=confidence,
- )
+ async with _tool_errors():
+ return await update_thought_impl(
+ provider.require(),
+ thought_id,
+ essence=essence,
+ content=content,
+ priority=priority,
+ lifecycle_status=lifecycle_status,
+ confidence=confidence,
+ )
@server.tool(
name="link_thoughts",
@@ -1325,14 +1408,15 @@ async def link_thoughts(
*,
edge_id: str | None = None,
) -> dict[str, Any]:
- return await link_thoughts_impl(
- provider.require(),
- from_thought_id,
- to_thought_id,
- edge_type,
- weight=weight,
- edge_id=edge_id,
- )
+ async with _tool_errors():
+ return await link_thoughts_impl(
+ provider.require(),
+ from_thought_id,
+ to_thought_id,
+ edge_type,
+ weight=weight,
+ edge_id=edge_id,
+ )
@server.tool(
name="delete_thought",
@@ -1345,7 +1429,8 @@ async def link_thoughts(
annotations=_WRITE_DESTRUCTIVE,
)
async def delete_thought(thought_id: str) -> dict[str, Any]:
- return await delete_thought_impl(provider.require(), thought_id)
+ async with _tool_errors():
+ return await delete_thought_impl(provider.require(), thought_id)
@server.tool(
name="delete_edge",
@@ -1358,7 +1443,8 @@ async def delete_thought(thought_id: str) -> dict[str, Any]:
annotations=_WRITE_DESTRUCTIVE,
)
async def delete_edge(edge_id: str) -> dict[str, Any]:
- return await delete_edge_impl(provider.require(), edge_id)
+ async with _tool_errors():
+ return await delete_edge_impl(provider.require(), edge_id)
def main() -> None:
diff --git a/tests/mcp/test_errors.py b/tests/mcp/test_errors.py
new file mode 100644
index 0000000..8a57138
--- /dev/null
+++ b/tests/mcp/test_errors.py
@@ -0,0 +1,355 @@
+"""Tests for the MCP tool error contract.
+
+When a tool hits a known failure condition, the client must receive a
+clean, typed, actionable error — a message with a helpful hint and
+``isError`` set — rather than a raw Python traceback or an internal class
+name. These tests drive the real tool boundary through the in-process MCP
+client transport (so FastMCP's error wrapping runs for real) and assert on
+the message the client actually sees.
+
+The conditions covered are:
+
+* the store is not yet available (a misconfigured deployment),
+* ``query_memory`` receives a non-``FIND`` command (``SELECT`` / ``COUNT``),
+* ``query_memory`` receives a malformed ``FIND``,
+* ``update_thought`` names a thought that does not exist,
+* ``link_thoughts`` names an endpoint that does not exist.
+
+Two cross-cutting properties are asserted in addition to per-condition
+hints: the ``FIND``-only guard on ``query_memory`` is preserved (a
+``SELECT`` is still rejected and the message never invites raw SQL), and no
+error message leaks a filesystem path, a stack frame, or an internal symbol
+name.
+
+The server and client are built inside each test (rather than via a
+yielding fixture) so the in-process transport's task-bound cancel scopes
+enter and exit within the same task — the pattern the end-to-end server
+tests already use.
+"""
+
+from __future__ import annotations
+
+import re
+from contextlib import asynccontextmanager
+from typing import TYPE_CHECKING
+
+from mcp.server.fastmcp import FastMCP
+from mcp.shared.memory import create_connected_server_and_client_session as connect_client
+
+from engrava.mcp.server import SERVER_NAME, StoreProvider, register_tools
+
+if TYPE_CHECKING:
+ from collections.abc import AsyncIterator
+
+ from mcp import ClientSession
+
+ from engrava.infrastructure.sqlite.engrava_core import SqliteEngravaCore
+
+#: Substrings that would indicate a leaked traceback or internal symbol.
+#: Error messages shown to a client must contain none of them.
+_LEAK_MARKERS = (
+ "Traceback",
+ 'File "',
+ "StoreNotReadyError",
+ "UnsupportedQueryError",
+ "MindQLParseError",
+ "ThoughtNotFoundError",
+ "ReferentialIntegrityError",
+ "SqliteEngravaCore",
+ "lifespan",
+)
+
+#: Phrases that would wrongly suggest raw SQL is runnable over the wire.
+#: The ``FIND``-only rejection message must contain none of them.
+_SQL_INVITATIONS = (
+ "use select",
+ "run select",
+ "raw sql",
+ "arbitrary sql",
+ "try select",
+ "select is",
+ "select instead",
+)
+
+
+@asynccontextmanager
+async def _client_for(store: SqliteEngravaCore) -> AsyncIterator[ClientSession]:
+ """Open a connected client whose tools query the given store.
+
+ Builds a server, points a :class:`StoreProvider` at ``store``, registers
+ the tools, and connects the in-process client so the real tool boundary
+ (and FastMCP's error wrapping) runs end to end.
+
+ Args:
+ store: The seeded store the tools should query.
+
+ Yields:
+ A connected client session wired to ``store``.
+
+ """
+ server: FastMCP = FastMCP(SERVER_NAME)
+ provider = StoreProvider()
+ provider.set(store)
+ register_tools(server, provider)
+ async with connect_client(server) as client:
+ yield client
+
+
+@asynccontextmanager
+async def _store_less_client() -> AsyncIterator[ClientSession]:
+ """Open a connected client whose provider never received a store.
+
+ Registering the tools against an unpopulated :class:`StoreProvider`
+ reproduces a deployment whose store has not been configured: the first
+ tool call hits the store-not-ready condition at the real boundary.
+
+ Yields:
+ A connected client session backed by a store-less provider.
+
+ """
+ server: FastMCP = FastMCP(SERVER_NAME)
+ register_tools(server, StoreProvider())
+ async with connect_client(server) as client:
+ yield client
+
+
+def _error_text(content: object) -> str:
+ """Extract the text of a tool error result's first content block.
+
+ Args:
+ content: The ``content`` sequence of a ``CallToolResult``.
+
+ Returns:
+ The ``text`` attribute of the first content block.
+
+ """
+ assert isinstance(content, list)
+ assert content, "an error result must carry a content block"
+ text = content[0].text # type: ignore[union-attr]
+ assert isinstance(text, str)
+ return text
+
+
+def _assert_no_leak(text: str) -> None:
+ """Assert an error message leaks no path, stack frame, or symbol name.
+
+ Args:
+ text: The client-facing error message to inspect.
+
+ """
+ for marker in _LEAK_MARKERS:
+ assert marker not in text, f"error message leaked {marker!r}: {text!r}"
+ # No forward-slash path segment ...
+ assert not re.search(r"/\w", text), f"error message leaked a '/' path: {text!r}"
+ # ... and no backslash path segment.
+ assert "\\" not in text, f"error message leaked a '\\' path: {text!r}"
+
+
+class TestStoreNotReady:
+ """The store-not-ready condition surfaces an actionable config hint."""
+
+ async def test_reports_missing_store_with_env_var_hint(self) -> None:
+ async with _store_less_client() as client:
+ result = await client.call_tool("memory_stats", {})
+
+ assert result.isError is True
+ text = _error_text(result.content)
+ # Actionable: it names the two documented configuration env vars ...
+ assert "ENGRAVA_DB_PATH" in text
+ assert "ENGRAVA_MCP_CONFIG" in text
+ # ... and leaks no path, stack frame, or internal symbol.
+ _assert_no_leak(text)
+
+
+class TestUnsupportedQuery:
+ """A non-``FIND`` ``query_memory`` is rejected with the FIND contract."""
+
+ async def test_select_is_rejected_and_message_states_find_only(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ async with _client_for(store) as client:
+ result = await client.call_tool(
+ "query_memory",
+ {"query": "SELECT thought_id FROM thought"},
+ )
+
+ assert result.isError is True
+ text = _error_text(result.content)
+ # The guard still rejects the query and states the FIND-only contract.
+ assert "FIND" in text
+ assert "only FIND" in text
+ _assert_no_leak(text)
+
+ async def test_count_is_rejected_with_find_example(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ async with _client_for(store) as client:
+ result = await client.call_tool("query_memory", {"query": "COUNT thoughts"})
+
+ assert result.isError is True
+ text = _error_text(result.content)
+ assert "FIND" in text
+ # A valid FIND example is offered to get the caller back on track.
+ assert "FIND thoughts WHERE" in text
+ _assert_no_leak(text)
+
+
+class TestGuardPreservation:
+ """The FIND-only guard must reject SELECT without ever inviting SQL."""
+
+ async def test_select_rejection_does_not_invite_sql(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ async with _client_for(store) as client:
+ result = await client.call_tool(
+ "query_memory",
+ {"query": "SELECT * FROM thought WHERE 1=1"},
+ )
+
+ # The rejection itself is intact: a SELECT still fails.
+ assert result.isError is True
+ text = _error_text(result.content)
+ lowered = text.lower()
+
+ # The message asserts the FIND-only contract ...
+ assert "find" in lowered
+ assert "only find" in lowered
+ # ... and must NOT suggest that raw SQL / SELECT is runnable.
+ for invite in _SQL_INVITATIONS:
+ assert invite not in lowered, f"message invited SQL via {invite!r}: {text!r}"
+ # The only mention of SELECT permitted is echoing the rejected verb.
+ # Stripping that quoted echo, no bare "SELECT" remains — so the
+ # message never presents SELECT as a usable command.
+ assert "SELECT" not in text.replace("'SELECT'", "")
+
+ async def test_extension_command_is_also_rejected(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ # A made-up verb parses as an unknown command and is rejected too —
+ # the surface stays restricted to FIND, not just "not SELECT".
+ async with _client_for(store) as client:
+ result = await client.call_tool("query_memory", {"query": "DROP thoughts"})
+
+ assert result.isError is True
+ text = _error_text(result.content)
+ assert "FIND" in text
+ _assert_no_leak(text)
+
+
+class TestMalformedFind:
+ """A malformed ``FIND`` returns the parse problem plus a valid example."""
+
+ async def test_incomplete_find_reports_problem_and_example(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ async with _client_for(store) as client:
+ result = await client.call_tool("query_memory", {"query": "FIND"})
+
+ assert result.isError is True
+ text = _error_text(result.content)
+ # The specific parse problem is surfaced ...
+ assert "table name" in text
+ # ... alongside a valid FIND example to copy.
+ assert "FIND thoughts WHERE" in text
+ _assert_no_leak(text)
+
+ async def test_unknown_table_reports_problem(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ async with _client_for(store) as client:
+ result = await client.call_tool(
+ "query_memory",
+ {"query": "FIND widgets WHERE x = '1'"},
+ )
+
+ assert result.isError is True
+ text = _error_text(result.content)
+ assert "FIND thoughts WHERE" in text
+ _assert_no_leak(text)
+
+
+class TestUpdateMissingThought:
+ """Updating an absent thought names the missing identifier."""
+
+ async def test_missing_thought_reports_id_with_hint(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ async with _client_for(store) as client:
+ result = await client.call_tool(
+ "update_thought",
+ {"thought_id": "ghost-thought", "essence": "x"},
+ )
+
+ assert result.isError is True
+ text = _error_text(result.content)
+ # The offending id is echoed so the caller knows which one is wrong.
+ assert "ghost-thought" in text
+ # An actionable next step is offered.
+ assert "search_memory" in text or "list_memory" in text
+ _assert_no_leak(text)
+
+
+class TestLinkMissingEndpoint:
+ """Linking to an absent endpoint names the missing identifier."""
+
+ async def test_missing_endpoint_reports_id_with_hint(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ async with _client_for(store) as client:
+ result = await client.call_tool(
+ "link_thoughts",
+ {
+ "from_thought_id": "thought-alpha",
+ "to_thought_id": "ghost-endpoint",
+ "edge_type": "ASSOCIATED",
+ },
+ )
+
+ assert result.isError is True
+ text = _error_text(result.content)
+ # The dangling endpoint id is echoed ...
+ assert "ghost-endpoint" in text
+ # ... and the message leaks no path, stack frame, or class name.
+ _assert_no_leak(text)
+
+
+class TestSuccessPathUnchanged:
+ """Mapping errors must not alter what a successful tool call returns."""
+
+ async def test_valid_find_still_succeeds(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ # The seeded store has two ACTIVE thoughts; a valid FIND returns rows
+ # with no error, confirming the wrapper is presentation-only.
+ async with _client_for(store) as client:
+ result = await client.call_tool(
+ "query_memory",
+ {"query": "FIND thoughts WHERE lifecycle_status = 'ACTIVE'"},
+ )
+
+ assert result.isError is False
+ assert result.structuredContent is not None
+ assert "thought_id" in result.structuredContent["columns"]
+ assert len(result.structuredContent["rows"]) == 2
+
+ async def test_valid_keyword_search_still_succeeds(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ # A well-formed call through a wrapped read tool returns its normal
+ # payload unchanged — the error wrapper adds nothing on the happy path.
+ async with _client_for(store) as client:
+ result = await client.call_tool("search_keywords", {"query": "coffee"})
+
+ assert result.isError is False
+ assert result.structuredContent is not None
+ assert "results" in result.structuredContent
From 5f4ea2009d4e4527c3af1fc429055c61f38e509c Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Tue, 9 Jun 2026 06:14:36 +0200
Subject: [PATCH 10/40] fix(mcp): keep query_memory parse errors FIND-only
An unrecognised command verb (e.g. DROP) raised a MindQL parse error whose
raw text named the full command set ("Expected FIND, COUNT, SELECT, or
extension command"). The MCP query_memory surface exposes only FIND, so that
message leaked commands the surface deliberately hides. The parse-error
mapping now emits a generic FIND-only message with a valid FIND example and
never echoes the parser's command list. Regression tests cover unknown verbs
and malformed FIND.
---
src/engrava/mcp/server.py | 10 ++++++++--
tests/mcp/test_errors.py | 30 ++++++++++++++++++++++++------
2 files changed, 32 insertions(+), 8 deletions(-)
diff --git a/src/engrava/mcp/server.py b/src/engrava/mcp/server.py
index 3ff4ce6..4e40657 100644
--- a/src/engrava/mcp/server.py
+++ b/src/engrava/mcp/server.py
@@ -262,9 +262,15 @@ async def _tool_errors() -> AsyncIterator[None]:
# and never invites raw SQL.
raise ToolError(str(exc)) from exc
except MindQLParseError as exc:
+ # Do NOT echo the parser's raw message: for an unrecognised verb the
+ # parser names the full MindQL command set ("Expected FIND, COUNT,
+ # SELECT, or extension command"), which would leak commands the MCP
+ # surface deliberately does not expose. query_memory accepts only
+ # FIND, so the client-facing message states that and shows a valid
+ # FIND example — never the parser's command list.
msg = (
- f"That query could not be parsed: {exc}. query_memory runs MindQL "
- f"FIND queries; for example: {FIND_QUERY_EXAMPLE}"
+ "query_memory accepts only FIND queries and the query could not "
+ f"be parsed as one. Use the FIND command, for example: {FIND_QUERY_EXAMPLE}"
)
raise ToolError(msg) from exc
except ThoughtNotFoundError as exc:
diff --git a/tests/mcp/test_errors.py b/tests/mcp/test_errors.py
index 8a57138..26dc3c6 100644
--- a/tests/mcp/test_errors.py
+++ b/tests/mcp/test_errors.py
@@ -236,14 +236,30 @@ async def test_extension_command_is_also_rejected(
assert result.isError is True
text = _error_text(result.content)
- assert "FIND" in text
+ lowered = text.lower()
+ assert "only find" in lowered
_assert_no_leak(text)
+ # The message must NOT leak the parser's full command set. For an
+ # unrecognised verb the raw parser error reads "Expected FIND, COUNT,
+ # SELECT, or extension command" — naming COUNT / SELECT / extension
+ # would advertise commands the MCP surface deliberately hides. The
+ # input verb ("DROP") is not echoed, so none of these may appear.
+ assert "COUNT" not in text
+ assert "SELECT" not in text
+ assert "extension" not in lowered
class TestMalformedFind:
- """A malformed ``FIND`` returns the parse problem plus a valid example."""
+ """A malformed query is rejected with a FIND-only message + example.
+
+ The message is deliberately generic (FIND-only + a valid example) and
+ does NOT echo the parser's raw text, because the parser names the full
+ MindQL command set for an unrecognised verb — which the MCP surface must
+ not advertise. The trade-off (a malformed FIND loses the precise parse
+ detail) is accepted to keep the over-the-wire surface FIND-only.
+ """
- async def test_incomplete_find_reports_problem_and_example(
+ async def test_incomplete_find_reports_find_only_and_example(
self,
store: SqliteEngravaCore,
) -> None:
@@ -252,10 +268,12 @@ async def test_incomplete_find_reports_problem_and_example(
assert result.isError is True
text = _error_text(result.content)
- # The specific parse problem is surfaced ...
- assert "table name" in text
- # ... alongside a valid FIND example to copy.
+ # FIND-only contract is stated, with a valid FIND example to copy ...
+ assert "only find" in text.lower()
assert "FIND thoughts WHERE" in text
+ # ... and the parser's command set is never leaked.
+ assert "COUNT" not in text
+ assert "extension" not in text.lower()
_assert_no_leak(text)
async def test_unknown_table_reports_problem(
From b2bc18bf46aa138c530684b4366ef706f591a6c5 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Tue, 9 Jun 2026 18:04:28 +0200
Subject: [PATCH 11/40] docs(mcp): add MCP server guide and client-config
examples
Document the shipped MCP server surface: install via the `mcp` extra,
run over stdio (engrava-mcp / python -m engrava.mcp), store resolution
through ENGRAVA_MCP_CONFIG / ENGRAVA_DB_PATH, the 11 tools (6 read +
5 write), 3 engrava:// resources, 3 prompts, and ENGRAVA_MCP_READ_ONLY.
Add copy-paste mcpServers client configs for Claude Desktop, Claude
Code, Cursor, Windsurf, and VS Code, plus sample config files under
examples/. Link the guide from the README and examples index.
---
README.md | 16 +
docs/guides/mcp.md | 354 +++++++++++++++++++++++
examples/README.md | 19 ++
examples/mcp-client-config.db-path.json | 10 +
examples/mcp-client-config.json | 10 +
examples/mcp-client-config.readonly.json | 11 +
examples/mcp-client-config.vscode.json | 12 +
7 files changed, 432 insertions(+)
create mode 100644 docs/guides/mcp.md
create mode 100644 examples/mcp-client-config.db-path.json
create mode 100644 examples/mcp-client-config.json
create mode 100644 examples/mcp-client-config.readonly.json
create mode 100644 examples/mcp-client-config.vscode.json
diff --git a/README.md b/README.md
index 0b94764..9f9532e 100644
--- a/README.md
+++ b/README.md
@@ -199,6 +199,21 @@ async with EngravaManager(data_dir=Path("./data")) as mgr:
# Completely isolated databases
```
+### MCP Server
+
+Expose a store to any MCP client — Claude Desktop, Claude Code, Cursor,
+Windsurf, VS Code — via the `engrava[mcp]` extra. A native stdio server (no HTTP
+shim) with read tools, optional write tools, attachable `engrava://` resources,
+and guided prompts:
+
+```bash
+pip install "engrava[mcp]"
+engrava-mcp # spawned by your MCP client over stdio
+```
+
+→ See [`docs/guides/mcp.md`](docs/guides/mcp.md) for install, client
+configuration, the full tool/resource/prompt reference, and read-only mode.
+
## CLI
```bash
@@ -235,6 +250,7 @@ See the [CLI reference](docs/cli.md) for every command and option.
- [Building a memory-backed agent](docs/guides/agent-memory.md) — the end-to-end agent turn loop (ingest → retrieve → generate → consolidate)
- [Migrating from another memory system](docs/guides/migrating-from-other-memory.md) — concept mapping, porting calls, bulk import, and scoping/multi-tenancy
- [Embeddings](docs/guides/embeddings.md) — wiring a real embedding provider (local / OpenAI / Ollama / HuggingFace / custom)
+- [MCP server](docs/guides/mcp.md) — expose a store to MCP clients (Claude Desktop, Claude Code, Cursor, Windsurf, VS Code): install, run, client config, tools/resources/prompts
- [Configuration](docs/configuration.md) — YAML config format and options
- [Upgrade Guide](docs/upgrade.md) — compatibility matrix, backups, and troubleshooting
- [Extensions](docs/extensions.md) — Writing custom extensions and hooks
diff --git a/docs/guides/mcp.md b/docs/guides/mcp.md
new file mode 100644
index 0000000..b251d67
--- /dev/null
+++ b/docs/guides/mcp.md
@@ -0,0 +1,354 @@
+# MCP server
+
+Engrava ships a [Model Context Protocol](https://modelcontextprotocol.io) (MCP)
+server that exposes a memory store to any MCP-capable client — Claude Desktop,
+Claude Code, Cursor, Windsurf, VS Code, and others. Point a client at it and the
+assistant can search, read, and (optionally) write the same engrava store your
+application uses, with no glue code.
+
+The server is an *API consumer*, not an engrava extension: it wraps engrava's
+public async API and speaks MCP over **stdio**. Think of it as a sibling of the
+[CLI](../cli.md) that talks to MCP clients instead of a terminal.
+
+> New to the model (thought, edge, reflection, cycle)? Read
+> [Core Concepts](../concepts.md) first — this guide uses those terms.
+
+## Install
+
+The server lives behind the `mcp` extra:
+
+```bash
+pip install "engrava[mcp]"
+```
+
+The extra pulls the MCP SDK and its transport stack. It installs **only** with
+the extra — plain `pip install engrava` is unaffected and stays dependency-light,
+so applications that embed engrava as a library never pay for the server they
+do not run.
+
+## Run
+
+The server is a standalone process served over stdio. Two equivalent entry
+points are installed with the extra:
+
+```bash
+engrava-mcp
+```
+
+```bash
+python -m engrava.mcp
+```
+
+Both build the same server and serve it on stdio. You normally do **not** launch
+it by hand — an MCP client spawns it as a subprocess using one of these commands
+(see [Client configuration](#client-configuration) below). Running it directly
+in a terminal is mostly useful for a quick smoke test; it will wait for an MCP
+client to speak to it over stdin and exits on EOF.
+
+### Pointing the server at a store
+
+The server resolves its store from the environment when it starts. Two variables
+are recognised, in priority order:
+
+| Variable | Value | Effect |
+|---|---|---|
+| `ENGRAVA_MCP_CONFIG` | Path to an `engrava.yaml` | Builds the store with the configured embedding provider, vector backend, journal, and TTL settings (`SqliteEngravaCore.from_config`). |
+| `ENGRAVA_DB_PATH` | Path to a SQLite database file | Opens that file directly and ensures the schema. No embedding provider or vector backend is configured, so hybrid search degrades to its lexical backend. |
+
+`ENGRAVA_MCP_CONFIG` takes precedence: if both are set, the config file wins. If
+**neither** is set, the server has no store to serve and tool calls return an
+actionable error telling you to set one of the two variables.
+
+Use `ENGRAVA_MCP_CONFIG` whenever you want semantic (vector) search or any
+non-default storage settings; the database created by your application via
+[`engrava.yaml`](../configuration.md) is the same file the server should open.
+Use `ENGRAVA_DB_PATH` for a quick lexical-only connection to a bare database
+file.
+
+```bash
+# Full configuration — semantic search, journal, TTL, etc.
+export ENGRAVA_MCP_CONFIG=/path/to/engrava.yaml
+engrava-mcp
+
+# Or a bare database file — lexical search only
+export ENGRAVA_DB_PATH=/path/to/agent-memory.db
+engrava-mcp
+```
+
+In a client configuration these become `env` entries on the server block, shown
+next.
+
+## Client configuration
+
+Every MCP client that speaks stdio uses the same `mcpServers` shape: a command,
+its arguments, and an environment block. Engrava is a **native stdio server**, so
+clients spawn `engrava-mcp` directly. There is no HTTP endpoint to host and —
+unlike HTTP-only MCP servers — **no `npx mcp-remote` shim** to wedge between the
+client and the server. Fewer moving parts, one process, local by default.
+
+A ready-to-copy sample for each client below lives in
+[`examples/`](https://github.com/sovantica/engrava/blob/main/examples). Replace
+the `ENGRAVA_MCP_CONFIG` path (or swap it for `ENGRAVA_DB_PATH`) with your own
+store.
+
+### Claude Desktop
+
+Edit `claude_desktop_config.json` (Settings → Developer → Edit Config):
+
+```json
+{
+ "mcpServers": {
+ "engrava": {
+ "command": "engrava-mcp",
+ "env": {
+ "ENGRAVA_MCP_CONFIG": "/absolute/path/to/engrava.yaml"
+ }
+ }
+ }
+}
+```
+
+Restart Claude Desktop; "engrava" appears in the tools menu.
+
+### Claude Code
+
+Register the server from the project root:
+
+```bash
+claude mcp add engrava --env ENGRAVA_MCP_CONFIG=/absolute/path/to/engrava.yaml -- engrava-mcp
+```
+
+That writes an `mcpServers` entry of the same shape into your Claude Code
+configuration. Equivalent JSON, if you prefer to edit it directly:
+
+```json
+{
+ "mcpServers": {
+ "engrava": {
+ "command": "engrava-mcp",
+ "env": {
+ "ENGRAVA_MCP_CONFIG": "/absolute/path/to/engrava.yaml"
+ }
+ }
+ }
+}
+```
+
+### Cursor
+
+Add an entry to `.cursor/mcp.json` (project-scoped) or the global
+`~/.cursor/mcp.json`:
+
+```json
+{
+ "mcpServers": {
+ "engrava": {
+ "command": "engrava-mcp",
+ "env": {
+ "ENGRAVA_MCP_CONFIG": "/absolute/path/to/engrava.yaml"
+ }
+ }
+ }
+}
+```
+
+### Windsurf
+
+Add an entry to `~/.codeium/windsurf/mcp_config.json`:
+
+```json
+{
+ "mcpServers": {
+ "engrava": {
+ "command": "engrava-mcp",
+ "env": {
+ "ENGRAVA_MCP_CONFIG": "/absolute/path/to/engrava.yaml"
+ }
+ }
+ }
+}
+```
+
+### VS Code
+
+VS Code's MCP support nests the servers under an `mcp` key. Add this to your
+user `settings.json` or a workspace `.vscode/mcp.json`:
+
+```json
+{
+ "mcp": {
+ "servers": {
+ "engrava": {
+ "command": "engrava-mcp",
+ "env": {
+ "ENGRAVA_MCP_CONFIG": "/absolute/path/to/engrava.yaml"
+ }
+ }
+ }
+ }
+}
+```
+
+> **Other clients.** Cline, Codex, and most other stdio MCP clients use the same
+> `command` / `args` / `env` block as Claude Desktop and Cursor above — copy any
+> of those entries. If a client cannot find `engrava-mcp` on its `PATH`, set
+> `command` to the absolute path of the script inside your virtual environment
+> (for example `/path/to/.venv/bin/engrava-mcp`), or use
+> `"command": "python"` with `"args": ["-m", "engrava.mcp"]`.
+
+## Tool reference
+
+The server registers **eleven tools**: six read tools that are always available,
+and five write tools that are available unless the server is started in
+[read-only mode](#read-only-mode). Tools return JSON; thought and edge mutations
+return the key fields of the affected record.
+
+Tools carry MCP *annotations* so clients can present them safely: the read tools
+are marked read-only, the write tools are marked as writes, and the two `delete`
+tools additionally carry a **destructive** hint so a client can warn before it
+runs them.
+
+### Read tools (always available)
+
+| Tool | Purpose | Key arguments |
+|---|---|---|
+| `get_thought` | Fetch a single thought by its identifier. Returns a `found` flag and the thought (or `null`). | `thought_id` |
+| `search_memory` | Hybrid ranked search (lexical + vector + recency). Returns ranked `thought_id`/`score` pairs and the `backends_used`. | `query_text`; `top_k` (default 10); `include_reflections` (default true); optional `thought_type`, `lifecycle_status`, `priority` |
+| `search_keywords` | Pure full-text BM25 keyword search. Supports `AND`, `OR`, `NOT`, and prefix `*`. Returns ranked `thought_id`/`score` pairs. | `query`; `top_k` (default 10) |
+| `list_memory` | Deterministic, unranked browse over stored thoughts — no score, newest first. The home for "list memory by structured field". | `thought_type`, `lifecycle_status`, `priority`, `min_cycle`, `max_cycle`, `include_expired` (default false); `limit` (default 50), `offset` (default 0) |
+| `query_memory` | Run a structured MindQL `FIND` query, e.g. `FIND thoughts WHERE lifecycle_status = 'ACTIVE' LIMIT 10`. **Only `FIND`** is accepted. Returns `columns` and `rows`. | `query`; optional `limit` (overrides any `LIMIT` in the query) |
+| `memory_stats` | Aggregate counts and store-health metrics: thought and edge counts (by type/status) and total storage size. | *(none)* |
+
+A note on `search_memory` filters: the hybrid ranker itself cannot filter, so a
+supplied `thought_type` / `lifecycle_status` / `priority` is applied **after**
+ranking. A filtered call may therefore return fewer than `top_k` results and adds
+a `filtered` block reporting how many ranked hits were `scanned`, `matched`, and
+`dropped` — so a short list is never mistaken for "nothing was found". When you
+want an exhaustive, paginated listing by those same fields, use `list_memory`
+instead.
+
+`query_memory` deliberately accepts **only** the MindQL `FIND` command; raw-SQL
+passthrough (`SELECT`), aggregate `COUNT`, and any extension commands are
+rejected over the wire. See [MindQL](../mindql.md) for the `FIND` grammar.
+
+### Write tools (hidden in read-only mode)
+
+| Tool | Purpose | Key arguments | Annotation |
+|---|---|---|---|
+| `store_thought` | Create a new thought node. New thoughts start in `CREATED` lifecycle state. Returns the created thought's identifier and key fields. | `essence`, `content`; optional `thought_type` (default `NOTE`), `priority` (default `P3`), `source` (default `"agent"`), `confidence`, `thought_id`, `deduplicate` | write |
+| `update_thought` | Update selected fields of an existing thought. Only supplied fields change; the rest are untouched. | `thought_id`; optional `essence`, `content`, `priority`, `lifecycle_status`, `confidence` | write (idempotent) |
+| `link_thoughts` | Create a typed edge between two existing thoughts. Both endpoints must already exist. | `from_thought_id`, `to_thought_id`, `edge_type`; optional `weight` (default 1.0), `edge_id` | write |
+| `delete_thought` | Delete a thought by its identifier. Deleting an absent id is a no-op (returns `deleted: false`), not an error. | `thought_id` | **destructive** |
+| `delete_edge` | Delete an edge by its identifier. Deleting an absent id is a no-op (returns `deleted: false`), not an error. | `edge_id` | **destructive** |
+
+`link_thoughts` edges are unique per `(source, target, type)`: linking the same
+pair with the same type twice is rejected rather than ignored, so this write is
+not idempotent. The valid `thought_type`, `lifecycle_status`, `priority`, and
+`edge_type` values are the engrava enums documented in the
+[Glossary](../glossary.md) and [Core Concepts](../concepts.md) (for example
+`thought_type` is one of `TASK`, `OBSERVATION`, `BELIEF`, `REFLECTION`,
+`OUTPUT_DRAFT`, `NOTE`).
+
+## Resources
+
+Where tools are *invoked*, **resources** are addressable `engrava://` URIs that a
+client surfaces as attachable context (drop them into a conversation, no tool
+call). Three resources are registered. They are reads by definition, so they are
+**always available** — they are *not* hidden by read-only mode — and each returns
+a JSON document (`application/json`).
+
+| Resource | Returns |
+|---|---|
+| `engrava://thought/{thought_id}` | A single thought as JSON. Reading an unknown identifier yields a graceful not-found payload rather than an error. |
+| `engrava://stats` | Store-health counts and total size — the same payload as the `memory_stats` tool (both share one implementation, so they always agree). |
+| `engrava://recent` | The most-recently-updated thoughts (newest first) as JSON. |
+
+## Prompts
+
+**Prompts** are parameterised templates a client surfaces as slash-commands or
+buttons. Each renders a ready-to-send instruction that guides the assistant to
+gather context with the read tools and resources above before answering. They
+are templates only — they open no write path and call no mutation. Like
+resources, prompts are read-oriented and are **always available**, including in
+read-only mode.
+
+| Prompt | What it scaffolds | Arguments |
+|---|---|---|
+| `summarize_recent_memory` | A concise summary of the most recently stored thoughts, highlighting themes and anything unresolved. | optional `limit` — how many recent thoughts to consider (default 5) |
+| `find_related` | Gather and synthesise stored thoughts related to a topic, grouping related points and noting gaps or contradictions. | required `topic` |
+| `reflect_on_topic` | A structured reflection over what memory holds about a topic: what is established, open questions, and tensions, with concrete follow-ups. | required `topic` |
+
+## Read-only mode
+
+Set `ENGRAVA_MCP_READ_ONLY` to a truthy value (`1`, `true`, or `yes`,
+case-insensitive) to start the server with a **retrieval-only** surface:
+
+```bash
+export ENGRAVA_MCP_READ_ONLY=true
+export ENGRAVA_MCP_CONFIG=/absolute/path/to/engrava.yaml
+engrava-mcp
+```
+
+In read-only mode the five write tools — `store_thought`, `update_thought`,
+`link_thoughts`, `delete_thought`, `delete_edge` — are **not registered at all**,
+so they are never advertised to the client. The six read tools, all three
+resources, and all three prompts remain available.
+
+Use it for any deployment that should only retrieve from memory and must not be
+able to change it — a shared read-only store, a demo, an analytics or
+question-answering client. Because the write tools are absent rather than merely
+guarded, a client in read-only mode has no path to mutate the store.
+
+As an `env` block on a server entry (any client):
+
+```json
+{
+ "mcpServers": {
+ "engrava": {
+ "command": "engrava-mcp",
+ "env": {
+ "ENGRAVA_MCP_CONFIG": "/absolute/path/to/engrava.yaml",
+ "ENGRAVA_MCP_READ_ONLY": "true"
+ }
+ }
+ }
+}
+```
+
+## Optional vector search
+
+Hybrid search (`search_memory`) combines lexical, vector, and recency signals.
+Whether the **vector** signal is active depends on how the store is configured:
+
+- With `ENGRAVA_MCP_CONFIG` pointing at an `engrava.yaml` that configures an
+ embedding provider, `search_memory` uses semantic vectors. Installing the
+ [`vec` extra](../search.md) (`pip install "engrava[vec,mcp]"`) adds the
+ `sqlite-vec` backend for faster KNN; without it the vector signal still works
+ via the built-in numpy backend.
+- With `ENGRAVA_DB_PATH` (a bare database file) — or any store without an
+ embedding provider — there is no vector signal, and `search_memory` degrades
+ gracefully to lexical (BM25) ranking. `search_keywords` is pure BM25 either
+ way.
+
+The `backends_used` field on a `search_memory` response tells you which signals
+actually contributed to a given query, so you can confirm whether vectors were in
+play.
+
+## Notes
+
+- The server is **single-writer**, like engrava itself — point it at a store that
+ is not being written concurrently by another process (see
+ [Concurrency](../concurrency.md)).
+- Tool errors are returned as clean, actionable messages (for example, an unknown
+ `thought_id`, or a non-`FIND` query) rather than raw tracebacks.
+- Thoughts and edges created through the write tools start at cycle `0`: this API
+ consumer has no notion of the agent [cycle clock](../concepts.md#cycle-the-agent-clock),
+ which your application owns.
+
+## Next
+
+- [Core Concepts](../concepts.md) — thought / edge / reflection / cycle.
+- [Hybrid Search](../search.md) — how the retrieval ranking works.
+- [MindQL](../mindql.md) — the `FIND` grammar behind `query_memory`.
+- [Configuration](../configuration.md) — wiring an embedding provider via `engrava.yaml`.
+- [CLI Reference](../cli.md) — the terminal-facing sibling of this server.
diff --git a/examples/README.md b/examples/README.md
index ebf6f21..0c5c503 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -23,6 +23,22 @@ download.
| [`quickstart.py`](quickstart.py) | 5-minute end-to-end tour: in-memory store, percepts + utterances ingest, one dreaming cycle, hybrid-search query, top-K print. |
| [`simple_agent.py`](simple_agent.py) | Lower-level walkthrough using a custom scoring hook, manual edges, and a fake embedding function — useful for understanding the API surface without the local-encoder dependency. |
+## MCP client configuration
+
+Sample `mcpServers` blocks for pointing an MCP client (Claude Desktop, Claude
+Code, Cursor, Windsurf, VS Code, …) at the engrava
+[MCP server](../docs/guides/mcp.md). Copy the one that matches your client and
+replace the store path with your own.
+
+| File | For |
+|---|---|
+| [`mcp-client-config.json`](mcp-client-config.json) | The default stdio block (Claude Desktop, Claude Code, Cursor, Windsurf, Cline, Codex, …). Points at an `engrava.yaml`. |
+| [`mcp-client-config.db-path.json`](mcp-client-config.db-path.json) | Same shape, but points at a bare SQLite file via `ENGRAVA_DB_PATH` (lexical search only). |
+| [`mcp-client-config.readonly.json`](mcp-client-config.readonly.json) | Read-only deployment — `ENGRAVA_MCP_READ_ONLY=true` hides the write tools. |
+| [`mcp-client-config.vscode.json`](mcp-client-config.vscode.json) | VS Code, which nests servers under an `mcp` key. |
+
+These require the MCP extra: `pip install "engrava[mcp]"`.
+
Run them directly with the Python interpreter:
```bash
@@ -57,3 +73,6 @@ teal.`.
- [`docs/benchmarks.md`](../docs/benchmarks.md) — the synthetic
benchmark suite that reports dreaming's measured REFLECTION
coverage on a representative workload.
+- [`docs/guides/mcp.md`](../docs/guides/mcp.md) — the MCP server:
+ install, run, client configuration, and the full
+ tool/resource/prompt reference.
diff --git a/examples/mcp-client-config.db-path.json b/examples/mcp-client-config.db-path.json
new file mode 100644
index 0000000..e6877fa
--- /dev/null
+++ b/examples/mcp-client-config.db-path.json
@@ -0,0 +1,10 @@
+{
+ "mcpServers": {
+ "engrava": {
+ "command": "engrava-mcp",
+ "env": {
+ "ENGRAVA_DB_PATH": "/absolute/path/to/agent-memory.db"
+ }
+ }
+ }
+}
diff --git a/examples/mcp-client-config.json b/examples/mcp-client-config.json
new file mode 100644
index 0000000..f397572
--- /dev/null
+++ b/examples/mcp-client-config.json
@@ -0,0 +1,10 @@
+{
+ "mcpServers": {
+ "engrava": {
+ "command": "engrava-mcp",
+ "env": {
+ "ENGRAVA_MCP_CONFIG": "/absolute/path/to/engrava.yaml"
+ }
+ }
+ }
+}
diff --git a/examples/mcp-client-config.readonly.json b/examples/mcp-client-config.readonly.json
new file mode 100644
index 0000000..d540649
--- /dev/null
+++ b/examples/mcp-client-config.readonly.json
@@ -0,0 +1,11 @@
+{
+ "mcpServers": {
+ "engrava": {
+ "command": "engrava-mcp",
+ "env": {
+ "ENGRAVA_MCP_CONFIG": "/absolute/path/to/engrava.yaml",
+ "ENGRAVA_MCP_READ_ONLY": "true"
+ }
+ }
+ }
+}
diff --git a/examples/mcp-client-config.vscode.json b/examples/mcp-client-config.vscode.json
new file mode 100644
index 0000000..cd3d9e3
--- /dev/null
+++ b/examples/mcp-client-config.vscode.json
@@ -0,0 +1,12 @@
+{
+ "mcp": {
+ "servers": {
+ "engrava": {
+ "command": "engrava-mcp",
+ "env": {
+ "ENGRAVA_MCP_CONFIG": "/absolute/path/to/engrava.yaml"
+ }
+ }
+ }
+ }
+}
From 897c46fd9300c4b8b9530852d414043593ffffbe Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Tue, 9 Jun 2026 19:55:34 +0200
Subject: [PATCH 12/40] fix(embeddings): retry transient errors with bounded
backoff
The OpenAI-compatible embedding provider issued a single request and
propagated any transient failure (read timeout, network blip, or a
transient HTTP status) straight to the caller, aborting the whole
ingest. _request_embeddings now retries with bounded exponential
backoff on httpx timeouts/network errors and on transient statuses
(408, 409, 425, 429, 500, 502, 503, 504), re-raising once attempts are
exhausted so a persistent error still surfaces. Non-transient statuses
(400/401/403/404, ...) still raise immediately with no retry.
Two keyword-only constructor arguments tune the policy, max_attempts
(default 3) and base_retry_delay_s (default 1.0); the defaults keep the
success path at a single request, so existing callers are unchanged.
The raise/log path carries only the status code and response body,
never the API key or Authorization header. Other providers untouched.
---
CHANGELOG.md | 15 ++
src/engrava/embeddings/openai_compatible.py | 102 ++++++++++-
tests/test_embedding_providers.py | 190 ++++++++++++++++++++
3 files changed, 304 insertions(+), 3 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8ac94e5..a43d56e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,21 @@ and this project adheres to [Semantic Versioning 2.0.0](https://semver.org/spec/
### Fixed
+- **Transient errors from an OpenAI-compatible embeddings endpoint no
+ longer abort the whole call.** `OpenAICompatibleProvider` now retries a
+ single embeddings request with bounded exponential backoff when the
+ endpoint reports a transient failure — a read timeout or network blip,
+ or a transient HTTP status (`408`, `409`, `425`, `429`, `500`, `502`,
+ `503`, `504`). A short outage is absorbed instead of failing the
+ caller's ingest. Non-transient errors (such as `400`, `401`, `403`,
+ `404`) still surface immediately with no retry, and a transient failure
+ that persists across every attempt is still raised, so the call never
+ loops forever. The behaviour is tunable through two new keyword-only
+ constructor arguments, `max_attempts` (default `3`) and
+ `base_retry_delay_s` (default `1.0`); the defaults leave the success
+ path at a single request, so existing callers see no change. The other
+ embedding providers are unaffected.
+
- **sqlite-vec backend no longer crashes on configuration.** Selecting the
`sqlite-vec` vector backend (via `engrava[vec]`) previously raised a
thread error when building a store from configuration, because the
diff --git a/src/engrava/embeddings/openai_compatible.py b/src/engrava/embeddings/openai_compatible.py
index 18b4a60..499b5d8 100644
--- a/src/engrava/embeddings/openai_compatible.py
+++ b/src/engrava/embeddings/openai_compatible.py
@@ -8,6 +8,7 @@
from __future__ import annotations
+import asyncio
import logging
import os
from typing import Any
@@ -16,6 +17,22 @@
_DEFAULT_BASE_URL = "https://api.openai.com/v1"
+#: Number of attempts (initial try + retries) for a transient failure.
+#: The default of 3 leaves the success path at a single request while
+#: absorbing a couple of consecutive transient blips.
+_DEFAULT_MAX_ATTEMPTS = 3
+
+#: Base delay (seconds) for the exponential backoff between retries.
+_DEFAULT_BASE_RETRY_DELAY_S = 1.0
+
+#: HTTP status codes treated as transient and therefore retryable:
+#: request timeout, conflict, too-early, rate limit, and the standard
+#: 5xx server / gateway errors. Any other non-2xx status is surfaced
+#: immediately without a retry.
+_RETRYABLE_STATUS_CODES = frozenset({408, 409, 425, 429, 500, 502, 503, 504})
+
+_HTTP_OK = 200
+
class OpenAICompatibleProvider:
"""Remote embedding provider using the OpenAI embeddings API.
@@ -29,6 +46,14 @@ class OpenAICompatibleProvider:
api_key: Bearer token. Falls back to ``OPENAI_API_KEY`` env var.
dimension: Expected vector dimensionality. Auto-detected on
first call if not provided.
+ max_attempts: Maximum number of attempts (initial request plus
+ retries) for a single embeddings call when the endpoint
+ returns a transient error. Keyword-only; defaults to ``3``.
+ A value of ``1`` disables retrying.
+ base_retry_delay_s: Base delay in seconds for the exponential
+ backoff between retries (the n-th retry waits
+ ``base_retry_delay_s * n`` seconds). Keyword-only; defaults
+ to ``1.0``.
Examples:
>>> provider = OpenAICompatibleProvider(
@@ -47,12 +72,17 @@ def __init__(
base_url: str = _DEFAULT_BASE_URL,
api_key: str | None = None,
dimension: int | None = None,
+ *,
+ max_attempts: int = _DEFAULT_MAX_ATTEMPTS,
+ base_retry_delay_s: float = _DEFAULT_BASE_RETRY_DELAY_S,
) -> None:
self._model_name = model_name
self._base_url = base_url.rstrip("/")
self._api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
self._dimension: int | None = dimension
self._client: Any = None
+ self._max_attempts = max(1, max_attempts)
+ self._base_retry_delay_s = max(0.0, base_retry_delay_s)
def _get_client(self) -> Any: # noqa: ANN401
"""Lazy-create the httpx async client.
@@ -150,6 +180,17 @@ async def embed_batch(self, texts: list[str]) -> list[list[float]]:
async def _request_embeddings(self, texts: list[str]) -> list[list[float]]:
"""Send a batch embedding request to the API.
+ The request is retried with bounded exponential backoff when the
+ endpoint reports a transient failure — a transport-level timeout
+ or network error, or a transient HTTP status (see
+ :data:`_RETRYABLE_STATUS_CODES`). Non-transient HTTP errors (for
+ example ``400``/``401``/``404``) are surfaced immediately without
+ a retry, and a transient failure that persists across every
+ attempt is re-raised so the call still fails rather than looping
+ forever. The number of attempts and the backoff base are
+ configured on the provider (``max_attempts`` /
+ ``base_retry_delay_s``).
+
Args:
texts: Input texts.
@@ -157,20 +198,75 @@ async def _request_embeddings(self, texts: list[str]) -> list[list[float]]:
List of embedding vectors, ordered by input index.
Raises:
- RuntimeError: On non-200 response or malformed JSON.
+ RuntimeError: On a non-retryable response, or on a transient
+ failure that persists across every attempt.
"""
+ import httpx # noqa: PLC0415
+
client = self._get_client()
payload: dict[str, Any] = {
"model": self._model_name,
"input": texts,
}
- response = await client.post("/embeddings", json=payload)
- if response.status_code != 200: # noqa: PLR2004
+ for attempt in range(1, self._max_attempts + 1):
+ try:
+ response = await client.post("/embeddings", json=payload)
+ except (httpx.TimeoutException, httpx.NetworkError) as exc:
+ # Transport-level transient error (read timeout, connection
+ # reset, …). The exception text carries the request target,
+ # never the request headers, so no credential is exposed.
+ if attempt >= self._max_attempts:
+ msg = (
+ f"OpenAI embeddings API request failed after "
+ f"{attempt} attempt(s): {type(exc).__name__}"
+ )
+ raise RuntimeError(msg) from exc
+ await self._sleep_before_retry(attempt)
+ continue
+
+ if response.status_code == _HTTP_OK:
+ return self._parse_response(response)
+
+ if response.status_code in _RETRYABLE_STATUS_CODES and attempt < self._max_attempts:
+ await self._sleep_before_retry(attempt)
+ continue
+
+ # Non-retryable status, or the final attempt of a retryable one:
+ # surface the error. The message carries the status code and the
+ # response body only — never the request's authorization header.
msg = f"OpenAI embeddings API error {response.status_code}: {response.text}"
raise RuntimeError(msg)
+ # Unreachable: the loop either returns or raises on every path. Present
+ # so the function provably never falls through without a value.
+ msg = "OpenAI embeddings API request exhausted all attempts" # pragma: no cover
+ raise RuntimeError(msg) # pragma: no cover
+
+ async def _sleep_before_retry(self, attempt: int) -> None:
+ """Sleep with exponential backoff before the next retry attempt.
+
+ Args:
+ attempt: The 1-based number of the attempt that just failed.
+ The delay scales linearly with this value
+ (``base_retry_delay_s * attempt``).
+
+ """
+ delay = self._base_retry_delay_s * attempt
+ if delay > 0:
+ await asyncio.sleep(delay)
+
+ def _parse_response(self, response: Any) -> list[list[float]]: # noqa: ANN401
+ """Parse a successful embeddings response into ordered vectors.
+
+ Args:
+ response: The ``httpx.Response`` from a ``200`` reply.
+
+ Returns:
+ List of embedding vectors, ordered by input index.
+
+ """
data = response.json()
embeddings_data = data.get("data", [])
# Sort by index to ensure correct ordering
diff --git a/tests/test_embedding_providers.py b/tests/test_embedding_providers.py
index 14af534..e5c2faf 100644
--- a/tests/test_embedding_providers.py
+++ b/tests/test_embedding_providers.py
@@ -571,6 +571,196 @@ async def test_embed_batch(self) -> None:
assert len(results) == 2
+class TestOpenAIProviderRetry:
+ """Transient-error retry behaviour for OpenAICompatibleProvider."""
+
+ @staticmethod
+ def _ok_response(embedding: list[float]) -> MagicMock:
+ """A 200 response carrying a single embedding at index 0."""
+ response = MagicMock()
+ response.status_code = 200
+ response.json.return_value = {"data": [{"index": 0, "embedding": embedding}]}
+ return response
+
+ @staticmethod
+ def _status_response(status_code: int) -> MagicMock:
+ """A non-200 response with a benign body (no secret material)."""
+ response = MagicMock()
+ response.status_code = status_code
+ response.text = "service unavailable"
+ return response
+
+ async def test_embedding_retries_then_succeeds(self) -> None:
+ """A read timeout twice, then a 200 — vectors returned after 3 attempts."""
+ import httpx
+
+ from engrava.embeddings.openai_compatible import OpenAICompatibleProvider
+
+ # A non-zero base delay so the backoff path is exercised; the
+ # asyncio.sleep patch keeps the test from sleeping for real.
+ provider = OpenAICompatibleProvider(
+ model_name="test-model",
+ base_url="https://api.test.com/v1",
+ api_key="sk-test",
+ base_retry_delay_s=1.0,
+ )
+
+ ok = self._ok_response([0.1, 0.2, 0.3])
+ mock_client = AsyncMock()
+ mock_client.post = AsyncMock(
+ side_effect=[
+ httpx.ReadTimeout("read timed out"),
+ httpx.ReadTimeout("read timed out"),
+ ok,
+ ]
+ )
+ provider._client = mock_client
+
+ with patch("asyncio.sleep", new=AsyncMock()) as mock_sleep:
+ result = await provider.embed("hello")
+
+ assert result == [0.1, 0.2, 0.3]
+ assert provider.dimension == 3
+ assert mock_client.post.call_count == 3
+ # Two failed attempts → two backoff sleeps before the success.
+ assert mock_sleep.await_count == 2
+
+ async def test_embedding_retries_on_retryable_status(self) -> None:
+ """A 503 twice, then a 200 — success after retrying the status."""
+ from engrava.embeddings.openai_compatible import OpenAICompatibleProvider
+
+ provider = OpenAICompatibleProvider(
+ model_name="test-model",
+ base_url="https://api.test.com/v1",
+ api_key="sk-test",
+ base_retry_delay_s=1.0,
+ )
+
+ ok = self._ok_response([0.4, 0.5])
+ mock_client = AsyncMock()
+ mock_client.post = AsyncMock(
+ side_effect=[
+ self._status_response(503),
+ self._status_response(503),
+ ok,
+ ]
+ )
+ provider._client = mock_client
+
+ with patch("asyncio.sleep", new=AsyncMock()) as mock_sleep:
+ result = await provider.embed("hello")
+
+ assert result == [0.4, 0.5]
+ assert mock_client.post.call_count == 3
+ assert mock_sleep.await_count == 2
+
+ async def test_embedding_persistent_timeout_raises(self) -> None:
+ """A read timeout on every attempt raises after max_attempts (no loop)."""
+ import httpx
+
+ from engrava.embeddings.openai_compatible import OpenAICompatibleProvider
+
+ fake_api_key = "sk-canary-token-value"
+ provider = OpenAICompatibleProvider(
+ model_name="test-model",
+ base_url="https://api.test.com/v1",
+ api_key=fake_api_key,
+ max_attempts=3,
+ base_retry_delay_s=0,
+ )
+
+ mock_client = AsyncMock()
+ mock_client.post = AsyncMock(side_effect=httpx.ReadTimeout("read timed out"))
+ provider._client = mock_client
+
+ with (
+ patch("asyncio.sleep", new=AsyncMock()),
+ pytest.raises(RuntimeError) as exc_info,
+ ):
+ await provider.embed("hello")
+
+ # Bounded: exactly max_attempts calls, then a raise — no infinite loop.
+ assert mock_client.post.call_count == 3
+ # The raised error must not leak the API key or an Authorization header.
+ message = str(exc_info.value)
+ assert fake_api_key not in message
+ assert "Authorization" not in message
+ assert "Bearer" not in message
+
+ async def test_embedding_non_retryable_status_raises_immediately(self) -> None:
+ """A 401 raises on the first attempt — no retry for a non-transient status."""
+ from engrava.embeddings.openai_compatible import OpenAICompatibleProvider
+
+ provider = OpenAICompatibleProvider(
+ model_name="test-model",
+ base_url="https://api.test.com/v1",
+ api_key="sk-test",
+ base_retry_delay_s=0,
+ )
+
+ mock_client = AsyncMock()
+ mock_client.post = AsyncMock(return_value=self._status_response(401))
+ provider._client = mock_client
+
+ with (
+ patch("asyncio.sleep", new=AsyncMock()) as mock_sleep,
+ pytest.raises(RuntimeError, match="401"),
+ ):
+ await provider.embed("hello")
+
+ # No retry on a non-retryable status: a single attempt, zero sleeps.
+ assert mock_client.post.call_count == 1
+ assert mock_sleep.await_count == 0
+
+ async def test_embedding_success_path_unchanged(self) -> None:
+ """A 200 on the first try — exactly one attempt and identical vectors."""
+ from engrava.embeddings.openai_compatible import OpenAICompatibleProvider
+
+ provider = OpenAICompatibleProvider(
+ model_name="test-model",
+ base_url="https://api.test.com/v1",
+ api_key="sk-test",
+ )
+
+ mock_client = AsyncMock()
+ mock_client.post = AsyncMock(return_value=self._ok_response([0.1, 0.2, 0.3]))
+ provider._client = mock_client
+
+ with patch("asyncio.sleep", new=AsyncMock()) as mock_sleep:
+ result = await provider.embed("hello")
+
+ assert result == [0.1, 0.2, 0.3]
+ assert provider.dimension == 3
+ # Backward-compat lock: one attempt, never any backoff sleep.
+ mock_client.post.assert_called_once()
+ assert mock_sleep.await_count == 0
+
+ async def test_embedding_backoff_is_bounded(self) -> None:
+ """With base_retry_delay_s=0 the retry count is bounded by max_attempts."""
+ import httpx
+
+ from engrava.embeddings.openai_compatible import OpenAICompatibleProvider
+
+ provider = OpenAICompatibleProvider(
+ model_name="test-model",
+ base_url="https://api.test.com/v1",
+ api_key="sk-test",
+ max_attempts=5,
+ base_retry_delay_s=0,
+ )
+
+ mock_client = AsyncMock()
+ mock_client.post = AsyncMock(side_effect=httpx.ConnectError("connection refused"))
+ provider._client = mock_client
+
+ with pytest.raises(RuntimeError):
+ await provider.embed("hello")
+
+ # Assert the attempt COUNT, not wall-clock — base_retry_delay_s=0
+ # means no real sleeping occurs.
+ assert mock_client.post.call_count == 5
+
+
class TestOllamaProvider:
"""Unit tests for OllamaProvider."""
From 730c2bb071380ad89660b85109cc8c2e440790bb Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Tue, 9 Jun 2026 20:17:59 +0200
Subject: [PATCH 13/40] test(docs): execute the tutorial end-to-end and a
search round-trip
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The execute layer (Layer 1) only ran an explicit allowlist of self-contained
blocks, so compile-only coverage missed examples that span several consecutive
blocks — in particular the tutorial, whose one worked example (imports ->
ingest -> link -> search_hybrid -> main + asyncio.run) is not runnable as any
single block.
Add an allowlist-driven page-concatenation capability: CONCATENATED_PAGES opts
a page in via (path, first_anchor, last_anchor), the inclusive contiguous block
range is joined in document order and run in a bounded subprocess asserting
exit 0. First target: docs/tutorial.md, which now executes end-to-end against
the installed package — running the search_hybrid round-trip whose return-shape
claims would otherwise rot silently under compile-only checks.
The opt-in stays entirely in the test module (no fence syntax or marker leaks
to the public docs). Documented in the module docstring so future docs authors
know how to opt a page in. tests/docs: 252 -> 253.
---
tests/docs/test_docs_examples_execute.py | 137 ++++++++++++++++++++---
1 file changed, 121 insertions(+), 16 deletions(-)
diff --git a/tests/docs/test_docs_examples_execute.py b/tests/docs/test_docs_examples_execute.py
index b5d7bae..d0ff42f 100644
--- a/tests/docs/test_docs_examples_execute.py
+++ b/tests/docs/test_docs_examples_execute.py
@@ -1,21 +1,56 @@
-"""Layer 1 of the documentation-example tests — execute self-contained blocks.
+"""Layer 1 of the documentation-example tests — execute documentation code.
-Some documentation code blocks are complete, runnable scripts (they import
-what they use and drive themselves via ``asyncio.run``). This module executes
-each such block exactly as a reader would — by writing it to a temp file and
-running it in a subprocess against the installed ``engrava`` — and asserts a
-clean exit. This is the strongest guarantee: the published snippet *runs*.
+Compiling a snippet (Layer 2) proves it is *syntactically* valid Python, but it
+cannot catch an example that calls an API which does not exist or behaves
+differently from what the prose claims (e.g. reading ``result.is_valid`` when
+the real attribute is ``result.valid``). This module closes that gap for the
+highest-value examples by actually *running* them against the installed
+``engrava`` and asserting a clean exit. It offers two execution shapes:
-Fragment blocks (the majority — they assume an existing ``store``/``conn`` or
+**Self-contained blocks.** Some documentation code blocks are complete, runnable
+scripts (they import what they use and drive themselves via ``asyncio.run``).
+Each such block is executed exactly as a reader would — written to a temp file
+and run in a subprocess — and must exit 0. This is the strongest guarantee: the
+published snippet *runs*.
+
+**Concatenated pages.** Some pages build *one* example across several
+*consecutive* code blocks (imports, then a helper, then more helpers, then a
+``main()`` that ties them together). No single block runs on its own, but the
+contiguous run of blocks concatenated in document order is a complete script.
+For an opted-in page this module joins that contiguous run into one script and
+runs it in a subprocess, asserting a clean exit — so the whole worked example is
+executed against the package, including the return-shape-sensitive search
+round-trip in the middle of it.
+
+Fragment blocks that are neither self-contained nor part of an opted-in
+concatenated run (the majority — they assume an existing ``store``/``conn`` or
show a class definition) are out of scope here; they are covered by the
compile + phantom-API guards in ``test_docs_examples_compile.py`` and by the
behaviour tests in ``test_docs_examples_behavior.py``.
-The set of self-contained blocks is pinned by an **explicit allowlist** keyed
-on ``file:line`` so the doc surface stays clean (no special fence
-annotations leak to GitHub / engrava.ai). When you move or edit one of these
-blocks, update the allowlist anchor — and remember: editing the block means
-re-verifying the example.
+Opting a page in
+-----------------
+Both execution shapes are **allowlist-driven**: a page runs only when it has an
+explicit entry in ``EXECUTABLE_BLOCKS`` or ``CONCATENATED_PAGES`` below. The
+opt-in lives entirely in this test module — there is no special fence syntax or
+marker in the Markdown — so the public docs (and the engrava.ai mirror) need no
+magic annotations to be executed (cf. AGENT_PRINCIPLES Principle 1).
+
+* To execute a **single** self-contained block, add a
+ ``(markdown_path, anchor_substring)`` entry to ``EXECUTABLE_BLOCKS``. The
+ anchor is a short string unique to that block; the block must also drive
+ itself via ``asyncio.run(main())``.
+* To execute a **contiguous run** of blocks as one page, add a
+ ``(markdown_path, first_anchor, last_anchor)`` entry to
+ ``CONCATENATED_PAGES``. ``first_anchor`` must appear in exactly one block and
+ ``last_anchor`` in exactly one (later or same) block; every block from the
+ first match through the last match — inclusive — is concatenated in document
+ order. Anchor a contiguous run, **not** a whole page: a page may follow a
+ complete example with later illustrative fragments that do not compose, so the
+ range is bounded explicitly by its end anchor.
+
+When you move or edit one of these blocks, update its anchor — and remember:
+editing the block means re-verifying the example.
"""
from __future__ import annotations
@@ -26,11 +61,14 @@
import pytest
-from tests.docs._md_blocks import REPO_ROOT, extract_python_blocks
+from tests.docs._md_blocks import REPO_ROOT, CodeBlock, extract_python_blocks
if TYPE_CHECKING:
from pathlib import Path
+# Bound for every documentation subprocess so a hung example cannot wedge CI.
+_RUN_TIMEOUT_S = 120
+
# Self-contained, executable blocks, identified by (markdown path, an anchor
# substring that must appear in the block body). The anchor makes the binding
# robust to small line-number drift and documents *which* block is meant.
@@ -40,6 +78,22 @@
("docs/guides/migrating-from-other-memory.md", "Imported {total} thoughts."),
)
+# Pages that build one example across a contiguous run of code blocks, identified
+# by (markdown path, first-block anchor, last-block anchor). The two anchors bound
+# an inclusive, contiguous range of blocks that is concatenated in document order
+# and run as a single script. Anchor the runnable run, not the whole page.
+CONCATENATED_PAGES: tuple[tuple[str, str, str], ...] = (
+ # docs/tutorial.md builds one notes-memory example across five consecutive
+ # blocks: imports + embed() -> NOTES + ingest() -> link() -> search()
+ # (the search_hybrid round-trip) -> main() + asyncio.run. The whole run
+ # composes into a complete script; there is no trailing non-composing block.
+ (
+ "docs/tutorial.md",
+ "def embed(text: str) -> list[float]:",
+ "asyncio.run(main())",
+ ),
+)
+
def _resolve_block_body(rel_path: str, anchor: str) -> str:
path = REPO_ROOT / rel_path
@@ -54,7 +108,32 @@ def _resolve_block_body(rel_path: str, anchor: str) -> str:
return matches[0].body
-def _run_snippet(body: str, tmp_path: Path) -> subprocess.CompletedProcess[str]:
+def _unique_block_index(blocks: list[CodeBlock], rel_path: str, anchor: str, role: str) -> int:
+ matches = [i for i, b in enumerate(blocks) if anchor in b.body]
+ if len(matches) != 1:
+ pytest.fail(
+ f"Expected exactly one block in {rel_path} containing the {role} anchor "
+ f"{anchor!r}, found {len(matches)}. Update CONCATENATED_PAGES in {__file__}.",
+ )
+ return matches[0]
+
+
+def _resolve_page_script(rel_path: str, first_anchor: str, last_anchor: str) -> str:
+ """Concatenate the inclusive, contiguous block range bounded by the anchors."""
+ path = REPO_ROOT / rel_path
+ blocks = extract_python_blocks(path)
+ start = _unique_block_index(blocks, rel_path, first_anchor, "first")
+ end = _unique_block_index(blocks, rel_path, last_anchor, "last")
+ if end < start:
+ pytest.fail(
+ f"In {rel_path} the last anchor {last_anchor!r} (block {end}) precedes the "
+ f"first anchor {first_anchor!r} (block {start}). Update CONCATENATED_PAGES "
+ f"in {__file__}.",
+ )
+ return "\n\n".join(b.body for b in blocks[start : end + 1])
+
+
+def _run_script(body: str, tmp_path: Path) -> subprocess.CompletedProcess[str]:
script = tmp_path / "doc_snippet.py"
script.write_text(body, encoding="utf-8")
return subprocess.run( # noqa: S603 — trusted, repo-authored doc snippet
@@ -62,7 +141,7 @@ def _run_snippet(body: str, tmp_path: Path) -> subprocess.CompletedProcess[str]:
check=False,
capture_output=True,
text=True,
- timeout=120,
+ timeout=_RUN_TIMEOUT_S,
)
@@ -74,8 +153,34 @@ def _run_snippet(body: str, tmp_path: Path) -> subprocess.CompletedProcess[str]:
def test_self_contained_doc_block_runs(rel_path: str, anchor: str, tmp_path: Path) -> None:
"""A complete, runnable documentation snippet exits 0 against installed engrava."""
body = _resolve_block_body(rel_path, anchor)
- result = _run_snippet(body, tmp_path)
+ result = _run_script(body, tmp_path)
assert result.returncode == 0, (
f"Documentation snippet from {rel_path} exited {result.returncode}.\n"
f"--- stdout ---\n{result.stdout}\n--- stderr ---\n{result.stderr}"
)
+
+
+@pytest.mark.parametrize(
+ ("rel_path", "first_anchor", "last_anchor"),
+ CONCATENATED_PAGES,
+ ids=[rel for rel, _, _ in CONCATENATED_PAGES],
+)
+def test_concatenated_doc_page_runs(
+ rel_path: str,
+ first_anchor: str,
+ last_anchor: str,
+ tmp_path: Path,
+) -> None:
+ """A page's contiguous run of blocks, concatenated, exits 0 against installed engrava.
+
+ This executes a worked example that is split across several consecutive doc
+ blocks and is therefore not runnable as any single block — catching API
+ drift in the mid-example fragments (e.g. the search round-trip) that
+ compile-only checks cannot see.
+ """
+ script = _resolve_page_script(rel_path, first_anchor, last_anchor)
+ result = _run_script(script, tmp_path)
+ assert result.returncode == 0, (
+ f"Concatenated documentation page {rel_path} exited {result.returncode}.\n"
+ f"--- stdout ---\n{result.stdout}\n--- stderr ---\n{result.stderr}"
+ )
From 456bcb6f66710ce9a51a5761dd7e406d15e1d177 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Wed, 10 Jun 2026 10:31:49 +0200
Subject: [PATCH 14/40] feat: add bi-temporal valid-time to thoughts and edges
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Introduce a second time axis ("valid time" — when a fact is true in the
world) alongside the existing transaction time. Adds nullable ISO-8601
valid_from / valid_until columns to ThoughtRecord and EdgeRecord, sharing
one ISO-8601 validator (UTC-normalised for correct TEXT ordering).
Schema migrates v12 -> v13 automatically on open: additive ADD COLUMN on
both tables, three partial indexes per table, and an asymmetric backfill
(thought.valid_from seeded from created_at where present; edges have no
calendar timestamp to source from, so they stay open-lower-bound NULL).
The migration tolerates a thought-only partial schema (skips edge work
when the table is absent) and is idempotent. A fresh v13 database is
structurally identical to a migrated one.
Existing behaviour is unchanged: the new fields are optional and queries
without a temporal filter return the same rows as before. Query-time
temporal predicates are a follow-up.
---
src/engrava/domain/models/_temporal.py | 42 ++
src/engrava/domain/models/edge.py | 32 +
src/engrava/domain/models/thought.py | 34 +-
.../infrastructure/sqlite/engrava_core.py | 130 +++-
.../infrastructure/sqlite/schema_core.sql | 34 +-
tests/domain/__init__.py | 1 +
tests/domain/models/__init__.py | 1 +
tests/domain/models/test_temporal_fields.py | 135 ++++
tests/test_cognitive_journal.py | 6 +-
tests/test_dedup_migration.py | 10 +-
tests/test_embedding_providers.py | 2 +-
tests/test_engrava_completeness.py | 2 +-
tests/test_extension_load_with_migrations.py | 6 +-
tests/test_metadata_migration.py | 22 +-
tests/test_mind_store_core.py | 2 +-
tests/test_referential_integrity.py | 10 +-
tests/test_search_hybrid_graph.py | 2 +-
tests/test_service_isolation.py | 4 +-
tests/test_ttl_auto_expiry.py | 4 +-
tests/test_valid_time_migration.py | 669 ++++++++++++++++++
20 files changed, 1082 insertions(+), 66 deletions(-)
create mode 100644 src/engrava/domain/models/_temporal.py
create mode 100644 tests/domain/__init__.py
create mode 100644 tests/domain/models/__init__.py
create mode 100644 tests/domain/models/test_temporal_fields.py
create mode 100644 tests/test_valid_time_migration.py
diff --git a/src/engrava/domain/models/_temporal.py b/src/engrava/domain/models/_temporal.py
new file mode 100644
index 0000000..6bca9e4
--- /dev/null
+++ b/src/engrava/domain/models/_temporal.py
@@ -0,0 +1,42 @@
+"""Shared temporal-field validation for domain models.
+
+Provides a single source of truth for ISO-8601 timestamp validation and
+UTC normalisation, used by every record that stores nullable timestamp
+columns (transaction-time, access-time, and valid-time fields). Keeping
+the logic here avoids divergent copies drifting across models.
+"""
+
+from __future__ import annotations
+
+import datetime
+
+
+def validate_iso8601_nullable(value: str | None) -> str | None:
+ """Validate ISO-8601 format and normalise to UTC when not ``None``.
+
+ Timezone-aware timestamps are converted to UTC so that SQLite TEXT
+ comparisons (lexicographic) produce correct results regardless of the
+ original offset. Naive timestamps are accepted and returned unchanged.
+
+ Args:
+ value: Timestamp string or ``None``.
+
+ Returns:
+ The validated (and UTC-normalised) string, or ``None`` when the
+ input was ``None``.
+
+ Raises:
+ ValueError: If ``value`` is a string that is not valid ISO-8601.
+
+ """
+ if value is None:
+ return value
+ try:
+ parsed = datetime.datetime.fromisoformat(value)
+ except ValueError as exc:
+ msg = f"Must be ISO-8601 timestamp, got {value!r}"
+ raise ValueError(msg) from exc
+ # Normalise timezone-aware timestamps to UTC for safe TEXT ordering.
+ if parsed.tzinfo is not None:
+ return parsed.astimezone(datetime.UTC).isoformat()
+ return value
diff --git a/src/engrava/domain/models/edge.py b/src/engrava/domain/models/edge.py
index aee3cd6..abfe145 100644
--- a/src/engrava/domain/models/edge.py
+++ b/src/engrava/domain/models/edge.py
@@ -8,6 +8,7 @@
from pydantic import BaseModel, ConfigDict, Field, field_validator
from engrava.domain.enums import EdgeType, KnowledgeSource
+from engrava.domain.models._temporal import validate_iso8601_nullable
class EdgeRecord(BaseModel):
@@ -22,6 +23,14 @@ class EdgeRecord(BaseModel):
created_cycle: Cycle when this edge was created.
source: Provenance of the edge (EXPERIENCE, SEEDED_LLM, DISTILLED_LLM).
decay_multiplier: Multiplier for accelerated decay (1.0 normal).
+ valid_from: ISO-8601 datetime marking the start of the interval
+ during which the relation is true in the world (valid time).
+ ``None`` means an open lower bound — the relation is treated
+ as valid from the beginning of time.
+ valid_until: ISO-8601 datetime marking the end of the interval
+ during which the relation is true in the world (valid time).
+ ``None`` means an open upper bound — the relation has no
+ known end and is treated as currently valid.
Examples:
>>> edge = EdgeRecord(
@@ -45,6 +54,8 @@ class EdgeRecord(BaseModel):
created_cycle: int = Field(ge=0)
source: KnowledgeSource = KnowledgeSource.EXPERIENCE
decay_multiplier: float = Field(default=1.0, ge=0.0)
+ valid_from: str | None = None
+ valid_until: str | None = None
@field_validator("edge_id", "from_thought_id", "to_thought_id")
@classmethod
@@ -54,3 +65,24 @@ def _validate_non_empty(cls, v: str) -> str:
msg = "ID field must not be empty or whitespace"
raise ValueError(msg)
return v
+
+ @field_validator("valid_from", "valid_until")
+ @classmethod
+ def _validate_iso8601_nullable(cls, v: str | None) -> str | None:
+ """Validate ISO-8601 format and normalize to UTC when not None.
+
+ Uses the shared timestamp validator so edge valid-time fields
+ normalise timezone-aware values to UTC exactly like the thought
+ record's timestamp columns, keeping SQLite TEXT ordering correct.
+
+ Args:
+ v: Timestamp string or None.
+
+ Returns:
+ The validated (and UTC-normalized) string, or None.
+
+ Raises:
+ ValueError: If string is not valid ISO-8601.
+
+ """
+ return validate_iso8601_nullable(v)
diff --git a/src/engrava/domain/models/thought.py b/src/engrava/domain/models/thought.py
index 4d74894..6c34f26 100644
--- a/src/engrava/domain/models/thought.py
+++ b/src/engrava/domain/models/thought.py
@@ -23,6 +23,7 @@
ThoughtVisibility,
)
from engrava.domain.exceptions import InvalidTransitionError
+from engrava.domain.models._temporal import validate_iso8601_nullable
#: Allowed value types for ``ThoughtRecord.metadata`` entries.
#:
@@ -74,7 +75,16 @@ class ThoughtRecord(BaseModel):
last_accessed_at: ISO-8601 datetime of last explicit access (nullable).
created_at: ISO-8601 datetime when the thought was persisted (nullable
for thoughts created before timestamp tracking was added).
+ This is transaction time — when the fact was *recorded*.
updated_at: ISO-8601 datetime of last mutation (nullable for legacy).
+ valid_from: ISO-8601 datetime marking the start of the interval
+ during which the fact is true in the world (valid time, the
+ second time axis). ``None`` means an open lower bound — the
+ fact is treated as valid from the beginning of time.
+ valid_until: ISO-8601 datetime marking the end of the interval
+ during which the fact is true in the world (valid time).
+ ``None`` means an open upper bound — the fact has no known
+ end and is treated as currently valid.
metadata: Extensible structured attributes (e.g. ``role``, ``lang``,
``content_type``, ``session_id``, ``turn_index``, ``speaker``).
Leaf values must be scalars (``str``, ``int``, ``float``,
@@ -123,6 +133,8 @@ class ThoughtRecord(BaseModel):
created_at: str | None = None
updated_at: str | None = None
expires_at: str | None = None
+ valid_from: str | None = None
+ valid_until: str | None = None
metadata: dict[str, MetadataValue] = Field(default_factory=dict)
@model_validator(mode="after")
@@ -145,7 +157,14 @@ def _validate_thought_id_not_empty(cls, v: str) -> str:
raise ValueError(msg)
return v
- @field_validator("created_at", "updated_at", "last_accessed_at", "expires_at")
+ @field_validator(
+ "created_at",
+ "updated_at",
+ "last_accessed_at",
+ "expires_at",
+ "valid_from",
+ "valid_until",
+ )
@classmethod
def _validate_iso8601_nullable(cls, v: str | None) -> str | None:
"""Validate ISO-8601 format and normalize to UTC when not None.
@@ -164,18 +183,7 @@ def _validate_iso8601_nullable(cls, v: str | None) -> str | None:
ValueError: If string is not valid ISO-8601.
"""
- if v is None:
- return v
- try:
- dt = datetime.datetime.fromisoformat(v)
- except ValueError as exc:
- msg = f"Must be ISO-8601 timestamp, got {v!r}"
- raise ValueError(msg) from exc
- # Normalize timezone-aware timestamps to UTC for safe TEXT ordering.
- if dt.tzinfo is not None:
- dt = dt.astimezone(datetime.UTC)
- return dt.isoformat()
- return v
+ return validate_iso8601_nullable(v)
def is_active(self) -> bool:
"""Check if the thought is in ACTIVE lifecycle status.
diff --git a/src/engrava/infrastructure/sqlite/engrava_core.py b/src/engrava/infrastructure/sqlite/engrava_core.py
index 16c5b6d..a014aa2 100644
--- a/src/engrava/infrastructure/sqlite/engrava_core.py
+++ b/src/engrava/infrastructure/sqlite/engrava_core.py
@@ -463,7 +463,7 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
Applies the full ``schema_core.sql`` (including FTS5 virtual
table and sync triggers) only when the database has not already
been bootstrapped to schema version 3+. Databases at older
- versions are upgraded incrementally up to the current version (11).
+ versions are upgraded incrementally up to the current version (13).
After core schema creation or upgrade, probes for the ``thought_fts``
table and then runs any pending extension schema migrations for each
@@ -491,7 +491,8 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
await self._migrate_core_v9_to_v10()
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
- await self._db.execute("PRAGMA user_version = 12")
+ await self._migrate_core_v12_to_v13()
+ await self._db.execute("PRAGMA user_version = 13")
await self._db.commit()
elif current_version < 4: # noqa: PLR2004
await self._migrate_core_v3_to_v4()
@@ -503,7 +504,8 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
await self._migrate_core_v9_to_v10()
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
- await self._db.execute("PRAGMA user_version = 12")
+ await self._migrate_core_v12_to_v13()
+ await self._db.execute("PRAGMA user_version = 13")
await self._db.commit()
elif current_version < 5: # noqa: PLR2004
await self._migrate_core_v4_to_v5()
@@ -514,7 +516,8 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
await self._migrate_core_v9_to_v10()
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
- await self._db.execute("PRAGMA user_version = 12")
+ await self._migrate_core_v12_to_v13()
+ await self._db.execute("PRAGMA user_version = 13")
await self._db.commit()
elif current_version < 6: # noqa: PLR2004
await self._migrate_core_v5_to_v6()
@@ -524,7 +527,8 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
await self._migrate_core_v9_to_v10()
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
- await self._db.execute("PRAGMA user_version = 12")
+ await self._migrate_core_v12_to_v13()
+ await self._db.execute("PRAGMA user_version = 13")
await self._db.commit()
elif current_version < 7: # noqa: PLR2004
await self._migrate_core_v6_to_v7()
@@ -533,7 +537,8 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
await self._migrate_core_v9_to_v10()
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
- await self._db.execute("PRAGMA user_version = 12")
+ await self._migrate_core_v12_to_v13()
+ await self._db.execute("PRAGMA user_version = 13")
await self._db.commit()
elif current_version < 8: # noqa: PLR2004
await self._migrate_core_v7_to_v8()
@@ -541,29 +546,38 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
await self._migrate_core_v9_to_v10()
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
- await self._db.execute("PRAGMA user_version = 12")
+ await self._migrate_core_v12_to_v13()
+ await self._db.execute("PRAGMA user_version = 13")
await self._db.commit()
elif current_version < 9: # noqa: PLR2004
await self._migrate_core_v8_to_v9()
await self._migrate_core_v9_to_v10()
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
- await self._db.execute("PRAGMA user_version = 12")
+ await self._migrate_core_v12_to_v13()
+ await self._db.execute("PRAGMA user_version = 13")
await self._db.commit()
elif current_version < 10: # noqa: PLR2004
await self._migrate_core_v9_to_v10()
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
- await self._db.execute("PRAGMA user_version = 12")
+ await self._migrate_core_v12_to_v13()
+ await self._db.execute("PRAGMA user_version = 13")
await self._db.commit()
elif current_version < 11: # noqa: PLR2004
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
- await self._db.execute("PRAGMA user_version = 12")
+ await self._migrate_core_v12_to_v13()
+ await self._db.execute("PRAGMA user_version = 13")
await self._db.commit()
elif current_version < 12: # noqa: PLR2004
await self._migrate_core_v11_to_v12()
- await self._db.execute("PRAGMA user_version = 12")
+ await self._migrate_core_v12_to_v13()
+ await self._db.execute("PRAGMA user_version = 13")
+ await self._db.commit()
+ elif current_version < 13: # noqa: PLR2004
+ await self._migrate_core_v12_to_v13()
+ await self._db.execute("PRAGMA user_version = 13")
await self._db.commit()
# Ensure referential integrity is enforced for the lifetime of this
@@ -894,6 +908,73 @@ async def _migrate_core_v11_to_v12(self) -> None:
finally:
await self._db.execute("PRAGMA foreign_keys=ON")
+ async def _migrate_core_v12_to_v13(self) -> None:
+ """Add nullable valid-time columns + indexes to thought and edge (core-13).
+
+ Introduces a second time axis ("valid time") alongside the
+ existing transaction time. ``created_at`` records *when a fact was
+ stored*; ``valid_from`` / ``valid_until`` record *when a fact is
+ true in the world*. Both new columns are nullable ISO-8601 TEXT.
+
+ Backfill is intentionally asymmetric:
+
+ * ``thought.valid_from`` is seeded from ``created_at`` for rows
+ that have a transaction timestamp, giving existing thoughts a
+ sensible default lower bound. ``valid_until`` is left ``NULL``
+ (open upper bound). Rows whose ``created_at`` is ``NULL``
+ (pre-timestamp legacy rows) keep ``valid_from = NULL`` — no
+ date is fabricated.
+ * ``edge`` rows are **not** backfilled. The edge table has no
+ ``created_at`` column; its only temporal field is
+ ``created_cycle``, which is an internal cognitive-cycle counter,
+ not a calendar timestamp. Synthesising a valid-time date from a
+ cycle number would invent information, so edges keep both
+ valid-time fields ``NULL`` (an open lower bound).
+
+ Idempotent: each ``ALTER TABLE ... ADD COLUMN`` is wrapped in
+ ``contextlib.suppress(Exception)`` so a re-run after the column
+ already exists is a no-op, and every index uses
+ ``CREATE INDEX IF NOT EXISTS``. Re-running the migration leaves
+ the schema unchanged.
+ """
+ # Only touch tables that exist. A partial bootstrap may carry just
+ # ``thought`` (the ``edge`` table is created lazily); operating on an
+ # absent ``edge`` would raise ``no such table``. ``thought`` is always
+ # present at this point. This mirrors the table-existence guards used
+ # by the earlier edge-touching migrations and ``_purge_orphan_children``.
+ tables = ["thought"]
+ if await self._table_exists("edge"):
+ tables.append("edge")
+
+ for table in tables:
+ for column in ("valid_from", "valid_until"):
+ with contextlib.suppress(Exception): # Column may already exist.
+ await self._db.execute(f"ALTER TABLE {table} ADD COLUMN {column} TEXT")
+
+ # Asymmetric backfill — thought only, sourced from transaction time.
+ # Rows with NULL created_at (legacy, pre-timestamp) keep NULL
+ # valid_from; no calendar date is fabricated for them.
+ await self._db.execute(
+ "UPDATE thought SET valid_from = created_at "
+ "WHERE created_at IS NOT NULL AND valid_from IS NULL"
+ )
+ # Edge has no created_at; created_cycle is internal cognitive time,
+ # not calendar time, so edges are deliberately left with NULL
+ # valid_from / valid_until (an open lower bound).
+
+ for table in tables:
+ await self._db.execute(
+ f"CREATE INDEX IF NOT EXISTS idx_{table}_valid_from ON {table}(valid_from)"
+ )
+ await self._db.execute(
+ f"CREATE INDEX IF NOT EXISTS idx_{table}_valid_until "
+ f"ON {table}(valid_until) WHERE valid_until IS NOT NULL"
+ )
+ await self._db.execute(
+ f"CREATE INDEX IF NOT EXISTS idx_{table}_valid_range "
+ f"ON {table}(valid_from, valid_until)"
+ )
+
async def _fk_present(self, table: str, column: str) -> bool:
"""Return ``True`` when ``table`` carries an FK on ``column``."""
cursor = await self._db.execute(f"PRAGMA foreign_key_list({table})")
@@ -1162,6 +1243,8 @@ def _row_to_thought(self, row: aiosqlite.Row) -> ThoughtRecord:
created_at_raw = row["created_at"] if "created_at" in keys else None
updated_at_raw = row["updated_at"] if "updated_at" in keys else None
expires_at_raw = row["expires_at"] if "expires_at" in keys else None
+ valid_from_raw = row["valid_from"] if "valid_from" in keys else None
+ valid_until_raw = row["valid_until"] if "valid_until" in keys else None
metadata_json_raw = row["metadata_json"] if "metadata_json" in keys else "{}"
metadata_decoded: dict[str, MetadataValue] = (
json.loads(metadata_json_raw) if metadata_json_raw else {}
@@ -1191,6 +1274,8 @@ def _row_to_thought(self, row: aiosqlite.Row) -> ThoughtRecord:
created_at=created_at_raw,
updated_at=updated_at_raw,
expires_at=expires_at_raw,
+ valid_from=valid_from_raw,
+ valid_until=valid_until_raw,
metadata=metadata_decoded,
)
@@ -1231,8 +1316,9 @@ async def _get_edge_row(self, edge_id: str) -> aiosqlite.Row | None:
" confidence, embedding_ref, source_type, confirmation_count, "
" consolidated_from, visibility, access_count, "
" last_accessed_at, created_at, updated_at, expires_at, "
+ " valid_from, valid_until, "
" metadata_json) "
- "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
)
def _thought_to_core_params(self, thought: ThoughtRecord) -> tuple[object, ...]:
@@ -1275,6 +1361,8 @@ def _thought_to_core_params(self, thought: ThoughtRecord) -> tuple[object, ...]:
thought.created_at,
thought.updated_at,
thought.expires_at,
+ thought.valid_from,
+ thought.valid_until,
json.dumps(thought.metadata, ensure_ascii=False),
)
@@ -1287,6 +1375,7 @@ def _thought_to_core_params(self, thought: ThoughtRecord) -> tuple[object, ...]:
" consolidated_from = ?, visibility = ?,"
" access_count = ?, last_accessed_at = ?,"
" created_at = ?, updated_at = ?, expires_at = ?,"
+ " valid_from = ?, valid_until = ?,"
" metadata_json = ? "
"WHERE thought_id = ? AND updated_cycle = ?"
)
@@ -1335,6 +1424,8 @@ def _thought_to_core_update_params(
updated.created_at,
updated.updated_at,
updated.expires_at,
+ updated.valid_from,
+ updated.valid_until,
json.dumps(updated.metadata, ensure_ascii=False),
thought_id,
expected_cycle,
@@ -1922,8 +2013,8 @@ async def create_edge(self, edge: EdgeRecord) -> EdgeRecord:
await self._db.execute(
"INSERT INTO edge "
"(edge_id, from_thought_id, to_thought_id, edge_type, weight, "
- " created_cycle, source, decay_multiplier) "
- "VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
+ " created_cycle, source, decay_multiplier, valid_from, valid_until) "
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
(
edge.edge_id,
edge.from_thought_id,
@@ -1933,6 +2024,8 @@ async def create_edge(self, edge: EdgeRecord) -> EdgeRecord:
edge.created_cycle,
edge.source.value,
edge.decay_multiplier,
+ edge.valid_from,
+ edge.valid_until,
),
)
except aiosqlite.IntegrityError as exc:
@@ -1981,7 +2074,8 @@ async def update_edge(self, edge_id: str, **changes: object) -> EdgeRecord:
await self._db.execute(
"UPDATE edge SET from_thought_id = ?, to_thought_id = ?, edge_type = ?, "
- "weight = ?, created_cycle = ?, source = ?, decay_multiplier = ? "
+ "weight = ?, created_cycle = ?, source = ?, decay_multiplier = ?, "
+ "valid_from = ?, valid_until = ? "
"WHERE edge_id = ?",
(
updated.from_thought_id,
@@ -1991,6 +2085,8 @@ async def update_edge(self, edge_id: str, **changes: object) -> EdgeRecord:
updated.created_cycle,
updated.source.value,
updated.decay_multiplier,
+ updated.valid_from,
+ updated.valid_until,
edge_id,
),
)
@@ -3727,6 +3823,8 @@ def _row_to_edge(row: aiosqlite.Row) -> EdgeRecord:
keys = row.keys()
source_raw = row["source"] if "source" in keys else None
decay_raw = row["decay_multiplier"] if "decay_multiplier" in keys else 1.0
+ valid_from_raw = row["valid_from"] if "valid_from" in keys else None
+ valid_until_raw = row["valid_until"] if "valid_until" in keys else None
return EdgeRecord(
edge_id=row["edge_id"],
from_thought_id=row["from_thought_id"],
@@ -3736,6 +3834,8 @@ def _row_to_edge(row: aiosqlite.Row) -> EdgeRecord:
created_cycle=row["created_cycle"],
source=KnowledgeSource(source_raw) if source_raw else KnowledgeSource.EXPERIENCE,
decay_multiplier=float(decay_raw) if decay_raw else 1.0,
+ valid_from=valid_from_raw,
+ valid_until=valid_until_raw,
)
diff --git a/src/engrava/infrastructure/sqlite/schema_core.sql b/src/engrava/infrastructure/sqlite/schema_core.sql
index ba3f162..e9b576c 100644
--- a/src/engrava/infrastructure/sqlite/schema_core.sql
+++ b/src/engrava/infrastructure/sqlite/schema_core.sql
@@ -1,7 +1,7 @@
-- engrava: Core thought-graph schema (free-tier boundary — no internal-cognitive columns).
--- Version: core-12 (referential integrity: FK + ON DELETE CASCADE on edge/embedding/action → thought)
+-- Version: core-13 (valid-time axis: nullable valid_from / valid_until on thought + edge)
-PRAGMA user_version = 12;
+PRAGMA user_version = 13;
CREATE TABLE IF NOT EXISTS thought (
thought_id TEXT PRIMARY KEY,
@@ -25,7 +25,12 @@ CREATE TABLE IF NOT EXISTS thought (
created_at TEXT,
updated_at TEXT,
expires_at TEXT,
- metadata_json TEXT NOT NULL DEFAULT '{}'
+ metadata_json TEXT NOT NULL DEFAULT '{}',
+ -- Valid-time (world-time) axis. Declared last so a freshly created
+ -- database matches the column order of one upgraded in place, where
+ -- ``ALTER TABLE ADD COLUMN`` can only append.
+ valid_from TEXT,
+ valid_until TEXT
);
CREATE TABLE IF NOT EXISTS edge (
@@ -37,6 +42,8 @@ CREATE TABLE IF NOT EXISTS edge (
created_cycle INTEGER NOT NULL DEFAULT 0,
source TEXT NOT NULL DEFAULT 'EXPERIENCE',
decay_multiplier REAL NOT NULL DEFAULT 1.0,
+ valid_from TEXT,
+ valid_until TEXT,
UNIQUE(from_thought_id, to_thought_id, edge_type),
FOREIGN KEY (from_thought_id) REFERENCES thought(thought_id) ON DELETE CASCADE,
FOREIGN KEY (to_thought_id) REFERENCES thought(thought_id) ON DELETE CASCADE
@@ -183,3 +190,24 @@ CREATE TABLE IF NOT EXISTS extension_schema_versions (
migration_file TEXT,
extension_version TEXT
);
+
+-- -------------------------------------------------------------------
+-- Valid-time (world-time) indexes for thought and edge
+-- -------------------------------------------------------------------
+-- The valid_from / valid_until columns form a second time axis ("valid
+-- time" — when a fact is true in the world) alongside the transaction
+-- time recorded by created_at. These indexes back range scans over that
+-- axis. valid_until is partial (only non-NULL upper bounds are indexed)
+-- because an open upper bound is the common case and incurs no overhead.
+-- A fresh-bootstrap database must carry the same indexes as one upgraded
+-- in place, so they are declared here as well as in the migration helper.
+
+CREATE INDEX IF NOT EXISTS idx_thought_valid_from ON thought(valid_from);
+CREATE INDEX IF NOT EXISTS idx_thought_valid_until ON thought(valid_until)
+ WHERE valid_until IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_thought_valid_range ON thought(valid_from, valid_until);
+
+CREATE INDEX IF NOT EXISTS idx_edge_valid_from ON edge(valid_from);
+CREATE INDEX IF NOT EXISTS idx_edge_valid_until ON edge(valid_until)
+ WHERE valid_until IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_edge_valid_range ON edge(valid_from, valid_until);
diff --git a/tests/domain/__init__.py b/tests/domain/__init__.py
new file mode 100644
index 0000000..810b720
--- /dev/null
+++ b/tests/domain/__init__.py
@@ -0,0 +1 @@
+"""Tests for engrava domain models and logic."""
diff --git a/tests/domain/models/__init__.py b/tests/domain/models/__init__.py
new file mode 100644
index 0000000..8bc85cd
--- /dev/null
+++ b/tests/domain/models/__init__.py
@@ -0,0 +1 @@
+"""Tests for engrava core domain records."""
diff --git a/tests/domain/models/test_temporal_fields.py b/tests/domain/models/test_temporal_fields.py
new file mode 100644
index 0000000..9477c03
--- /dev/null
+++ b/tests/domain/models/test_temporal_fields.py
@@ -0,0 +1,135 @@
+"""Unit tests for the valid-time fields on the core domain records.
+
+Covers the nullable ISO-8601 ``valid_from`` / ``valid_until`` fields on
+both :class:`ThoughtRecord` and :class:`EdgeRecord`: ``None`` and valid
+ISO values are accepted, malformed strings are rejected, and
+timezone-aware values are normalised to UTC. The normalisation logic is
+shared via :mod:`engrava.domain.models._temporal`, so both records are
+exercised in parallel to guard against drift.
+"""
+
+from __future__ import annotations
+
+import pytest
+from pydantic import ValidationError
+
+from engrava.domain.enums import EdgeType, LifecycleStatus, Priority, ThoughtType
+from engrava.domain.models import EdgeRecord, ThoughtRecord
+from engrava.domain.models._temporal import validate_iso8601_nullable
+
+
+def _make_thought(**overrides: object) -> ThoughtRecord:
+ base: dict[str, object] = {
+ "thought_id": "t-1",
+ "thought_type": ThoughtType.OBSERVATION,
+ "essence": "e",
+ "content": "c",
+ "priority": Priority.P2,
+ "lifecycle_status": LifecycleStatus.CREATED,
+ "created_cycle": 0,
+ "updated_cycle": 0,
+ "source": "test",
+ }
+ base.update(overrides)
+ return ThoughtRecord(**base) # type: ignore[arg-type]
+
+
+def _make_edge(**overrides: object) -> EdgeRecord:
+ base: dict[str, object] = {
+ "edge_id": "e-1",
+ "from_thought_id": "t-1",
+ "to_thought_id": "t-2",
+ "edge_type": EdgeType.ASSOCIATED,
+ "weight": 0.5,
+ "created_cycle": 0,
+ }
+ base.update(overrides)
+ return EdgeRecord(**base) # type: ignore[arg-type]
+
+
+# ---------------------------------------------------------------------------
+# Shared helper
+# ---------------------------------------------------------------------------
+
+
+class TestValidateIso8601Nullable:
+ """Direct tests for the shared validation helper."""
+
+ def test_none_passes_through(self) -> None:
+ assert validate_iso8601_nullable(None) is None
+
+ def test_naive_returned_unchanged(self) -> None:
+ assert validate_iso8601_nullable("2026-01-01T00:00:00") == "2026-01-01T00:00:00"
+
+ def test_positive_offset_normalised_to_utc(self) -> None:
+ assert validate_iso8601_nullable("2026-04-12T15:00:00+02:00") == "2026-04-12T13:00:00+00:00"
+
+ def test_malformed_raises_value_error(self) -> None:
+ with pytest.raises(ValueError, match="Must be ISO-8601 timestamp"):
+ validate_iso8601_nullable("not-a-timestamp")
+
+
+# ---------------------------------------------------------------------------
+# ThoughtRecord
+# ---------------------------------------------------------------------------
+
+
+class TestThoughtValidTime:
+ def test_defaults_to_none(self) -> None:
+ thought = _make_thought()
+ assert thought.valid_from is None
+ assert thought.valid_until is None
+
+ def test_valid_iso_accepted(self) -> None:
+ thought = _make_thought(
+ valid_from="2026-01-01T00:00:00",
+ valid_until="2026-12-31T00:00:00",
+ )
+ assert thought.valid_from == "2026-01-01T00:00:00"
+ assert thought.valid_until == "2026-12-31T00:00:00"
+
+ def test_tz_aware_normalised_to_utc(self) -> None:
+ thought = _make_thought(
+ valid_from="2026-04-12T15:00:00+02:00",
+ valid_until="2026-04-12T10:00:00-05:00",
+ )
+ assert thought.valid_from == "2026-04-12T13:00:00+00:00"
+ assert thought.valid_until == "2026-04-12T15:00:00+00:00"
+
+ @pytest.mark.parametrize("field", ["valid_from", "valid_until"])
+ def test_malformed_rejected(self, field: str) -> None:
+ with pytest.raises(ValidationError, match="Must be ISO-8601 timestamp"):
+ _make_thought(**{field: "garbage"})
+
+
+# ---------------------------------------------------------------------------
+# EdgeRecord
+# ---------------------------------------------------------------------------
+
+
+class TestEdgeValidTime:
+ def test_defaults_to_none(self) -> None:
+ edge = _make_edge()
+ assert edge.valid_from is None
+ assert edge.valid_until is None
+
+ def test_valid_iso_accepted(self) -> None:
+ edge = _make_edge(
+ valid_from="2026-01-01T00:00:00",
+ valid_until="2026-12-31T00:00:00",
+ )
+ assert edge.valid_from == "2026-01-01T00:00:00"
+ assert edge.valid_until == "2026-12-31T00:00:00"
+
+ def test_tz_aware_normalised_to_utc(self) -> None:
+ edge = _make_edge(
+ valid_from="2026-04-12T15:00:00+02:00",
+ valid_until="2026-04-12T10:00:00-05:00",
+ )
+ assert edge.valid_from == "2026-04-12T13:00:00+00:00"
+ assert edge.valid_until == "2026-04-12T15:00:00+00:00"
+
+ @pytest.mark.parametrize("field", ["valid_from", "valid_until"])
+ def test_malformed_rejected(self, field: str) -> None:
+ with pytest.raises(ValidationError, match="Must be ISO-8601 timestamp"):
+ _make_edge(**{field: "garbage"})
diff --git a/tests/test_cognitive_journal.py b/tests/test_cognitive_journal.py
index 1d94dd7..cb0d79c 100644
--- a/tests/test_cognitive_journal.py
+++ b/tests/test_cognitive_journal.py
@@ -718,7 +718,7 @@ async def test_fresh_schema_has_journal_table(self) -> None:
cursor = await conn.execute("PRAGMA user_version")
row = await cursor.fetchone()
- assert row[0] == 12
+ assert row[0] == 13
finally:
await conn.close()
@@ -760,7 +760,7 @@ async def test_migrate_from_v5(self) -> None:
cursor = await conn.execute("PRAGMA user_version")
row = await cursor.fetchone()
- assert row[0] == 12
+ assert row[0] == 13
finally:
await conn.close()
@@ -775,7 +775,7 @@ async def test_migration_idempotent(self) -> None:
cursor = await conn.execute("PRAGMA user_version")
row = await cursor.fetchone()
- assert row[0] == 12
+ assert row[0] == 13
finally:
await conn.close()
diff --git a/tests/test_dedup_migration.py b/tests/test_dedup_migration.py
index 0d2512f..4f91f0c 100644
--- a/tests/test_dedup_migration.py
+++ b/tests/test_dedup_migration.py
@@ -182,7 +182,7 @@ async def test_ensure_schema_fresh_db_starts_at_head(
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 12
+ assert await _user_version(fresh_db) == 13
assert "content_hash" in await _table_columns(fresh_db, "thought")
assert await _index_exists(fresh_db, "idx_thought_content_hash")
@@ -197,7 +197,7 @@ async def test_ensure_schema_from_v9_to_head(
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 12
+ assert await _user_version(fresh_db) == 13
assert "content_hash" in await _table_columns(fresh_db, "thought")
assert await _index_exists(fresh_db, "idx_thought_content_hash")
@@ -212,7 +212,7 @@ async def test_ensure_schema_from_v9_idempotent(
for _ in range(3):
await store.ensure_schema()
- assert await _user_version(fresh_db) == 12
+ assert await _user_version(fresh_db) == 13
async def test_ensure_schema_at_head_skips_all_migration_branches(
@@ -221,11 +221,11 @@ async def test_ensure_schema_at_head_skips_all_migration_branches(
"""Already-migrated DB stays at head across repeat ``ensure_schema`` calls."""
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema() # bootstrap fresh -> head
- assert await _user_version(fresh_db) == 12
+ assert await _user_version(fresh_db) == 13
# Re-run; helpers should not fire (idempotent on user_version branch).
await store.ensure_schema()
- assert await _user_version(fresh_db) == 12
+ assert await _user_version(fresh_db) == 13
assert "content_hash" in await _table_columns(fresh_db, "thought")
diff --git a/tests/test_embedding_providers.py b/tests/test_embedding_providers.py
index e5c2faf..91ba694 100644
--- a/tests/test_embedding_providers.py
+++ b/tests/test_embedding_providers.py
@@ -390,7 +390,7 @@ async def test_migration_from_v4_creates_metadata(self) -> None:
cursor = await conn.execute("PRAGMA user_version")
row = await cursor.fetchone()
assert row is not None
- assert int(row[0]) == 12
+ assert int(row[0]) == 13
# _metadata table should exist.
cursor = await conn.execute(
diff --git a/tests/test_engrava_completeness.py b/tests/test_engrava_completeness.py
index f920ec5..7881945 100644
--- a/tests/test_engrava_completeness.py
+++ b/tests/test_engrava_completeness.py
@@ -491,7 +491,7 @@ async def test_migration_idempotent(self) -> None:
cursor = await conn.execute("PRAGMA user_version")
row = await cursor.fetchone()
assert row is not None
- assert int(row[0]) == 12
+ assert int(row[0]) == 13
await conn.close()
diff --git a/tests/test_extension_load_with_migrations.py b/tests/test_extension_load_with_migrations.py
index 8abfc5f..5bbe8d7 100644
--- a/tests/test_extension_load_with_migrations.py
+++ b/tests/test_extension_load_with_migrations.py
@@ -59,11 +59,11 @@ async def test_extension_schema_versions_table_exists_after_ensure_schema(
)
assert await cursor.fetchone() is not None
- async def test_schema_version_is_11(self, db: aiosqlite.Connection) -> None:
+ async def test_schema_version_is_head(self, db: aiosqlite.Connection) -> None:
await _fresh_core(db)
cursor = await db.execute("PRAGMA user_version")
row = await cursor.fetchone()
- assert int(row[0]) == 12
+ assert int(row[0]) == 13
async def test_no_manifests_leaves_versions_table_empty(self, db: aiosqlite.Connection) -> None:
await _fresh_core(db)
@@ -289,7 +289,7 @@ async def test_existing_v8_db_upgraded_to_v9(self, tmp_path: Path) -> None:
cursor = await conn2.execute("PRAGMA user_version")
row = await cursor.fetchone()
- assert int(row[0]) == 12
+ assert int(row[0]) == 13
cursor = await conn2.execute(
"SELECT name FROM sqlite_master WHERE type='table' "
diff --git a/tests/test_metadata_migration.py b/tests/test_metadata_migration.py
index 029b641..71b928f 100644
--- a/tests/test_metadata_migration.py
+++ b/tests/test_metadata_migration.py
@@ -3,8 +3,8 @@
Mirrors ``test_dedup_migration.py``: exercises the
``_migrate_core_v10_to_v11`` helper directly (idempotence, ALTER
TABLE add-column behaviour) and the full ``ensure_schema`` cascade
-so DBs at every supported source version converge on
-``user_version = 11`` with the ``metadata_json`` column populated by
+so DBs at every supported source version converge on the current head
+``user_version`` with the ``metadata_json`` column populated by
the schema-level ``DEFAULT '{}'``.
"""
@@ -167,11 +167,11 @@ async def test_migrate_v10_to_v11_preserves_existing_rows_with_default(
async def test_ensure_schema_fresh_db_lands_at_head(
fresh_db: aiosqlite.Connection,
) -> None:
- """Empty DB ends at ``user_version = 11`` with the metadata column."""
+ """Empty DB ends at the head ``user_version`` with the metadata column."""
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 12
+ assert await _user_version(fresh_db) == 13
assert "metadata_json" in await _table_columns(fresh_db, "thought")
@@ -185,22 +185,22 @@ async def test_ensure_schema_from_v10_to_head(
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 12
+ assert await _user_version(fresh_db) == 13
assert "metadata_json" in await _table_columns(fresh_db, "thought")
async def test_ensure_schema_idempotent_at_head(
fresh_db: aiosqlite.Connection,
) -> None:
- """Repeat calls after reaching head stay at v11 without errors."""
+ """Repeat calls after reaching head stay at the head version without errors."""
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 12
+ assert await _user_version(fresh_db) == 13
for _ in range(3):
await store.ensure_schema()
- assert await _user_version(fresh_db) == 12
+ assert await _user_version(fresh_db) == 13
assert "metadata_json" in await _table_columns(fresh_db, "thought")
@@ -248,7 +248,7 @@ async def test_ensure_schema_from_v10_with_legacy_row_then_insert_new(
# ---------------------------------------------------------------------------
-# Cascade-from-any-version (1 parametrized — covers v3..v10 -> v11)
+# Cascade-from-any-version (1 parametrized — covers v3..v10 -> head)
# ---------------------------------------------------------------------------
@@ -257,7 +257,7 @@ async def test_cascade_from_any_version_to_head(
fresh_db: aiosqlite.Connection,
source_version: int,
) -> None:
- """A DB stamped at any historical core version cascades to v11.
+ """A DB stamped at any historical core version cascades to head.
The historical schemas differ across versions in ways unrelated to
the metadata column, so this test only seeds the ``user_version``
@@ -286,5 +286,5 @@ async def test_cascade_from_any_version_to_head(
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 12
+ assert await _user_version(fresh_db) == 13
assert "metadata_json" in await _table_columns(fresh_db, "thought")
diff --git a/tests/test_mind_store_core.py b/tests/test_mind_store_core.py
index 69c743a..181f0a8 100644
--- a/tests/test_mind_store_core.py
+++ b/tests/test_mind_store_core.py
@@ -580,7 +580,7 @@ async def test_user_version_set_to_current(self, db: aiosqlite.Connection) -> No
cursor = await db.execute("PRAGMA user_version")
row = await cursor.fetchone()
assert row is not None
- assert int(row[0]) == 12
+ assert int(row[0]) == 13
async def test_search_fts_lazy_probes_index(self, db: aiosqlite.Connection) -> None:
"""search_fts should work without an explicit _probe_fts call."""
diff --git a/tests/test_referential_integrity.py b/tests/test_referential_integrity.py
index 622f415..465c33d 100644
--- a/tests/test_referential_integrity.py
+++ b/tests/test_referential_integrity.py
@@ -148,11 +148,11 @@ async def test_action_carries_fk_on_source_thought_id(
assert rows[0]["from"] == "source_thought_id"
assert rows[0]["on_delete"] == "CASCADE"
- async def test_user_version_is_twelve(self, store: SqliteEngravaCore) -> None:
+ async def test_user_version_is_head(self, store: SqliteEngravaCore) -> None:
cursor = await store._db.execute("PRAGMA user_version")
row = await cursor.fetchone()
assert row is not None
- assert row[0] == 12
+ assert row[0] == 13
class TestCreateEdgeRejectsOrphans:
@@ -429,7 +429,7 @@ async def test_clean_v11_migrates_with_zero_row_loss(
await core.ensure_schema()
version_row = await (await db.execute("PRAGMA user_version")).fetchone()
assert version_row is not None
- assert version_row[0] == 12
+ assert version_row[0] == 13
for table, expected in (("edge", 1), ("embedding", 1), ("action", 1), ("thought", 2)):
row = await (
await db.execute(f"SELECT COUNT(*) FROM {table}") # noqa: S608
@@ -508,7 +508,7 @@ async def test_migration_is_idempotent(
await core.ensure_schema() # second pass — must converge without error
version_row = await (await db.execute("PRAGMA user_version")).fetchone()
assert version_row is not None
- assert version_row[0] == 12
+ assert version_row[0] == 13
# FK declarations must still be exactly 2 on edge, not duplicated.
rows = list(await (await db.execute("PRAGMA foreign_key_list(edge)")).fetchall())
assert len(rows) == 2
@@ -662,7 +662,7 @@ async def test_full_ladder_path_from_oldest_supported_to_v12(
await core.ensure_schema()
version_row = await (await db.execute("PRAGMA user_version")).fetchone()
assert version_row is not None
- assert version_row[0] == 12
+ assert version_row[0] == 13
for table, expected in (("edge", 1), ("embedding", 1), ("action", 1), ("thought", 2)):
row = await (
await db.execute(f"SELECT COUNT(*) FROM {table}") # noqa: S608
diff --git a/tests/test_search_hybrid_graph.py b/tests/test_search_hybrid_graph.py
index 5ec2b9a..93df2dc 100644
--- a/tests/test_search_hybrid_graph.py
+++ b/tests/test_search_hybrid_graph.py
@@ -603,4 +603,4 @@ async def test_schema_migration_v7_to_head(self, tmp_path: Path) -> None:
await conn.close()
assert row is not None, "idx_edge_type_from missing after v7->head migration"
- assert int(version_row[0]) == 12
+ assert int(version_row[0]) == 13
diff --git a/tests/test_service_isolation.py b/tests/test_service_isolation.py
index fc870e9..7f33641 100644
--- a/tests/test_service_isolation.py
+++ b/tests/test_service_isolation.py
@@ -316,7 +316,7 @@ async def test_per_service_schema_independent(self, tmp_path: Path) -> None:
row_a = await cursor_a.fetchone()
cursor_b = await store_b._db.execute("PRAGMA user_version")
row_b = await cursor_b.fetchone()
- assert row_a[0] == row_b[0] == 12
+ assert row_a[0] == row_b[0] == 13
async def test_per_service_fts_independent(self, tmp_path: Path) -> None:
data_dir = tmp_path / "services"
@@ -483,7 +483,7 @@ def test_snapshot_includes_metadata_header(
lines = out.read_text(encoding="utf-8").strip().splitlines()
header = json.loads(lines[0])
assert header["_type"] == "metadata"
- assert header["schema_version"] == 12
+ assert header["schema_version"] == 13
assert header["embedding_model_name"] == "all-MiniLM-L12-v2"
assert header["embedding_dimension"] == 16
diff --git a/tests/test_ttl_auto_expiry.py b/tests/test_ttl_auto_expiry.py
index d78d2ea..c369022 100644
--- a/tests/test_ttl_auto_expiry.py
+++ b/tests/test_ttl_auto_expiry.py
@@ -271,7 +271,7 @@ async def test_fresh_schema_version_current(self, db: aiosqlite.Connection) -> N
cursor = await db.execute("PRAGMA user_version")
row = await cursor.fetchone()
assert row is not None
- assert row[0] == 12
+ assert row[0] == 13
async def test_expires_at_column_exists(self, db: aiosqlite.Connection) -> None:
cursor = await db.execute("PRAGMA table_info(thought)")
@@ -303,7 +303,7 @@ async def test_migration_from_v6_idempotent(self) -> None:
cursor = await conn.execute("PRAGMA user_version")
row = await cursor.fetchone()
assert row is not None
- assert row[0] == 12
+ assert row[0] == 13
await conn.close()
diff --git a/tests/test_valid_time_migration.py b/tests/test_valid_time_migration.py
new file mode 100644
index 0000000..b97eac2
--- /dev/null
+++ b/tests/test_valid_time_migration.py
@@ -0,0 +1,669 @@
+"""Schema migration tests for the valid-time columns (core-12 -> core-13).
+
+Exercises ``_migrate_core_v12_to_v13`` directly (idempotence, ALTER
+add-column behaviour, asymmetric backfill, index creation) and the full
+``ensure_schema`` cascade so a database stamped at any supported source
+version converges on the head ``user_version`` with the ``valid_from`` /
+``valid_until`` columns and their indexes on both the ``thought`` and
+``edge`` tables.
+
+The feature introduces a second time axis ("valid time" — when a fact is
+true in the world) alongside the existing transaction time
+(``created_at``). Thought rows are backfilled from ``created_at``; edge
+rows are deliberately left ``NULL`` because the edge table has no
+calendar timestamp to source from.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import aiosqlite
+import pytest
+
+from engrava import SqliteEngravaCore
+from engrava.domain.enums import EdgeType, LifecycleStatus, Priority, ThoughtType
+from engrava.domain.models import EdgeRecord, ThoughtRecord
+
+if TYPE_CHECKING:
+ from collections.abc import AsyncIterator
+
+_VALID_THOUGHT_INDEXES = (
+ "idx_thought_valid_from",
+ "idx_thought_valid_until",
+ "idx_thought_valid_range",
+)
+_VALID_EDGE_INDEXES = (
+ "idx_edge_valid_from",
+ "idx_edge_valid_until",
+ "idx_edge_valid_range",
+)
+_ALL_VALID_INDEXES = _VALID_THOUGHT_INDEXES + _VALID_EDGE_INDEXES
+
+
+# ---------------------------------------------------------------------------
+# Helpers (mirror test_metadata_migration.py / test_dedup_migration.py)
+# ---------------------------------------------------------------------------
+
+
+async def _user_version(db: aiosqlite.Connection) -> int:
+ cursor = await db.execute("PRAGMA user_version")
+ row = await cursor.fetchone()
+ assert row is not None
+ return int(row[0])
+
+
+async def _table_columns(db: aiosqlite.Connection, table: str) -> set[str]:
+ cursor = await db.execute(f"PRAGMA table_info({table})")
+ rows = await cursor.fetchall()
+ return {row["name"] for row in rows}
+
+
+async def _table_info(db: aiosqlite.Connection, table: str) -> list[tuple[str, str]]:
+ """Return ``(name, declared_type)`` pairs for ``table`` in cid order."""
+ cursor = await db.execute(f"PRAGMA table_info({table})")
+ rows = await cursor.fetchall()
+ return [(row["name"], row["type"]) for row in rows]
+
+
+async def _index_names(db: aiosqlite.Connection, table: str) -> set[str]:
+ cursor = await db.execute(
+ "SELECT name FROM sqlite_master WHERE type = 'index' AND tbl_name = ?",
+ (table,),
+ )
+ rows = await cursor.fetchall()
+ return {row["name"] for row in rows if row["name"] is not None}
+
+
+async def _index_exists(db: aiosqlite.Connection, name: str) -> bool:
+ cursor = await db.execute(
+ "SELECT 1 FROM sqlite_master WHERE type = 'index' AND name = ?",
+ (name,),
+ )
+ return await cursor.fetchone() is not None
+
+
+async def _row_count(db: aiosqlite.Connection, table: str) -> int:
+ cursor = await db.execute(f"SELECT COUNT(*) AS n FROM {table}") # noqa: S608
+ row = await cursor.fetchone()
+ assert row is not None
+ return int(row["n"])
+
+
+async def _bootstrap_core_at_v12(db: aiosqlite.Connection) -> None:
+ """Recreate a faithful core-12 ``thought`` + ``edge`` schema.
+
+ Mirrors what ``schema_core.sql`` looked like at ``user_version=12``
+ (referential integrity present, but no valid-time axis). The
+ pre-valid-time indexes a real v12 install carries on these two tables
+ (``idx_edge_type_from``, ``idx_thought_content_hash``,
+ ``idx_thought_expires``) are recreated too, so that after the upgrade a
+ migrated database is structurally identical to a freshly bootstrapped
+ one. The valid-time columns and their six indexes are deliberately
+ absent — that is precisely the surface the upgrade re-adds.
+ """
+ await db.executescript(
+ """
+ CREATE TABLE IF NOT EXISTS thought (
+ thought_id TEXT PRIMARY KEY,
+ thought_type TEXT NOT NULL,
+ essence TEXT NOT NULL,
+ content TEXT NOT NULL,
+ content_hash TEXT,
+ priority TEXT NOT NULL,
+ lifecycle_status TEXT NOT NULL DEFAULT 'CREATED',
+ created_cycle INTEGER NOT NULL DEFAULT 0,
+ updated_cycle INTEGER NOT NULL DEFAULT 0,
+ source TEXT NOT NULL DEFAULT 'human',
+ confidence REAL,
+ embedding_ref TEXT,
+ source_type TEXT NOT NULL DEFAULT 'EXPERIENCE',
+ confirmation_count INTEGER NOT NULL DEFAULT 0,
+ consolidated_from TEXT,
+ visibility TEXT NOT NULL DEFAULT 'selective',
+ access_count INTEGER NOT NULL DEFAULT 0,
+ last_accessed_at TEXT,
+ created_at TEXT,
+ updated_at TEXT,
+ expires_at TEXT,
+ metadata_json TEXT NOT NULL DEFAULT '{}'
+ );
+ CREATE TABLE IF NOT EXISTS edge (
+ edge_id TEXT PRIMARY KEY,
+ from_thought_id TEXT NOT NULL,
+ to_thought_id TEXT NOT NULL,
+ edge_type TEXT NOT NULL,
+ weight REAL NOT NULL DEFAULT 0.5,
+ created_cycle INTEGER NOT NULL DEFAULT 0,
+ source TEXT NOT NULL DEFAULT 'EXPERIENCE',
+ decay_multiplier REAL NOT NULL DEFAULT 1.0,
+ UNIQUE(from_thought_id, to_thought_id, edge_type),
+ FOREIGN KEY (from_thought_id) REFERENCES thought(thought_id) ON DELETE CASCADE,
+ FOREIGN KEY (to_thought_id) REFERENCES thought(thought_id) ON DELETE CASCADE
+ );
+ CREATE INDEX IF NOT EXISTS idx_thought_expires ON thought(expires_at)
+ WHERE expires_at IS NOT NULL;
+ CREATE INDEX IF NOT EXISTS idx_thought_content_hash ON thought(content_hash);
+ CREATE INDEX IF NOT EXISTS idx_edge_type_from ON edge(edge_type, from_thought_id);
+ PRAGMA user_version = 12;
+ """,
+ )
+ await db.commit()
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+async def fresh_db() -> AsyncIterator[aiosqlite.Connection]:
+ """Empty in-memory SQLite (``user_version`` starts at 0)."""
+ conn = await aiosqlite.connect(":memory:")
+ conn.row_factory = aiosqlite.Row
+ yield conn
+ await conn.close()
+
+
+# ---------------------------------------------------------------------------
+# Helper-level tests (direct ``_migrate_core_v12_to_v13``)
+# ---------------------------------------------------------------------------
+
+
+async def test_migrate_v12_to_v13_adds_columns_to_both_tables(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """The migration adds valid-time columns to ``thought`` and ``edge``."""
+ await _bootstrap_core_at_v12(fresh_db)
+ assert "valid_from" not in await _table_columns(fresh_db, "thought")
+ assert "valid_from" not in await _table_columns(fresh_db, "edge")
+
+ store = SqliteEngravaCore(fresh_db)
+ await store._migrate_core_v12_to_v13()
+
+ thought_cols = await _table_columns(fresh_db, "thought")
+ edge_cols = await _table_columns(fresh_db, "edge")
+ assert {"valid_from", "valid_until"} <= thought_cols
+ assert {"valid_from", "valid_until"} <= edge_cols
+
+
+async def test_migrate_v12_to_v13_creates_all_six_indexes(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """Three partial indexes per table are created (six total)."""
+ await _bootstrap_core_at_v12(fresh_db)
+ store = SqliteEngravaCore(fresh_db)
+ await store._migrate_core_v12_to_v13()
+
+ for index_name in _ALL_VALID_INDEXES:
+ assert await _index_exists(fresh_db, index_name), index_name
+
+
+async def test_migrate_v12_to_v13_idempotent(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """Re-running the helper is safe: no duplicate columns or indexes, no error."""
+ await _bootstrap_core_at_v12(fresh_db)
+ store = SqliteEngravaCore(fresh_db)
+
+ for _ in range(3):
+ await store._migrate_core_v12_to_v13()
+
+ # Exactly one occurrence of each valid-time column per table.
+ for table in ("thought", "edge"):
+ info = await _table_info(fresh_db, table)
+ names = [name for name, _ in info]
+ assert names.count("valid_from") == 1, table
+ assert names.count("valid_until") == 1, table
+ # All six indexes present exactly once (sqlite_master keys by name).
+ for index_name in _ALL_VALID_INDEXES:
+ assert await _index_exists(fresh_db, index_name), index_name
+
+
+async def test_migrate_v12_to_v13_backfills_thought_from_created_at(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """A thought with non-NULL ``created_at`` gets ``valid_from == created_at``."""
+ await _bootstrap_core_at_v12(fresh_db)
+ await fresh_db.execute(
+ """
+ INSERT INTO thought
+ (thought_id, thought_type, essence, content, priority, created_at)
+ VALUES (?, ?, ?, ?, ?, ?)
+ """,
+ ("t-dated", "OBSERVATION", "e", "c", "P2", "2026-01-02T03:04:05+00:00"),
+ )
+ await fresh_db.commit()
+
+ store = SqliteEngravaCore(fresh_db)
+ await store._migrate_core_v12_to_v13()
+
+ cursor = await fresh_db.execute(
+ "SELECT created_at, valid_from, valid_until FROM thought WHERE thought_id = ?",
+ ("t-dated",),
+ )
+ row = await cursor.fetchone()
+ assert row is not None
+ assert row["valid_from"] == row["created_at"]
+ assert row["valid_from"] == "2026-01-02T03:04:05+00:00"
+ # valid_until is always left open by the backfill.
+ assert row["valid_until"] is None
+
+
+async def test_migrate_v12_to_v13_does_not_fabricate_for_null_created_at(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """A thought with NULL ``created_at`` keeps ``valid_from`` NULL (no fabrication)."""
+ await _bootstrap_core_at_v12(fresh_db)
+ await fresh_db.execute(
+ """
+ INSERT INTO thought (thought_id, thought_type, essence, content, priority)
+ VALUES (?, ?, ?, ?, ?)
+ """,
+ ("t-legacy", "OBSERVATION", "e", "c", "P2"),
+ )
+ await fresh_db.commit()
+
+ store = SqliteEngravaCore(fresh_db)
+ await store._migrate_core_v12_to_v13()
+
+ cursor = await fresh_db.execute(
+ "SELECT created_at, valid_from FROM thought WHERE thought_id = ?",
+ ("t-legacy",),
+ )
+ row = await cursor.fetchone()
+ assert row is not None
+ assert row["created_at"] is None
+ assert row["valid_from"] is None
+
+
+async def test_migrate_v12_to_v13_leaves_all_edges_null(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """Edges are intentionally not backfilled — both valid-time fields stay NULL."""
+ await _bootstrap_core_at_v12(fresh_db)
+ await fresh_db.execute(
+ """
+ INSERT INTO thought (thought_id, thought_type, essence, content, priority)
+ VALUES (?, ?, ?, ?, ?)
+ """,
+ ("t-1", "OBSERVATION", "e", "c", "P2"),
+ )
+ await fresh_db.execute(
+ """
+ INSERT INTO edge (edge_id, from_thought_id, to_thought_id, edge_type, created_cycle)
+ VALUES (?, ?, ?, ?, ?)
+ """,
+ ("e-1", "t-1", "t-1", "ASSOCIATED", 7),
+ )
+ await fresh_db.commit()
+
+ store = SqliteEngravaCore(fresh_db)
+ await store._migrate_core_v12_to_v13()
+
+ cursor = await fresh_db.execute(
+ "SELECT valid_from, valid_until FROM edge WHERE edge_id = ?",
+ ("e-1",),
+ )
+ row = await cursor.fetchone()
+ assert row is not None
+ assert row["valid_from"] is None
+ assert row["valid_until"] is None
+
+
+async def test_migrate_v12_to_v13_tolerates_absent_edge_table(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """A thought-only partial bootstrap (no ``edge`` table) migrates cleanly.
+
+ Some databases carry only the ``thought`` table at this point — the
+ ``edge`` table is created lazily. The migration must skip the edge
+ column-adds and edge indexes rather than raising ``no such table``,
+ while still upgrading ``thought`` fully.
+ """
+ await fresh_db.executescript(
+ """
+ CREATE TABLE thought (
+ thought_id TEXT PRIMARY KEY,
+ thought_type TEXT NOT NULL,
+ essence TEXT NOT NULL,
+ content TEXT NOT NULL,
+ priority TEXT NOT NULL,
+ created_at TEXT
+ );
+ PRAGMA user_version = 12;
+ """,
+ )
+ await fresh_db.commit()
+ assert "edge" not in {
+ row["name"]
+ for row in await (
+ await fresh_db.execute("SELECT name FROM sqlite_master WHERE type = 'table'")
+ ).fetchall()
+ }
+
+ store = SqliteEngravaCore(fresh_db)
+ await store._migrate_core_v12_to_v13() # must not raise
+
+ thought_cols = await _table_columns(fresh_db, "thought")
+ assert {"valid_from", "valid_until"} <= thought_cols
+ for index_name in _VALID_THOUGHT_INDEXES:
+ assert await _index_exists(fresh_db, index_name), index_name
+ # No edge indexes were created (the table is absent).
+ for index_name in _VALID_EDGE_INDEXES:
+ assert not await _index_exists(fresh_db, index_name), index_name
+
+
+async def test_migrate_v12_to_v13_preserves_row_counts(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """The additive migration changes no row counts in either table."""
+ await _bootstrap_core_at_v12(fresh_db)
+ await fresh_db.executemany(
+ """
+ INSERT INTO thought (thought_id, thought_type, essence, content, priority, created_at)
+ VALUES (?, 'OBSERVATION', 'e', 'c', 'P2', ?)
+ """,
+ [("t-1", "2026-01-01T00:00:00+00:00"), ("t-2", None), ("t-3", "2026-02-02T00:00:00+00:00")],
+ )
+ await fresh_db.execute(
+ """
+ INSERT INTO edge (edge_id, from_thought_id, to_thought_id, edge_type)
+ VALUES (?, 't-1', 't-2', 'ASSOCIATED'), (?, 't-2', 't-3', 'CONSOLIDATED_FROM')
+ """,
+ ("e-1", "e-2"),
+ )
+ await fresh_db.commit()
+ thoughts_before = await _row_count(fresh_db, "thought")
+ edges_before = await _row_count(fresh_db, "edge")
+
+ store = SqliteEngravaCore(fresh_db)
+ await store._migrate_core_v12_to_v13()
+
+ assert await _row_count(fresh_db, "thought") == thoughts_before == 3
+ assert await _row_count(fresh_db, "edge") == edges_before == 2
+
+
+# ---------------------------------------------------------------------------
+# ensure_schema cascade tests
+# ---------------------------------------------------------------------------
+
+
+async def test_ensure_schema_fresh_db_lands_at_head(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """An empty DB bootstraps straight to v13 with all valid-time columns."""
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+
+ assert await _user_version(fresh_db) == 13
+ assert {"valid_from", "valid_until"} <= await _table_columns(fresh_db, "thought")
+ assert {"valid_from", "valid_until"} <= await _table_columns(fresh_db, "edge")
+ for index_name in _ALL_VALID_INDEXES:
+ assert await _index_exists(fresh_db, index_name), index_name
+
+
+async def test_ensure_schema_from_v12_to_head(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """A v12 DB walks the ``< 13`` branch up to head."""
+ await _bootstrap_core_at_v12(fresh_db)
+ assert await _user_version(fresh_db) == 12
+
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+
+ assert await _user_version(fresh_db) == 13
+ assert {"valid_from", "valid_until"} <= await _table_columns(fresh_db, "thought")
+ assert {"valid_from", "valid_until"} <= await _table_columns(fresh_db, "edge")
+
+
+async def test_ensure_schema_idempotent_at_head(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """Repeated ``ensure_schema`` calls stay at v13 without error."""
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+ assert await _user_version(fresh_db) == 13
+
+ for _ in range(3):
+ await store.ensure_schema()
+
+ assert await _user_version(fresh_db) == 13
+
+
+async def test_ensure_schema_from_v12_backfills_and_preserves_counts(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """End-to-end cascade: dated thought backfilled, legacy + edges NULL, counts kept."""
+ await _bootstrap_core_at_v12(fresh_db)
+ await fresh_db.execute(
+ """
+ INSERT INTO thought (thought_id, thought_type, essence, content, priority, created_at)
+ VALUES (?, 'OBSERVATION', 'e', 'c', 'P2', ?)
+ """,
+ ("t-dated", "2026-03-04T05:06:07+00:00"),
+ )
+ await fresh_db.execute(
+ """
+ INSERT INTO thought (thought_id, thought_type, essence, content, priority)
+ VALUES (?, 'OBSERVATION', 'e', 'c', 'P2')
+ """,
+ ("t-legacy",),
+ )
+ await fresh_db.execute(
+ """
+ INSERT INTO edge (edge_id, from_thought_id, to_thought_id, edge_type)
+ VALUES (?, 't-dated', 't-legacy', 'ASSOCIATED')
+ """,
+ ("e-1",),
+ )
+ await fresh_db.commit()
+
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+
+ assert await _user_version(fresh_db) == 13
+ assert await _row_count(fresh_db, "thought") == 2
+ assert await _row_count(fresh_db, "edge") == 1
+
+ dated = await fresh_db.execute("SELECT valid_from FROM thought WHERE thought_id = 't-dated'")
+ dated_row = await dated.fetchone()
+ assert dated_row is not None
+ assert dated_row["valid_from"] == "2026-03-04T05:06:07+00:00"
+
+ legacy = await fresh_db.execute("SELECT valid_from FROM thought WHERE thought_id = 't-legacy'")
+ legacy_row = await legacy.fetchone()
+ assert legacy_row is not None
+ assert legacy_row["valid_from"] is None
+
+ edge = await fresh_db.execute("SELECT valid_from, valid_until FROM edge WHERE edge_id = 'e-1'")
+ edge_row = await edge.fetchone()
+ assert edge_row is not None
+ assert edge_row["valid_from"] is None
+ assert edge_row["valid_until"] is None
+
+
+@pytest.mark.parametrize("source_version", [3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
+async def test_cascade_from_any_version_to_head(
+ fresh_db: aiosqlite.Connection,
+ source_version: int,
+) -> None:
+ """A DB stamped at any historical core version cascades to head v13.
+
+ Only the ``user_version`` PRAGMA is seeded; ``ensure_schema`` walks
+ the matching elif branch up to head, exactly as an in-place upgrade
+ from an older install would.
+ """
+ bootstrap = SqliteEngravaCore(fresh_db)
+ await bootstrap.ensure_schema()
+ await fresh_db.execute(f"PRAGMA user_version = {source_version}")
+ await fresh_db.commit()
+ assert await _user_version(fresh_db) == source_version
+
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+
+ assert await _user_version(fresh_db) == 13
+ assert {"valid_from", "valid_until"} <= await _table_columns(fresh_db, "thought")
+ assert {"valid_from", "valid_until"} <= await _table_columns(fresh_db, "edge")
+
+
+# ---------------------------------------------------------------------------
+# Fresh-v13 == migrated-v13 structural equivalence
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("table", ["thought", "edge"])
+async def test_fresh_equals_migrated_schema(table: str) -> None:
+ """A fresh-bootstrap v13 DB is structurally identical to a migrated-v13 DB.
+
+ Compares ``PRAGMA table_info`` (column name + declared type) and the
+ set of indexes for ``table`` between a database that ran the
+ fresh-create DDL and one upgraded in place from v12.
+ """
+ fresh = await aiosqlite.connect(":memory:")
+ fresh.row_factory = aiosqlite.Row
+ migrated = await aiosqlite.connect(":memory:")
+ migrated.row_factory = aiosqlite.Row
+ try:
+ await SqliteEngravaCore(fresh).ensure_schema()
+
+ await _bootstrap_core_at_v12(migrated)
+ await SqliteEngravaCore(migrated).ensure_schema()
+
+ assert await _user_version(fresh) == await _user_version(migrated) == 13
+ assert await _table_info(fresh, table) == await _table_info(migrated, table)
+ assert await _index_names(fresh, table) == await _index_names(migrated, table)
+ finally:
+ await fresh.close()
+ await migrated.close()
+
+
+# ---------------------------------------------------------------------------
+# Round-trip through the public CRUD path
+# ---------------------------------------------------------------------------
+
+
+async def test_thought_round_trip_preserves_valid_time(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """``create_thought`` then read preserves set valid-time; unset round-trips None."""
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+
+ with_valid = ThoughtRecord(
+ thought_id="t-valid",
+ thought_type=ThoughtType.OBSERVATION,
+ essence="e",
+ content="c",
+ priority=Priority.P2,
+ lifecycle_status=LifecycleStatus.CREATED,
+ created_cycle=0,
+ updated_cycle=0,
+ source="test",
+ valid_from="2026-01-01T00:00:00+00:00",
+ valid_until="2026-12-31T00:00:00+00:00",
+ )
+ await store.create_thought(with_valid)
+ fetched = await store.get_thought("t-valid")
+ assert fetched is not None
+ assert fetched.valid_from == "2026-01-01T00:00:00+00:00"
+ assert fetched.valid_until == "2026-12-31T00:00:00+00:00"
+
+ without_valid = ThoughtRecord(
+ thought_id="t-none",
+ thought_type=ThoughtType.OBSERVATION,
+ essence="e",
+ content="c",
+ priority=Priority.P2,
+ lifecycle_status=LifecycleStatus.CREATED,
+ created_cycle=0,
+ updated_cycle=0,
+ source="test",
+ )
+ await store.create_thought(without_valid)
+ fetched_none = await store.get_thought("t-none")
+ assert fetched_none is not None
+ assert fetched_none.valid_from is None
+ assert fetched_none.valid_until is None
+
+
+async def test_edge_round_trip_preserves_valid_time(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """``create_edge`` then read preserves set valid-time; unset round-trips None."""
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+
+ parent = ThoughtRecord(
+ thought_id="t-parent",
+ thought_type=ThoughtType.OBSERVATION,
+ essence="e",
+ content="c",
+ priority=Priority.P2,
+ lifecycle_status=LifecycleStatus.CREATED,
+ created_cycle=0,
+ updated_cycle=0,
+ source="test",
+ )
+ await store.create_thought(parent)
+
+ edge_with_valid = EdgeRecord(
+ edge_id="e-valid",
+ from_thought_id="t-parent",
+ to_thought_id="t-parent",
+ edge_type=EdgeType.ASSOCIATED,
+ weight=0.5,
+ created_cycle=1,
+ valid_from="2026-05-01T00:00:00+00:00",
+ valid_until="2026-06-01T00:00:00+00:00",
+ )
+ await store.create_edge(edge_with_valid)
+ edges = await store.get_edges("t-parent")
+ by_id = {edge.edge_id: edge for edge in edges}
+ assert by_id["e-valid"].valid_from == "2026-05-01T00:00:00+00:00"
+ assert by_id["e-valid"].valid_until == "2026-06-01T00:00:00+00:00"
+
+ edge_without_valid = EdgeRecord(
+ edge_id="e-none",
+ from_thought_id="t-parent",
+ to_thought_id="t-parent",
+ edge_type=EdgeType.CONSOLIDATED_FROM,
+ weight=0.5,
+ created_cycle=1,
+ )
+ await store.create_edge(edge_without_valid)
+ edges_after = {e.edge_id: e for e in await store.get_edges("t-parent")}
+ assert edges_after["e-none"].valid_from is None
+ assert edges_after["e-none"].valid_until is None
+
+
+async def test_existing_list_unaffected_by_additive_columns(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """A non-temporal listing returns the same rows regardless of valid-time."""
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+
+ ids = ("t-a", "t-b", "t-c")
+ for index, thought_id in enumerate(ids):
+ await store.create_thought(
+ ThoughtRecord(
+ thought_id=thought_id,
+ thought_type=ThoughtType.OBSERVATION,
+ essence="e",
+ content="c",
+ priority=Priority.P2,
+ lifecycle_status=LifecycleStatus.CREATED,
+ created_cycle=index,
+ updated_cycle=index,
+ source="test",
+ valid_from="2026-01-01T00:00:00+00:00" if index == 0 else None,
+ )
+ )
+
+ for thought_id in ids:
+ fetched = await store.get_thought(thought_id)
+ assert fetched is not None
+ assert fetched.thought_id == thought_id
From 86de77fc11ab4322927648d8c7a51ea6d13da91a Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Wed, 10 Jun 2026 12:53:44 +0200
Subject: [PATCH 15/40] feat: add temporal query predicates and invalidate
primitive
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add four opt-in, NULL-tolerant valid-time predicates to the WHERE clause
of FIND/COUNT queries — valid_at, valid_now, valid_within, valid_between —
working on both the thought and edge tables. Open bounds (NULL valid_from
or valid_until) are treated as -inf / +inf, so legacy and open-interval
rows stay visible; valid_between requires real bounds on both ends.
valid_now resolves against the server clock, with an injectable instant
for deterministic tests.
Add invalidate_thought / invalidate_edge: deterministic, idempotent
convenience methods that close a record's valid-time interval by setting
valid_until. They never delete, never cascade to connected edges, and
perform no similarity search or model inference.
A query with no temporal predicate behaves exactly as before.
---
.../infrastructure/sqlite/engrava_core.py | 74 +++
src/engrava/mindql/executor.py | 109 +++-
src/engrava/mindql/parser.py | 216 +++++++-
tests/test_invalidate.py | 196 +++++++
tests/test_mindql.py | 519 +++++++++++++++++-
5 files changed, 1088 insertions(+), 26 deletions(-)
create mode 100644 tests/test_invalidate.py
diff --git a/src/engrava/infrastructure/sqlite/engrava_core.py b/src/engrava/infrastructure/sqlite/engrava_core.py
index a014aa2..84a2b7b 100644
--- a/src/engrava/infrastructure/sqlite/engrava_core.py
+++ b/src/engrava/infrastructure/sqlite/engrava_core.py
@@ -45,6 +45,7 @@
StaleDataError,
ThoughtNotFoundError,
)
+from engrava.domain.models._temporal import validate_iso8601_nullable
from engrava.domain.models.action import ActionRecord
from engrava.domain.models.edge import EdgeRecord
from engrava.domain.models.embedding import EmbeddingRecord
@@ -1839,6 +1840,44 @@ async def update_thought(self, thought_id: str, **changes: object) -> ThoughtRec
await self._maybe_auto_cleanup(exclude_id=thought_id)
return updated
+ async def invalidate_thought(
+ self,
+ thought_id: str,
+ valid_until: str,
+ ) -> ThoughtRecord:
+ """Close a thought's valid-time interval at the given instant.
+
+ Sets the thought's ``valid_until`` to ``valid_until``, marking the
+ end of the window during which the fact is considered true in the
+ world. This is a deterministic, valid-time-only operation:
+
+ * It is **not** a delete — the row and all of its history remain
+ stored and retrievable; only the valid-time upper bound changes.
+ * It performs **no** similarity search, automatic invalidation, or
+ model inference of any kind.
+ * It does **not** cascade to the thought's edges — invalidating a
+ thought leaves every connected edge's valid-time interval
+ untouched.
+ * It is **idempotent**: invalidating with the same ``valid_until``
+ twice converges to the same stored value.
+
+ Args:
+ thought_id: UUID of the thought to invalidate.
+ valid_until: ISO-8601 instant at which the fact stops being
+ valid. Stored as the thought's ``valid_until`` bound.
+
+ Returns:
+ The updated thought record.
+
+ Raises:
+ ThoughtNotFoundError: If the thought does not exist.
+ StaleDataError: If the row was modified since it was read.
+ ValueError: If ``valid_until`` is not a valid ISO-8601 timestamp.
+
+ """
+ normalized = validate_iso8601_nullable(valid_until)
+ return await self.update_thought(thought_id, valid_until=normalized)
+
async def list_thoughts(
self,
*,
@@ -2104,6 +2143,41 @@ async def update_edge(self, edge_id: str, **changes: object) -> EdgeRecord:
await self._maybe_commit()
return updated
+ async def invalidate_edge(
+ self,
+ edge_id: str,
+ valid_until: str,
+ ) -> EdgeRecord:
+ """Close an edge's valid-time interval at the given instant.
+
+ Sets the edge's ``valid_until`` to ``valid_until``, marking the end
+ of the window during which the relation is considered true in the
+ world. Like :meth:`invalidate_thought`, this is a deterministic,
+ valid-time-only operation:
+
+ * It is **not** a delete — the edge row remains stored and
+ retrievable; only the valid-time upper bound changes.
+ * It performs **no** similarity search, automatic invalidation, or
+ model inference of any kind.
+ * It is **idempotent**: invalidating with the same ``valid_until``
+ twice converges to the same stored value.
+
+ Args:
+ edge_id: UUID of the edge to invalidate.
+ valid_until: ISO-8601 instant at which the relation stops being
+ valid. Stored as the edge's ``valid_until`` bound.
+
+ Returns:
+ The updated edge record.
+
+ Raises:
+ ValueError: If the edge does not exist, or ``valid_until`` is not
+ a valid ISO-8601 timestamp.
+
+ """
+ normalized = validate_iso8601_nullable(valid_until)
+ return await self.update_edge(edge_id, valid_until=normalized)
+
async def get_edges(
self,
thought_id: str,
diff --git a/src/engrava/mindql/executor.py b/src/engrava/mindql/executor.py
index a9b3577..364b7b9 100644
--- a/src/engrava/mindql/executor.py
+++ b/src/engrava/mindql/executor.py
@@ -6,16 +6,46 @@
from __future__ import annotations
+import contextvars
+import datetime
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any
-from engrava.mindql.parser import MindQLCommand, MindQLOperator, MindQLParseError
+from engrava.mindql.parser import (
+ MindQLCommand,
+ MindQLOperator,
+ MindQLParseError,
+ TemporalPredicateKind,
+)
if TYPE_CHECKING:
import aiosqlite
from engrava.domain.protocols.hooks import MindQLExtension
- from engrava.mindql.parser import MindQLQuery
+ from engrava.mindql.parser import MindQLQuery, TemporalPredicate
+
+
+# Optional pinned "now" for ``valid_now`` resolution. When unset (the
+# default), ``valid_now`` resolves against the server clock at execution
+# time. Tests pin a deterministic instant via :func:`mindql_now.set`.
+mindql_now: contextvars.ContextVar[str | None] = contextvars.ContextVar(
+ "mindql_now",
+ default=None,
+)
+
+
+def _resolve_now() -> str:
+ """Resolve the instant ``valid_now`` evaluates against.
+
+ Returns:
+ The pinned instant from the ``mindql_now`` context variable when set,
+ otherwise the current UTC time as an ISO-8601 string.
+
+ """
+ pinned = mindql_now.get()
+ if pinned is not None:
+ return pinned
+ return datetime.datetime.now(datetime.UTC).isoformat()
# Columns that are safe to filter on (allowlist per table).
@@ -79,6 +109,10 @@
MindQLOperator.LE: "<=",
}
+# Tables carrying the bi-temporal valid-time columns. Temporal predicates are
+# only meaningful against these; applying one to any other table is rejected.
+_TEMPORAL_TABLES: frozenset[str] = frozenset({"thought", "edge"})
+
@dataclass(frozen=True)
class MindQLResult:
@@ -261,6 +295,12 @@ def _build_where(
) -> tuple[list[str], list[object]]:
"""Build WHERE clauses and params, validating columns.
+ Ordinary ``field op value`` conditions and opt-in valid-time
+ temporal predicates are both emitted here, so every code path that
+ builds a query body (FIND and COUNT) gets temporal filtering for
+ free. The clauses are returned in source order: conditions first,
+ then temporal predicates.
+
Args:
table: Target table name.
query: Parsed query.
@@ -269,7 +309,9 @@ def _build_where(
Tuple of (clause strings, parameter values).
Raises:
- MindQLParseError: If a condition references a disallowed column.
+ MindQLParseError: If a condition references a disallowed column,
+ or a temporal predicate targets a table without valid-time
+ columns.
"""
allowed = _ALLOWED_COLUMNS.get(table, frozenset())
@@ -284,4 +326,65 @@ def _build_where(
clauses.append(f"{cond.field} {op_sql} ?")
params.append(cond.value)
+ for predicate in query.temporal_predicates:
+ fragment, frag_params = self._build_temporal_clause(table, predicate)
+ clauses.append(fragment)
+ params.extend(frag_params)
+
return clauses, params
+
+ @staticmethod
+ def _build_temporal_clause(
+ table: str,
+ predicate: TemporalPredicate,
+ ) -> tuple[str, list[object]]:
+ """Build the NULL-tolerant SQL fragment for one temporal predicate.
+
+ NULL ``valid_from`` is an open lower bound (negative infinity) and
+ NULL ``valid_until`` is an open upper bound (positive infinity).
+ ``valid_at`` / ``valid_now`` / ``valid_within`` are NULL-tolerant so
+ rows with an open bound stay visible; ``valid_between`` requires real
+ bounds on both ends and therefore excludes open-bound rows.
+
+ Args:
+ table: Target table name (must carry valid-time columns).
+ predicate: The parsed temporal predicate.
+
+ Returns:
+ Tuple of (SQL fragment, ordered parameter values).
+
+ Raises:
+ MindQLParseError: If ``table`` has no valid-time columns.
+
+ """
+ if table not in _TEMPORAL_TABLES:
+ msg = f"Temporal predicate not supported for table {table!r}"
+ raise MindQLParseError(msg)
+
+ kind = predicate.kind
+ if kind == TemporalPredicateKind.VALID_NOW:
+ now = _resolve_now()
+ return (
+ "(valid_from IS NULL OR valid_from <= ?) "
+ "AND (valid_until IS NULL OR valid_until > ?)",
+ [now, now],
+ )
+ if kind == TemporalPredicateKind.VALID_AT:
+ instant = predicate.start
+ return (
+ "(valid_from IS NULL OR valid_from <= ?) "
+ "AND (valid_until IS NULL OR valid_until > ?)",
+ [instant, instant],
+ )
+ if kind == TemporalPredicateKind.VALID_WITHIN:
+ return (
+ "(valid_from IS NULL OR valid_from < ?) "
+ "AND (valid_until IS NULL OR valid_until > ?)",
+ [predicate.end, predicate.start],
+ )
+ # VALID_BETWEEN — closed containment requiring real bounds on both ends.
+ return (
+ "valid_from IS NOT NULL AND valid_from >= ? "
+ "AND valid_until IS NOT NULL AND valid_until <= ?",
+ [predicate.start, predicate.end],
+ )
diff --git a/src/engrava/mindql/parser.py b/src/engrava/mindql/parser.py
index 15a1150..3d5bb49 100644
--- a/src/engrava/mindql/parser.py
+++ b/src/engrava/mindql/parser.py
@@ -19,6 +19,16 @@
CUSTOM_CMD arg
+The WHERE clause also accepts opt-in **temporal predicates** that filter rows
+by their valid-time interval (the ``valid_from`` / ``valid_until`` columns on
+the ``thought`` and ``edge`` tables). They are NULL-tolerant: a row with an
+open lower or upper bound stays visible::
+
+ FIND thoughts WHERE valid_now
+ FIND edges WHERE valid_at '2025-01-01T00:00:00+00:00'
+ FIND thoughts WHERE priority = 'P1' AND valid_within '2025-01-01' '2025-02-01'
+ FIND edges WHERE valid_between '2025-01-01' '2025-12-31'
+
Grammar (simplified BNF)::
query := find_query | count_query | select_query | extension_query
@@ -29,10 +39,16 @@
table_name := "thoughts" | "thought" | "edges" | "edge"
| "embeddings" | "embedding" | "actions" | "action"
- where_clause := "WHERE" condition ("AND" condition)*
+ where_clause := "WHERE" clause ("AND" clause)*
+ clause := condition | temporal_predicate
condition := field_name operator value
+ temporal_pred := "valid_now"
+ | "valid_at" timestamp
+ | "valid_within" timestamp timestamp
+ | "valid_between" timestamp timestamp
operator := "=" | "!=" | ">" | "<" | ">=" | "<="
value := quoted_string | number
+ timestamp := quoted_string | bare_iso8601_token
quoted_string := "'" "'"
limit_clause := "LIMIT" integer
"""
@@ -43,6 +59,8 @@
from dataclasses import dataclass, field
from enum import StrEnum
+from engrava.domain.models._temporal import validate_iso8601_nullable
+
class MindQLCommand(StrEnum):
"""Supported MindQL command verbs."""
@@ -93,6 +111,40 @@ class Condition:
value: str | int | float
+class TemporalPredicateKind(StrEnum):
+ """Kinds of valid-time predicate supported in a WHERE clause.
+
+ Each kind filters rows by their valid-time interval — the
+ ``valid_from`` / ``valid_until`` columns present on the ``thought`` and
+ ``edge`` tables — without referencing those columns as ordinary
+ filterable values.
+ """
+
+ VALID_NOW = "valid_now"
+ VALID_AT = "valid_at"
+ VALID_WITHIN = "valid_within"
+ VALID_BETWEEN = "valid_between"
+
+
+@dataclass(frozen=True)
+class TemporalPredicate:
+ """A single valid-time predicate in a WHERE clause.
+
+ Attributes:
+ kind: Which temporal predicate this is.
+ start: First ISO-8601 timestamp argument, or ``None`` for
+ ``valid_now`` (which carries no argument and resolves against
+ the current instant at execution time).
+ end: Second ISO-8601 timestamp argument, present only for the
+ two-argument predicates (``valid_within`` / ``valid_between``).
+
+ """
+
+ kind: TemporalPredicateKind
+ start: str | None = None
+ end: str | None = None
+
+
@dataclass(frozen=True)
class MindQLQuery:
"""Parsed MindQL query plan.
@@ -101,6 +153,9 @@ class MindQLQuery:
command: The MindQL command verb.
table: Target table (canonical name, e.g. ``"thought"``).
conditions: WHERE conditions.
+ temporal_predicates: Valid-time predicates parsed from the WHERE
+ clause. Empty when the query carries no temporal predicate, in
+ which case execution behaves exactly as before this feature.
limit: Optional LIMIT clause.
raw_sql: Original SQL for SELECT passthrough.
extension_name: Extension command name (for EXTENSION type).
@@ -111,6 +166,7 @@ class MindQLQuery:
command: MindQLCommand
table: str | None = None
conditions: list[Condition] = field(default_factory=list)
+ temporal_predicates: list[TemporalPredicate] = field(default_factory=list)
limit: int | None = None
raw_sql: str | None = None
extension_name: str | None = None
@@ -131,6 +187,17 @@ class MindQLParseError(Exception):
r"(\w+)\s*(!=|>=|<=|=|>|<)\s*(?:'([^']*)'|(\S+))",
)
+# Number of timestamp arguments each temporal predicate carries.
+_TEMPORAL_ARITY: dict[TemporalPredicateKind, int] = {
+ TemporalPredicateKind.VALID_NOW: 0,
+ TemporalPredicateKind.VALID_AT: 1,
+ TemporalPredicateKind.VALID_WITHIN: 2,
+ TemporalPredicateKind.VALID_BETWEEN: 2,
+}
+
+# A timestamp argument: a single-quoted string OR a bare whitespace-free token.
+_TIMESTAMP_ARG_RE = re.compile(r"'([^']*)'|(\S+)")
+
_OPERATOR_MAP: dict[str, MindQLOperator] = {
"=": MindQLOperator.EQ,
"!=": MindQLOperator.NE,
@@ -202,34 +269,43 @@ def parse(
# Parse remainder for WHERE and LIMIT
remainder = " ".join(tokens[2:])
- conditions, limit = _parse_clauses(remainder)
+ conditions, temporal_predicates, limit = _parse_clauses(remainder)
return MindQLQuery(
command=command,
table=table,
conditions=conditions,
+ temporal_predicates=temporal_predicates,
limit=limit,
)
-def _parse_clauses(text: str) -> tuple[list[Condition], int | None]:
+def _parse_clauses(
+ text: str,
+) -> tuple[list[Condition], list[TemporalPredicate], int | None]:
"""Parse WHERE and LIMIT clauses from the remainder of a FIND/COUNT query.
+ Each ``AND``-separated WHERE part is first checked against the temporal
+ predicate keywords (``valid_now`` / ``valid_at`` / ``valid_within`` /
+ ``valid_between``); only when a part is not a temporal predicate does it
+ fall through to the ordinary ``field op value`` condition grammar.
+
Args:
text: Everything after the table name.
Returns:
- Tuple of (conditions, limit).
+ Tuple of (conditions, temporal_predicates, limit).
Raises:
MindQLParseError: If clauses are malformed.
"""
conditions: list[Condition] = []
+ temporal_predicates: list[TemporalPredicate] = []
limit: int | None = None
if not text.strip():
- return conditions, limit
+ return conditions, temporal_predicates, limit
upper = text.upper()
@@ -249,27 +325,123 @@ def _parse_clauses(text: str) -> tuple[list[Condition], int | None]:
stripped_part = part.strip()
if not stripped_part:
continue
- match = _CONDITION_RE.match(stripped_part)
- if not match:
- msg = f"Invalid condition: {stripped_part!r}"
- raise MindQLParseError(msg)
- field_name = match.group(1)
- op_str = match.group(2)
- # group 3 = quoted value, group 4 = unquoted value
- raw_value: str = match.group(3) if match.group(3) is not None else match.group(4)
- value: str | int | float = _coerce_value(raw_value)
- conditions.append(
- Condition(
- field=field_name,
- operator=_OPERATOR_MAP[op_str],
- value=value,
- )
- )
+ # Recognise temporal predicates before the ordinary condition
+ # grammar — they carry keyword + bare timestamp args (no operator)
+ # and so never match ``_CONDITION_RE``.
+ temporal = _try_parse_temporal_predicate(stripped_part)
+ if temporal is not None:
+ temporal_predicates.append(temporal)
+ continue
+ conditions.append(_parse_condition(stripped_part))
elif text.strip():
msg = f"Expected WHERE or LIMIT, got: {text.strip()!r}"
raise MindQLParseError(msg)
- return conditions, limit
+ return conditions, temporal_predicates, limit
+
+
+def _parse_condition(part: str) -> Condition:
+ """Parse a single ``field op value`` WHERE condition.
+
+ Args:
+ part: One ``AND``-separated WHERE fragment.
+
+ Returns:
+ The parsed condition.
+
+ Raises:
+ MindQLParseError: If the fragment is not a valid condition.
+
+ """
+ match = _CONDITION_RE.match(part)
+ if not match:
+ msg = f"Invalid condition: {part!r}"
+ raise MindQLParseError(msg)
+ field_name = match.group(1)
+ op_str = match.group(2)
+ # group 3 = quoted value, group 4 = unquoted value
+ raw_value: str = match.group(3) if match.group(3) is not None else match.group(4)
+ value: str | int | float = _coerce_value(raw_value)
+ return Condition(
+ field=field_name,
+ operator=_OPERATOR_MAP[op_str],
+ value=value,
+ )
+
+
+def _try_parse_temporal_predicate(part: str) -> TemporalPredicate | None:
+ """Recognise and parse a temporal predicate WHERE fragment.
+
+ Args:
+ part: One ``AND``-separated WHERE fragment.
+
+ Returns:
+ The parsed :class:`TemporalPredicate` when ``part`` begins with a
+ temporal keyword, otherwise ``None`` so the caller can fall through
+ to the ordinary condition grammar.
+
+ Raises:
+ MindQLParseError: If ``part`` names a temporal keyword but its
+ timestamp arguments are missing, surplus, or not ISO-8601. The
+ message is intentionally generic and never echoes the keyword
+ list.
+
+ """
+ tokens = part.split(None, 1)
+ keyword = tokens[0].lower()
+ try:
+ kind = TemporalPredicateKind(keyword)
+ except ValueError:
+ return None
+
+ rest = tokens[1] if len(tokens) > 1 else ""
+ raw_args = _TIMESTAMP_ARG_RE.findall(rest)
+ # Each match is a ``(quoted, bare)`` pair; exactly one group is non-empty.
+ args = [quoted or bare for quoted, bare in raw_args]
+
+ expected = _TEMPORAL_ARITY[kind]
+ if len(args) != expected:
+ msg = "Malformed temporal predicate"
+ raise MindQLParseError(msg)
+
+ validated = [_require_iso8601(arg) for arg in args]
+ start = validated[0] if expected >= 1 else None
+ end = validated[1] if expected >= 2 else None # noqa: PLR2004
+ return TemporalPredicate(kind=kind, start=start, end=end)
+
+
+def _require_iso8601(arg: str) -> str:
+ """Validate that a temporal-predicate argument is a non-empty ISO-8601 value.
+
+ Unlike :func:`validate_iso8601_nullable`, a missing or empty value is an
+ error here: temporal predicates that take an argument require a real
+ timestamp.
+
+ Args:
+ arg: The raw timestamp token (quotes already stripped).
+
+ Returns:
+ The validated, UTC-normalised timestamp string.
+
+ Raises:
+ MindQLParseError: If the argument is empty or not valid ISO-8601. The
+ message is generic and public-safe.
+
+ """
+ if not arg.strip():
+ msg = "Malformed temporal predicate"
+ raise MindQLParseError(msg)
+ try:
+ validated = validate_iso8601_nullable(arg)
+ except ValueError as exc:
+ msg = "Malformed temporal predicate"
+ raise MindQLParseError(msg) from exc
+ # ``validate_iso8601_nullable`` only returns ``None`` for a ``None`` input,
+ # which cannot happen here, but narrow the type for the caller.
+ if validated is None: # pragma: no cover - defensive, unreachable
+ msg = "Malformed temporal predicate"
+ raise MindQLParseError(msg)
+ return validated
def _coerce_value(raw: str) -> str | int | float:
diff --git a/tests/test_invalidate.py b/tests/test_invalidate.py
new file mode 100644
index 0000000..9e03dcd
--- /dev/null
+++ b/tests/test_invalidate.py
@@ -0,0 +1,196 @@
+"""Tests for the deterministic ``invalidate`` valid-time primitives.
+
+Covers :meth:`SqliteEngravaCore.invalidate_thought` and
+:meth:`SqliteEngravaCore.invalidate_edge`: each closes a record's valid-time
+interval by stamping ``valid_until``, is idempotent, and never cascades or
+deletes.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import aiosqlite
+import pytest
+
+from engrava import (
+ EdgeRecord,
+ EdgeType,
+ LifecycleStatus,
+ Priority,
+ SqliteEngravaCore,
+ ThoughtRecord,
+ ThoughtType,
+)
+from engrava.domain.exceptions import ThoughtNotFoundError
+from engrava.mindql import executor as executor_module
+from engrava.mindql.parser import parse
+
+if TYPE_CHECKING:
+ from collections.abc import AsyncIterator
+
+
+_T_JAN = "2025-01-01T00:00:00+00:00"
+_T_MID = "2025-03-01T00:00:00+00:00"
+_T_CLOSE = "2025-04-01T00:00:00+00:00"
+
+
+@pytest.fixture
+async def store() -> AsyncIterator[SqliteEngravaCore]:
+ """In-memory store with the core schema applied."""
+ conn = await aiosqlite.connect(":memory:")
+ conn.row_factory = aiosqlite.Row
+ await conn.execute("PRAGMA foreign_keys = ON")
+ core = SqliteEngravaCore(conn)
+ await core.ensure_schema()
+ yield core
+ await conn.close()
+
+
+def _mk_thought(thought_id: str) -> ThoughtRecord:
+ return ThoughtRecord(
+ thought_id=thought_id,
+ thought_type=ThoughtType.OBSERVATION,
+ essence="essence",
+ content="content",
+ priority=Priority.P1,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=1,
+ updated_cycle=1,
+ source="test",
+ valid_from=_T_JAN,
+ )
+
+
+class TestInvalidateThought:
+ """``invalidate_thought`` closes a thought's valid-time interval."""
+
+ async def test_sets_valid_until(self, store: SqliteEngravaCore) -> None:
+ await store.create_thought(_mk_thought("t1"))
+ updated = await store.invalidate_thought("t1", _T_CLOSE)
+ assert updated.valid_until == _T_CLOSE
+ # Persisted, not just returned.
+ reloaded = await store.get_thought("t1")
+ assert reloaded is not None
+ assert reloaded.valid_until == _T_CLOSE
+
+ async def test_is_not_a_delete(self, store: SqliteEngravaCore) -> None:
+ await store.create_thought(_mk_thought("t1"))
+ await store.invalidate_thought("t1", _T_CLOSE)
+ # The row is still present and retrievable.
+ assert await store.get_thought("t1") is not None
+
+ async def test_idempotent(self, store: SqliteEngravaCore) -> None:
+ await store.create_thought(_mk_thought("t1"))
+ first = await store.invalidate_thought("t1", _T_CLOSE)
+ second = await store.invalidate_thought("t1", _T_CLOSE)
+ assert first.valid_until == second.valid_until == _T_CLOSE
+
+ async def test_drops_out_of_valid_now_after_close(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ await store.create_thought(_mk_thought("t1"))
+ # Before invalidation: visible at MID.
+ token = executor_module.mindql_now.set(_T_MID)
+ try:
+ before = await store.execute_mindql(parse("FIND thoughts WHERE valid_now"))
+ assert {row["thought_id"] for row in before.rows} == {"t1"}
+ # Close the interval at MID, then query "now" after the close.
+ await store.invalidate_thought("t1", _T_MID)
+ executor_module.mindql_now.set(_T_CLOSE)
+ after = await store.execute_mindql(parse("FIND thoughts WHERE valid_now"))
+ assert after.rows == []
+ finally:
+ executor_module.mindql_now.reset(token)
+
+ async def test_does_not_modify_edges(self, store: SqliteEngravaCore) -> None:
+ await store.create_thought(_mk_thought("t1"))
+ await store.create_thought(_mk_thought("t2"))
+ await store.create_edge(
+ EdgeRecord(
+ edge_id="e1",
+ from_thought_id="t1",
+ to_thought_id="t2",
+ edge_type=EdgeType.ASSOCIATED,
+ weight=0.5,
+ created_cycle=1,
+ valid_from=_T_JAN,
+ )
+ )
+ await store.invalidate_thought("t1", _T_CLOSE)
+ # The edge's valid-time interval is untouched — no cascade.
+ edges = await store.get_edges("t1")
+ assert len(edges) == 1
+ assert edges[0].valid_until is None
+
+ async def test_missing_thought_raises(self, store: SqliteEngravaCore) -> None:
+ with pytest.raises(ThoughtNotFoundError):
+ await store.invalidate_thought("ghost", _T_CLOSE)
+
+ async def test_non_iso_valid_until_raises(self, store: SqliteEngravaCore) -> None:
+ await store.create_thought(_mk_thought("t1"))
+ with pytest.raises(ValueError, match="ISO-8601"):
+ await store.invalidate_thought("t1", "not-a-date")
+
+
+class TestInvalidateEdge:
+ """``invalidate_edge`` closes an edge's valid-time interval."""
+
+ async def _seed_edge(self, store: SqliteEngravaCore) -> None:
+ await store.create_thought(_mk_thought("t1"))
+ await store.create_thought(_mk_thought("t2"))
+ await store.create_edge(
+ EdgeRecord(
+ edge_id="e1",
+ from_thought_id="t1",
+ to_thought_id="t2",
+ edge_type=EdgeType.ASSOCIATED,
+ weight=0.5,
+ created_cycle=1,
+ valid_from=_T_JAN,
+ )
+ )
+
+ async def test_sets_valid_until(self, store: SqliteEngravaCore) -> None:
+ await self._seed_edge(store)
+ updated = await store.invalidate_edge("e1", _T_CLOSE)
+ assert updated.valid_until == _T_CLOSE
+ edges = await store.get_edges("t1")
+ assert edges[0].valid_until == _T_CLOSE
+
+ async def test_is_not_a_delete(self, store: SqliteEngravaCore) -> None:
+ await self._seed_edge(store)
+ await store.invalidate_edge("e1", _T_CLOSE)
+ assert len(await store.get_edges("t1")) == 1
+
+ async def test_idempotent(self, store: SqliteEngravaCore) -> None:
+ await self._seed_edge(store)
+ first = await store.invalidate_edge("e1", _T_CLOSE)
+ second = await store.invalidate_edge("e1", _T_CLOSE)
+ assert first.valid_until == second.valid_until == _T_CLOSE
+
+ async def test_drops_out_of_valid_now_after_close(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ await self._seed_edge(store)
+ token = executor_module.mindql_now.set(_T_MID)
+ try:
+ before = await store.execute_mindql(parse("FIND edges WHERE valid_now"))
+ assert {row["edge_id"] for row in before.rows} == {"e1"}
+ await store.invalidate_edge("e1", _T_MID)
+ executor_module.mindql_now.set(_T_CLOSE)
+ after = await store.execute_mindql(parse("FIND edges WHERE valid_now"))
+ assert after.rows == []
+ finally:
+ executor_module.mindql_now.reset(token)
+
+ async def test_missing_edge_raises(self, store: SqliteEngravaCore) -> None:
+ with pytest.raises(ValueError, match="Edge not found"):
+ await store.invalidate_edge("ghost", _T_CLOSE)
+
+ async def test_non_iso_valid_until_raises(self, store: SqliteEngravaCore) -> None:
+ await self._seed_edge(store)
+ with pytest.raises(ValueError, match="ISO-8601"):
+ await store.invalidate_edge("e1", "not-a-date")
diff --git a/tests/test_mindql.py b/tests/test_mindql.py
index 9048b64..a5acbae 100644
--- a/tests/test_mindql.py
+++ b/tests/test_mindql.py
@@ -12,6 +12,8 @@
import pytest
from engrava import (
+ EdgeRecord,
+ EdgeType,
LifecycleStatus,
Priority,
SqliteEngravaCore,
@@ -19,6 +21,7 @@
ThoughtType,
)
from engrava.domain.protocols.hooks import MindQLExtension
+from engrava.mindql import executor as executor_module
from engrava.mindql.executor import MindQLExecutor
from engrava.mindql.parser import (
Condition,
@@ -26,11 +29,13 @@
MindQLOperator,
MindQLParseError,
MindQLQuery,
+ TemporalPredicate,
+ TemporalPredicateKind,
parse,
)
if TYPE_CHECKING:
- from collections.abc import AsyncIterator
+ from collections.abc import AsyncIterator, Iterator
# ---------------------------------------------------------------------------
@@ -72,6 +77,92 @@ async def populated_db(db: aiosqlite.Connection) -> aiosqlite.Connection:
return db
+# Fixed ISO-8601 instants used across the temporal-predicate tests. The window
+# of the "bounded" rows is [JAN, JUN); MID falls inside it, BEFORE/AFTER do not.
+_T_JAN = "2025-01-01T00:00:00+00:00"
+_T_FEB = "2025-02-01T00:00:00+00:00"
+_T_MID = "2025-03-01T00:00:00+00:00"
+_T_JUN = "2025-06-01T00:00:00+00:00"
+_T_DEC = "2025-12-31T00:00:00+00:00"
+_T_BEFORE = "2024-01-01T00:00:00+00:00"
+_T_AFTER = "2026-01-01T00:00:00+00:00"
+
+
+@pytest.fixture
+def pinned_now() -> Iterator[None]:
+ """Pin ``valid_now`` to a deterministic instant for the duration of a test.
+
+ Resets the context variable afterwards so tests stay isolated.
+ """
+ token = executor_module.mindql_now.set(_T_MID)
+ try:
+ yield
+ finally:
+ executor_module.mindql_now.reset(token)
+
+
+@pytest.fixture
+async def temporal_db(db: aiosqlite.Connection) -> aiosqlite.Connection:
+ """Database with thoughts and edges spanning a range of valid-time bounds.
+
+ Layout (both tables share the same shape):
+
+ * ``*-bounded`` — closed window ``[JAN, JUN)``.
+ * ``*-open-from``— ``valid_from`` NULL, ``valid_until`` JUN.
+ * ``*-open-until``— ``valid_from`` JAN, ``valid_until`` NULL.
+ * ``*-legacy`` — both bounds NULL (pre-feature / un-backfilled rows).
+ * ``*-future`` — closed window ``[AFTER, +inf)`` (begins after MID).
+ """
+ store = SqliteEngravaCore(db)
+
+ def mk_thought(suffix: str, vf: str | None, vu: str | None) -> ThoughtRecord:
+ return ThoughtRecord(
+ thought_id=f"t-{suffix}",
+ thought_type=ThoughtType.OBSERVATION,
+ essence=f"essence {suffix}",
+ content=f"content {suffix}",
+ priority=Priority.P1,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=1,
+ updated_cycle=1,
+ source="test",
+ valid_from=vf,
+ valid_until=vu,
+ )
+
+ rows: list[tuple[str, str | None, str | None]] = [
+ ("bounded", _T_JAN, _T_JUN),
+ ("open-from", None, _T_JUN),
+ ("open-until", _T_JAN, None),
+ ("legacy", None, None),
+ ("future", _T_AFTER, None),
+ ]
+ for suffix, vf, vu in rows:
+ await store.create_thought(mk_thought(suffix, vf, vu))
+
+ # A shared source anchor plus one distinct target per edge so every edge
+ # has unique (from, to, type) — the edge table enforces that as UNIQUE.
+ # These wiring thoughts use a ``wire-`` id namespace so they never collide
+ # with the ``t-`` rows the thought-table assertions reason about.
+ await store.create_thought(mk_thought("wire-anchor", _T_JAN, None))
+ for suffix, vf, vu in rows:
+ await store.create_thought(mk_thought(f"wire-target-{suffix}", _T_JAN, None))
+ await store.create_edge(
+ EdgeRecord(
+ edge_id=f"e-{suffix}",
+ from_thought_id="t-wire-anchor",
+ to_thought_id=f"t-wire-target-{suffix}",
+ edge_type=EdgeType.ASSOCIATED,
+ weight=0.5,
+ created_cycle=1,
+ valid_from=vf,
+ valid_until=vu,
+ )
+ )
+ await db.commit()
+ return db
+
+
# ---------------------------------------------------------------------------
# Parser tests
# ---------------------------------------------------------------------------
@@ -393,3 +484,429 @@ async def test_disallowed_column(self, populated_db: aiosqlite.Connection) -> No
)
with pytest.raises(MindQLParseError, match="not allowed"):
await executor.execute(q)
+
+
+# ---------------------------------------------------------------------------
+# Temporal-predicate parsing
+# ---------------------------------------------------------------------------
+
+
+class TestParserTemporalPredicates:
+ """Parsing of the four valid-time WHERE predicates."""
+
+ def test_valid_now_no_args(self) -> None:
+ q = parse("FIND thoughts WHERE valid_now")
+ assert q.conditions == []
+ assert len(q.temporal_predicates) == 1
+ pred = q.temporal_predicates[0]
+ assert pred.kind == TemporalPredicateKind.VALID_NOW
+ assert pred.start is None
+ assert pred.end is None
+
+ def test_valid_at_one_arg(self) -> None:
+ q = parse(f"FIND thoughts WHERE valid_at '{_T_JAN}'")
+ pred = q.temporal_predicates[0]
+ assert pred.kind == TemporalPredicateKind.VALID_AT
+ assert pred.start == _T_JAN
+ assert pred.end is None
+
+ def test_valid_within_two_args(self) -> None:
+ q = parse(f"FIND edges WHERE valid_within '{_T_JAN}' '{_T_JUN}'")
+ pred = q.temporal_predicates[0]
+ assert pred.kind == TemporalPredicateKind.VALID_WITHIN
+ assert pred.start == _T_JAN
+ assert pred.end == _T_JUN
+
+ def test_valid_between_two_args(self) -> None:
+ q = parse(f"FIND edges WHERE valid_between '{_T_JAN}' '{_T_DEC}'")
+ pred = q.temporal_predicates[0]
+ assert pred.kind == TemporalPredicateKind.VALID_BETWEEN
+ assert pred.start == _T_JAN
+ assert pred.end == _T_DEC
+
+ def test_bare_unquoted_timestamp_arg(self) -> None:
+ # Whitespace-free ISO-8601 tokens are accepted without quotes.
+ q = parse(f"FIND thoughts WHERE valid_at {_T_JAN}")
+ assert q.temporal_predicates[0].start == _T_JAN
+
+ def test_case_insensitive_keyword(self) -> None:
+ q = parse("FIND thoughts WHERE VALID_NOW")
+ assert q.temporal_predicates[0].kind == TemporalPredicateKind.VALID_NOW
+
+ def test_composable_with_condition(self) -> None:
+ q = parse("FIND thoughts WHERE priority = 'P1' AND valid_now")
+ assert len(q.conditions) == 1
+ assert q.conditions[0].field == "priority"
+ assert len(q.temporal_predicates) == 1
+ assert q.temporal_predicates[0].kind == TemporalPredicateKind.VALID_NOW
+
+ def test_condition_after_temporal(self) -> None:
+ q = parse(f"FIND thoughts WHERE valid_at '{_T_JAN}' AND priority = 'P1'")
+ assert len(q.conditions) == 1
+ assert len(q.temporal_predicates) == 1
+
+ def test_two_temporal_predicates(self) -> None:
+ q = parse(f"FIND thoughts WHERE valid_now AND valid_at '{_T_JAN}'")
+ assert len(q.temporal_predicates) == 2
+
+ def test_timezone_normalised_to_utc(self) -> None:
+ # A +02:00 offset is normalised to the equivalent UTC instant so
+ # that lexicographic TEXT comparison in SQLite stays correct.
+ q = parse("FIND thoughts WHERE valid_at '2025-01-01T02:00:00+02:00'")
+ assert q.temporal_predicates[0].start == "2025-01-01T00:00:00+00:00"
+
+ def test_normal_query_has_no_temporal_predicates(self) -> None:
+ q = parse("FIND thoughts WHERE priority = 'P1'")
+ assert q.temporal_predicates == []
+
+
+class TestParserTemporalPredicateErrors:
+ """Malformed temporal predicates raise a generic, public-safe error."""
+
+ def test_valid_at_missing_arg(self) -> None:
+ with pytest.raises(MindQLParseError, match="Malformed temporal predicate"):
+ parse("FIND thoughts WHERE valid_at")
+
+ def test_valid_within_missing_second_arg(self) -> None:
+ with pytest.raises(MindQLParseError, match="Malformed temporal predicate"):
+ parse(f"FIND thoughts WHERE valid_within '{_T_JAN}'")
+
+ def test_valid_between_missing_args(self) -> None:
+ with pytest.raises(MindQLParseError, match="Malformed temporal predicate"):
+ parse("FIND thoughts WHERE valid_between")
+
+ def test_valid_now_with_surplus_arg(self) -> None:
+ with pytest.raises(MindQLParseError, match="Malformed temporal predicate"):
+ parse(f"FIND thoughts WHERE valid_now '{_T_JAN}'")
+
+ def test_valid_at_surplus_arg(self) -> None:
+ with pytest.raises(MindQLParseError, match="Malformed temporal predicate"):
+ parse(f"FIND thoughts WHERE valid_at '{_T_JAN}' '{_T_JUN}'")
+
+ def test_non_iso_timestamp_rejected(self) -> None:
+ with pytest.raises(MindQLParseError, match="Malformed temporal predicate"):
+ parse("FIND thoughts WHERE valid_at 'not-a-date'")
+
+ def test_empty_quoted_arg_rejected(self) -> None:
+ with pytest.raises(MindQLParseError, match="Malformed temporal predicate"):
+ parse("FIND thoughts WHERE valid_at ''")
+
+ def test_error_message_does_not_leak_keyword_list(self) -> None:
+ # The public-safe message must not enumerate the supported keywords.
+ with pytest.raises(MindQLParseError) as exc_info:
+ parse("FIND thoughts WHERE valid_at")
+ message = str(exc_info.value)
+ for keyword in ("valid_now", "valid_at", "valid_within", "valid_between"):
+ assert keyword not in message
+
+
+# ---------------------------------------------------------------------------
+# Temporal-predicate execution (both thought and edge tables)
+# ---------------------------------------------------------------------------
+
+
+async def _find_ids(
+ store: SqliteEngravaCore,
+ table: str,
+ where: str,
+ id_column: str,
+) -> set[str]:
+ """Run a FIND on ``table`` with ``where`` and collect the id column.
+
+ The ``wire-`` namespace (anchor / target thoughts used only to satisfy
+ edge referential integrity) is filtered out so thought-table assertions
+ reason solely about the five valid-time shape rows.
+ """
+ result = await store.execute_mindql(parse(f"FIND {table} WHERE {where}"))
+ return {row[id_column] for row in result.rows if "wire-" not in row[id_column]}
+
+
+# Each table is exercised through the same matrix: (table, id column, id prefix).
+_TEMPORAL_TABLE_CASES = [
+ ("thoughts", "thought_id", "t-"),
+ ("edges", "edge_id", "e-"),
+]
+
+
+class TestExecutorTemporalThoughtAndEdge:
+ """Temporal predicates filter both the thought and edge tables."""
+
+ @pytest.mark.parametrize(("table", "id_column", "prefix"), _TEMPORAL_TABLE_CASES)
+ async def test_valid_at_selects_in_window(
+ self,
+ temporal_db: aiosqlite.Connection,
+ table: str,
+ id_column: str,
+ prefix: str,
+ ) -> None:
+ store = SqliteEngravaCore(temporal_db)
+ ids = await _find_ids(store, table, f"valid_at '{_T_MID}'", id_column)
+ # MID is inside [JAN, JUN), so bounded + both open variants match;
+ # the open-bound rows (open-from, open-until, legacy) are NULL-tolerant;
+ # the future row (begins AFTER) is excluded.
+ assert ids == {
+ f"{prefix}bounded",
+ f"{prefix}open-from",
+ f"{prefix}open-until",
+ f"{prefix}legacy",
+ }
+
+ @pytest.mark.parametrize(("table", "id_column", "prefix"), _TEMPORAL_TABLE_CASES)
+ async def test_valid_at_before_window_excludes_bounded(
+ self,
+ temporal_db: aiosqlite.Connection,
+ table: str,
+ id_column: str,
+ prefix: str,
+ ) -> None:
+ store = SqliteEngravaCore(temporal_db)
+ ids = await _find_ids(store, table, f"valid_at '{_T_BEFORE}'", id_column)
+ # BEFORE JAN: the bounded and open-until rows (valid_from JAN) start
+ # later and are excluded; open lower bounds (open-from, legacy) match.
+ assert ids == {f"{prefix}open-from", f"{prefix}legacy"}
+
+ @pytest.mark.parametrize(("table", "id_column", "prefix"), _TEMPORAL_TABLE_CASES)
+ @pytest.mark.usefixtures("pinned_now")
+ async def test_valid_now_excludes_future_and_expired(
+ self,
+ temporal_db: aiosqlite.Connection,
+ table: str,
+ id_column: str,
+ prefix: str,
+ ) -> None:
+ # pinned_now fixes "now" to MID (inside [JAN, JUN)).
+ store = SqliteEngravaCore(temporal_db)
+ ids = await _find_ids(store, table, "valid_now", id_column)
+ # Future row (valid_from AFTER) is future-valid → excluded; nothing has
+ # expired before MID, so every in/open-window row is returned.
+ assert ids == {
+ f"{prefix}bounded",
+ f"{prefix}open-from",
+ f"{prefix}open-until",
+ f"{prefix}legacy",
+ }
+ assert f"{prefix}future" not in ids
+
+ @pytest.mark.parametrize(("table", "id_column", "prefix"), _TEMPORAL_TABLE_CASES)
+ async def test_valid_now_excludes_expired_validity(
+ self,
+ temporal_db: aiosqlite.Connection,
+ table: str,
+ id_column: str,
+ prefix: str,
+ ) -> None:
+ # Pin "now" to AFTER: the bounded and open-from rows ended at JUN and
+ # are now expired; open-until / legacy / future stay valid.
+ token = executor_module.mindql_now.set(_T_AFTER)
+ try:
+ store = SqliteEngravaCore(temporal_db)
+ ids = await _find_ids(store, table, "valid_now", id_column)
+ finally:
+ executor_module.mindql_now.reset(token)
+ assert ids == {
+ f"{prefix}open-until",
+ f"{prefix}legacy",
+ f"{prefix}future",
+ }
+
+ @pytest.mark.parametrize(("table", "id_column", "prefix"), _TEMPORAL_TABLE_CASES)
+ async def test_valid_within_overlap(
+ self,
+ temporal_db: aiosqlite.Connection,
+ table: str,
+ id_column: str,
+ prefix: str,
+ ) -> None:
+ store = SqliteEngravaCore(temporal_db)
+ # Probe window [FEB, MID) overlaps every row whose interval intersects
+ # it; the future row begins at AFTER and does not overlap.
+ ids = await _find_ids(
+ store,
+ table,
+ f"valid_within '{_T_FEB}' '{_T_MID}'",
+ id_column,
+ )
+ assert ids == {
+ f"{prefix}bounded",
+ f"{prefix}open-from",
+ f"{prefix}open-until",
+ f"{prefix}legacy",
+ }
+ assert f"{prefix}future" not in ids
+
+ @pytest.mark.parametrize(("table", "id_column", "prefix"), _TEMPORAL_TABLE_CASES)
+ async def test_valid_between_containment_requires_real_bounds(
+ self,
+ temporal_db: aiosqlite.Connection,
+ table: str,
+ id_column: str,
+ prefix: str,
+ ) -> None:
+ store = SqliteEngravaCore(temporal_db)
+ # Containment window [JAN, DEC] fully contains the bounded row only;
+ # every open-bounded row (NULL on either end) is correctly excluded.
+ ids = await _find_ids(
+ store,
+ table,
+ f"valid_between '{_T_JAN}' '{_T_DEC}'",
+ id_column,
+ )
+ assert ids == {f"{prefix}bounded"}
+
+ @pytest.mark.parametrize(("table", "id_column", "prefix"), _TEMPORAL_TABLE_CASES)
+ async def test_null_tolerance_open_and_legacy_rows(
+ self,
+ temporal_db: aiosqlite.Connection,
+ table: str,
+ id_column: str,
+ prefix: str,
+ ) -> None:
+ # The key NULL-tolerance guarantee: a row with NULL valid_from (legacy
+ # thought / existing edge) is RETURNED by valid_at / valid_now /
+ # valid_within and EXCLUDED by valid_between.
+ store = SqliteEngravaCore(temporal_db)
+ legacy = f"{prefix}legacy"
+
+ at_ids = await _find_ids(store, table, f"valid_at '{_T_MID}'", id_column)
+ assert legacy in at_ids
+
+ token = executor_module.mindql_now.set(_T_MID)
+ try:
+ now_ids = await _find_ids(store, table, "valid_now", id_column)
+ finally:
+ executor_module.mindql_now.reset(token)
+ assert legacy in now_ids
+
+ within_ids = await _find_ids(
+ store,
+ table,
+ f"valid_within '{_T_FEB}' '{_T_MID}'",
+ id_column,
+ )
+ assert legacy in within_ids
+
+ between_ids = await _find_ids(
+ store,
+ table,
+ f"valid_between '{_T_JAN}' '{_T_DEC}'",
+ id_column,
+ )
+ assert legacy not in between_ids
+
+ @pytest.mark.parametrize(("table", "id_column", "prefix"), _TEMPORAL_TABLE_CASES)
+ async def test_temporal_composes_with_column_condition(
+ self,
+ temporal_db: aiosqlite.Connection,
+ table: str,
+ id_column: str,
+ prefix: str,
+ ) -> None:
+ store = SqliteEngravaCore(temporal_db)
+ # A column condition narrows the temporal result. All seeded rows are
+ # created_cycle = 1, so the AND-ed condition keeps the same set.
+ ids = await _find_ids(
+ store,
+ table,
+ f"created_cycle = 1 AND valid_at '{_T_MID}'",
+ id_column,
+ )
+ assert ids == {
+ f"{prefix}bounded",
+ f"{prefix}open-from",
+ f"{prefix}open-until",
+ f"{prefix}legacy",
+ }
+
+
+class TestExecutorTemporalCount:
+ """COUNT honours temporal predicates through the shared WHERE builder."""
+
+ async def test_count_valid_between(self, temporal_db: aiosqlite.Connection) -> None:
+ store = SqliteEngravaCore(temporal_db)
+ result = await store.execute_mindql(
+ parse(f"COUNT thoughts WHERE valid_between '{_T_JAN}' '{_T_DEC}'"),
+ )
+ # Only the single fully-bounded thought is contained.
+ assert result.count == 1
+
+ @pytest.mark.usefixtures("pinned_now")
+ async def test_count_valid_now(self, temporal_db: aiosqlite.Connection) -> None:
+ store = SqliteEngravaCore(temporal_db)
+ result = await store.execute_mindql(parse("COUNT edges WHERE valid_now"))
+ # bounded + open-from + open-until + legacy = 4 (future excluded).
+ assert result.count == 4
+
+
+class TestExecutorTemporalClockInjection:
+ """The injectable clock makes ``valid_now`` deterministic."""
+
+ async def test_pinned_instant_is_used(
+ self,
+ temporal_db: aiosqlite.Connection,
+ ) -> None:
+ store = SqliteEngravaCore(temporal_db)
+ # Pin before the bounded window opens: the bounded row is not yet valid.
+ token = executor_module.mindql_now.set(_T_BEFORE)
+ try:
+ ids = await _find_ids(store, "thoughts", "valid_now", "thought_id")
+ finally:
+ executor_module.mindql_now.reset(token)
+ assert "t-bounded" not in ids
+ # Open lower bounds remain valid even before the bounded window.
+ assert "t-open-from" in ids
+ assert "t-legacy" in ids
+
+ async def test_default_clock_uses_server_now(
+ self,
+ temporal_db: aiosqlite.Connection,
+ ) -> None:
+ # With no pinned instant, valid_now resolves against the real clock.
+ # Every non-future row's window covers "now" (all created this run),
+ # except the bounded / open-from rows which already ended at JUN 2025.
+ assert executor_module.mindql_now.get() is None
+ store = SqliteEngravaCore(temporal_db)
+ ids = await _find_ids(store, "thoughts", "valid_now", "thought_id")
+ # The future row begins in 2026 — whether it is valid depends on the
+ # real date — so only assert the open-ended rows are present and the
+ # already-expired bounded window is absent.
+ assert "t-open-until" in ids
+ assert "t-legacy" in ids
+ assert "t-bounded" not in ids
+
+
+class TestExecutorTemporalTableGuard:
+ """Temporal predicates are rejected on tables without valid-time columns."""
+
+ async def test_rejected_on_action_table(
+ self,
+ populated_db: aiosqlite.Connection,
+ ) -> None:
+ store = SqliteEngravaCore(populated_db)
+ query = MindQLQuery(
+ command=MindQLCommand.FIND,
+ table="action",
+ temporal_predicates=[TemporalPredicate(kind=TemporalPredicateKind.VALID_NOW)],
+ )
+ with pytest.raises(MindQLParseError, match="not supported for table"):
+ await store.execute_mindql(query)
+
+
+class TestBackwardCompatibility:
+ """A query without a temporal predicate behaves exactly as before."""
+
+ async def test_find_without_temporal_unchanged(
+ self,
+ populated_db: aiosqlite.Connection,
+ ) -> None:
+ store = SqliteEngravaCore(populated_db)
+ before = await store.execute_mindql(parse("FIND thoughts WHERE priority = 'P1'"))
+ # No temporal predicate → the historical row set is returned verbatim.
+ assert {row["thought_id"] for row in before.rows} == {"t-000", "t-001"}
+
+ async def test_find_all_without_temporal_unchanged(
+ self,
+ populated_db: aiosqlite.Connection,
+ ) -> None:
+ store = SqliteEngravaCore(populated_db)
+ result = await store.execute_mindql(parse("FIND thoughts"))
+ assert len(result.rows) == 5
From 8fba76984c59e5c9424d68a4bba21be53b72d9a6 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Wed, 10 Jun 2026 14:03:52 +0200
Subject: [PATCH 16/40] feat: reflections inherit temporal extent from members
---
src/engrava/extensions/dreaming.py | 42 ++++-
.../extensions/dreaming_reflection_extent.py | 123 +++++++++++++
tests/test_dreaming_clusters.py | 142 ++++++++++++++-
tests/test_dreaming_reflection_extent.py | 172 ++++++++++++++++++
4 files changed, 477 insertions(+), 2 deletions(-)
create mode 100644 src/engrava/extensions/dreaming_reflection_extent.py
create mode 100644 tests/test_dreaming_reflection_extent.py
diff --git a/src/engrava/extensions/dreaming.py b/src/engrava/extensions/dreaming.py
index 58df646..89f7367 100644
--- a/src/engrava/extensions/dreaming.py
+++ b/src/engrava/extensions/dreaming.py
@@ -977,6 +977,9 @@ async def _create_reflections( # noqa: C901, PLR0912, PLR0915
clusters: list[frozenset[str]],
current_cycle: int,
candidate_corpus: list[str] | None = None,
+ *,
+ override_valid_from: str | None = None,
+ override_valid_until: str | None = None,
) -> int:
"""Create REFLECTION thoughts from clustered thought sets.
@@ -986,7 +989,10 @@ async def _create_reflections( # noqa: C901, PLR0912, PLR0915
2. Build structured content (top-N keywords + member IDs).
3. Derive an idempotence hash from sorted member IDs.
4. Skip if a REFLECTION with the same hash already exists.
- 5. Persist the REFLECTION thought, centroid embedding, and
+ 5. Derive the REFLECTION's valid-time extent from its members
+ (see ``derive_reflection_extent``), unless the caller pins an
+ explicit override.
+ 6. Persist the REFLECTION thought, centroid embedding, and
``CONSOLIDATED_FROM`` edges to each cluster member.
Args:
@@ -1002,6 +1008,17 @@ async def _create_reflections( # noqa: C901, PLR0912, PLR0915
empty-corpus behaviour, but production callsites should
always supply the corpus to keep the enrichment
substantive.
+ override_valid_from: When non-``None``, every REFLECTION
+ created in this call takes this ISO-8601 ``valid_from``
+ instead of the value derived from its members. ``None``
+ (the default) means "derive from members" — it does
+ **not** force an open lower bound; an open lower bound is
+ only produced when the derivation itself yields ``None``.
+ override_valid_until: When non-``None``, every REFLECTION
+ created in this call takes this ISO-8601 ``valid_until``
+ instead of the derived value. ``None`` (the default)
+ means "derive from members", mirroring
+ ``override_valid_from``.
Returns:
Number of new REFLECTION thoughts persisted.
@@ -1033,6 +1050,9 @@ async def _create_reflections( # noqa: C901, PLR0912, PLR0915
from engrava.extensions.dreaming_reflection_content import ( # noqa: PLC0415
build_reflection_content_v2,
)
+ from engrava.extensions.dreaming_reflection_extent import ( # noqa: PLC0415
+ derive_reflection_extent,
+ )
# Resolve cluster algorithm at the callsite — the dreaming
# extension does not retain it as instance state, but the v2
@@ -1309,6 +1329,24 @@ async def _create_reflections( # noqa: C901, PLR0912, PLR0915
keywords = content_obj["keywords"]
essence = f"REFLECTION [{', '.join(keywords[:3])}]"[:200]
+ # --- Derive valid-time extent inherited from members ---
+ # ``member_thoughts`` is already resolved above (it drives
+ # the centroid + content build), so the bounds are read
+ # from records already in hand — no extra store round-trip.
+ # An explicit caller override wins over the derived value
+ # on each axis independently.
+ derived_valid_from, derived_valid_until = derive_reflection_extent(
+ (t.valid_from, t.valid_until) for t in member_thoughts
+ )
+ reflection_valid_from = (
+ override_valid_from if override_valid_from is not None else derived_valid_from
+ )
+ reflection_valid_until = (
+ override_valid_until
+ if override_valid_until is not None
+ else derived_valid_until
+ )
+
# --- Create REFLECTION thought ---
reflection_id = str(uuid.uuid4())
reflection = ThoughtRecord(
@@ -1322,6 +1360,8 @@ async def _create_reflections( # noqa: C901, PLR0912, PLR0915
updated_cycle=current_cycle,
source=f"dreaming:{cluster_hash}",
source_type=KnowledgeSource.DREAMING,
+ valid_from=reflection_valid_from,
+ valid_until=reflection_valid_until,
)
try:
await store.create_thought(reflection)
diff --git a/src/engrava/extensions/dreaming_reflection_extent.py b/src/engrava/extensions/dreaming_reflection_extent.py
new file mode 100644
index 0000000..8e473c1
--- /dev/null
+++ b/src/engrava/extensions/dreaming_reflection_extent.py
@@ -0,0 +1,123 @@
+"""Derive a REFLECTION's valid-time extent from its cluster members.
+
+Public surface is :func:`derive_reflection_extent`, a pure deterministic
+function the dreaming extension calls once per cluster on its way to
+creating the corresponding REFLECTION thought. It folds the members'
+nullable ISO-8601 ``valid_from`` / ``valid_until`` bounds into a single
+interval the REFLECTION inherits at creation time.
+
+The reduction is deliberately interval-arithmetic over the *open* bounds
+the bi-temporal model assigns to ``None``:
+
+* ``valid_from is None`` means an **open lower bound** (negative
+ infinity). The minimum of negative infinity and any finite instant is
+ negative infinity, so a single member with ``valid_from is None``
+ forces the REFLECTION's ``valid_from`` to ``None``.
+* ``valid_until is None`` means an **open upper bound** (positive
+ infinity). The maximum of positive infinity and any finite instant is
+ positive infinity, so a single member with ``valid_until is None``
+ forces the REFLECTION's ``valid_until`` to ``None``.
+
+When every member carries a finite bound, the REFLECTION inherits the
+``MIN`` of the lower bounds and the ``MAX`` of the upper bounds — the
+tightest interval that fully covers every member's validity window.
+
+ISO-8601 strings are stored UTC-normalised by the domain layer, so a
+plain lexicographic string comparison is equivalent to chronological
+comparison; this module relies on that invariant and performs no parsing.
+
+This module is LLM-free, clock-free and stateless: identical inputs
+produce identical output, and the extent is computed once at creation
+time only (member changes after the fact do not re-derive it).
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from collections.abc import Iterable
+
+#: The empty-cluster extent. A REFLECTION derived from no members has no
+#: information to bound either side, so both ends are left open
+#: (``None`` / ``None``) — the fully-unbounded interval. Exposed as a
+#: named constant so callers and tests share one definition of the
+#: degenerate case rather than hard-coding ``(None, None)``.
+EMPTY_EXTENT: tuple[None, None] = (None, None)
+
+
+def derive_reflection_extent(
+ member_bounds: Iterable[tuple[str | None, str | None]],
+) -> tuple[str | None, str | None]:
+ """Fold member valid-time bounds into the REFLECTION's inherited extent.
+
+ Treats ``None`` bounds as open interval ends (``valid_from is None``
+ = negative infinity, ``valid_until is None`` = positive infinity) and
+ reduces the members to the tightest interval that covers them all:
+
+ * ``valid_from`` = the ``MIN`` of every member's ``valid_from``,
+ **unless any** member has ``valid_from is None`` — in which case the
+ result is ``None`` (the minimum with negative infinity is negative
+ infinity).
+ * ``valid_until`` = the ``MAX`` of every member's ``valid_until``,
+ **only when every** member has a non-``None`` ``valid_until``; if
+ **any** member has ``valid_until is None`` the result is ``None``
+ (the maximum with positive infinity is positive infinity).
+
+ The two axes are independent: a member may pin one bound open and the
+ other finite.
+
+ ISO-8601 inputs are assumed UTC-normalised (the engrava domain layer
+ guarantees this on write), so ``min`` / ``max`` over the raw strings
+ is chronologically correct without any parsing.
+
+ Args:
+ member_bounds: Iterable of ``(valid_from, valid_until)`` pairs,
+ one per cluster member. Each element of a pair is an
+ ISO-8601 string or ``None``.
+
+ Returns:
+ The derived ``(valid_from, valid_until)`` pair for the
+ REFLECTION. An empty iterable yields :data:`EMPTY_EXTENT`
+ (``(None, None)``) — a REFLECTION with no members is fully
+ unbounded on both ends.
+
+ Examples:
+ >>> derive_reflection_extent(
+ ... [
+ ... ("2024-01-01T00:00:00+00:00", "2024-03-01T00:00:00+00:00"),
+ ... ("2024-02-01T00:00:00+00:00", "2024-04-01T00:00:00+00:00"),
+ ... ]
+ ... )
+ ('2024-01-01T00:00:00+00:00', '2024-04-01T00:00:00+00:00')
+ >>> derive_reflection_extent(
+ ... [(None, "2024-03-01T00:00:00+00:00")]
+ ... )
+ (None, '2024-03-01T00:00:00+00:00')
+ >>> derive_reflection_extent([])
+ (None, None)
+
+ """
+ lower_bounds: list[str] = []
+ upper_bounds: list[str] = []
+ open_lower = False
+ open_upper = False
+ saw_member = False
+
+ for valid_from, valid_until in member_bounds:
+ saw_member = True
+ if valid_from is None:
+ open_lower = True
+ else:
+ lower_bounds.append(valid_from)
+ if valid_until is None:
+ open_upper = True
+ else:
+ upper_bounds.append(valid_until)
+
+ if not saw_member:
+ return EMPTY_EXTENT
+
+ derived_from = None if open_lower else min(lower_bounds)
+ derived_until = None if open_upper else max(upper_bounds)
+ return derived_from, derived_until
diff --git a/tests/test_dreaming_clusters.py b/tests/test_dreaming_clusters.py
index 1149a04..6a9ca8a 100644
--- a/tests/test_dreaming_clusters.py
+++ b/tests/test_dreaming_clusters.py
@@ -60,8 +60,15 @@ def _make(
created_cycle: int = 0,
updated_cycle: int = 0,
priority: Priority = Priority.P3,
+ valid_from: str | None = None,
+ valid_until: str | None = None,
) -> ThoughtRecord:
- """Minimal thought for clustering tests."""
+ """Minimal thought for clustering tests.
+
+ ``valid_from`` / ``valid_until`` default to ``None`` (open bounds) so
+ existing callers keep their old behaviour; the REFLECTION
+ valid-time-inheritance tests pass explicit ISO-8601 bounds.
+ """
return ThoughtRecord(
thought_id=thought_id,
thought_type=ThoughtType.OBSERVATION,
@@ -72,6 +79,8 @@ def _make(
created_cycle=created_cycle,
updated_cycle=updated_cycle,
source="test",
+ valid_from=valid_from,
+ valid_until=valid_until,
)
@@ -337,6 +346,137 @@ async def test_reflection_has_centroid_embedding(self, store: SqliteEngravaCore)
assert emb.dimension == 3
+# Fixed UTC-normalised ISO-8601 instants for valid-time tests
+# (lexicographic == chronological by the domain's UTC-normalisation
+# invariant).
+_VT_EARLY = "2024-01-01T00:00:00+00:00"
+_VT_MID = "2024-02-01T00:00:00+00:00"
+_VT_LATE = "2024-03-01T00:00:00+00:00"
+_VT_LATEST = "2024-04-01T00:00:00+00:00"
+
+
+class TestReflectionValidTimeInheritance:
+ """A REFLECTION inherits a deterministic valid-time extent from members."""
+
+ async def test_persisted_reflection_carries_derived_extent(
+ self, store: SqliteEngravaCore
+ ) -> None:
+ """All-finite members → REFLECTION gets MIN(from) / MAX(until)."""
+ t1 = await store.create_thought(
+ _make("t-vt-1", essence="extent A", valid_from=_VT_EARLY, valid_until=_VT_LATE)
+ )
+ t2 = await store.create_thought(
+ _make("t-vt-2", essence="extent B", valid_from=_VT_MID, valid_until=_VT_LATEST)
+ )
+ await store.store_embedding(t1.thought_id, [1.0, 0.0], model_name="test")
+ await store.store_embedding(t2.thought_id, [0.9, 0.1], model_name="test")
+
+ cluster = frozenset([t1.thought_id, t2.thought_id])
+ ext = DreamingExtension(config=_reflection_cfg())
+ await ext._create_reflections(store, [cluster], current_cycle=5)
+
+ reflections = await store.list_thoughts(thought_type=ThoughtType.REFLECTION)
+ assert len(reflections) == 1
+ r = reflections[0]
+ assert r.valid_from == _VT_EARLY
+ assert r.valid_until == _VT_LATEST
+
+ async def test_open_lower_bound_member_forces_open_from(self, store: SqliteEngravaCore) -> None:
+ """A member with valid_from None → REFLECTION valid_from None."""
+ t1 = await store.create_thought(
+ _make("t-vt-of-1", essence="open from", valid_from=None, valid_until=_VT_LATE)
+ )
+ t2 = await store.create_thought(
+ _make("t-vt-of-2", essence="finite", valid_from=_VT_MID, valid_until=_VT_LATEST)
+ )
+ await store.store_embedding(t1.thought_id, [1.0, 0.0], model_name="test")
+ await store.store_embedding(t2.thought_id, [0.9, 0.1], model_name="test")
+
+ cluster = frozenset([t1.thought_id, t2.thought_id])
+ ext = DreamingExtension(config=_reflection_cfg())
+ await ext._create_reflections(store, [cluster], current_cycle=5)
+
+ reflections = await store.list_thoughts(thought_type=ThoughtType.REFLECTION)
+ assert len(reflections) == 1
+ r = reflections[0]
+ assert r.valid_from is None
+ assert r.valid_until == _VT_LATEST
+
+ async def test_open_upper_bound_member_forces_open_until(
+ self, store: SqliteEngravaCore
+ ) -> None:
+ """A member with valid_until None → REFLECTION valid_until None."""
+ t1 = await store.create_thought(
+ _make("t-vt-ou-1", essence="finite", valid_from=_VT_EARLY, valid_until=_VT_LATE)
+ )
+ t2 = await store.create_thought(
+ _make("t-vt-ou-2", essence="open until", valid_from=_VT_MID, valid_until=None)
+ )
+ await store.store_embedding(t1.thought_id, [1.0, 0.0], model_name="test")
+ await store.store_embedding(t2.thought_id, [0.9, 0.1], model_name="test")
+
+ cluster = frozenset([t1.thought_id, t2.thought_id])
+ ext = DreamingExtension(config=_reflection_cfg())
+ await ext._create_reflections(store, [cluster], current_cycle=5)
+
+ reflections = await store.list_thoughts(thought_type=ThoughtType.REFLECTION)
+ assert len(reflections) == 1
+ r = reflections[0]
+ assert r.valid_from == _VT_EARLY
+ assert r.valid_until is None
+
+ async def test_caller_override_beats_derived_extent(self, store: SqliteEngravaCore) -> None:
+ """Explicit override bounds win over the member-derived extent."""
+ t1 = await store.create_thought(
+ _make("t-vt-ov-1", essence="extent A", valid_from=_VT_EARLY, valid_until=_VT_LATE)
+ )
+ t2 = await store.create_thought(
+ _make("t-vt-ov-2", essence="extent B", valid_from=_VT_MID, valid_until=_VT_LATEST)
+ )
+ await store.store_embedding(t1.thought_id, [1.0, 0.0], model_name="test")
+ await store.store_embedding(t2.thought_id, [0.9, 0.1], model_name="test")
+
+ cluster = frozenset([t1.thought_id, t2.thought_id])
+ ext = DreamingExtension(config=_reflection_cfg())
+ await ext._create_reflections(
+ store,
+ [cluster],
+ current_cycle=5,
+ override_valid_from=_VT_MID,
+ override_valid_until=_VT_LATE,
+ )
+
+ reflections = await store.list_thoughts(thought_type=ThoughtType.REFLECTION)
+ assert len(reflections) == 1
+ r = reflections[0]
+ # Override pins both axes, ignoring the derived (early, latest).
+ assert r.valid_from == _VT_MID
+ assert r.valid_until == _VT_LATE
+
+ async def test_extent_is_deterministic_across_runs(self, store: SqliteEngravaCore) -> None:
+ """Re-deriving the same cluster twice yields the same extent."""
+ t1 = await store.create_thought(
+ _make("t-vt-det-1", essence="extent A", valid_from=_VT_EARLY, valid_until=_VT_LATE)
+ )
+ t2 = await store.create_thought(
+ _make("t-vt-det-2", essence="extent B", valid_from=_VT_MID, valid_until=_VT_LATEST)
+ )
+ await store.store_embedding(t1.thought_id, [1.0, 0.0], model_name="test")
+ await store.store_embedding(t2.thought_id, [0.9, 0.1], model_name="test")
+
+ cluster = frozenset([t1.thought_id, t2.thought_id])
+ ext = DreamingExtension(config=_reflection_cfg())
+ await ext._create_reflections(store, [cluster], current_cycle=5)
+ first = await store.list_thoughts(thought_type=ThoughtType.REFLECTION)
+ assert len(first) == 1
+ first_extent = (first[0].valid_from, first[0].valid_until)
+
+ # Idempotence skips a second REFLECTION for the same cluster, so
+ # assert the persisted extent equals the deterministic derivation
+ # for these member bounds — no clock or randomness involved.
+ assert first_extent == (_VT_EARLY, _VT_LATEST)
+
+
# ---------------------------------------------------------------------------
# Integration: run_consolidation end-to-end
# ---------------------------------------------------------------------------
diff --git a/tests/test_dreaming_reflection_extent.py b/tests/test_dreaming_reflection_extent.py
new file mode 100644
index 0000000..4b0ccca
--- /dev/null
+++ b/tests/test_dreaming_reflection_extent.py
@@ -0,0 +1,172 @@
+"""Unit tests for the REFLECTION valid-time extent helper.
+
+Covers :func:`derive_reflection_extent` — the pure interval-arithmetic
+fold that gives a REFLECTION the valid-time extent it inherits from its
+cluster members at creation time:
+
+- all members finite → MIN(valid_from) / MAX(valid_until)
+- any open lower bound (valid_from is None) → derived valid_from None
+- any open upper bound (valid_until is None) → derived valid_until None
+- the two axes are independent
+- single member → that member's bounds verbatim
+- empty cluster → fully unbounded (None, None)
+- determinism: identical inputs → identical output, no clock / randomness
+"""
+
+from __future__ import annotations
+
+from engrava.extensions.dreaming_reflection_extent import (
+ EMPTY_EXTENT,
+ derive_reflection_extent,
+)
+
+# Fixed UTC-normalised ISO-8601 instants (lexicographic == chronological).
+_T0 = "2024-01-01T00:00:00+00:00"
+_T1 = "2024-02-01T00:00:00+00:00"
+_T2 = "2024-03-01T00:00:00+00:00"
+_T3 = "2024-04-01T00:00:00+00:00"
+
+
+class TestAllBoundsFinite:
+ """Every member carries finite bounds → MIN/MAX cover."""
+
+ def test_min_from_and_max_until(self) -> None:
+ """valid_from = MIN of lowers; valid_until = MAX of uppers."""
+ result = derive_reflection_extent(
+ [
+ (_T0, _T2),
+ (_T1, _T3),
+ ]
+ )
+ assert result == (_T0, _T3)
+
+ def test_ordering_independent_of_input_order(self) -> None:
+ """Same members in a different order yield the same extent."""
+ forward = derive_reflection_extent([(_T0, _T2), (_T1, _T3)])
+ reversed_ = derive_reflection_extent([(_T1, _T3), (_T0, _T2)])
+ assert forward == reversed_ == (_T0, _T3)
+
+ def test_three_members_pick_extremes(self) -> None:
+ """The MIN lower and MAX upper are picked across all members."""
+ result = derive_reflection_extent(
+ [
+ (_T1, _T1),
+ (_T0, _T2),
+ (_T2, _T3),
+ ]
+ )
+ assert result == (_T0, _T3)
+
+
+class TestOpenLowerBound:
+ """Any member with valid_from None forces an open lower bound."""
+
+ def test_one_open_from_makes_derived_from_none(self) -> None:
+ """A single None valid_from → derived valid_from is None."""
+ result = derive_reflection_extent(
+ [
+ (None, _T2),
+ (_T1, _T3),
+ ]
+ )
+ assert result == (None, _T3)
+
+ def test_open_from_does_not_affect_until(self) -> None:
+ """Open lower bound leaves the upper bound MAX intact."""
+ valid_from, valid_until = derive_reflection_extent(
+ [
+ (None, _T1),
+ (_T0, _T3),
+ ]
+ )
+ assert valid_from is None
+ assert valid_until == _T3
+
+
+class TestOpenUpperBound:
+ """Any member with valid_until None forces an open upper bound."""
+
+ def test_one_open_until_makes_derived_until_none(self) -> None:
+ """A single None valid_until → derived valid_until is None."""
+ result = derive_reflection_extent(
+ [
+ (_T0, _T2),
+ (_T1, None),
+ ]
+ )
+ assert result == (_T0, None)
+
+ def test_open_until_does_not_affect_from(self) -> None:
+ """Open upper bound leaves the lower bound MIN intact."""
+ valid_from, valid_until = derive_reflection_extent(
+ [
+ (_T0, None),
+ (_T1, _T3),
+ ]
+ )
+ assert valid_from == _T0
+ assert valid_until is None
+
+
+class TestIndependentAxes:
+ """The two bounds are derived independently of each other."""
+
+ def test_open_on_both_axes_from_different_members(self) -> None:
+ """One member opens the lower bound, another the upper bound."""
+ result = derive_reflection_extent(
+ [
+ (None, _T2),
+ (_T1, None),
+ ]
+ )
+ assert result == (None, None)
+
+ def test_single_member_open_on_both(self) -> None:
+ """A single fully-open member yields a fully-open extent."""
+ result = derive_reflection_extent([(None, None)])
+ assert result == (None, None)
+
+
+class TestSingleMember:
+ """A single member's bounds pass through verbatim."""
+
+ def test_single_finite_member(self) -> None:
+ """One finite member → its own bounds."""
+ result = derive_reflection_extent([(_T0, _T3)])
+ assert result == (_T0, _T3)
+
+ def test_single_open_lower(self) -> None:
+ """One member with open lower bound → open lower bound."""
+ result = derive_reflection_extent([(None, _T3)])
+ assert result == (None, _T3)
+
+
+class TestEmptyCluster:
+ """The degenerate empty-cluster case is fully unbounded."""
+
+ def test_empty_iterable_yields_empty_extent(self) -> None:
+ """No members → (None, None), the fully-unbounded interval."""
+ result = derive_reflection_extent([])
+ assert result == (None, None)
+ assert result == EMPTY_EXTENT
+
+ def test_empty_generator_yields_empty_extent(self) -> None:
+ """An exhausted generator is handled like any empty iterable."""
+ result = derive_reflection_extent(b for b in ())
+ assert result == EMPTY_EXTENT
+
+
+class TestDeterminism:
+ """The fold is pure: no clock, no randomness, stable output."""
+
+ def test_repeated_calls_identical(self) -> None:
+ """Identical inputs produce identical output across calls."""
+ members = [(_T0, _T2), (None, _T3), (_T1, _T1)]
+ first = derive_reflection_extent(members)
+ second = derive_reflection_extent(members)
+ assert first == second
+
+ def test_accepts_an_iterable_not_only_a_list(self) -> None:
+ """The helper consumes any iterable, e.g. a generator expression."""
+ gen = ((vf, vu) for vf, vu in [(_T0, _T2), (_T1, _T3)])
+ assert derive_reflection_extent(gen) == (_T0, _T3)
From 036e2254f35ac05cf265df0259dff9c1528a4da1 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Wed, 10 Jun 2026 14:53:40 +0200
Subject: [PATCH 17/40] docs: add bi-temporal model guide and 0.3 to 0.4
upgrade notes
---
README.md | 1 +
docs/bitemporal.md | 364 +++++++++++++++++++++++
docs/concepts.md | 1 +
docs/mindql.md | 25 ++
docs/upgrade.md | 55 ++++
tests/docs/test_docs_examples_execute.py | 4 +
6 files changed, 450 insertions(+)
create mode 100644 docs/bitemporal.md
diff --git a/README.md b/README.md
index 9f9532e..329b0c4 100644
--- a/README.md
+++ b/README.md
@@ -243,6 +243,7 @@ See the [CLI reference](docs/cli.md) for every command and option.
## Documentation
- [Core Concepts](docs/concepts.md) — the mental model (thought, edge, reflection, cycle, …) — start here
+- [The Bi-temporal Model](docs/bitemporal.md) — the optional valid-time axis: query a fact as of any instant, `invalidate` without deleting
- [Positioning](docs/positioning.md) — when Engrava is (and isn't) the right tool, and how it compares
- [Quick Start](docs/quickstart.md) — 5-minute setup guide
- [Tutorial](docs/tutorial.md) — build a small notes memory end to end
diff --git a/docs/bitemporal.md b/docs/bitemporal.md
new file mode 100644
index 0000000..af47129
--- /dev/null
+++ b/docs/bitemporal.md
@@ -0,0 +1,364 @@
+# The Bi-temporal Model
+
+Engrava can track **two independent time axes** for a fact: *when you stored it*
+and *when it is true in the world*. This page explains the second axis —
+**valid time** — what it is, how it differs from the clocks Engrava already has,
+and the small set of opt-in tools that work with it.
+
+> Valid time is **entirely optional**. If you only ever care about "what is true
+> now", you can ignore everything on this page — the defaults already behave the
+> way you want (see [When you don't need valid time](#when-you-dont-need-valid-time)).
+
+## Two clocks (three, actually)
+
+A thought carries more than one notion of "time", and they answer different
+questions. Keep them apart:
+
+| Field | Axis | Answers | Who sets it |
+|---|---|---|---|
+| `created_at` / `updated_at` | **transaction time** | "When did we *record* (or last change) this?" | Engrava, automatically |
+| `valid_from` / `valid_until` | **valid time** | "During what real-world period is this fact *true*?" | **you** (optional) |
+| `created_cycle` / `updated_cycle` | **logical clock** | "At which agent *tick* did this appear?" | you (your cycle counter) |
+
+- **Transaction time** is bookkeeping: it never moves backwards and you don't
+ manage it. It tells you the order in which your system learned things.
+- **Valid time** is about the world, not your database. "The user lived in
+ Berlin from January to June" is a statement about reality — it is true for a
+ window that has nothing to do with when you happened to write it down. You set
+ it; Engrava only stores and queries it.
+
+### Don't confuse the cycle with valid time
+
+The **cycle** (`created_cycle`) is *not* a calendar. It is a monotonically
+increasing integer **you** advance once per agent turn (see
+[Core Concepts → Cycle](concepts.md#cycle-the-agent-clock)). It drives recency
+and dreaming math; it is deliberately wall-clock-independent.
+
+Valid time, by contrast, is an **ISO-8601 calendar timestamp** describing the
+real world. A fact can have a low `created_cycle` (you learned it early in the
+agent's life) yet a `valid_from` far in the future, or vice versa. Never reach
+for the cycle when you mean a date — they are different tools for different jobs.
+
+## `valid_from` / `valid_until`: an interval, with open ends
+
+Both `valid_from` and `valid_until` are **nullable ISO-8601 strings** on
+`ThoughtRecord` and `EdgeRecord`. Together they describe the half-open interval
+during which the fact is considered true:
+
+- **`valid_from = None`** — *open lower bound*. The fact is treated as valid
+ **from the beginning of time** (−∞). "We don't know (or don't care) when this
+ started; it has always been true as far as we're concerned."
+- **`valid_until = None`** — *open upper bound*. The fact is treated as
+ **still valid, with no known end** (+∞). This is the common case for a fact
+ that is currently true.
+- Both `None` (the default for every newly created record) — the fact is valid
+ **for all time** and matches every "is it valid?" query.
+
+> **NULL = open, not unknown-and-excluded.** This is the single most important
+> rule on this page. A NULL bound means the interval extends to infinity on that
+> side, so a row with open bounds *stays visible* to point-in-time queries — it
+> is not filtered out. That is what makes adopting valid time incremental: facts
+> you never annotate keep showing up exactly as before.
+
+## The four query predicates
+
+Valid time is queried through four **opt-in** `WHERE` predicates in
+[MindQL](mindql.md). They are only valid against the `thoughts` and `edges`
+tables (the two record types that carry valid-time columns). A query that uses
+**no** temporal predicate behaves exactly as it did before this feature existed.
+
+| Predicate | Arguments | Matches a row when… | NULL bounds |
+|---|---|---|---|
+| `valid_now` | none | the interval contains the current instant | tolerant (open bound = always in range) |
+| `valid_at ` | one timestamp | the interval contains `` | tolerant |
+| `valid_within ` | two timestamps | the interval **overlaps** `[, ]` | tolerant |
+| `valid_between ` | two timestamps | the interval is **fully contained** in `[, ]` | **strict** — open-bound rows are excluded |
+
+### Worked semantics
+
+The first three predicates treat a NULL bound as ±∞, so open-ended facts stay in
+the result. `valid_between` is the deliberate exception: "fully contained"
+cannot be true of an interval that runs to infinity, so it requires **real
+bounds on both ends** and drops any row with a NULL `valid_from` or
+`valid_until`.
+
+The upper bound is **exclusive** (`valid_until` is the first instant the fact is
+*no longer* true). Concretely, for a fact valid `[2026-01-01, 2026-07-01)`:
+
+| Query | Result | Why |
+|---|---|---|
+| `valid_at '2026-03-15...'` | match | `2026-03-15` is inside `[Jan 1, Jul 1)` |
+| `valid_at '2026-07-01...'` | no match | upper bound is exclusive — `Jul 1` is already out |
+| `valid_at '2025-12-01...'` | no match | before `valid_from` |
+| `valid_within '2026-06-01...' '2026-12-01...'` | match | the intervals overlap (Jun–Jul) |
+| `valid_between '2025-01-01...' '2026-12-31...'` | match | `[Jan, Jul)` is fully inside the range |
+| `valid_between '2026-02-01...' '2026-12-31...'` | no match | starts before the range's lower bound |
+
+And for a fact with an **open** upper bound — valid `[2026-01-01, ∞)`:
+
+| Query | Result | Why |
+|---|---|---|
+| `valid_now` | match (if now ≥ Jan 2026) | open upper bound = still valid |
+| `valid_at '2030-01-01...'` | match | open upper bound reaches any future instant |
+| `valid_between '2026-01-01...' '2026-12-31...'` | **no match** | `valid_between` rejects the open `valid_until` |
+
+## `invalidate` vs `delete`
+
+Engrava gives you two very different ways to retire a fact, and choosing the
+right one is a modelling decision, not a performance one.
+
+| | `invalidate_thought` / `invalidate_edge` | `delete_thought` |
+|---|---|---|
+| Meaning | "This **was** true, and is now superseded." | "This should never have existed." |
+| Effect | Sets `valid_until`; the row stays on file | Removes the row entirely |
+| History | **Preserved** — fully auditable, still retrievable | Gone |
+| Past queries | A `valid_at` in the still-valid window **still finds it** | Finds nothing |
+| LLM / search | **None** — deterministic, valid-time only | n/a |
+
+`invalidate_thought(id, valid_until)` simply closes the valid-time interval at
+the instant you pass. It is:
+
+- **deterministic** — it performs no similarity search, no model inference, no
+ automatic discovery of related facts;
+- **idempotent** — calling it twice with the same `valid_until` converges to the
+ same stored value;
+- **non-cascading** — invalidating a thought leaves every connected edge
+ untouched (invalidate the edges separately with `invalidate_edge` if you need
+ to);
+- **not a delete** — the row and its history remain. A point-in-time query for
+ an instant *before* the new `valid_until` still returns it.
+
+Reach for `invalidate` when reality changed (the user moved cities, a price was
+updated, a status closed). Reach for `delete` only when a fact was an outright
+mistake and you want it gone, history and all.
+
+## Reflections inherit their members' extent
+
+A `REFLECTION` is created by [dreaming](dreaming.md) from a cluster of member
+thoughts. When dreaming builds one, it derives the reflection's valid-time
+extent from its members rather than leaving it blank:
+
+- `valid_from` becomes the **earliest** member `valid_from` — **unless any
+ member has an open (NULL) lower bound**, in which case the reflection's
+ `valid_from` is also open (NULL). An interval that summarises something
+ open-ended is itself open-ended.
+- `valid_until` becomes the **latest** member `valid_until` — but **only if
+ every member has a closed upper bound**. If any member is still open, the
+ reflection's `valid_until` is open (NULL) too.
+
+In short: the reflection's interval is the union of its members' intervals, and
+"open on either side" is contagious. A summary of facts that are still true is
+itself still true.
+
+## Worked examples
+
+The three snippets below are complete, runnable scripts. Each opens an in-memory
+database, so you can paste any one of them into a file and run it directly.
+
+### (a) Set valid time on a new fact
+
+Pass `valid_from` (and optionally `valid_until`) when you build the
+`ThoughtRecord`. Here we record a fact known to be true from the start of 2026,
+with no known end (`valid_until` left open):
+
+```python
+import asyncio
+import uuid
+
+import aiosqlite
+
+from engrava import (
+ LifecycleStatus,
+ Priority,
+ SqliteEngravaCore,
+ ThoughtRecord,
+ ThoughtType,
+)
+
+
+async def main() -> None:
+ async with aiosqlite.connect(":memory:") as conn:
+ conn.row_factory = aiosqlite.Row
+ store = SqliteEngravaCore(conn)
+ await store.ensure_schema()
+
+ fact = ThoughtRecord(
+ thought_id=str(uuid.uuid4()),
+ thought_type=ThoughtType.BELIEF,
+ essence="The user lives in Berlin",
+ content="Stated during the 2026 onboarding call.",
+ priority=Priority.P2,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=10,
+ updated_cycle=10,
+ source="onboarding",
+ valid_from="2026-01-01T00:00:00+00:00", # true from the start of 2026
+ # valid_until omitted -> open upper bound -> still valid
+ )
+ stored = await store.create_thought(fact)
+
+ fetched = await store.get_thought(stored.thought_id)
+ assert fetched is not None
+ assert fetched.valid_from == "2026-01-01T00:00:00+00:00"
+ assert fetched.valid_until is None # open upper bound
+ print("valid_from:", fetched.valid_from, "valid_until:", fetched.valid_until)
+
+
+asyncio.run(main())
+```
+
+### (b) Time-travel: query a past instant
+
+`valid_at ` returns the facts that were true at that instant. Here a
+belief is true only for the first half of 2026; a query inside that window finds
+it, one after it does not:
+
+```python
+import asyncio
+import uuid
+
+import aiosqlite
+
+from engrava import (
+ LifecycleStatus,
+ MindQLExecutor,
+ Priority,
+ SqliteEngravaCore,
+ ThoughtRecord,
+ ThoughtType,
+ parse,
+)
+
+
+async def main() -> None:
+ async with aiosqlite.connect(":memory:") as conn:
+ conn.row_factory = aiosqlite.Row
+ store = SqliteEngravaCore(conn)
+ await store.ensure_schema()
+
+ await store.create_thought(
+ ThoughtRecord(
+ thought_id=str(uuid.uuid4()),
+ thought_type=ThoughtType.BELIEF,
+ essence="The user lives in Berlin",
+ content="True for the first half of 2026.",
+ priority=Priority.P2,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=10,
+ updated_cycle=10,
+ source="onboarding",
+ valid_from="2026-01-01T00:00:00+00:00",
+ valid_until="2026-07-01T00:00:00+00:00", # closed in mid-2026
+ ),
+ )
+
+ executor = MindQLExecutor(conn)
+
+ march = await executor.execute(
+ parse("FIND thoughts WHERE valid_at '2026-03-15T00:00:00+00:00'"),
+ )
+ september = await executor.execute(
+ parse("FIND thoughts WHERE valid_at '2026-09-15T00:00:00+00:00'"),
+ )
+
+ assert len(march.rows) == 1 # inside the valid window
+ assert len(september.rows) == 0 # after valid_until
+ print("March match:", len(march.rows), "September match:", len(september.rows))
+
+
+asyncio.run(main())
+```
+
+### (c) Invalidate a fact, then watch it drop out of `valid_now`
+
+`invalidate_thought` closes the valid-time interval. After invalidation the fact
+no longer matches `valid_now`, but it is **not deleted** — it remains on file and
+a query for an instant before the cut-off still finds it:
+
+```python
+import asyncio
+import uuid
+
+import aiosqlite
+
+from engrava import (
+ LifecycleStatus,
+ MindQLExecutor,
+ Priority,
+ SqliteEngravaCore,
+ ThoughtRecord,
+ ThoughtType,
+ parse,
+)
+
+
+async def main() -> None:
+ async with aiosqlite.connect(":memory:") as conn:
+ conn.row_factory = aiosqlite.Row
+ store = SqliteEngravaCore(conn)
+ await store.ensure_schema()
+
+ fact = await store.create_thought(
+ ThoughtRecord(
+ thought_id=str(uuid.uuid4()),
+ thought_type=ThoughtType.BELIEF,
+ essence="The user lives in Berlin",
+ content="Open-ended until superseded.",
+ priority=Priority.P2,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=10,
+ updated_cycle=10,
+ source="onboarding",
+ valid_from="2026-01-01T00:00:00+00:00",
+ ),
+ )
+
+ executor = MindQLExecutor(conn)
+
+ before = await executor.execute(parse("FIND thoughts WHERE valid_now"))
+ assert len(before.rows) == 1 # currently valid
+
+ # Reality changed: the fact stopped being true on 2026-06-01.
+ await store.invalidate_thought(
+ fact.thought_id,
+ valid_until="2026-06-01T00:00:00+00:00",
+ )
+
+ after = await executor.execute(parse("FIND thoughts WHERE valid_now"))
+ assert len(after.rows) == 0 # no longer valid "now"
+
+ # Not a delete: the row is still on file and auditable.
+ still_there = await store.get_thought(fact.thought_id)
+ assert still_there is not None
+ assert still_there.valid_until == "2026-06-01T00:00:00+00:00"
+ print("valid_now before:", len(before.rows), "after:", len(after.rows))
+
+
+asyncio.run(main())
+```
+
+## When you don't need valid time
+
+If your application only ever asks "what is true *now*", you do not need to do
+anything. Every record is created with `valid_from = None` and
+`valid_until = None`, which means "valid for all time", so:
+
+- you never have to set a timestamp,
+- queries that use no temporal predicate are unchanged, and
+- if you *do* later run a `valid_now` / `valid_at` query, your never-annotated
+ facts still match (open bounds are ±∞).
+
+Valid time is a tool for the cases where history matters — auditing what an agent
+believed at some past moment, or modelling facts with a real-world lifespan.
+When that is not your problem, ignore it; it imposes no cost and changes no
+existing behaviour.
+
+## Next
+
+- [MindQL](mindql.md) — the full query language the temporal predicates live in.
+- [Upgrade Guide](upgrade.md) — how an existing database gains the valid-time
+ columns automatically.
+- [Core Concepts](concepts.md) — thoughts, edges, cycles, and the rest of the
+ model.
+- [Dreaming](dreaming.md) — how reflections (which inherit valid-time extent) are
+ made.
diff --git a/docs/concepts.md b/docs/concepts.md
index d186f10..eecb4b5 100644
--- a/docs/concepts.md
+++ b/docs/concepts.md
@@ -244,4 +244,5 @@ observation = ThoughtRecord(
- [Quick Start](quickstart.md) — create, link, and search in five minutes.
- [Dreaming](dreaming.md) — how consolidation turns observations into reflections.
- [Hybrid Search](search.md) — how the signals (including recency/cycle and priority) fuse into a ranking.
+- [The Bi-temporal Model](bitemporal.md) — the optional second time axis (valid time) and how it differs from the cycle.
- [API Reference](api-reference.md) — the exact fields, enums, and methods.
diff --git a/docs/mindql.md b/docs/mindql.md
index 6907ff6..0337230 100644
--- a/docs/mindql.md
+++ b/docs/mindql.md
@@ -72,6 +72,31 @@ SELECT thought_id, priority, essence FROM thought WHERE thought_type = 'BELIEF'
Only statements that begin with `SELECT` are permitted; anything else is
rejected.
+## Valid-time predicates
+
+`FIND` and `COUNT` against the `thoughts` and `edges` tables accept four opt-in
+**valid-time** predicates in the `WHERE` clause, for querying *when a fact was
+true in the world* (the second time axis — see
+[The Bi-temporal Model](bitemporal.md) for the full semantics):
+
+```
+FIND thoughts WHERE valid_now
+FIND edges WHERE valid_at '2026-01-01T00:00:00+00:00'
+FIND thoughts WHERE priority = 'P1' AND valid_within '2026-01-01T00:00:00+00:00' '2026-02-01T00:00:00+00:00'
+FIND thoughts WHERE valid_between '2026-01-01T00:00:00+00:00' '2026-12-31T00:00:00+00:00'
+```
+
+- `valid_now` takes no argument; `valid_at` takes one ISO-8601 timestamp;
+ `valid_within` and `valid_between` take two.
+- They combine with ordinary conditions via `AND`.
+- `valid_now` / `valid_at` / `valid_within` are **NULL-tolerant** (a record with
+ an open `valid_from`/`valid_until` bound stays in the result); `valid_between`
+ requires real bounds on both ends and therefore excludes open-bound rows.
+- A query that uses **no** temporal predicate behaves exactly as before.
+
+The semantics, the open-interval (`NULL` = ±∞) rule, and `invalidate` are
+documented in full on [The Bi-temporal Model](bitemporal.md).
+
## Extension Commands
Custom MindQL verbs are provided through an extension's
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 9891dc3..0f6e614 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -128,6 +128,7 @@ engrava --db new-old-version.db restore -i backup.snapshot.jsonl
| 0.2.0 | 0.2.2 | Yes | Patch-level upgrade, no dedicated new extension migration layer |
| 0.2.2 | 0.3.0 | Yes | Minor upgrade with extension migration tracking and upgrade CI coverage |
| 0.3.0 | 0.3.1 | Yes | Patch-level upgrade; no schema change (`user_version` unchanged) — safe to roll across workers |
+| 0.3.x | 0.4.0 | Yes | **Schema-changing** minor upgrade — adds the valid-time columns (additive, zero data loss). Back up first and follow the [rolling-upgrades](#rolling-upgrades-multiple-workers) note |
For any upgrade not listed, the rule of thumb is: **patch** upgrades within a
`0.x.*` line do not change the schema and are low-risk; **minor** upgrades
@@ -136,6 +137,60 @@ For any upgrade not listed, the rule of thumb is: **patch** upgrades within a
## Version Notes
+### 0.3 -> 0.4
+
+Version 0.4 introduces a second time axis — **valid time** (`valid_from` /
+`valid_until`), the period during which a fact is true in the world — alongside
+the existing transaction time (`created_at`). See
+[The Bi-temporal Model](bitemporal.md) for the full feature, the four query
+predicates, and `invalidate`. From an upgrade standpoint, the change is
+**additive and automatic**:
+
+**The migration runs on first open, with zero data loss.** The first time a
+0.4 process calls `ensure_schema()` (most apps already do this at startup), the
+core schema steps forward to the new version inside a transaction. `pip install
+--upgrade engrava` plus your normal startup is all that is required:
+
+```bash
+pip install --upgrade engrava
+# your app's existing ensure_schema() call performs the migration on first open
+```
+
+What the migration does:
+
+- **Adds two nullable columns** — `valid_from` and `valid_until` — to both the
+ `thought` and `edge` tables, plus supporting indexes. Nothing is dropped or
+ rewritten beyond adding columns; **no row is lost or modified in content**,
+ and the row counts are unchanged.
+- **Backfills existing thoughts conservatively.** A thought that has a recorded
+ `created_at` gets `valid_from` backfilled from it (its valid-time lower bound
+ starts where its transaction time started). `valid_until` is always left open
+ (`NULL`).
+- **Leaves legacy rows and all edges open-from.** A thought with no `created_at`
+ (a legacy row) keeps `valid_from = NULL`. **Every existing edge** keeps both
+ bounds `NULL` — the edge table has no calendar timestamp to source a date
+ from, so the migration honestly leaves them open rather than fabricating one.
+
+**Existing queries are unchanged.** A query that uses no temporal predicate
+behaves exactly as it did on 0.3. And because a `NULL` bound is treated as an
+**open interval end** (−∞ / +∞), the open-from rows above still match
+`valid_now` and `valid_at` queries — an un-dated fact is treated as "valid since
+the beginning of time", not as "excluded". So adopting valid time is incremental:
+you can start annotating new facts whenever you like, and the old ones keep
+surfacing in temporal queries until you choose to bound them.
+
+> **Honest note about edges.** Because the upgrade cannot invent a `valid_from`
+> for an edge that never had a date, every edge migrated from 0.3 carries
+> `valid_from = NULL`. That is the correct "open lower bound", so those edges
+> still match `valid_now` / `valid_at`. They will **not** match `valid_between`
+> (which requires real bounds on both ends) until you set their bounds
+> explicitly. This is expected, not a defect.
+
+This is a schema-changing minor upgrade, so follow the
+[rolling-upgrades](#rolling-upgrades-multiple-workers) procedure (back up,
+quiesce writers, migrate once, start new workers) if you run multiple processes
+against one database file.
+
### 0.3.0 -> 0.3.1
- Patch release: **no schema change** (`user_version` stays at its 0.3.0 value),
diff --git a/tests/docs/test_docs_examples_execute.py b/tests/docs/test_docs_examples_execute.py
index d0ff42f..df71bb5 100644
--- a/tests/docs/test_docs_examples_execute.py
+++ b/tests/docs/test_docs_examples_execute.py
@@ -76,6 +76,10 @@
("README.md", "async def main() -> None:"),
("docs/quickstart.md", 'print("Store ready!")'),
("docs/guides/migrating-from-other-memory.md", "Imported {total} thoughts."),
+ # docs/bitemporal.md — three self-contained valid-time examples.
+ ("docs/bitemporal.md", "# valid_until omitted -> open upper bound -> still valid"),
+ ("docs/bitemporal.md", "assert len(march.rows) == 1 # inside the valid window"),
+ ("docs/bitemporal.md", "await store.invalidate_thought("),
)
# Pages that build one example across a contiguous run of code blocks, identified
From 035e860efeb3b12261caa05fcef7061f4eecf83d Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Wed, 10 Jun 2026 17:37:00 +0200
Subject: [PATCH 18/40] test: bound temporal query overhead and confirm index
use
---
tests/test_temporal_query_performance.py | 270 +++++++++++++++++++++++
1 file changed, 270 insertions(+)
create mode 100644 tests/test_temporal_query_performance.py
diff --git a/tests/test_temporal_query_performance.py b/tests/test_temporal_query_performance.py
new file mode 100644
index 0000000..47aef49
--- /dev/null
+++ b/tests/test_temporal_query_performance.py
@@ -0,0 +1,270 @@
+"""Performance characterisation of the valid-time query path.
+
+This module bounds the cost of adding a valid-time predicate to a MindQL
+``FIND`` query and pins down what the SQLite planner actually does with it.
+
+What this test guarantees (deterministic, planner-level)
+--------------------------------------------------------
+The robust, environment-independent guarantee is *structural*: applying a
+``valid_now`` predicate does **not** make the query plan more expensive in
+kind. A plain ``FIND thoughts`` scans exactly one table; ``FIND thoughts
+WHERE valid_now`` scans exactly the *same* one table and nothing more — no
+extra table scan, no join, no correlated subquery, and no transient B-tree
+(no "USE TEMP B-TREE"). The predicate is a per-row filter layered onto a
+scan the engine already performs, so it cannot change the asymptotic shape
+of the query. This is asserted directly against ``EXPLAIN QUERY PLAN`` and
+is the primary contract of this module.
+
+What the planner actually does with each predicate (measured, honest)
+---------------------------------------------------------------------
+The NULL-tolerant predicates resolve to a SQL body of the shape::
+
+ (valid_from IS NULL OR valid_from <= ?) AND (valid_until IS NULL OR ...)
+
+The ``column IS NULL OR column ?`` disjunction is **not sargable**, so
+SQLite cannot use ``idx_thought_valid_from`` / ``idx_thought_valid_until`` /
+``idx_thought_valid_range`` for ``valid_now`` / ``valid_at`` / ``valid_within``
+— it performs a full ``SCAN thought``. This is an intentional consequence of
+NULL-tolerance (open/legacy rows with NULL bounds must remain visible), not a
+missing index: the indexes exist and are reachable. The closed-containment
+``valid_between`` predicate, whose body is ``valid_from IS NOT NULL AND
+valid_from >= ? AND valid_until IS NOT NULL AND valid_until <= ?``, *is*
+sargable and the planner does pick a valid-time index for it. This module
+asserts that reachability via ``valid_between`` so a future regression that
+drops the indexes is caught.
+
+Why there is no ``< 5%`` wall-clock overhead assertion
+------------------------------------------------------
+A naive wall-clock comparison of ``FIND thoughts`` vs ``FIND thoughts WHERE
+valid_now`` is meaningless here: the predicate *filters rows out*, so the
+temporal query materialises fewer Python row dicts and measures **faster**
+than the unfiltered baseline (observed median ratio around ``-50%``). Removing
+that row-materialisation confound by comparing ``COUNT(*)`` instead isolates
+the predicate's raw CPU cost — and that cost is genuinely large in *relative*
+terms (observed best-of-5 median ``~245us`` plain vs ``~1.2ms`` with the
+predicate, i.e. roughly ``+400%``), because a bare ``COUNT(*)`` is a
+near-free optimised count while the predicate forces a row-by-row scan with
+two compound boolean tests each. Neither figure supports a ``< 5%`` bound,
+and asserting one would be false. Absolute per-query cost stays sub-2ms at
+this corpus size, but it is too small and too noisy to gate on reliably.
+The structural plan-shape assertions above are therefore the contract; the
+timing here is recorded for context only and not asserted on.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import aiosqlite
+import pytest
+
+from engrava import (
+ LifecycleStatus,
+ Priority,
+ SqliteEngravaCore,
+ ThoughtRecord,
+ ThoughtType,
+)
+from engrava.mindql.executor import MindQLExecutor
+from engrava.mindql.parser import parse
+
+if TYPE_CHECKING:
+ from collections.abc import AsyncIterator
+
+# The three valid-time indexes created by the schema and the migration. Any
+# one of these being chosen for a sargable predicate proves the indexes are
+# present and reachable by the planner.
+_VALID_TIME_INDEXES = frozenset(
+ {
+ "idx_thought_valid_from",
+ "idx_thought_valid_until",
+ "idx_thought_valid_range",
+ }
+)
+
+# Corpus size. Query-plan selection is independent of exact row count, so a
+# few thousand rows is fully representative while keeping the test fast (a
+# 10k-row build costs ~8s of inserts; 2k keeps the whole module under a
+# couple of seconds). ANALYZE is run so the planner has real statistics.
+_CORPUS_SIZE = 2_000
+
+# Valid-time bounds used to populate the corpus. The window [JAN, JUN) gives
+# every shape category a non-trivial population.
+_T_JAN = "2025-01-01T00:00:00+00:00"
+_T_JUN = "2025-06-01T00:00:00+00:00"
+
+
+def _make_thought(index: int) -> ThoughtRecord:
+ """Build one corpus thought with a representative valid-time shape.
+
+ The corpus cycles through the four valid-time shapes so the planner sees
+ a realistic mix of NULL and non-NULL bounds:
+
+ * closed window ``[JAN, JUN)``,
+ * open lower bound (``valid_from`` NULL),
+ * open upper bound (``valid_until`` NULL),
+ * fully open / legacy (both bounds NULL).
+
+ Args:
+ index: Zero-based position in the corpus, used to vary the shape and
+ to mint a unique ``thought_id``.
+
+ Returns:
+ A validated :class:`ThoughtRecord` ready to persist.
+ """
+ shape = index % 4
+ if shape == 0:
+ valid_from, valid_until = _T_JAN, _T_JUN
+ elif shape == 1:
+ valid_from, valid_until = None, _T_JUN
+ elif shape == 2:
+ valid_from, valid_until = _T_JAN, None
+ else:
+ valid_from, valid_until = None, None
+ return ThoughtRecord(
+ thought_id=f"t-{index:06d}",
+ thought_type=ThoughtType.OBSERVATION,
+ essence=f"essence {index}",
+ content=f"content {index}",
+ priority=Priority.P2,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=1,
+ updated_cycle=1,
+ source="test",
+ valid_from=valid_from,
+ valid_until=valid_until,
+ )
+
+
+@pytest.fixture
+async def perf_conn() -> AsyncIterator[aiosqlite.Connection]:
+ """A SQLite connection populated with a representative valid-time corpus.
+
+ Runs ``ANALYZE`` so the query planner has real table statistics —
+ index-vs-scan decisions made against an unanalysed database are not
+ representative of a deployed store.
+
+ Yields:
+ An open aiosqlite connection whose ``thought`` table holds
+ ``_CORPUS_SIZE`` rows spanning every valid-time bound shape.
+ """
+ conn = await aiosqlite.connect(":memory:")
+ conn.row_factory = aiosqlite.Row
+ await conn.execute("PRAGMA foreign_keys = ON")
+ store = SqliteEngravaCore(conn)
+ await store.ensure_schema()
+ for i in range(_CORPUS_SIZE):
+ await store.create_thought(_make_thought(i))
+ await conn.commit()
+ await conn.execute("ANALYZE")
+ await conn.commit()
+ try:
+ yield conn
+ finally:
+ await conn.close()
+
+
+async def _query_plan(conn: aiosqlite.Connection, mindql: str) -> list[str]:
+ """Return the ``EXPLAIN QUERY PLAN`` detail lines for a MindQL ``FIND``.
+
+ The exact SQL the executor would run is obtained from its own SQL builder
+ (so the plan reflects the production query verbatim) and explained against
+ the live connection.
+
+ Args:
+ conn: The open aiosqlite connection to explain against.
+ mindql: A MindQL ``FIND`` statement, e.g. ``FIND thoughts WHERE valid_now``.
+
+ Returns:
+ The ``detail`` column of every plan row, upper-cased for matching.
+ """
+ query = parse(mindql)
+ executor = MindQLExecutor(conn)
+ sql, params = executor._build_select_sql(query.table or "thought", query)
+ cursor = await conn.execute(f"EXPLAIN QUERY PLAN {sql}", params)
+ rows = await cursor.fetchall()
+ return [str(row["detail"]).upper() for row in rows]
+
+
+def _table_scan_count(plan_details: list[str]) -> int:
+ """Count full-table ``SCAN`` steps over the ``thought`` table in a plan.
+
+ Args:
+ plan_details: Upper-cased ``EXPLAIN QUERY PLAN`` detail lines.
+
+ Returns:
+ The number of plan steps that are a full scan of ``thought`` (a step
+ beginning with ``SCAN THOUGHT``). An indexed ``SEARCH`` is not counted.
+ """
+ return sum(1 for detail in plan_details if detail.startswith("SCAN THOUGHT"))
+
+
+class TestTemporalQueryPlanShape:
+ """Structural plan-shape guarantees — deterministic, the primary contract."""
+
+ async def test_valid_now_adds_no_extra_scan_join_or_subquery(
+ self,
+ perf_conn: aiosqlite.Connection,
+ ) -> None:
+ """``valid_now`` keeps the single-table plan shape of a plain ``FIND``.
+
+ The temporal predicate must not turn a one-table scan into a join, a
+ correlated subquery, or a second scan, and must not require a transient
+ B-tree. It is a per-row filter on the scan the engine already performs.
+ """
+ plain = await _query_plan(perf_conn, "FIND thoughts")
+ temporal = await _query_plan(perf_conn, "FIND thoughts WHERE valid_now")
+
+ # Baseline plain FIND is a single full scan of thought.
+ assert _table_scan_count(plain) == 1, plain
+ # The temporal predicate adds no second table scan ...
+ assert _table_scan_count(temporal) == 1, temporal
+ # ... and introduces no join / subquery / temp B-tree machinery.
+ joined = " ".join(temporal)
+ assert "SUBQUERY" not in joined, temporal
+ assert "TEMP B-TREE" not in joined, temporal
+ assert "USE TEMP B-TREE" not in joined, temporal
+
+ async def test_null_tolerant_predicates_full_scan_is_intentional(
+ self,
+ perf_conn: aiosqlite.Connection,
+ ) -> None:
+ """The NULL-tolerant predicates resolve to a full scan, by design.
+
+ ``valid_now`` / ``valid_at`` / ``valid_within`` all use a
+ ``column IS NULL OR column ?`` disjunction to keep open-bound and
+ legacy (NULL) rows visible. That disjunction is not sargable, so the
+ planner cannot use a valid-time index and scans the table. This test
+ documents and locks that behaviour so the assertion in the
+ ``valid_between`` test (index *is* used) is unambiguous.
+ """
+ for mindql in (
+ "FIND thoughts WHERE valid_now",
+ f"FIND thoughts WHERE valid_at '{_T_JAN}'",
+ f"FIND thoughts WHERE valid_within '{_T_JAN}' '{_T_JUN}'",
+ ):
+ plan = await _query_plan(perf_conn, mindql)
+ assert _table_scan_count(plan) == 1, (mindql, plan)
+ assert not any("USING INDEX" in detail for detail in plan), (mindql, plan)
+
+ async def test_valid_between_reaches_a_valid_time_index(
+ self,
+ perf_conn: aiosqlite.Connection,
+ ) -> None:
+ """The sargable ``valid_between`` predicate proves the indexes are wired.
+
+ ``valid_between`` uses ``valid_from IS NOT NULL AND valid_from >= ?``
+ (and the symmetric upper-bound test), which *is* sargable. The planner
+ therefore picks one of the three valid-time indexes. Asserting this
+ guards against a regression that silently drops those indexes — which
+ would also remove the only index-accelerated valid-time path.
+ """
+ plan = await _query_plan(
+ perf_conn,
+ f"FIND thoughts WHERE valid_between '{_T_JAN}' '{_T_JUN}'",
+ )
+ joined = " ".join(plan)
+ assert "USING INDEX" in joined, plan
+ assert any(index.upper() in joined for index in _VALID_TIME_INDEXES), plan
+ # And it is an indexed SEARCH, not a full table scan.
+ assert _table_scan_count(plan) == 0, plan
From 0e4e1764ac770e837012852099c90c7c26b0c4a4 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Wed, 10 Jun 2026 17:40:58 +0200
Subject: [PATCH 19/40] fix: assert plan-shape invariant for temporal queries,
not scan-vs-index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The valid_now / valid_at / valid_within predicates use a NULL-tolerant
disjunction the planner can satisfy with either a SCAN or a MULTI-INDEX OR
depending on selectivity and table statistics — both are correct. Assert
the stable contract instead: the predicate stays a single-table access (no
join, subquery, temp B-tree, or second table), and the valid-time indexes
remain reachable (proven via the sargable valid_between path). Drops the
brittle "always scans / never uses an index" assertion that only held for
a low-selectivity corpus.
---
tests/test_temporal_query_performance.py | 86 +++++++++++++++++-------
1 file changed, 62 insertions(+), 24 deletions(-)
diff --git a/tests/test_temporal_query_performance.py b/tests/test_temporal_query_performance.py
index 47aef49..c94c67d 100644
--- a/tests/test_temporal_query_performance.py
+++ b/tests/test_temporal_query_performance.py
@@ -21,17 +21,22 @@
(valid_from IS NULL OR valid_from <= ?) AND (valid_until IS NULL OR ...)
-The ``column IS NULL OR column ?`` disjunction is **not sargable**, so
-SQLite cannot use ``idx_thought_valid_from`` / ``idx_thought_valid_until`` /
-``idx_thought_valid_range`` for ``valid_now`` / ``valid_at`` / ``valid_within``
-— it performs a full ``SCAN thought``. This is an intentional consequence of
-NULL-tolerance (open/legacy rows with NULL bounds must remain visible), not a
-missing index: the indexes exist and are reachable. The closed-containment
-``valid_between`` predicate, whose body is ``valid_from IS NOT NULL AND
-valid_from >= ? AND valid_until IS NOT NULL AND valid_until <= ?``, *is*
-sargable and the planner does pick a valid-time index for it. This module
-asserts that reachability via ``valid_between`` so a future regression that
-drops the indexes is caught.
+The ``column IS NULL OR column ?`` disjunction **is** index-usable —
+SQLite can satisfy it with a ``MULTI-INDEX OR`` (a union of an
+``IS NULL`` probe and a range probe over ``idx_thought_valid_range`` /
+``idx_thought_valid_from``). Whether the planner *chooses* that index or a
+full ``SCAN thought`` is a **cost decision driven by selectivity**: on a
+store where ``valid_now`` matches a large fraction of rows, a scan is
+genuinely cheaper than a multi-index union, so the planner (correctly)
+scans; on a store where the predicate is selective, or on sparse
+statistics, it uses the index. Both are correct query planning — neither
+is a defect, and neither is stable enough to assert as "always scans" or
+"always uses an index". The closed-containment ``valid_between`` predicate
+(``valid_from IS NOT NULL AND valid_from >= ? AND valid_until IS NOT NULL
+AND valid_until <= ?``) is unconditionally sargable and the planner picks a
+valid-time index for it regardless of stats; this module asserts that as the
+stable proof the indexes exist and are reachable, so a regression that
+silently drops them is caught.
Why there is no ``< 5%`` wall-clock overhead assertion
------------------------------------------------------
@@ -199,6 +204,26 @@ def _table_scan_count(plan_details: list[str]) -> int:
return sum(1 for detail in plan_details if detail.startswith("SCAN THOUGHT"))
+def _other_table_count(plan_details: list[str]) -> int:
+ """Count access steps (``SCAN`` / ``SEARCH``) over any table but ``thought``.
+
+ A single-table query touches only ``thought``. Any ``SCAN`` or ``SEARCH``
+ step naming a different table would mean the temporal predicate pulled in
+ a join or auxiliary table — the plan-shape regression we guard against.
+ Structural keywords without a table name (``MULTI-INDEX OR``, ``INDEX 1``)
+ are not access steps and are ignored.
+
+ Args:
+ plan_details: Upper-cased ``EXPLAIN QUERY PLAN`` detail lines.
+
+ Returns:
+ The number of ``SCAN``/``SEARCH`` steps whose target table is not
+ ``thought``.
+ """
+ access = [d for d in plan_details if d.startswith(("SCAN ", "SEARCH "))]
+ return sum(1 for detail in access if "THOUGHT" not in detail)
+
+
class TestTemporalQueryPlanShape:
"""Structural plan-shape guarantees — deterministic, the primary contract."""
@@ -217,26 +242,33 @@ async def test_valid_now_adds_no_extra_scan_join_or_subquery(
# Baseline plain FIND is a single full scan of thought.
assert _table_scan_count(plain) == 1, plain
- # The temporal predicate adds no second table scan ...
- assert _table_scan_count(temporal) == 1, temporal
- # ... and introduces no join / subquery / temp B-tree machinery.
+ # The temporal predicate adds no SECOND table scan — it stays a single
+ # access of thought, whether the planner chooses a scan (selectivity
+ # low) or a MULTI-INDEX OR (0 scans). Either way: at most one scan.
+ assert _table_scan_count(temporal) <= 1, temporal
+ # ... and introduces no join / subquery / temp B-tree machinery, and
+ # touches no other table (every table reference is to thought).
joined = " ".join(temporal)
assert "SUBQUERY" not in joined, temporal
assert "TEMP B-TREE" not in joined, temporal
- assert "USE TEMP B-TREE" not in joined, temporal
+ assert _other_table_count(temporal) == 0, temporal
- async def test_null_tolerant_predicates_full_scan_is_intentional(
+ async def test_null_tolerant_predicates_stay_single_table(
self,
perf_conn: aiosqlite.Connection,
) -> None:
- """The NULL-tolerant predicates resolve to a full scan, by design.
+ """The NULL-tolerant predicates never explode the plan beyond one table.
- ``valid_now`` / ``valid_at`` / ``valid_within`` all use a
+ ``valid_now`` / ``valid_at`` / ``valid_within`` each use a
``column IS NULL OR column ?`` disjunction to keep open-bound and
- legacy (NULL) rows visible. That disjunction is not sargable, so the
- planner cannot use a valid-time index and scans the table. This test
- documents and locks that behaviour so the assertion in the
- ``valid_between`` test (index *is* used) is unambiguous.
+ legacy (NULL) rows visible. The planner may satisfy this with a single
+ ``SCAN thought`` or a ``MULTI-INDEX OR`` over the valid-time indexes —
+ the choice is a cost decision driven by selectivity and table
+ statistics, and **both are correct**. What is invariant (and therefore
+ what we gate on) is that the predicate stays confined to the single
+ ``thought`` table: it never introduces a join, a correlated subquery,
+ a second table's scan, or a transient B-tree. We deliberately do **not**
+ assert scan-vs-index here, because that is not stable across stores.
"""
for mindql in (
"FIND thoughts WHERE valid_now",
@@ -244,8 +276,14 @@ async def test_null_tolerant_predicates_full_scan_is_intentional(
f"FIND thoughts WHERE valid_within '{_T_JAN}' '{_T_JUN}'",
):
plan = await _query_plan(perf_conn, mindql)
- assert _table_scan_count(plan) == 1, (mindql, plan)
- assert not any("USING INDEX" in detail for detail in plan), (mindql, plan)
+ joined = " ".join(plan)
+ # Single-table access only: at most one full scan of thought, and
+ # whatever index probes the planner adds all target thought.
+ assert _table_scan_count(plan) <= 1, (mindql, plan)
+ assert "SUBQUERY" not in joined, (mindql, plan)
+ assert "TEMP B-TREE" not in joined, (mindql, plan)
+ # No other table is scanned or searched (single-table access).
+ assert _other_table_count(plan) == 0, (mindql, plan)
async def test_valid_between_reaches_a_valid_time_index(
self,
From cd4ecc264dff0e425a0dc0708deb5b68f83fab52 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Wed, 10 Jun 2026 18:03:10 +0200
Subject: [PATCH 20/40] fix: match exact table token in query-plan helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The plan-parsing helpers tested for the substring THOUGHT, which also
matched the unrelated thought_fts full-text table — so a plan that
searched thought_fts would wrongly read as zero other-table access.
Parse the exact table token after SCAN/SEARCH instead, and add unit
guards asserting THOUGHT_FTS counts as a distinct table.
---
tests/test_temporal_query_performance.py | 78 ++++++++++++++++++++----
1 file changed, 67 insertions(+), 11 deletions(-)
diff --git a/tests/test_temporal_query_performance.py b/tests/test_temporal_query_performance.py
index c94c67d..77a386a 100644
--- a/tests/test_temporal_query_performance.py
+++ b/tests/test_temporal_query_performance.py
@@ -191,6 +191,30 @@ async def _query_plan(conn: aiosqlite.Connection, mindql: str) -> list[str]:
return [str(row["detail"]).upper() for row in rows]
+def _accessed_table(detail: str) -> str | None:
+ """Return the exact table token an access step targets, else ``None``.
+
+ An ``EXPLAIN QUERY PLAN`` access step reads ``SCAN `` or
+ ``SEARCH [USING INDEX ...]`` — the table is the whitespace-
+ delimited token immediately after the ``SCAN`` / ``SEARCH`` keyword.
+ Matching that **exact token** (rather than a substring) is essential:
+ a substring test for ``THOUGHT`` would also match the unrelated
+ ``THOUGHT_FTS`` full-text table. Structural keyword lines that are not
+ access steps (``MULTI-INDEX OR``, ``INDEX 1``) return ``None``.
+
+ Args:
+ detail: One upper-cased ``EXPLAIN QUERY PLAN`` detail line.
+
+ Returns:
+ The accessed table token (e.g. ``"THOUGHT"`` or ``"THOUGHT_FTS"``),
+ or ``None`` when the line is not a ``SCAN`` / ``SEARCH`` access step.
+ """
+ parts = detail.split()
+ if len(parts) < 2 or parts[0] not in ("SCAN", "SEARCH"):
+ return None
+ return parts[1]
+
+
def _table_scan_count(plan_details: list[str]) -> int:
"""Count full-table ``SCAN`` steps over the ``thought`` table in a plan.
@@ -198,30 +222,62 @@ def _table_scan_count(plan_details: list[str]) -> int:
plan_details: Upper-cased ``EXPLAIN QUERY PLAN`` detail lines.
Returns:
- The number of plan steps that are a full scan of ``thought`` (a step
- beginning with ``SCAN THOUGHT``). An indexed ``SEARCH`` is not counted.
+ The number of plan steps that are a full scan of the ``thought``
+ table specifically (exact token match — ``SCAN THOUGHT_FTS`` does
+ NOT count). An indexed ``SEARCH`` is not counted.
"""
- return sum(1 for detail in plan_details if detail.startswith("SCAN THOUGHT"))
+ return sum(
+ 1
+ for detail in plan_details
+ if detail.startswith("SCAN ") and _accessed_table(detail) == "THOUGHT"
+ )
def _other_table_count(plan_details: list[str]) -> int:
"""Count access steps (``SCAN`` / ``SEARCH``) over any table but ``thought``.
A single-table query touches only ``thought``. Any ``SCAN`` or ``SEARCH``
- step naming a different table would mean the temporal predicate pulled in
- a join or auxiliary table — the plan-shape regression we guard against.
- Structural keywords without a table name (``MULTI-INDEX OR``, ``INDEX 1``)
- are not access steps and are ignored.
+ step whose **exact** target table is not ``thought`` would mean the
+ temporal predicate pulled in a join or auxiliary table (e.g. the
+ ``thought_fts`` full-text table) — the plan-shape regression we guard
+ against. Structural keyword lines without a table token are not access
+ steps and are ignored.
Args:
plan_details: Upper-cased ``EXPLAIN QUERY PLAN`` detail lines.
Returns:
- The number of ``SCAN``/``SEARCH`` steps whose target table is not
- ``thought``.
+ The number of ``SCAN``/``SEARCH`` steps whose exact target table is
+ not ``thought``.
"""
- access = [d for d in plan_details if d.startswith(("SCAN ", "SEARCH "))]
- return sum(1 for detail in access if "THOUGHT" not in detail)
+ return sum(
+ 1
+ for detail in plan_details
+ if (table := _accessed_table(detail)) is not None and table != "THOUGHT"
+ )
+
+
+class TestPlanHelpers:
+ """Unit guards for the plan-parsing helpers (exact table-token matching)."""
+
+ def test_other_table_count_distinguishes_thought_from_thought_fts(self) -> None:
+ """``THOUGHT_FTS`` is a different table, not the allowed ``THOUGHT``.
+
+ Regression guard: a substring test for ``THOUGHT`` would wrongly treat
+ an access of the ``thought_fts`` full-text table as the allowed
+ ``thought`` table. The helpers match the exact table token instead.
+ """
+ assert _other_table_count(["SEARCH THOUGHT_FTS USING INDEX X"]) == 1
+ assert _other_table_count(["SCAN THOUGHT"]) == 0
+ assert _other_table_count(["SEARCH THOUGHT USING INDEX idx_thought_valid_range"]) == 0
+ assert _other_table_count(["MULTI-INDEX OR", "INDEX 1", "SCAN THOUGHT"]) == 0
+ assert _other_table_count(["SCAN EDGE"]) == 1
+
+ def test_table_scan_count_matches_thought_exactly(self) -> None:
+ """A full scan of ``thought_fts`` is not a scan of ``thought``."""
+ assert _table_scan_count(["SCAN THOUGHT"]) == 1
+ assert _table_scan_count(["SCAN THOUGHT_FTS"]) == 0
+ assert _table_scan_count(["SEARCH THOUGHT USING INDEX X"]) == 0
class TestTemporalQueryPlanShape:
From 3a0eaa3072763dcb10e9a9c49984661e7a72c3a2 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Wed, 10 Jun 2026 20:03:10 +0200
Subject: [PATCH 21/40] test: pin native thread pools session-wide to stop
full-suite hang
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
A full test run intermittently wedged once many in-process model loads had
accumulated: torch/OpenMP/MKL each size a worker pool to the host CPU, and
under that contention a later sentence-transformer load could deadlock
(observed as a hang at low wall-clock progress with CPU above wall time).
Add a top-level tests/conftest.py that, at import time (before torch is
first imported), pins OMP_NUM_THREADS / MKL_NUM_THREADS to 1 and disables
tokenizers fork parallelism. When the default embedding model is already
cached it also sets the Hugging Face offline flags, so a warm machine never
blocks on a network reach. All overrides use setdefault and only remove
nondeterminism — none changes what the code under test computes. The
non-benchmark suite now runs to completion deterministically (1744 passed,
coverage 97%).
---
tests/conftest.py | 88 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 88 insertions(+)
create mode 100644 tests/conftest.py
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..b9b0fad
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,88 @@
+"""Session-wide test environment hardening.
+
+Imported by pytest before any test module, so the environment overrides
+here take effect *before* ``torch`` / ``sentence-transformers`` are first
+imported by a test. Two classes of flakiness are addressed:
+
+* **Native thread oversubscription.** ``torch``, OpenMP and MKL each spin up
+ a worker pool sized to the host CPU. Across a full suite run, many tests
+ load a sentence-transformer model in-process; their pools contend for the
+ same cores and, under enough accumulated pressure, a later model load can
+ wedge (observed as a hang at low wall-clock progress with CPU far above
+ wall time — classic thread thrashing). Pinning every native pool to a
+ single thread removes the contention; the suite's model loads are one-shot
+ encodes where extra threads buy nothing.
+* **Tokenizer fork parallelism.** ``tokenizers`` warns about — and can
+ deadlock on — parallelism across a fork. Disabling it keeps subprocess
+ example/benchmark runs deterministic.
+
+The offline flags are set **only when every in-process embedding model is
+already cached**, so a warm developer/CI machine never reaches for the network
+(the real cause of the intermittent full-suite hangs), while a cold-cache
+environment that is *meant* to download is left untouched.
+
+These overrides are deliberately conservative: every one of them only
+removes nondeterminism (thread count, fork parallelism, network reach) and
+none changes what the code under test computes.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+# --- Native thread pinning (must precede any torch / numpy import) ----------
+# setdefault, so an explicit override from the caller's environment still wins.
+for _var in ("OMP_NUM_THREADS", "MKL_NUM_THREADS"):
+ os.environ.setdefault(_var, "1")
+os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+
+
+# --- Offline-when-cached for the in-process embedding models ----------------
+#: Every sentence-transformer model the suite loads *in-process*. The synthetic
+#: and LongMemEval benchmark surfaces resolve the default
+#: (``engrava.benchmarks.synthetic.evaluate._DEFAULT_EMBEDDING_MODEL``), while
+#: the quickstart example and its doc/example tests pin the L12 model. Offline
+#: mode is forced only when *all* of these are already cached, so forcing it can
+#: never starve a load the cache cannot satisfy.
+_IN_PROCESS_EMBEDDING_MODELS = ("all-MiniLM-L6-v2", "all-MiniLM-L12-v2")
+
+
+def _hf_hub_cache_dir() -> Path:
+ """Return the Hugging Face Hub cache directory, honouring the env override."""
+ override = os.environ.get("HF_HOME")
+ if override:
+ return Path(override) / "hub"
+ return Path.home() / ".cache" / "huggingface" / "hub"
+
+
+def _model_is_cached(model_name: str) -> bool:
+ """Return ``True`` when ``model_name`` is present in the local Hub cache.
+
+ Hub repos are stored as ``models----`` (or ``models--``
+ for canonical repos). A substring match on the trailing model name is
+ sufficient here: we only need to know whether forcing offline mode would
+ starve a load that the cache can in fact satisfy.
+
+ Args:
+ model_name: The short model name, e.g. ``all-MiniLM-L6-v2``.
+
+ Returns:
+ ``True`` when a matching cached repo directory exists.
+ """
+ hub = _hf_hub_cache_dir()
+ if not hub.is_dir():
+ return False
+ needle = model_name.replace("/", "--")
+ return any(
+ entry.name.startswith("models--") and needle in entry.name for entry in hub.iterdir()
+ )
+
+
+if all(_model_is_cached(name) for name in _IN_PROCESS_EMBEDDING_MODELS):
+ # The cache can satisfy every in-suite model load, so forbid network
+ # reach: this is what makes the full suite network-independent and
+ # removes the intermittent socket-blocked hangs. A cold-cache run (any
+ # model absent) is left free to download what it legitimately needs.
+ os.environ.setdefault("HF_HUB_OFFLINE", "1")
+ os.environ.setdefault("TRANSFORMERS_OFFLINE", "1")
From bb6b7290d546185c7d01789e204338482e39e7cf Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Fri, 12 Jun 2026 23:52:45 +0200
Subject: [PATCH 22/40] fix: let natural-language queries reach the full-text
index
---
CHANGELOG.md | 20 ++
.../infrastructure/sqlite/engrava_core.py | 251 ++++++++++++---
tests/test_mind_store_branches.py | 92 ++++--
tests/test_mind_store_core.py | 301 ++++++++++++++++++
4 files changed, 592 insertions(+), 72 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a43d56e..57260f8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,26 @@ and this project adheres to [Semantic Versioning 2.0.0](https://semver.org/spec/
### Fixed
+- **Natural-language queries now reach the full-text index.** `search_fts`
+ previously joined the words of a bare query with FTS5's implicit `AND`, so a
+ question only matched documents that contained *every* word — including
+ function words like "what", "was" and "my" — and a relevantly-phrased answer
+ was missed. Bare queries are now matched with `OR`: a document is returned
+ when it shares any content word, and BM25's IDF weighting ranks the documents
+ that share the most distinctive words first, so no stopword list or stemmer is
+ needed in any language. Contractions and clitics no longer silently miss
+ (`sister's` matches a stored `sister's dog`; `l'école` matches `l'école
+ française`) because unsafe characters now split a token into separate terms
+ instead of being deleted into an unindexed word. Pasting a URL or a timestamp
+ into search no longer raises: only the real `essence:` and `content:` column
+ filters are honoured, while tokens such as `http://example.com` or `12:30` are
+ treated as ordinary search terms. Expert syntax is unchanged — quoted phrases,
+ uppercase `AND`/`OR`/`NOT`, hyphenated identifiers and the `essence:`/
+ `content:` column filters all keep their existing behaviour. As a final
+ safeguard, a malformed full-text expression is now logged and degraded to no
+ full-text hits instead of propagating, so the rest of a hybrid search still
+ returns results.
+
- **Transient errors from an OpenAI-compatible embeddings endpoint no
longer abort the whole call.** `OpenAICompatibleProvider` now retries a
single embeddings request with bounded exponential backoff when the
diff --git a/src/engrava/infrastructure/sqlite/engrava_core.py b/src/engrava/infrastructure/sqlite/engrava_core.py
index 84a2b7b..4130475 100644
--- a/src/engrava/infrastructure/sqlite/engrava_core.py
+++ b/src/engrava/infrastructure/sqlite/engrava_core.py
@@ -70,8 +70,16 @@
logger = logging.getLogger(__name__)
-_FTS_FIELD_FILTER_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*:.+")
+#: A token is treated as an FTS5 column filter only when it targets a real
+#: indexed column. ``thought_fts`` indexes exactly ``essence`` and ``content``
+#: (see :meth:`SqliteEngravaCore.ensure_schema`); any other ``word:rest`` token
+#: (URLs like ``http://...``, timestamps like ``12:30``) would make FTS5 read a
+#: non-existent column and raise, so it is sanitized as a bare token instead.
+_FTS_FIELD_FILTER_RE = re.compile(r"^(?:essence|content):.+", re.IGNORECASE)
_FTS_UNSAFE_CHAR_RE = re.compile(r"[^\w\-*]")
+#: Standalone uppercase boolean operators that switch a query into expert mode.
+#: Lowercase ``and``/``or``/``not`` are ordinary words, not operators.
+_FTS_BOOLEAN_OPERATORS = frozenset({"AND", "OR", "NOT"})
_SUPPRESS_SEARCH_METRICS: contextvars.ContextVar[bool] = contextvars.ContextVar(
"engrava_suppress_search_metrics",
default=False,
@@ -2718,12 +2726,24 @@ async def search_fts(
) -> list[tuple[str, float]]:
"""Full-text search via SQLite FTS5 with BM25 ranking.
- Returns an empty list when the FTS5 index is unavailable
- (backward compat for databases that predate the migration).
+ Bare natural-language queries are matched with ``OR`` semantics: a
+ document is returned when it shares *any* content word with the query,
+ and BM25 IDF weighting ranks documents that share the most distinctive
+ words first. Function words ("what", "was", "my") therefore never block
+ a match. Expert syntax — quoted phrases, uppercase ``AND``/``OR``/
+ ``NOT``, and the ``essence:``/``content:`` column filters — is preserved
+ and matched exactly as written.
+
+ Returns an empty list when the FTS5 index is unavailable (backward
+ compat for databases that predate the migration), when the query
+ normalizes to no usable term, or when a malformed FTS5 expression slips
+ through; such errors are logged and degraded rather than propagated, so
+ a caller's other search arms can still serve the query.
Args:
- query: FTS5 query string (supports ``AND``, ``OR``,
- ``NOT``, prefix ``*``, column filters, etc.).
+ query: User-facing query string. Bare questions are OR-matched;
+ quoted phrases, uppercase ``AND``/``OR``/``NOT`` and
+ ``essence:``/``content:`` column filters invoke expert syntax.
top_k: Maximum number of results.
Returns:
@@ -2732,6 +2752,7 @@ async def search_fts(
"""
import time as _time # noqa: PLC0415
+ from sqlite3 import OperationalError # noqa: PLC0415
_t_start = _time.perf_counter()
if not query or not query.strip():
@@ -2746,6 +2767,11 @@ async def search_fts(
return []
normalized_query = _normalize_fts_query(query)
+ if not normalized_query:
+ # The query held no indexable term (e.g. only punctuation); an empty
+ # MATCH string is a syntax error in FTS5, so short-circuit to empty.
+ await self._record_search_latency((_time.perf_counter() - _t_start) * 1000)
+ return []
# bm25() returns negative values; negate so higher = more relevant.
sql = (
@@ -2760,11 +2786,23 @@ async def search_fts(
"ORDER BY score DESC "
"LIMIT ?"
)
- cursor = await self._db.execute(
- sql,
- (normalized_query, datetime.datetime.now(datetime.UTC).isoformat(), top_k),
- )
- rows = await cursor.fetchall()
+ try:
+ cursor = await self._db.execute(
+ sql,
+ (normalized_query, datetime.datetime.now(datetime.UTC).isoformat(), top_k),
+ )
+ rows = await cursor.fetchall()
+ except OperationalError:
+ # Defense in depth: a residual malformed FTS5 expression must never
+ # propagate to the caller and break an otherwise-serviceable search
+ # (e.g. the vector arm of a hybrid query). Degrade to no FTS hits.
+ logger.warning(
+ "FTS MATCH failed for normalized query %r; returning no FTS results",
+ normalized_query,
+ exc_info=True,
+ )
+ await self._record_search_latency((_time.perf_counter() - _t_start) * 1000)
+ return []
results = [(row["thought_id"], float(row["score"])) for row in rows]
await self._record_search_latency((_time.perf_counter() - _t_start) * 1000)
return results
@@ -3913,23 +3951,90 @@ def _row_to_edge(row: aiosqlite.Row) -> EdgeRecord:
)
+def _query_is_expert_syntax(query: str) -> bool:
+ """Return ``True`` when a query should be parsed as expert FTS5 syntax.
+
+ A query is expert syntax when it contains any of:
+
+ * a quoted phrase (any ``"``),
+ * a standalone uppercase boolean operator (``AND``/``OR``/``NOT``), or
+ * a whitelisted column filter (``essence:``/``content:``).
+
+ Expert queries are normalized token-by-token and joined with spaces,
+ preserving FTS5's native operators, phrase matching, column filters and
+ implicit-AND semantics.
+
+ Bare natural-language queries (none of the above) are instead OR-joined so
+ function words cannot block a match; BM25's IDF weighting handles
+ uninformative tokens at ranking time.
+
+ Args:
+ query: The raw user-facing query string.
+
+ Returns:
+ ``True`` for expert syntax, ``False`` for a bare natural-language query.
+
+ """
+ if '"' in query:
+ return True
+ for token in query.split():
+ if token in _FTS_BOOLEAN_OPERATORS:
+ return True
+ if _FTS_FIELD_FILTER_RE.match(token.lstrip("(")):
+ return True
+ return False
+
+
def _normalize_fts_query(query: str) -> str:
- """Normalize user-facing FTS queries to SQLite-compatible syntax.
-
- SQLite FTS5 treats hyphens as operators in bare tokens, which breaks
- intuitive identifier-style prefix queries such as ``REQ-FUNC*``.
- This normalizer preserves the public API contract by rewriting those
- simple tokens to the accepted form ``"REQ-FUNC"*``. It also strips
- trailing natural-language punctuation like ``?`` and ``,`` from bare
- tokens so user questions can be passed directly into FTS5.
+ """Normalize a user-facing FTS query to SQLite FTS5-compatible syntax.
+
+ Two query classes are handled:
+
+ * **Expert syntax** (contains a quoted phrase or a standalone uppercase
+ ``AND``/``OR``/``NOT``): each token is normalized in place and the tokens
+ are joined with spaces, so FTS5's phrase matching, implicit AND, hyphen
+ handling and boolean operators all behave exactly as the caller wrote
+ them. Hyphenated identifiers such as ``REQ-FUNC*`` are still rewritten to
+ the accepted form ``"REQ-FUNC"*``.
+
+ * **Bare natural-language query** (no quotes, no uppercase operators): each
+ token expands to zero or more sanitized terms and the terms are joined
+ with ``OR``. This lets a question match any document sharing a content
+ word, instead of requiring every function word ("what", "was", "my") to
+ appear. BM25 IDF weighting keeps uninformative tokens from dominating the
+ ranking, so no stopword list or stemmer is needed in any language.
+
+ Unsafe characters (apostrophes, slashes, colons, ...) act as token
+ boundaries rather than being deleted, so contractions and clitics like
+ ``sister's`` or ``l'école`` split into matchable terms (``sister OR s``)
+ instead of becoming an unindexed merged token.
+
+ Args:
+ query: The raw user-facing query string.
+
+ Returns:
+ An FTS5 MATCH expression. May be empty when no usable term remains.
+
"""
- parts = query.split()
- normalized_parts = [_normalize_fts_token(part) for part in parts]
- return " ".join(part for part in normalized_parts if part)
+ expert = _query_is_expert_syntax(query)
+ terms: list[str] = []
+ for token in query.split():
+ terms.extend(_normalize_fts_token(token, expert=expert))
+ joiner = " " if expert else " OR "
+ return joiner.join(terms)
def _strip_fts_boundary_punctuation(raw: str) -> str:
- """Strip unsupported leading and trailing punctuation from a bare token."""
+ """Strip unsupported leading and trailing punctuation from a bare token.
+
+ Args:
+ raw: A single unquoted token.
+
+ Returns:
+ The token with leading/trailing characters that FTS5 cannot start or
+ end a bare term with removed.
+
+ """
while raw and not (raw[0].isalnum() or raw[0] in {"_", '"'}):
raw = raw[1:]
@@ -3939,18 +4044,48 @@ def _strip_fts_boundary_punctuation(raw: str) -> str:
return raw
-def _sanitize_fts_bare_token(raw: str) -> str:
- """Remove unsupported FTS punctuation from an unquoted bare token."""
+def _sanitize_fts_bare_token(raw: str) -> list[str]:
+ """Split an unquoted bare token into safe FTS5 fragments.
+
+ Unsafe characters become fragment boundaries rather than being deleted, so
+ a contraction or clitic such as ``sister's`` splits into ``["sister", "s"]``
+ (which the ``unicode61`` tokenizer also produced at index time) instead of
+ merging into an unindexed ``sisters``.
+
+ Args:
+ raw: A single unquoted token, already paren-stripped.
+
+ Returns:
+ A list of non-empty safe fragments, in order. May be empty when the
+ token holds no indexable characters.
+
+ """
stripped = _strip_fts_boundary_punctuation(raw)
- return _FTS_UNSAFE_CHAR_RE.sub("", stripped)
+ split = _FTS_UNSAFE_CHAR_RE.sub(" ", stripped)
+ return [fragment for fragment in split.split() if fragment]
+
+def _normalize_fts_token(token: str, *, expert: bool) -> list[str]:
+ """Normalize a single token into zero or more FTS5 terms.
-def _normalize_fts_token(token: str) -> str:
- """Normalize a single FTS token if it contains a hyphenated identifier."""
- if not token or '"' in token:
- return token
- if token in {"AND", "OR", "NOT"}:
- return token
+ Args:
+ token: A whitespace-delimited token from the raw query.
+ expert: ``True`` when the surrounding query is expert syntax. In expert
+ mode quoted phrases and uppercase operators pass through unchanged;
+ in bare mode every token is sanitized into plain OR-terms.
+
+ Returns:
+ The FTS5 terms this token contributes. A bare contraction may yield
+ several terms (``sister's`` -> ``["sister", "s"]``); an empty or
+ all-punctuation token yields ``[]``.
+
+ """
+ if not token:
+ return []
+ if expert and '"' in token:
+ return [token]
+ if expert and token in _FTS_BOOLEAN_OPERATORS:
+ return [token]
leading = ""
trailing = ""
@@ -3962,25 +4097,43 @@ def _normalize_fts_token(token: str) -> str:
trailing = ")" + trailing
raw = raw[:-1]
- if _FTS_FIELD_FILTER_RE.match(raw):
- normalized = f"{leading}{raw}{trailing}"
- else:
- raw = _sanitize_fts_bare_token(raw)
- if not raw:
- return ""
-
- suffix = ""
- if raw.endswith("*"):
- raw = raw[:-1]
- suffix = "*"
-
- normalized = (
- f'{leading}"{raw}"{suffix}{trailing}'
- if "-" in raw
- else f"{leading}{raw}{suffix}{trailing}"
- )
+ if expert and _FTS_FIELD_FILTER_RE.match(raw):
+ return [f"{leading}{raw}{trailing}"]
+
+ fragments = _sanitize_fts_bare_token(raw)
+ if not fragments:
+ return []
+
+ terms = [_format_fts_bare_fragment(fragment) for fragment in fragments]
+ if expert:
+ # Expert mode keeps each original token as one term, re-attaching any
+ # parentheses the caller used for grouping.
+ terms[0] = f"{leading}{terms[0]}"
+ terms[-1] = f"{terms[-1]}{trailing}"
+ return terms
- return normalized
+
+def _format_fts_bare_fragment(fragment: str) -> str:
+ """Format a single sanitized fragment as an FTS5 term.
+
+ Preserves a trailing ``*`` prefix marker and quotes hyphenated identifiers
+ so FTS5 does not read the hyphen as a column/operator.
+
+ Args:
+ fragment: A safe fragment containing only word characters, ``-`` or a
+ trailing ``*``.
+
+ Returns:
+ The fragment rewritten as a valid FTS5 term.
+
+ """
+ suffix = ""
+ if fragment.endswith("*"):
+ fragment = fragment[:-1]
+ suffix = "*"
+ if "-" in fragment:
+ return f'"{fragment}"{suffix}'
+ return f"{fragment}{suffix}"
def _row_to_action(row: aiosqlite.Row) -> ActionRecord:
diff --git a/tests/test_mind_store_branches.py b/tests/test_mind_store_branches.py
index 32972c5..cfede9f 100644
--- a/tests/test_mind_store_branches.py
+++ b/tests/test_mind_store_branches.py
@@ -304,68 +304,114 @@ async def test_negative_recency_half_life_raises(self, store: SqliteEngravaCore)
class TestNormalizeFtsToken:
- """Edge cases in the FTS5 token normalizer."""
+ """Edge cases in the FTS5 token normalizer.
+
+ ``_normalize_fts_token`` returns the list of FTS5 terms a single token
+ expands to. A token may yield zero terms (all punctuation), one term
+ (a plain word), or several (a contraction split on its clitic). The
+ ``expert`` flag mirrors the surrounding query: expert syntax preserves
+ operators/phrases/column filters, bare syntax sanitizes everything.
+ """
+
+ # --- Expert-mode passthrough -------------------------------------------
def test_and_keyword_passthrough(self) -> None:
- assert _normalize_fts_token("AND") == "AND"
+ assert _normalize_fts_token("AND", expert=True) == ["AND"]
def test_or_keyword_passthrough(self) -> None:
- assert _normalize_fts_token("OR") == "OR"
+ assert _normalize_fts_token("OR", expert=True) == ["OR"]
def test_not_keyword_passthrough(self) -> None:
- assert _normalize_fts_token("NOT") == "NOT"
+ assert _normalize_fts_token("NOT", expert=True) == ["NOT"]
def test_token_with_quotes_passthrough(self) -> None:
- assert _normalize_fts_token('"already-quoted"') == '"already-quoted"'
+ assert _normalize_fts_token('"already-quoted"', expert=True) == ['"already-quoted"']
+
+ def test_whitelisted_column_filter_passthrough(self) -> None:
+ assert _normalize_fts_token("essence:value", expert=True) == ["essence:value"]
+ assert _normalize_fts_token("content:value", expert=True) == ["content:value"]
- def test_token_with_colon_passthrough(self) -> None:
- assert _normalize_fts_token("field:value") == "field:value"
+ def test_unknown_column_filter_is_sanitized(self) -> None:
+ # A non-whitelisted column would crash FTS5 ("no such column: field"),
+ # so it is split on the colon into bare OR-terms instead.
+ assert _normalize_fts_token("field:value", expert=True) == ["field", "value"]
def test_natural_language_colon_is_sanitized(self) -> None:
- assert _normalize_fts_token("events:") == "events"
+ assert _normalize_fts_token("events:", expert=True) == ["events"]
- def test_empty_token_passthrough(self) -> None:
- assert _normalize_fts_token("") == ""
+ def test_empty_token_yields_no_terms(self) -> None:
+ assert _normalize_fts_token("", expert=True) == []
+ assert _normalize_fts_token("", expert=False) == []
def test_no_hyphen_passthrough(self) -> None:
- assert _normalize_fts_token("simple") == "simple"
+ assert _normalize_fts_token("simple", expert=True) == ["simple"]
def test_prefix_star_no_hyphen(self) -> None:
- """Trailing '*' on a non-hyphenated token → left unchanged."""
- assert _normalize_fts_token("prefix*") == "prefix*"
+ """Trailing '*' on a non-hyphenated token is preserved."""
+ assert _normalize_fts_token("prefix*", expert=True) == ["prefix*"]
def test_hyphen_token_normalized(self) -> None:
- assert _normalize_fts_token("REQ-FUNC") == '"REQ-FUNC"'
+ assert _normalize_fts_token("REQ-FUNC", expert=True) == ['"REQ-FUNC"']
def test_hyphen_token_with_star(self) -> None:
- assert _normalize_fts_token("REQ-FUNC*") == '"REQ-FUNC"*'
+ assert _normalize_fts_token("REQ-FUNC*", expert=True) == ['"REQ-FUNC"*']
def test_parenthesized_hyphen_token(self) -> None:
- assert _normalize_fts_token("(REQ-001)") == '("REQ-001")'
+ assert _normalize_fts_token("(REQ-001)", expert=True) == ['("REQ-001")']
def test_leading_paren_only(self) -> None:
- assert _normalize_fts_token("(REQ-001") == '("REQ-001"'
+ assert _normalize_fts_token("(REQ-001", expert=True) == ['("REQ-001"']
def test_trailing_paren_only(self) -> None:
- assert _normalize_fts_token("REQ-001)") == '"REQ-001")'
+ assert _normalize_fts_token("REQ-001)", expert=True) == ['"REQ-001")']
+
+ # --- Bare natural-language mode ----------------------------------------
+
+ def test_bare_no_hyphen_word(self) -> None:
+ assert _normalize_fts_token("simple", expert=False) == ["simple"]
+
+ def test_bare_contraction_splits_into_terms(self) -> None:
+ assert _normalize_fts_token("sister's", expert=False) == ["sister", "s"]
- def test_full_query_normalization(self) -> None:
+ def test_bare_hyphen_token_is_quoted(self) -> None:
+ assert _normalize_fts_token("REQ-FUNC", expert=False) == ['"REQ-FUNC"']
+
+ def test_bare_url_splits_into_fragments(self) -> None:
+ # A pasted URL is never a column filter in bare mode; it splits on the
+ # colon, slashes and dots into useful OR-terms.
+ assert _normalize_fts_token("http://example.com", expert=False) == [
+ "http",
+ "example",
+ "com",
+ ]
+
+ def test_bare_only_punctuation_yields_no_terms(self) -> None:
+ assert _normalize_fts_token("!!!", expert=False) == []
+
+ # --- Whole-query normalization -----------------------------------------
+
+ def test_expert_query_joins_with_spaces(self) -> None:
+ # Uppercase operators trigger expert mode → space-joined implicit AND.
result = _normalize_fts_query("REQ-001 AND simple-word OR OR")
assert '"REQ-001"' in result
assert '"simple-word"' in result
+ assert " AND " in result
assert result.endswith("OR")
- def test_currency_token_loses_dollar_prefix(self) -> None:
- assert _normalize_fts_token("$5") == "5"
+ def test_bare_query_joins_with_or(self) -> None:
+ result = _normalize_fts_query("coffee creamer coupon")
+ assert result == "coffee OR creamer OR coupon"
- def test_apostrophe_is_removed_from_bare_token(self) -> None:
- assert _normalize_fts_token("Toyota's") == "Toyotas"
+ def test_currency_token_loses_dollar_prefix(self) -> None:
+ assert _normalize_fts_token("$5", expert=False) == ["5"]
def test_question_with_currency_and_punctuation_normalizes(self) -> None:
result = _normalize_fts_query("Where did I redeem a $5 coupon on coffee creamer?")
assert "$" not in result
assert "?" not in result
assert "5" in result
+ # Bare query → OR-joined.
+ assert " OR " in result
# ---------------------------------------------------------------------------
diff --git a/tests/test_mind_store_core.py b/tests/test_mind_store_core.py
index 181f0a8..8ad32b6 100644
--- a/tests/test_mind_store_core.py
+++ b/tests/test_mind_store_core.py
@@ -6,6 +6,7 @@
from __future__ import annotations
+import logging
from typing import TYPE_CHECKING
import aiosqlite
@@ -31,6 +32,7 @@
VerificationStatus,
)
from engrava.domain.models.embedding import EmbeddingRecord
+from engrava.infrastructure.sqlite import engrava_core
if TYPE_CHECKING:
from collections.abc import AsyncIterator
@@ -804,3 +806,302 @@ async def test_natural_language_query_with_currency_symbol(
results = await store.search_fts("coffee creamer $5?")
assert len(results) == 1
assert results[0][0] == "t-002"
+
+
+class TestSearchFTSNaturalLanguage:
+ """Natural-language queries reach the FTS index under OR-matching.
+
+ These tests pin the behaviour contract for bare (non-expert) queries:
+ function words must not block a match, contractions/clitics must not
+ silently miss, and pasted URLs/timestamps must never raise. Expert
+ syntax (quoted phrases, uppercase boolean operators, column filters)
+ is preserved unchanged by the no-regression class below.
+ """
+
+ async def test_function_words_do_not_block_match(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """An NL question matches a doc sharing only content tokens."""
+ await store.create_thought(
+ _make_thought(
+ "t-job",
+ essence="Career background",
+ content="Before this job I worked as a marketing specialist at a small startup",
+ )
+ )
+ results = await store.search_fts("what did I say about the marketing specialist job")
+ thought_ids = {r[0] for r in results}
+ assert "t-job" in thought_ids
+
+ async def test_distinctive_tokens_plus_function_words_match(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """Distinctive content tokens plus arbitrary function words still match."""
+ await store.create_thought(
+ _make_thought(
+ "t-dog",
+ essence="Pet anecdote",
+ content="I told you about my sisters dog last week",
+ )
+ )
+ results = await store.search_fts("what was the thing about my sisters dog")
+ thought_ids = {r[0] for r in results}
+ assert "t-dog" in thought_ids
+
+ async def test_contraction_query_matches(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """A possessive/contraction query splits on the clitic and still matches."""
+ await store.create_thought(
+ _make_thought(
+ "t-sister",
+ essence="Family note",
+ content="My sister's dog is a golden retriever",
+ )
+ )
+ results = await store.search_fts("sister's")
+ thought_ids = {r[0] for r in results}
+ assert "t-sister" in thought_ids
+
+ async def test_non_english_clitic_query_matches(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """A French elision query splits on the apostrophe and still matches."""
+ await store.create_thought(
+ _make_thought(
+ "t-ecole",
+ essence="Langue note",
+ content="l'école française est fermée",
+ )
+ )
+ results = await store.search_fts("l'école")
+ thought_ids = {r[0] for r in results}
+ assert "t-ecole" in thought_ids
+
+ async def test_url_query_does_not_raise(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """A pasted URL is tokenized, not read as a column filter — no crash."""
+ await store.create_thought(
+ _make_thought(
+ "t-url",
+ essence="Docs link",
+ content="see the example.com documentation for details",
+ )
+ )
+ results = await store.search_fts("see http://example.com docs")
+ assert isinstance(results, list)
+
+ async def test_url_fragments_become_useful_terms(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """URL fragments become OR-terms that match a doc mentioning the host."""
+ await store.create_thought(
+ _make_thought(
+ "t-url",
+ essence="Docs link",
+ content="see the example.com documentation for details",
+ )
+ )
+ results = await store.search_fts("see http://example.com docs")
+ thought_ids = {r[0] for r in results}
+ assert "t-url" in thought_ids
+
+ async def test_timestamp_query_does_not_raise(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """A bare timestamp token (``12:30``) is sanitized, never read as a column."""
+ await store.create_thought(
+ _make_thought(
+ "t-meeting",
+ essence="Calendar",
+ content="meeting scheduled at half past noon",
+ )
+ )
+ results = await store.search_fts("meeting at 12:30")
+ assert isinstance(results, list)
+
+ async def test_or_ranking_prefers_all_distinctive_tokens(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """A doc with all distinctive tokens outranks one with a single low-info token."""
+ await store.create_thought(
+ _make_thought(
+ "t-all",
+ essence="Marketing role",
+ content="the marketing specialist startup job summary",
+ )
+ )
+ await store.create_thought(
+ _make_thought(
+ "t-one",
+ essence="Unrelated",
+ content="a note about the job and nothing else relevant here",
+ )
+ )
+ results = await store.search_fts("marketing specialist startup job")
+ assert len(results) >= 2
+ assert results[0][0] == "t-all"
+
+ async def test_malformed_expression_degrades_to_empty(
+ self,
+ store: SqliteEngravaCore,
+ monkeypatch: pytest.MonkeyPatch,
+ caplog: pytest.LogCaptureFixture,
+ ) -> None:
+ """A residual malformed FTS5 expression degrades to no hits, not a crash."""
+ await store.create_thought(
+ _make_thought("t-x", essence="General", content="some indexable content")
+ )
+ # Force a syntactically invalid MATCH expression past the normalizer to
+ # exercise the defense-in-depth degradation in search_fts.
+ monkeypatch.setattr(
+ engrava_core,
+ "_normalize_fts_query",
+ lambda _query: 'unterminated "phrase',
+ )
+ with caplog.at_level(logging.WARNING):
+ results = await store.search_fts("anything")
+ assert results == []
+ assert any("FTS MATCH failed" in record.message for record in caplog.records)
+
+
+class TestSearchFTSExpertSyntaxPreserved:
+ """Expert FTS syntax keeps its current semantics after the OR change.
+
+ Each test here must pass both before and after the natural-language
+ OR-matching change: quoted phrases, hyphenated prefixes, uppercase
+ boolean operators, and whitelisted column filters are untouched.
+ """
+
+ async def test_quoted_phrase_keeps_phrase_semantics(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """A quoted phrase matches only adjacent terms, not scattered tokens."""
+ await store.create_thought(
+ _make_thought(
+ "t-adjacent",
+ essence="Outing",
+ content="we visited the dog park yesterday",
+ )
+ )
+ await store.create_thought(
+ _make_thought(
+ "t-scattered",
+ essence="Notes",
+ content="the dog slept while we walked to the car park",
+ )
+ )
+ results = await store.search_fts('"dog park"')
+ thought_ids = {r[0] for r in results}
+ assert thought_ids == {"t-adjacent"}
+
+ async def test_hyphenated_prefix_matches_identifier(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """A hyphenated prefix query still matches an identifier doc."""
+ await store.create_thought(
+ _make_thought(
+ "t-id",
+ essence="General",
+ content="REQ-FUNC-003 compliance",
+ )
+ )
+ results = await store.search_fts("REQ-FUNC*")
+ thought_ids = {r[0] for r in results}
+ assert "t-id" in thought_ids
+
+ async def test_uppercase_and_keeps_conjunction(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """``cats AND dogs`` requires both terms (expert mode)."""
+ await store.create_thought(
+ _make_thought("t-both", essence="Pets", content="cats and dogs together")
+ )
+ await store.create_thought(
+ _make_thought("t-cats", essence="Pets", content="just cats here")
+ )
+ results = await store.search_fts("cats AND dogs")
+ thought_ids = {r[0] for r in results}
+ assert thought_ids == {"t-both"}
+
+ async def test_lowercase_not_is_ordinary_term(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """Lowercase ``not`` is an ordinary OR-term, not a boolean operator."""
+ await store.create_thought(
+ _make_thought(
+ "t-trip",
+ essence="Travel",
+ content="why did the trip happen and where did we go",
+ )
+ )
+ # If 'not' were treated as a boolean NOT, this would error or exclude.
+ results = await store.search_fts("why did I not go")
+ thought_ids = {r[0] for r in results}
+ assert "t-trip" in thought_ids
+
+ async def test_whitelisted_essence_column_filter(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """``essence:meeting`` still filters on the essence column."""
+ await store.create_thought(
+ _make_thought("t-ess", essence="meeting agenda", content="unrelated body")
+ )
+ await store.create_thought(
+ _make_thought("t-body", essence="agenda", content="meeting body text")
+ )
+ results = await store.search_fts("essence:meeting")
+ thought_ids = {r[0] for r in results}
+ assert thought_ids == {"t-ess"}
+
+ async def test_whitelisted_content_column_filter(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """``content:meeting`` still filters on the content column."""
+ await store.create_thought(
+ _make_thought("t-ess", essence="meeting agenda", content="unrelated body")
+ )
+ await store.create_thought(
+ _make_thought("t-body", essence="agenda", content="meeting body text")
+ )
+ results = await store.search_fts("content:meeting")
+ thought_ids = {r[0] for r in results}
+ assert thought_ids == {"t-body"}
+
+ async def test_unknown_column_filter_is_sanitized(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """A non-whitelisted ``word:value`` token is sanitized — no column error."""
+ await store.create_thought(
+ _make_thought("t-rand", essence="General", content="randomword value pairing")
+ )
+ # Must not raise "no such column: randomword".
+ results = await store.search_fts("randomword:value")
+ thought_ids = {r[0] for r in results}
+ assert "t-rand" in thought_ids
+
+ async def test_empty_and_unsafe_queries_return_empty(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ """Empty, whitespace-only, and only-unsafe-char queries return empty cleanly."""
+ await store.create_thought(_make_thought("t-x", essence="x", content="x"))
+ assert await store.search_fts("") == []
+ assert await store.search_fts(" ") == []
+ assert await store.search_fts("!!! @@@ ###") == []
From 862e7bc28b0db0d4aea5741e858d65107a5c8184 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Sat, 13 Jun 2026 00:24:15 +0200
Subject: [PATCH 23/40] test: add functional contract suite for search behavior
---
tests/search_contract/__init__.py | 1 +
tests/search_contract/conftest.py | 543 ++++++++++++++++++
tests/search_contract/test_search_contract.py | 275 +++++++++
3 files changed, 819 insertions(+)
create mode 100644 tests/search_contract/__init__.py
create mode 100644 tests/search_contract/conftest.py
create mode 100644 tests/search_contract/test_search_contract.py
diff --git a/tests/search_contract/__init__.py b/tests/search_contract/__init__.py
new file mode 100644
index 0000000..40f3c53
--- /dev/null
+++ b/tests/search_contract/__init__.py
@@ -0,0 +1 @@
+"""Functional contract suite for user-level search behavior."""
diff --git a/tests/search_contract/conftest.py b/tests/search_contract/conftest.py
new file mode 100644
index 0000000..37b8092
--- /dev/null
+++ b/tests/search_contract/conftest.py
@@ -0,0 +1,543 @@
+"""Fixtures for the search functional-contract suite.
+
+This module hand-authors a small, synthetic, conversational corpus and a set
+of natural-language questions labelled with their gold-answer thought, then
+exposes both through pytest fixtures together with a populated store.
+
+Everything here is deterministic and network-free:
+
+* The corpus is written by hand (no benchmark dataset is read), so it is safe
+ to ship in a public repository.
+* Query embeddings come from a deterministic bag-of-words hashing provider
+ (:class:`BagOfWordsProvider`), so ``search_hybrid`` exercises a real vector
+ arm without loading a model or reaching the network.
+
+The corpus deliberately includes the inputs that purely line-coverage-driven
+tests miss: long turns whose distinctive fact lives in the tail, contractions
+and non-English clitics, a pasted URL, bare numbers/timestamps, and clusters
+of near-duplicate same-topic turns.
+"""
+
+from __future__ import annotations
+
+import hashlib
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING
+
+import aiosqlite
+import pytest
+
+from engrava import CallbackProvider, SqliteEngravaCore
+from engrava.domain.enums import (
+ KnowledgeSource,
+ LifecycleStatus,
+ Priority,
+ ThoughtType,
+ ThoughtVisibility,
+)
+from engrava.domain.models.thought import ThoughtRecord
+
+if TYPE_CHECKING:
+ from collections.abc import AsyncIterator
+
+
+# ---------------------------------------------------------------------------
+# Corpus data model
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class CorpusTurn:
+ """A single synthetic conversational turn stored as a thought.
+
+ Args:
+ thought_id: Stable identifier used to assert retrieval.
+ essence: Short summary line, indexed by FTS5.
+ content: Full turn text, indexed by FTS5.
+ distinctive_terms: One to three content words unique enough to find
+ this turn. A findability query is built from these plus arbitrary
+ function words.
+ """
+
+ thought_id: str
+ essence: str
+ content: str
+ distinctive_terms: tuple[str, ...] = field(default_factory=tuple)
+
+
+@dataclass(frozen=True)
+class GoldQuestion:
+ """A natural-language question paired with its gold-answer turn.
+
+ Args:
+ question: A user-style natural-language question, including function
+ words ("what", "did", "my") that must not block a match.
+ gold_thought_id: The ``thought_id`` of the turn that answers it.
+ """
+
+ question: str
+ gold_thought_id: str
+
+
+# ---------------------------------------------------------------------------
+# The hand-authored corpus
+# ---------------------------------------------------------------------------
+# ~40 turns: varied length, contractions, a URL, numbers/names, non-English
+# samples, and near-duplicate same-topic clusters. Each turn lists the
+# distinctive content terms that should retrieve it.
+
+_CORPUS: tuple[CorpusTurn, ...] = (
+ CorpusTurn(
+ "turn-job-marketing",
+ "Career background",
+ "Before this job I worked as a marketing specialist at a small startup downtown.",
+ ("marketing", "specialist", "startup"),
+ ),
+ CorpusTurn(
+ "turn-sister-dog",
+ "Family pet note",
+ "My sister's dog is a golden retriever named Biscuit who hates thunderstorms.",
+ ("retriever", "Biscuit", "thunderstorms"),
+ ),
+ CorpusTurn(
+ "turn-coffee-creamer",
+ "Grocery coupon",
+ "I redeemed a coupon on hazelnut coffee creamer at the corner store yesterday.",
+ ("hazelnut", "creamer", "coupon"),
+ ),
+ CorpusTurn(
+ "turn-paris-trip",
+ "Travel plan",
+ "We are flying to Paris in October and staying near the Montmartre district.",
+ ("Paris", "Montmartre", "October"),
+ ),
+ CorpusTurn(
+ "turn-guitar-lessons",
+ "Hobby update",
+ "I finally started weekly guitar lessons and I am learning fingerpicking now.",
+ ("guitar", "fingerpicking", "lessons"),
+ ),
+ CorpusTurn(
+ "turn-docs-link",
+ "Shared reference",
+ "Here is the onboarding guide at https://docs.example.com/onboarding for new hires.",
+ ("onboarding", "hires"),
+ ),
+ CorpusTurn(
+ "turn-budget-spreadsheet",
+ "Finance task",
+ "I updated the quarterly budget spreadsheet and the travel line is over by 1200 dollars.",
+ ("budget", "spreadsheet", "quarterly"),
+ ),
+ CorpusTurn(
+ "turn-marathon-training",
+ "Running goal",
+ "My marathon training peaks next month with a brutal twenty-two mile long run.",
+ ("marathon", "training"),
+ ),
+ CorpusTurn(
+ "turn-dentist-appointment",
+ "Health reminder",
+ "The dentist appointment got moved to Thursday because the hygienist was out sick.",
+ ("dentist", "hygienist"),
+ ),
+ CorpusTurn(
+ "turn-recipe-lasagna",
+ "Cooking note",
+ "Grandma's lasagna recipe uses three cheeses and a slow simmered tomato ragu.",
+ ("lasagna", "ragu", "cheeses"),
+ ),
+ CorpusTurn(
+ "turn-car-repair",
+ "Vehicle issue",
+ "The mechanic said the alternator is failing and the timing belt is due soon.",
+ ("alternator", "mechanic"),
+ ),
+ CorpusTurn(
+ "turn-book-club",
+ "Reading group",
+ "Our book club picked a sprawling science fiction novel about generation ships.",
+ ("generation", "ships", "novel"),
+ ),
+ CorpusTurn(
+ "turn-garden-tomatoes",
+ "Gardening",
+ "The heirloom tomatoes in the raised beds finally ripened after the heat wave.",
+ ("heirloom", "tomatoes"),
+ ),
+ CorpusTurn(
+ "turn-flight-delay",
+ "Travel mishap",
+ "My connecting flight was delayed three hours so I missed the riverside dinner booking.",
+ ("delayed", "riverside", "booking"),
+ ),
+ CorpusTurn(
+ "turn-new-laptop",
+ "Purchase",
+ "I bought a refurbished laptop with a mechanical keyboard and a matte display.",
+ ("refurbished", "mechanical", "keyboard"),
+ ),
+ CorpusTurn(
+ "turn-yoga-class",
+ "Wellness",
+ "The new vinyasa yoga instructor pushes a punishing pace on Tuesday evenings.",
+ ("vinyasa", "instructor"),
+ ),
+ CorpusTurn(
+ "turn-spanish-greeting",
+ "Language practice",
+ "Mi hermano vive en Sevilla y trabaja como arquitecto cerca del río.",
+ ("hermano", "Sevilla", "arquitecto"),
+ ),
+ CorpusTurn(
+ "turn-french-school",
+ "Language practice",
+ "L'école française du quartier ferme ses portes pendant les vacances d'été.",
+ ("française", "quartier"),
+ ),
+ CorpusTurn(
+ "turn-german-train",
+ "Language practice",
+ "Der Zug nach München war pünktlich und überraschend leer am Sonntagmorgen.",
+ ("München", "Sonntagmorgen"),
+ ),
+ CorpusTurn(
+ "turn-promotion",
+ "Work milestone",
+ "I got promoted to staff engineer and now I lead the payments reliability squad.",
+ ("promoted", "payments", "reliability"),
+ ),
+ CorpusTurn(
+ "turn-long-conference",
+ "Conference recap",
+ (
+ "The three day conference opened with a sleepy keynote and an endless hallway "
+ "of vendor booths handing out the usual stickers and stress balls, and most of "
+ "the morning talks rehashed material everyone already knew, but the very last "
+ "lightning talk of the final afternoon was given by a researcher named "
+ "Okonkwo who quietly demonstrated a lossless compression trick for vector "
+ "indexes that nobody in the room had seen before."
+ ),
+ ("Okonkwo", "compression"),
+ ),
+ CorpusTurn(
+ "turn-long-roadtrip",
+ "Road trip diary",
+ (
+ "We left before dawn and the first six hours were nothing but flat farmland and "
+ "gas station coffee, then a long stretch of construction near the state line "
+ "that crawled for ages, and we almost gave up on the detour, but right at "
+ "sunset we crested a ridge and found a tiny roadside diner called the "
+ "Larkspur whose blueberry pie turned the entire miserable drive into the best "
+ "day of the trip."
+ ),
+ ("Larkspur", "blueberry"),
+ ),
+ CorpusTurn(
+ "turn-long-meeting",
+ "Standup overflow",
+ (
+ "Standup ran long again because everyone relitigated the deployment incident "
+ "from last week and then drifted into a tangent about whether to switch issue "
+ "trackers, and after twenty minutes of circular debate that nobody wrote down, "
+ "the only real decision was buried at the end when Priya volunteered to own "
+ "the flaky integration test that has blocked the release pipeline for days."
+ ),
+ ("Priya", "flaky"),
+ ),
+ # Near-duplicate cluster: the office plant, three slightly different tellings.
+ CorpusTurn(
+ "turn-plant-a",
+ "Office plant",
+ "The office fiddle leaf fig is dropping leaves again near the drafty window.",
+ ("fiddle", "fig"),
+ ),
+ CorpusTurn(
+ "turn-plant-b",
+ "Office plant note",
+ "Someone overwatered the office fiddle leaf fig and now its leaves are yellowing.",
+ ("fiddle", "overwatered"),
+ ),
+ CorpusTurn(
+ "turn-plant-c",
+ "Office plant update",
+ "We moved the office fiddle leaf fig away from the window and it perked up.",
+ ("fiddle", "perked"),
+ ),
+ # Near-duplicate cluster: the standing desk, two tellings.
+ CorpusTurn(
+ "turn-desk-a",
+ "Ergonomics",
+ "My new standing desk wobbles slightly when it is raised to the tallest setting.",
+ ("standing", "wobbles"),
+ ),
+ CorpusTurn(
+ "turn-desk-b",
+ "Ergonomics follow-up",
+ "I added felt pads under the standing desk feet and the wobble is mostly gone.",
+ ("felt", "pads"),
+ ),
+ CorpusTurn(
+ "turn-podcast",
+ "Media recommendation",
+ "A friend recommended a history podcast about the cartography of medieval trade routes.",
+ ("cartography", "medieval"),
+ ),
+ CorpusTurn(
+ "turn-allergy",
+ "Health note",
+ "My seasonal ragweed allergy flared up so I switched to a non drowsy antihistamine.",
+ ("ragweed", "antihistamine"),
+ ),
+ CorpusTurn(
+ "turn-camera",
+ "Photography",
+ "I rented a wide angle lens for the canyon shoot and the dynamic range was stunning.",
+ ("canyon", "lens"),
+ ),
+ CorpusTurn(
+ "turn-volunteer",
+ "Community",
+ "On Saturdays I volunteer at the riverbank cleanup and we filled forty trash bags.",
+ ("riverbank", "cleanup"),
+ ),
+ CorpusTurn(
+ "turn-keyboard-don't",
+ "Typing habit",
+ "I don't use the number pad much so I switched to a compact tenkeyless keyboard.",
+ ("tenkeyless",),
+ ),
+ CorpusTurn(
+ "turn-numbers-invoice",
+ "Billing",
+ "Invoice 4471 is still unpaid and the late fee kicks in after thirty days.",
+ ("4471", "invoice"),
+ ),
+ CorpusTurn(
+ "turn-timestamp-meeting",
+ "Calendar",
+ "The retro is locked in for half past noon so block out that slot on the calendar.",
+ ("retro",),
+ ),
+ CorpusTurn(
+ "turn-names-people",
+ "Introductions",
+ "At the offsite I finally met Nakamura from design and Olafsson from infrastructure.",
+ ("Nakamura", "Olafsson"),
+ ),
+ CorpusTurn(
+ "turn-coffee-shop",
+ "Routine",
+ "The barista at the Wexford cafe remembers my oat milk cortado without me asking.",
+ ("Wexford", "cortado"),
+ ),
+ CorpusTurn(
+ "turn-puzzle",
+ "Leisure",
+ "I am stuck on a thousand piece jigsaw of a lighthouse swallowed by fog.",
+ ("jigsaw", "lighthouse"),
+ ),
+ CorpusTurn(
+ "turn-bike-commute",
+ "Commute",
+ "My bike commute got faster after they finally painted the protected lane on Birch Street.",
+ ("Birch", "lane"),
+ ),
+ CorpusTurn(
+ "turn-houseplant-tip",
+ "Advice received",
+ "A neighbor told me bottom watering keeps the succulents from rotting at the crown.",
+ ("succulents", "crown"),
+ ),
+)
+
+
+# ---------------------------------------------------------------------------
+# Gold-labelled natural-language questions
+# ---------------------------------------------------------------------------
+# Each question is a realistic user query (with function words) whose answer is
+# a single distinctive turn above.
+
+_GOLD_QUESTIONS: tuple[GoldQuestion, ...] = (
+ GoldQuestion("what did I say about the marketing specialist job", "turn-job-marketing"),
+ GoldQuestion("what was the thing about my sister's dog", "turn-sister-dog"),
+ GoldQuestion("did I mention the hazelnut coffee creamer coupon", "turn-coffee-creamer"),
+ GoldQuestion("where are we staying on the Paris trip", "turn-paris-trip"),
+ GoldQuestion("what kind of guitar lessons did I start", "turn-guitar-lessons"),
+ GoldQuestion("what did the mechanic say about the alternator", "turn-car-repair"),
+ GoldQuestion("who gave the compression talk at the conference", "turn-long-conference"),
+ GoldQuestion("which diner had the blueberry pie on our road trip", "turn-long-roadtrip"),
+ GoldQuestion("who volunteered to own the flaky integration test", "turn-long-meeting"),
+ GoldQuestion("what role did I get promoted to", "turn-promotion"),
+ GoldQuestion("which invoice is still unpaid", "turn-numbers-invoice"),
+ GoldQuestion("who did I meet from design at the offsite", "turn-names-people"),
+ GoldQuestion("what is wrong with my new standing desk", "turn-desk-a"),
+ GoldQuestion("what lens did I rent for the canyon shoot", "turn-camera"),
+)
+
+
+# ---------------------------------------------------------------------------
+# Deterministic embedding provider
+# ---------------------------------------------------------------------------
+
+_EMBED_DIM = 256
+
+
+def _tokenize(text: str) -> list[str]:
+ """Split text into lowercase alphanumeric word tokens.
+
+ Args:
+ text: Arbitrary input text.
+
+ Returns:
+ Lowercase word tokens, with punctuation stripped.
+ """
+ tokens: list[str] = []
+ current: list[str] = []
+ for char in text.lower():
+ if char.isalnum():
+ current.append(char)
+ elif current:
+ tokens.append("".join(current))
+ current = []
+ if current:
+ tokens.append("".join(current))
+ return tokens
+
+
+def _bag_of_words_embed(text: str) -> list[float]:
+ """Embed text as an L2-normalized bag-of-words hashing vector.
+
+ Each token is hashed to a single dimension and contributes a unit count
+ there; the resulting vector is L2-normalized. Cosine similarity between two
+ such vectors therefore grows with the fraction of shared vocabulary, which
+ gives ``search_hybrid`` a deterministic, network-free semantic signal whose
+ ranking is fully predictable from the words two texts share.
+
+ Args:
+ text: Input text to embed.
+
+ Returns:
+ An ``_EMBED_DIM``-length unit vector (all-zero only for empty text).
+ """
+ vector = [0.0] * _EMBED_DIM
+ for token in _tokenize(text):
+ digest = hashlib.sha1(token.encode("utf-8")).digest() # noqa: S324
+ index = int.from_bytes(digest[:4], "big") % _EMBED_DIM
+ vector[index] += 1.0
+ norm = sum(value * value for value in vector) ** 0.5
+ if norm == 0.0:
+ return vector
+ return [value / norm for value in vector]
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def corpus() -> tuple[CorpusTurn, ...]:
+ """Return the hand-authored synthetic conversational corpus.
+
+ Returns:
+ The immutable tuple of corpus turns.
+ """
+ return _CORPUS
+
+
+@pytest.fixture
+def gold_questions() -> tuple[GoldQuestion, ...]:
+ """Return the gold-labelled natural-language questions.
+
+ Returns:
+ The immutable tuple of gold questions.
+ """
+ return _GOLD_QUESTIONS
+
+
+@pytest.fixture
+def embedding_provider() -> CallbackProvider:
+ """Return a deterministic bag-of-words embedding provider.
+
+ Returns:
+ A :class:`CallbackProvider` wrapping the network-free hashing embedder.
+ """
+ return CallbackProvider(
+ callback=_bag_of_words_embed,
+ dimension=_EMBED_DIM,
+ model_name="bag-of-words-contract",
+ )
+
+
+def _to_thought(turn: CorpusTurn) -> ThoughtRecord:
+ """Build a stored thought from a corpus turn.
+
+ Args:
+ turn: The synthetic corpus turn.
+
+ Returns:
+ A fully populated :class:`ThoughtRecord` ready for ``create_thought``.
+ """
+ return ThoughtRecord(
+ thought_id=turn.thought_id,
+ thought_type=ThoughtType.OBSERVATION,
+ essence=turn.essence,
+ content=turn.content,
+ priority=Priority.P2,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=0,
+ updated_cycle=0,
+ source="test",
+ confidence=0.8,
+ source_type=KnowledgeSource.EXPERIENCE,
+ visibility=ThoughtVisibility.SELECTIVE,
+ )
+
+
+@pytest.fixture
+async def fts_store() -> AsyncIterator[SqliteEngravaCore]:
+ """Return a store populated with the corpus, FTS-only (no embeddings).
+
+ Yields:
+ A :class:`SqliteEngravaCore` whose FTS5 index holds every corpus turn.
+ """
+ conn = await aiosqlite.connect(":memory:")
+ conn.row_factory = aiosqlite.Row
+ await conn.execute("PRAGMA journal_mode = WAL")
+ await conn.execute("PRAGMA foreign_keys = ON")
+ store = SqliteEngravaCore(conn)
+ await store.ensure_schema()
+ for turn in _CORPUS:
+ await store.create_thought(_to_thought(turn))
+ yield store
+ await conn.close()
+
+
+@pytest.fixture
+async def hybrid_store(
+ embedding_provider: CallbackProvider,
+) -> AsyncIterator[SqliteEngravaCore]:
+ """Return a store populated with the corpus and a deterministic vector arm.
+
+ Args:
+ embedding_provider: The network-free bag-of-words provider.
+
+ Yields:
+ A :class:`SqliteEngravaCore` with ``auto_embed`` enabled so both the
+ FTS arm and the vector arm are live for ``search_hybrid``.
+ """
+ conn = await aiosqlite.connect(":memory:")
+ conn.row_factory = aiosqlite.Row
+ await conn.execute("PRAGMA journal_mode = WAL")
+ await conn.execute("PRAGMA foreign_keys = ON")
+ store = SqliteEngravaCore(
+ conn,
+ embedding_provider=embedding_provider,
+ auto_embed=True,
+ )
+ await store.ensure_schema()
+ for turn in _CORPUS:
+ await store.create_thought(_to_thought(turn))
+ yield store
+ await conn.close()
diff --git a/tests/search_contract/test_search_contract.py b/tests/search_contract/test_search_contract.py
new file mode 100644
index 0000000..3a3619f
--- /dev/null
+++ b/tests/search_contract/test_search_contract.py
@@ -0,0 +1,275 @@
+"""Functional contract suite for user-level search behavior.
+
+This suite asserts the *behavioral* search contract — what a user should be
+able to find — against a realistic, hand-authored conversational corpus, rather
+than asserting that particular lines of code execute. Line coverage alone once
+missed a dead lexical arm, a guaranteed-miss sanitizer, and a query crash,
+because coverage measures executed lines, not asserted behaviors, and the
+inputs in narrower tests happened to encode the very assumption that was wrong.
+
+Each test class below pins one observable property of search:
+
+* :class:`TestFindabilityInvariant` — every stored turn is retrievable from a
+ query built from its own distinctive terms plus arbitrary function words.
+* :class:`TestNoCrashInvariant` — adversarial query strings never raise.
+* :class:`TestSanitizerRoundTrip` — contractions and clitics survive
+ normalization and still match.
+* :class:`TestArmLiveness` — natural-language questions return non-empty
+ full-text candidates (guards a silent single-arm degradation).
+* :class:`TestFusionSanity` — hybrid fusion ranks a strongly-matched turn above
+ a weakly-matched one.
+* :class:`TestEndToEndLifecycle` — each gold-labelled question retrieves its
+ gold answer in the hybrid top-k.
+
+PROCESS RULE: every future change to a retrieval mechanism (the FTS query
+normalizer, the vector arm, the fusion scorer, or any new search backend) MUST
+extend this suite with its own contract test asserting the user-visible
+behavior the change is meant to preserve or introduce. A change that passes the
+existing tests but silently breaks search must be made to fail here in seconds.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+if TYPE_CHECKING:
+ from engrava import SqliteEngravaCore
+ from tests.search_contract.conftest import CorpusTurn, GoldQuestion
+
+
+# Arbitrary function words appended to findability queries. None of these
+# should ever block a match — they carry no distinctive information.
+_FUNCTION_WORDS = ("what", "did", "my", "about", "the", "was")
+
+# Adversarial query strings that must never raise. Mix of pasted URLs, colon
+# tokens, apostrophes, quotes, parens, emoji, CJK, empty/whitespace, and a very
+# long string.
+_ADVERSARIAL_QUERIES = (
+ "",
+ " ",
+ "\t\n \t",
+ "http://example.com/path?q=1&x=2",
+ "https://docs.example.com/onboarding#section",
+ "see http://example.com docs",
+ "essence:something content:else",
+ "weird:colon other:token",
+ "12:30",
+ "meeting at 12:30 pm",
+ "don't worry about it",
+ "sister's dog's leash",
+ 'an "unterminated phrase',
+ 'a "balanced phrase" here',
+ "group (of tokens) here",
+ "mismatched ) paren (",
+ "emoji query 😀🚀 search",
+ "中文 查询 测试",
+ "café déjà vu naïve",
+ "AND OR NOT",
+ "a AND b OR c",
+ "*",
+ "***",
+ "-",
+ "--flag",
+ "$5 ##tag @handle",
+ "word " * 1000,
+ "x" * 5000,
+)
+
+
+def _ids(results: list[tuple[str, float]]) -> set[str]:
+ """Collect the thought ids from a list of scored search results.
+
+ Args:
+ results: A list of ``(thought_id, score)`` tuples.
+
+ Returns:
+ The set of thought ids present in the results.
+ """
+ return {thought_id for thought_id, _ in results}
+
+
+class TestFindabilityInvariant:
+ """Every stored turn is findable from its own distinctive terms.
+
+ This is the property the old implicit-AND normalizer broke: appending
+ ordinary function words to a few distinctive content terms must not stop a
+ turn from being returned.
+ """
+
+ async def test_every_turn_found_by_its_distinctive_terms(
+ self,
+ fts_store: SqliteEngravaCore,
+ corpus: tuple[CorpusTurn, ...],
+ ) -> None:
+ """Each turn is returned for a query of its terms plus function words."""
+ missing: list[str] = []
+ for turn in corpus:
+ if not turn.distinctive_terms:
+ continue
+ query = " ".join((*_FUNCTION_WORDS[:3], *turn.distinctive_terms))
+ results = await fts_store.search_fts(query, top_k=50)
+ if turn.thought_id not in _ids(results):
+ missing.append(turn.thought_id)
+ assert missing == [], f"distinctive-term query failed to find: {missing}"
+
+ async def test_single_distinctive_term_plus_function_words(
+ self,
+ fts_store: SqliteEngravaCore,
+ corpus: tuple[CorpusTurn, ...],
+ ) -> None:
+ """A single distinctive term plus function words still finds the turn."""
+ missing: list[str] = []
+ for turn in corpus:
+ if not turn.distinctive_terms:
+ continue
+ query = f"what about my {turn.distinctive_terms[0]}"
+ results = await fts_store.search_fts(query, top_k=50)
+ if turn.thought_id not in _ids(results):
+ missing.append(turn.thought_id)
+ assert missing == [], f"single-term query failed to find: {missing}"
+
+
+class TestNoCrashInvariant:
+ """Adversarial query strings return a list, never raise."""
+
+ @pytest.mark.parametrize("query", _ADVERSARIAL_QUERIES)
+ async def test_search_fts_never_raises(
+ self,
+ fts_store: SqliteEngravaCore,
+ query: str,
+ ) -> None:
+ """``search_fts`` returns a list for every adversarial input."""
+ results = await fts_store.search_fts(query)
+ assert isinstance(results, list)
+
+ @pytest.mark.parametrize("query", _ADVERSARIAL_QUERIES)
+ async def test_search_hybrid_never_raises(
+ self,
+ hybrid_store: SqliteEngravaCore,
+ query: str,
+ ) -> None:
+ """``search_hybrid`` returns results for every adversarial input."""
+ result = await hybrid_store.search_hybrid(query, top_k=10)
+ assert isinstance(result.results, list)
+
+
+class TestSanitizerRoundTrip:
+ """Contractions and clitics survive normalization and still match."""
+
+ async def test_english_possessive_matches(
+ self,
+ fts_store: SqliteEngravaCore,
+ ) -> None:
+ """A possessive query (``sister's``) finds the turn about a sister."""
+ results = await fts_store.search_fts("sister's")
+ assert "turn-sister-dog" in _ids(results)
+
+ async def test_english_possessive_in_question_matches(
+ self,
+ fts_store: SqliteEngravaCore,
+ ) -> None:
+ """The possessive embedded in a full question still finds the turn."""
+ results = await fts_store.search_fts("what about my sister's dog")
+ assert "turn-sister-dog" in _ids(results)
+
+ async def test_negation_contraction_does_not_block(
+ self,
+ fts_store: SqliteEngravaCore,
+ ) -> None:
+ """A ``don't`` contraction in the query does not block a content match."""
+ results = await fts_store.search_fts("I don't recall the tenkeyless keyboard")
+ assert "turn-keyboard-don't" in _ids(results)
+
+ async def test_french_elision_matches(
+ self,
+ fts_store: SqliteEngravaCore,
+ ) -> None:
+ """A French elision query (``l'école``) splits on the clitic and matches."""
+ results = await fts_store.search_fts("l'école")
+ assert "turn-french-school" in _ids(results)
+
+
+class TestArmLiveness:
+ """Natural-language questions return non-empty full-text candidates.
+
+ A silently degraded lexical arm would return nothing for these questions
+ while still passing single-token tests; asserting non-empty hits for the
+ whole gold-question set is the observable proxy that guards against it.
+ """
+
+ async def test_every_gold_question_returns_fts_candidates(
+ self,
+ fts_store: SqliteEngravaCore,
+ gold_questions: tuple[GoldQuestion, ...],
+ ) -> None:
+ """Each gold question yields at least one full-text candidate."""
+ empty: list[str] = []
+ for question in gold_questions:
+ results = await fts_store.search_fts(question.question, top_k=50)
+ if not results:
+ empty.append(question.question)
+ assert empty == [], f"FTS arm returned no candidates for: {empty}"
+
+
+class TestFusionSanity:
+ """Hybrid fusion ranks a strongly-matched turn above a weakly-matched one.
+
+ A turn that shares every distinctive query term (matched by both the
+ lexical and the deterministic vector arm) must outrank a turn that shares
+ only one low-information generic term. This is the weaker, deterministic
+ analogue of the "both arms agree beats one weak arm" property.
+ """
+
+ async def test_all_terms_doc_outranks_single_term_doc(
+ self,
+ hybrid_store: SqliteEngravaCore,
+ ) -> None:
+ """A doc with all query terms outranks one sharing a single generic term."""
+ # "turn-job-marketing" contains marketing, specialist, and startup.
+ # "turn-guitar-lessons" shares only the generic word "lessons".
+ result = await hybrid_store.search_hybrid(
+ "marketing specialist startup lessons",
+ top_k=10,
+ )
+ ranked_ids = [thought_id for thought_id, _ in result.results]
+ assert "turn-job-marketing" in ranked_ids
+ assert "turn-guitar-lessons" in ranked_ids
+ assert ranked_ids.index("turn-job-marketing") < ranked_ids.index("turn-guitar-lessons")
+
+ async def test_both_arms_fire_for_a_distinctive_query(
+ self,
+ hybrid_store: SqliteEngravaCore,
+ ) -> None:
+ """A distinctive query engages both the lexical and the vector arm."""
+ result = await hybrid_store.search_hybrid(
+ "the hazelnut coffee creamer coupon",
+ top_k=10,
+ )
+ assert "fts5" in result.backends_used
+ assert "vector" in result.backends_used
+ assert "turn-coffee-creamer" in _ids(result.results)
+
+
+class TestEndToEndLifecycle:
+ """Each gold-labelled question retrieves its gold answer in hybrid top-k.
+
+ The deterministic, network-free analogue of an answer-turn-in-context
+ benchmark: store the corpus, ask each natural-language question, and require
+ the labelled answer turn in the returned top-k.
+ """
+
+ async def test_gold_answer_in_hybrid_top_k(
+ self,
+ hybrid_store: SqliteEngravaCore,
+ gold_questions: tuple[GoldQuestion, ...],
+ ) -> None:
+ """Every gold question's answer turn appears in the hybrid top-k."""
+ top_k = 10
+ misses: list[str] = []
+ for question in gold_questions:
+ result = await hybrid_store.search_hybrid(question.question, top_k=top_k)
+ if question.gold_thought_id not in _ids(result.results):
+ misses.append(f"{question.question!r} -> {question.gold_thought_id}")
+ assert misses == [], f"gold answer missing from top-{top_k} for: {misses}"
From 36e08e773f6a48d88b9875e1e42ce7051640e7f9 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Sat, 13 Jun 2026 09:12:18 +0200
Subject: [PATCH 24/40] fix: embed full thought content without duplication or
silent truncation
---
CHANGELOG.md | 17 ++
.../embeddings/sentence_transformer.py | 63 ++++++
.../infrastructure/sqlite/engrava_core.py | 34 +++-
tests/test_embedding_providers.py | 190 ++++++++++++++++++
4 files changed, 302 insertions(+), 2 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 57260f8..35663d6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,23 @@ and this project adheres to [Semantic Versioning 2.0.0](https://semver.org/spec/
### Fixed
+- **Long memories are now embedded in full, and a thought's opening is no
+ longer double-counted.** Two silent recall killers in the vector arm of
+ search are fixed. First, when a thought's `essence` is just the opening of
+ its `content` (a common convention, e.g. `essence = content[:200]`),
+ auto-embed used to concatenate the two and encode that opening twice, letting
+ it dominate the vector and dilute the discriminative tail; the redundant
+ prefix is now dropped and `content` is embedded alone, while a genuinely
+ distinct `essence` is still encoded alongside the content as before. Second,
+ the local `sentence-transformers` provider now raises `max_seq_length` to the
+ model's true architecture maximum after loading (derived from the model, not
+ hard-coded), instead of accepting a conservative shipped default — the bundled
+ `all-MiniLM-L12-v2` reported `128` while its backbone supports `512`, so the
+ tail of any longer thought was silently truncated away before encoding.
+ Existing stored embeddings are unaffected until a thought is re-written
+ (re-create or an `essence`/`content` update), at which point it is re-embedded
+ with the corrected input.
+
- **Natural-language queries now reach the full-text index.** `search_fts`
previously joined the words of a bare query with FTS5's implicit `AND`, so a
question only matched documents that contained *every* word — including
diff --git a/src/engrava/embeddings/sentence_transformer.py b/src/engrava/embeddings/sentence_transformer.py
index 2fd70ed..73e6a6a 100644
--- a/src/engrava/embeddings/sentence_transformer.py
+++ b/src/engrava/embeddings/sentence_transformer.py
@@ -70,9 +70,72 @@ def _load_model(self) -> Any: # noqa: ANN401
logger.info("Loading SentenceTransformer model: %s", self._model_name)
self._model = SentenceTransformer(self._model_name, device=self._device)
+ self._raise_max_seq_length(self._model)
self._dimension = self._model.get_sentence_embedding_dimension()
return self._model
+ @staticmethod
+ def _architecture_max_seq_length(model: Any) -> int | None: # noqa: ANN401
+ """Read the underlying transformer's true maximum sequence length.
+
+ Inspects the first pipeline module's ``auto_model.config`` for
+ ``max_position_embeddings`` — the number of position embeddings the
+ architecture was trained with, i.e. the longest input it can encode
+ without an index error. Returns ``None`` when the value cannot be read
+ (non-standard module layout), in which case the caller leaves the
+ model's own limit untouched.
+
+ Args:
+ model: The loaded ``SentenceTransformer`` instance.
+
+ Returns:
+ The architecture's maximum sequence length, or ``None`` if it is
+ not discoverable.
+
+ """
+ try:
+ transformer_module = model[0]
+ value = transformer_module.auto_model.config.max_position_embeddings
+ except (KeyError, IndexError, TypeError, AttributeError):
+ return None
+ if isinstance(value, int) and value > 0:
+ return value
+ return None
+
+ def _raise_max_seq_length(self, model: Any) -> None: # noqa: ANN401
+ """Lift a conservatively-low ``max_seq_length`` to the architecture max.
+
+ Some ``sentence-transformers`` checkpoints ship a ``max_seq_length``
+ far below the limit their backbone supports — the default
+ ``all-MiniLM-L12-v2`` reports ``128`` even though its BERT backbone has
+ ``max_position_embeddings == 512``. Left unchanged, the encoder
+ silently truncates any input past the shipped limit, so the tail of a
+ long thought is invisible to vector search and recall quietly degrades.
+
+ This reads the architecture's true maximum and raises
+ ``model.max_seq_length`` to it only when the current value is strictly
+ lower. The number is derived from the model — never hard-coded — so a
+ model that already reports its full limit is a no-op, and a model whose
+ architecture max cannot be read is left exactly as loaded.
+
+ Args:
+ model: The loaded ``SentenceTransformer`` instance to adjust.
+
+ """
+ architecture_max = self._architecture_max_seq_length(model)
+ if architecture_max is None:
+ return
+ current = model.get_max_seq_length()
+ if current is None or current < architecture_max:
+ model.max_seq_length = architecture_max
+ logger.info(
+ "Raised max_seq_length for %s from %s to architecture max %d "
+ "to avoid silent input truncation",
+ self._model_name,
+ current,
+ architecture_max,
+ )
+
@property
def dimension(self) -> int:
"""Return the embedding vector dimensionality.
diff --git a/src/engrava/infrastructure/sqlite/engrava_core.py b/src/engrava/infrastructure/sqlite/engrava_core.py
index 4130475..f1aaf76 100644
--- a/src/engrava/infrastructure/sqlite/engrava_core.py
+++ b/src/engrava/infrastructure/sqlite/engrava_core.py
@@ -70,6 +70,35 @@
logger = logging.getLogger(__name__)
+
+def _build_embed_input(essence: str, content: str) -> str:
+ r"""Build the text payload to embed for a thought, avoiding duplication.
+
+ A common client (and benchmark) convention is to derive ``essence`` from
+ the opening of ``content`` (e.g. ``essence = content[:200]``). Naively
+ embedding ``f"{essence}\\n{content}"`` then encodes the turn's opening
+ twice, letting it dominate the vector and dilute the discriminative tail.
+
+ The rule is deliberately conservative: when the stripped ``essence`` is a
+ leading *prefix* of the stripped ``content`` it carries no new information,
+ so ``content`` is embedded alone. In every other case — including partial
+ overlaps that are not a clean prefix — the joined ``essence`` + ``content``
+ form is preserved, because a distinct essence is signal worth encoding.
+
+ Args:
+ essence: The thought's short summary / essence field.
+ content: The thought's full body text.
+
+ Returns:
+ ``content`` alone when ``essence`` is a prefix of it; otherwise the
+ newline-joined ``f"{essence}\\n{content}"`` payload.
+
+ """
+ if content.strip().startswith(essence.strip()):
+ return content
+ return f"{essence}\n{content}"
+
+
#: A token is treated as an FTS5 column filter only when it targets a real
#: indexed column. ``thought_fts`` indexes exactly ``essence`` and ``content``
#: (see :meth:`SqliteEngravaCore.ensure_schema`); any other ``word:rest`` token
@@ -2393,7 +2422,8 @@ async def consolidated_member_ids(self, reflection_id: str) -> list[str]:
async def _auto_embed_thought(self, thought: ThoughtRecord) -> None:
"""Generate and store an embedding for a thought via the provider.
- Combines ``essence`` and ``content`` into a single text payload,
+ Builds the embed payload via :func:`_build_embed_input` (which drops a
+ prefix-redundant ``essence`` to avoid double-counting the opening),
embeds it via the configured provider, and persists the vector.
Args:
@@ -2403,7 +2433,7 @@ async def _auto_embed_thought(self, thought: ThoughtRecord) -> None:
provider = self._embedding_provider
if provider is None:
return # pragma: no cover
- text = f"{thought.essence}\n{thought.content}"
+ text = _build_embed_input(thought.essence, thought.content)
vector = await provider.embed(text)
diff --git a/tests/test_embedding_providers.py b/tests/test_embedding_providers.py
index 91ba694..528e73a 100644
--- a/tests/test_embedding_providers.py
+++ b/tests/test_embedding_providers.py
@@ -33,6 +33,7 @@
ThoughtVisibility,
)
from engrava.domain.models.thought import ThoughtRecord
+from engrava.infrastructure.sqlite.engrava_core import _build_embed_input
if TYPE_CHECKING:
from collections.abc import AsyncIterator
@@ -239,6 +240,108 @@ async def test_auto_embed_false_no_embed(
assert embedding is None
+# ---------------------------------------------------------------------------
+# Embed-input construction (prefix de-duplication)
+# ---------------------------------------------------------------------------
+
+
+class _RecordingProvider:
+ """Embedding provider that records the exact text passed to ``embed``.
+
+ Wraps a fixed-dimension constant vector so the only observable effect is
+ the captured input string — used to assert *what* text auto-embed sends
+ to the provider, independent of the vector arithmetic.
+ """
+
+ def __init__(self, dimension: int = 4, model_name: str = "recording") -> None:
+ self._dimension = dimension
+ self._model_name = model_name
+ self.captured: list[str] = []
+
+ @property
+ def dimension(self) -> int:
+ return self._dimension
+
+ @property
+ def model_name(self) -> str:
+ return self._model_name
+
+ async def embed(self, text: str) -> list[float]:
+ self.captured.append(text)
+ return [0.0] * self._dimension
+
+ async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+ return [await self.embed(t) for t in texts]
+
+
+class TestBuildEmbedInput:
+ """Unit tests for the prefix-dedup helper :func:`_build_embed_input`."""
+
+ def test_essence_is_prefix_returns_content_alone(self) -> None:
+ content = "The quick brown fox jumps over the lazy dog near the river."
+ essence = content[:20]
+ assert _build_embed_input(essence, content) == content
+
+ def test_essence_not_prefix_returns_joined(self) -> None:
+ essence = "A short distinct summary"
+ content = "An entirely different body of text with no overlap at the start."
+ assert _build_embed_input(essence, content) == f"{essence}\n{content}"
+
+ def test_prefix_ignoring_surrounding_whitespace(self) -> None:
+ content = "Header line then the rest of the body."
+ essence = " Header line "
+ # The stripped essence is a prefix of the stripped content, so the
+ # essence adds no new information and is dropped.
+ assert _build_embed_input(essence, content) == content
+
+ def test_partial_overlap_is_not_treated_as_prefix(self) -> None:
+ # Conservative: only the clear prefix case dedups. A shared word that
+ # is not a leading prefix keeps the joined form.
+ essence = "fox jumps"
+ content = "The quick brown fox jumps."
+ assert _build_embed_input(essence, content) == f"{essence}\n{content}"
+
+ def test_identical_essence_and_content_returns_content(self) -> None:
+ text = "Exactly the same on both fields."
+ assert _build_embed_input(text, text) == text
+
+
+class TestAutoEmbedInput:
+ """Integration tests asserting the exact text auto-embed sends."""
+
+ async def test_prefix_essence_not_double_embedded(
+ self,
+ db: aiosqlite.Connection,
+ ) -> None:
+ recorder = _RecordingProvider(dimension=4, model_name="recording")
+ store = SqliteEngravaCore(db, embedding_provider=recorder, auto_embed=True)
+
+ content = "The deployment failed because the database migration timed out."
+ essence = content[:20] # essence == content[:N]
+ await store.create_thought(
+ _make_thought(thought_id="t-prefix", essence=essence, content=content)
+ )
+
+ assert recorder.captured == [content]
+ # The opening must not appear twice in the embedded text.
+ assert recorder.captured[0].count(essence) == 1
+
+ async def test_distinct_essence_uses_joined_form(
+ self,
+ db: aiosqlite.Connection,
+ ) -> None:
+ recorder = _RecordingProvider(dimension=4, model_name="recording")
+ store = SqliteEngravaCore(db, embedding_provider=recorder, auto_embed=True)
+
+ essence = "Outage postmortem summary"
+ content = "The deployment failed because the database migration timed out."
+ await store.create_thought(
+ _make_thought(thought_id="t-distinct", essence=essence, content=content)
+ )
+
+ assert recorder.captured == [f"{essence}\n{content}"]
+
+
# ---------------------------------------------------------------------------
# Model immutability (lazy lock)
# ---------------------------------------------------------------------------
@@ -864,6 +967,93 @@ def test_lazy_load_sets_dimension(self) -> None:
provider._dimension = 384
assert provider.dimension == 384
+ def test_load_raises_max_seq_length_to_architecture_max(self) -> None:
+ """Loading lifts a conservative shipped limit up to the true maximum.
+
+ ``all-MiniLM-L12-v2`` ships ``get_max_seq_length() == 128`` while its
+ BERT backbone supports ``max_position_embeddings == 512``. The provider
+ must raise ``max_seq_length`` to the architecture maximum so long
+ inputs are not silently truncated at 128 word-pieces.
+ """
+ from engrava.embeddings.sentence_transformer import SentenceTransformerProvider
+
+ # Fake transformer module exposing the architecture's true max.
+ transformer_module = MagicMock()
+ transformer_module.auto_model.config.max_position_embeddings = 512
+
+ fake_model = MagicMock()
+ fake_model.get_max_seq_length.return_value = 128
+ fake_model.max_seq_length = 128
+ fake_model.get_sentence_embedding_dimension.return_value = 384
+ fake_model.tokenizer.model_max_length = 128
+ # ``model[0]`` returns the underlying transformer module.
+ fake_model.__getitem__.return_value = transformer_module
+
+ st_module = MagicMock()
+ st_module.SentenceTransformer.return_value = fake_model
+
+ provider = SentenceTransformerProvider(model_name="all-MiniLM-L12-v2")
+ with patch.dict("sys.modules", {"sentence_transformers": st_module}):
+ loaded = provider._load_model()
+
+ assert loaded.max_seq_length == 512
+
+ def test_load_keeps_max_seq_length_when_already_full(self) -> None:
+ """No-op when the model already reports its full architecture limit."""
+ from engrava.embeddings.sentence_transformer import SentenceTransformerProvider
+
+ transformer_module = MagicMock()
+ transformer_module.auto_model.config.max_position_embeddings = 256
+
+ fake_model = MagicMock()
+ fake_model.get_max_seq_length.return_value = 256
+ fake_model.max_seq_length = 256
+ fake_model.get_sentence_embedding_dimension.return_value = 384
+ fake_model.tokenizer.model_max_length = 256
+ fake_model.__getitem__.return_value = transformer_module
+
+ st_module = MagicMock()
+ st_module.SentenceTransformer.return_value = fake_model
+
+ provider = SentenceTransformerProvider(model_name="already-full")
+ with patch.dict("sys.modules", {"sentence_transformers": st_module}):
+ loaded = provider._load_model()
+
+ assert loaded.max_seq_length == 256
+
+ def test_load_leaves_max_seq_length_when_architecture_max_unreadable(self) -> None:
+ """Untouched when the architecture max cannot be discovered."""
+ from engrava.embeddings.sentence_transformer import SentenceTransformerProvider
+
+ # Indexing the model raises — the provider cannot read the true max.
+ fake_model = MagicMock()
+ fake_model.__getitem__.side_effect = IndexError("no modules")
+ fake_model.get_max_seq_length.return_value = 64
+ fake_model.max_seq_length = 64
+ fake_model.get_sentence_embedding_dimension.return_value = 384
+
+ st_module = MagicMock()
+ st_module.SentenceTransformer.return_value = fake_model
+
+ provider = SentenceTransformerProvider(model_name="unreadable")
+ with patch.dict("sys.modules", {"sentence_transformers": st_module}):
+ loaded = provider._load_model()
+
+ assert loaded.max_seq_length == 64
+
+ def test_architecture_max_ignores_non_positive_config_value(self) -> None:
+ """A missing/sentinel config value is treated as not discoverable."""
+ from engrava.embeddings.sentence_transformer import SentenceTransformerProvider
+
+ transformer_module = MagicMock()
+ # A non-positive sentinel (e.g. unset) must not be adopted as the max.
+ transformer_module.auto_model.config.max_position_embeddings = 0
+ fake_model = MagicMock()
+ fake_model.__getitem__.return_value = transformer_module
+
+ provider = SentenceTransformerProvider(model_name="bad-config")
+ assert provider._architecture_max_seq_length(fake_model) is None
+
# ---------------------------------------------------------------------------
# HuggingFaceProvider tests (mocked client)
From d88043df45ab05047c20517a5882f071ad7cab92 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Sat, 13 Jun 2026 09:40:16 +0200
Subject: [PATCH 25/40] fix: keep quoted MindQL values as strings and reject
malformed conditions
---
CHANGELOG.md | 14 +++
src/engrava/mindql/executor.py | 15 ++-
src/engrava/mindql/parser.py | 12 ++-
tests/test_mindql.py | 166 +++++++++++++++++++++++++++++++++
4 files changed, 202 insertions(+), 5 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 35663d6..6ad9f78 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,20 @@ and this project adheres to [Semantic Versioning 2.0.0](https://semver.org/spec/
### Fixed
+- **MindQL no longer mistypes quoted values or silently ignores malformed
+ conditions.** A single-quoted WHERE value is now kept verbatim as a string,
+ so a zero-padded identifier such as `WHERE source = '007'` matches the stored
+ string `'007'` instead of being coerced to the integer `7` and matching
+ nothing; an *unquoted* bare value (for example `WHERE created_cycle = 12`) is
+ still coerced to a number as before. A WHERE fragment must now match the
+ `field op value` grammar in full: trailing content after a condition (such as
+ `WHERE priority = 'P1' OR 1=1`) previously matched only the leading prefix and
+ silently discarded the rest, which could change the result set unnoticed — it
+ now raises a parse error. Finally, a `FIND` query with no `LIMIT` clause is
+ capped at a sane default (100 rows) rather than running an unbounded scan; an
+ explicit `LIMIT` always overrides the default, and `COUNT` queries are
+ unaffected.
+
- **Long memories are now embedded in full, and a thought's opening is no
longer double-counted.** Two silent recall killers in the vector arm of
search are fixed. First, when a thought's `essence` is just the opening of
diff --git a/src/engrava/mindql/executor.py b/src/engrava/mindql/executor.py
index 364b7b9..34f7a4f 100644
--- a/src/engrava/mindql/executor.py
+++ b/src/engrava/mindql/executor.py
@@ -113,6 +113,12 @@ def _resolve_now() -> str:
# only meaningful against these; applying one to any other table is rejected.
_TEMPORAL_TABLES: frozenset[str] = frozenset({"thought", "edge"})
+# Default row cap applied to a FIND query that carries no explicit LIMIT, so an
+# unqualified ``FIND thoughts`` cannot run an unbounded scan. An explicit
+# ``LIMIT`` in the query always overrides this. COUNT queries are unaffected —
+# they aggregate and never materialise the row set.
+DEFAULT_FIND_LIMIT = 100
+
@dataclass(frozen=True)
class MindQLResult:
@@ -274,6 +280,10 @@ def _build_select_sql(
) -> tuple[str, list[object]]:
"""Build a parameterized SELECT SQL from a FIND query.
+ A ``LIMIT`` is always emitted: the query's own limit when it has one,
+ otherwise :data:`DEFAULT_FIND_LIMIT` so an unqualified FIND cannot run
+ an unbounded scan.
+
Args:
table: Target table name.
query: Parsed FIND query.
@@ -284,7 +294,10 @@ def _build_select_sql(
"""
clauses, params = self._build_where(table, query)
where = f" WHERE {' AND '.join(clauses)}" if clauses else ""
- limit = f" LIMIT {query.limit}" if query.limit is not None else ""
+ # An explicit LIMIT wins; otherwise cap the scan at DEFAULT_FIND_LIMIT
+ # so an unqualified FIND cannot run an unbounded query.
+ effective_limit = query.limit if query.limit is not None else DEFAULT_FIND_LIMIT
+ limit = f" LIMIT {effective_limit}"
sql = f"SELECT * FROM {table}{where}{limit}" # noqa: S608
return sql, params
diff --git a/src/engrava/mindql/parser.py b/src/engrava/mindql/parser.py
index 3d5bb49..b29eb52 100644
--- a/src/engrava/mindql/parser.py
+++ b/src/engrava/mindql/parser.py
@@ -353,15 +353,19 @@ def _parse_condition(part: str) -> Condition:
MindQLParseError: If the fragment is not a valid condition.
"""
- match = _CONDITION_RE.match(part)
+ match = _CONDITION_RE.fullmatch(part)
if not match:
msg = f"Invalid condition: {part!r}"
raise MindQLParseError(msg)
field_name = match.group(1)
op_str = match.group(2)
- # group 3 = quoted value, group 4 = unquoted value
- raw_value: str = match.group(3) if match.group(3) is not None else match.group(4)
- value: str | int | float = _coerce_value(raw_value)
+ # group 3 = quoted value, group 4 = unquoted value. A single-quoted literal
+ # is taken verbatim as a string; only the unquoted bare value is coerced to
+ # int/float, so e.g. ``'007'`` stays the string ``"007"`` instead of int 7.
+ quoted_value = match.group(3)
+ value: str | int | float = (
+ quoted_value if quoted_value is not None else _coerce_value(match.group(4))
+ )
return Condition(
field=field_name,
operator=_OPERATOR_MAP[op_str],
diff --git a/tests/test_mindql.py b/tests/test_mindql.py
index a5acbae..a0f0b00 100644
--- a/tests/test_mindql.py
+++ b/tests/test_mindql.py
@@ -910,3 +910,169 @@ async def test_find_all_without_temporal_unchanged(
store = SqliteEngravaCore(populated_db)
result = await store.execute_mindql(parse("FIND thoughts"))
assert len(result.rows) == 5
+
+
+# ---------------------------------------------------------------------------
+# Quoted-value typing: a single-quoted literal stays a string verbatim
+# ---------------------------------------------------------------------------
+
+
+class TestQuotedValueStaysString:
+ """A single-quoted WHERE value must keep its string type.
+
+ Only an *unquoted* bare value is coerced to ``int`` / ``float``; a value
+ the user wrapped in single quotes is taken verbatim as a string, so a
+ zero-padded identifier such as ``'007'`` is never silently turned into the
+ integer ``7``.
+ """
+
+ def test_quoted_numeric_value_is_string(self) -> None:
+ q = parse("FIND thoughts WHERE source = '12'")
+ assert q.conditions[0].value == "12"
+ assert isinstance(q.conditions[0].value, str)
+
+ def test_unquoted_numeric_value_still_coerces(self) -> None:
+ q = parse("FIND thoughts WHERE source = 12")
+ assert q.conditions[0].value == 12
+ assert isinstance(q.conditions[0].value, int)
+
+ def test_quoted_zero_padded_value_is_string(self) -> None:
+ q = parse("FIND thoughts WHERE source = '007'")
+ assert q.conditions[0].value == "007"
+ assert isinstance(q.conditions[0].value, str)
+
+ async def test_quoted_zero_padded_value_matches_stored_string(
+ self,
+ db: aiosqlite.Connection,
+ ) -> None:
+ store = SqliteEngravaCore(db)
+ await store.create_thought(
+ ThoughtRecord(
+ thought_id="t-zero-pad",
+ thought_type=ThoughtType.OBSERVATION,
+ essence="zero padded source",
+ content="zero padded source",
+ priority=Priority.P1,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=1,
+ updated_cycle=1,
+ source="007",
+ )
+ )
+ result = await store.execute_mindql(parse("FIND thoughts WHERE source = '007'"))
+ # Pre-fix: '007' was coerced to int 7 and never matched the stored
+ # string '007', so this returned no rows.
+ assert {row["thought_id"] for row in result.rows} == {"t-zero-pad"}
+
+
+# ---------------------------------------------------------------------------
+# Strict condition matching: a fragment with trailing content is rejected
+# ---------------------------------------------------------------------------
+
+
+class TestConditionFullMatch:
+ """A WHERE fragment must match the condition grammar in full.
+
+ A prefix match used to silently discard any trailing content after the
+ first ``field op value`` token (for example ``priority = 'P1' OR 1=1``
+ parsed as just ``priority = 'P1'``). Such a fragment is now rejected so the
+ surplus never alters the result set unnoticed.
+ """
+
+ def test_trailing_content_after_condition_raises(self) -> None:
+ with pytest.raises(MindQLParseError, match="Invalid condition"):
+ parse("FIND thoughts WHERE priority = 'P1' OR 1=1")
+
+ def test_clean_single_condition_still_parses(self) -> None:
+ q = parse("FIND thoughts WHERE priority = 'P1'")
+ assert len(q.conditions) == 1
+ assert q.conditions[0].field == "priority"
+ assert q.conditions[0].value == "P1"
+
+ def test_clean_and_split_conditions_still_parse(self) -> None:
+ q = parse("FIND thoughts WHERE source = 'x' AND priority = 'P1'")
+ assert len(q.conditions) == 2
+ assert q.conditions[0].field == "source"
+ assert q.conditions[0].value == "x"
+ assert q.conditions[1].field == "priority"
+ assert q.conditions[1].value == "P1"
+
+
+# ---------------------------------------------------------------------------
+# Default LIMIT: an unbounded FIND is capped; an explicit LIMIT still wins
+# ---------------------------------------------------------------------------
+
+
+class TestDefaultFindLimit:
+ """A FIND with no LIMIT is capped at the default; COUNT is unaffected."""
+
+ async def test_find_without_limit_capped_at_default(
+ self,
+ db: aiosqlite.Connection,
+ ) -> None:
+ default = executor_module.DEFAULT_FIND_LIMIT
+ store = SqliteEngravaCore(db)
+ for i in range(default + 10):
+ await store.create_thought(
+ ThoughtRecord(
+ thought_id=f"t-cap-{i:04d}",
+ thought_type=ThoughtType.OBSERVATION,
+ essence=f"essence {i}",
+ content=f"content {i}",
+ priority=Priority.P1,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=1,
+ updated_cycle=1,
+ source="cap",
+ )
+ )
+ result = await store.execute_mindql(parse("FIND thoughts"))
+ # Pre-fix: the query was unbounded and returned every stored row.
+ assert len(result.rows) == default
+
+ async def test_explicit_limit_still_wins(
+ self,
+ db: aiosqlite.Connection,
+ ) -> None:
+ store = SqliteEngravaCore(db)
+ for i in range(20):
+ await store.create_thought(
+ ThoughtRecord(
+ thought_id=f"t-lim-{i:04d}",
+ thought_type=ThoughtType.OBSERVATION,
+ essence=f"essence {i}",
+ content=f"content {i}",
+ priority=Priority.P1,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=1,
+ updated_cycle=1,
+ source="lim",
+ )
+ )
+ result = await store.execute_mindql(parse("FIND thoughts LIMIT 5"))
+ assert len(result.rows) == 5
+
+ async def test_count_unaffected_by_default_limit(
+ self,
+ db: aiosqlite.Connection,
+ ) -> None:
+ default = executor_module.DEFAULT_FIND_LIMIT
+ store = SqliteEngravaCore(db)
+ total = default + 10
+ for i in range(total):
+ await store.create_thought(
+ ThoughtRecord(
+ thought_id=f"t-cnt-{i:04d}",
+ thought_type=ThoughtType.OBSERVATION,
+ essence=f"essence {i}",
+ content=f"content {i}",
+ priority=Priority.P1,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ created_cycle=1,
+ updated_cycle=1,
+ source="cnt",
+ )
+ )
+ result = await store.execute_mindql(parse("COUNT thoughts"))
+ # COUNT does not apply the FIND default cap.
+ assert result.count == total
From 12563030ebab86c481fae341eaccabd8db2223eb Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Sat, 13 Jun 2026 10:39:41 +0200
Subject: [PATCH 26/40] perf: tune sqlite pragmas and add hot-path indexes
Set synchronous=NORMAL (the documented-safe companion to WAL) and a 5s
busy_timeout on every connection, cutting per-commit fsync cost and the
instant SQLITE_BUSY a second connection used to hit. Add four indexes
backing the equality filters and sort hit on every common read
(edge.to_thought_id, embedding.owner_id, thought.updated_cycle,
thought.thought_type), turning previous full scans into index lookups. The
schema migrates v13->v14 automatically on open: purely additive (indexes
only), idempotent, zero data loss; absent lazy tables and legacy-missing
columns are skipped rather than erroring.
---
CHANGELOG.md | 15 +
docs/upgrade.md | 8 +
src/engrava/infrastructure/service_manager.py | 8 +
.../infrastructure/sqlite/engrava_core.py | 116 +++-
.../infrastructure/sqlite/schema_core.sql | 26 +-
tests/test_cognitive_journal.py | 6 +-
tests/test_dedup_migration.py | 10 +-
tests/test_embedding_providers.py | 2 +-
tests/test_engrava_completeness.py | 2 +-
tests/test_extension_load_with_migrations.py | 4 +-
tests/test_hot_path_index_migration.py | 644 ++++++++++++++++++
tests/test_metadata_migration.py | 10 +-
tests/test_mind_store_core.py | 2 +-
tests/test_referential_integrity.py | 8 +-
tests/test_search_hybrid_graph.py | 2 +-
tests/test_service_isolation.py | 4 +-
tests/test_ttl_auto_expiry.py | 4 +-
tests/test_valid_time_migration.py | 20 +-
18 files changed, 840 insertions(+), 51 deletions(-)
create mode 100644 tests/test_hot_path_index_migration.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6ad9f78..ebc1c3b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,21 @@ and this project adheres to [Semantic Versioning 2.0.0](https://semver.org/spec/
## [Unreleased]
+### Performance
+
+- **Hot-path indexes and tuned SQLite PRAGMAs make the common reads faster.**
+ Four indexes now back the equality filters and the sort column hit on every
+ common read — looking up edges by their target thought, fetching a thought's
+ embedding, listing thoughts in recency order, and filtering thoughts by type
+ — turning what were full table scans into index lookups. The connection is
+ also opened with `synchronous=NORMAL` (the documented-safe companion to WAL:
+ durable across an application crash, only at risk of losing the most recent
+ transactions on an OS crash or power loss) and `busy_timeout=5000`, so a
+ second connection waits briefly for a lock instead of failing immediately
+ with a "database is locked" error. The index changes are an additive schema
+ migration that runs automatically on first open with zero data loss; see the
+ [upgrade guide](docs/upgrade.md#03---04).
+
### Fixed
- **MindQL no longer mistypes quoted values or silently ignores malformed
diff --git a/docs/upgrade.md b/docs/upgrade.md
index 0f6e614..b5ae903 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -170,6 +170,14 @@ What the migration does:
(a legacy row) keeps `valid_from = NULL`. **Every existing edge** keeps both
bounds `NULL` — the edge table has no calendar timestamp to source a date
from, so the migration honestly leaves them open rather than fabricating one.
+- **Adds four hot-path indexes.** A second additive step creates indexes that
+ back the equality filters and the sort column hit on every common read
+ (edges by their target thought, a thought's embedding by owner, listing
+ thoughts in recency order, and filtering thoughts by type). This is a
+ pure index addition — **no row is read, modified, or removed**, and the row
+ counts are unchanged. The connection is also opened with `synchronous=NORMAL`
+ and `busy_timeout=5000` (a PRAGMA-only change with no on-disk effect). Like
+ the valid-time step, it runs automatically on first open with zero data loss.
**Existing queries are unchanged.** A query that uses no temporal predicate
behaves exactly as it did on 0.3. And because a `NULL` bound is treated as an
diff --git a/src/engrava/infrastructure/service_manager.py b/src/engrava/infrastructure/service_manager.py
index 77d468e..64b32f1 100644
--- a/src/engrava/infrastructure/service_manager.py
+++ b/src/engrava/infrastructure/service_manager.py
@@ -298,6 +298,14 @@ async def _create_store(self, service_name: str) -> SqliteEngravaCore:
if self._wal_mode:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA foreign_keys=ON")
+ # synchronous=NORMAL is the documented-safe companion to WAL: the
+ # database stays durable across an application crash and is only at
+ # risk of losing the most recent transactions on an OS crash or
+ # power loss, which is the standard recommendation for WAL.
+ await db.execute("PRAGMA synchronous=NORMAL")
+ # busy_timeout makes a second connection wait (up to 5s) for a lock
+ # instead of failing immediately with SQLITE_BUSY.
+ await db.execute("PRAGMA busy_timeout=5000")
db.row_factory = aiosqlite.Row
emb_config = self._resolve_embedding_config(service_name)
diff --git a/src/engrava/infrastructure/sqlite/engrava_core.py b/src/engrava/infrastructure/sqlite/engrava_core.py
index f1aaf76..99eef66 100644
--- a/src/engrava/infrastructure/sqlite/engrava_core.py
+++ b/src/engrava/infrastructure/sqlite/engrava_core.py
@@ -366,6 +366,14 @@ async def from_config(
if config.wal_mode:
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA foreign_keys=ON")
+ # synchronous=NORMAL is the documented-safe companion to WAL: the
+ # database stays durable across an application crash and is only at
+ # risk of losing the most recent transactions on an OS crash or
+ # power loss, which is the standard recommendation for WAL.
+ await db.execute("PRAGMA synchronous=NORMAL")
+ # busy_timeout makes a second connection wait (up to 5s) for a lock
+ # instead of failing immediately with SQLITE_BUSY.
+ await db.execute("PRAGMA busy_timeout=5000")
db.row_factory = aiosqlite.Row
hooks = resolve_hooks(config.hooks_class)
@@ -501,7 +509,7 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
Applies the full ``schema_core.sql`` (including FTS5 virtual
table and sync triggers) only when the database has not already
been bootstrapped to schema version 3+. Databases at older
- versions are upgraded incrementally up to the current version (13).
+ versions are upgraded incrementally up to the current version (14).
After core schema creation or upgrade, probes for the ``thought_fts``
table and then runs any pending extension schema migrations for each
@@ -530,7 +538,8 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
await self._migrate_core_v12_to_v13()
- await self._db.execute("PRAGMA user_version = 13")
+ await self._migrate_core_v13_to_v14()
+ await self._db.execute("PRAGMA user_version = 14")
await self._db.commit()
elif current_version < 4: # noqa: PLR2004
await self._migrate_core_v3_to_v4()
@@ -543,7 +552,8 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
await self._migrate_core_v12_to_v13()
- await self._db.execute("PRAGMA user_version = 13")
+ await self._migrate_core_v13_to_v14()
+ await self._db.execute("PRAGMA user_version = 14")
await self._db.commit()
elif current_version < 5: # noqa: PLR2004
await self._migrate_core_v4_to_v5()
@@ -555,7 +565,8 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
await self._migrate_core_v12_to_v13()
- await self._db.execute("PRAGMA user_version = 13")
+ await self._migrate_core_v13_to_v14()
+ await self._db.execute("PRAGMA user_version = 14")
await self._db.commit()
elif current_version < 6: # noqa: PLR2004
await self._migrate_core_v5_to_v6()
@@ -566,7 +577,8 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
await self._migrate_core_v12_to_v13()
- await self._db.execute("PRAGMA user_version = 13")
+ await self._migrate_core_v13_to_v14()
+ await self._db.execute("PRAGMA user_version = 14")
await self._db.commit()
elif current_version < 7: # noqa: PLR2004
await self._migrate_core_v6_to_v7()
@@ -576,7 +588,8 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
await self._migrate_core_v12_to_v13()
- await self._db.execute("PRAGMA user_version = 13")
+ await self._migrate_core_v13_to_v14()
+ await self._db.execute("PRAGMA user_version = 14")
await self._db.commit()
elif current_version < 8: # noqa: PLR2004
await self._migrate_core_v7_to_v8()
@@ -585,7 +598,8 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
await self._migrate_core_v12_to_v13()
- await self._db.execute("PRAGMA user_version = 13")
+ await self._migrate_core_v13_to_v14()
+ await self._db.execute("PRAGMA user_version = 14")
await self._db.commit()
elif current_version < 9: # noqa: PLR2004
await self._migrate_core_v8_to_v9()
@@ -593,29 +607,38 @@ async def ensure_schema(self) -> None: # noqa: C901, PLR0912, PLR0915
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
await self._migrate_core_v12_to_v13()
- await self._db.execute("PRAGMA user_version = 13")
+ await self._migrate_core_v13_to_v14()
+ await self._db.execute("PRAGMA user_version = 14")
await self._db.commit()
elif current_version < 10: # noqa: PLR2004
await self._migrate_core_v9_to_v10()
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
await self._migrate_core_v12_to_v13()
- await self._db.execute("PRAGMA user_version = 13")
+ await self._migrate_core_v13_to_v14()
+ await self._db.execute("PRAGMA user_version = 14")
await self._db.commit()
elif current_version < 11: # noqa: PLR2004
await self._migrate_core_v10_to_v11()
await self._migrate_core_v11_to_v12()
await self._migrate_core_v12_to_v13()
- await self._db.execute("PRAGMA user_version = 13")
+ await self._migrate_core_v13_to_v14()
+ await self._db.execute("PRAGMA user_version = 14")
await self._db.commit()
elif current_version < 12: # noqa: PLR2004
await self._migrate_core_v11_to_v12()
await self._migrate_core_v12_to_v13()
- await self._db.execute("PRAGMA user_version = 13")
+ await self._migrate_core_v13_to_v14()
+ await self._db.execute("PRAGMA user_version = 14")
await self._db.commit()
elif current_version < 13: # noqa: PLR2004
await self._migrate_core_v12_to_v13()
- await self._db.execute("PRAGMA user_version = 13")
+ await self._migrate_core_v13_to_v14()
+ await self._db.execute("PRAGMA user_version = 14")
+ await self._db.commit()
+ elif current_version < 14: # noqa: PLR2004
+ await self._migrate_core_v13_to_v14()
+ await self._db.execute("PRAGMA user_version = 14")
await self._db.commit()
# Ensure referential integrity is enforced for the lifetime of this
@@ -1013,6 +1036,60 @@ async def _migrate_core_v12_to_v13(self) -> None:
f"ON {table}(valid_from, valid_until)"
)
+ async def _migrate_core_v13_to_v14(self) -> None:
+ """Add hot-path indexes for the core read queries (core-14).
+
+ Purely additive: creates four indexes that back the equality
+ filters and the sort column hit on every common read, without
+ touching any row or column. The targets were chosen from the
+ actual ``WHERE`` / ``ORDER BY`` clauses in this module:
+
+ * ``idx_edge_to_thought`` on ``edge(to_thought_id)`` — ``get_edges``
+ (the inbound and both-direction modes) and the
+ reflection-consolidation scan filter the edge table on
+ ``to_thought_id``.
+ * ``idx_embedding_owner`` on ``embedding(owner_id)`` —
+ ``get_embedding`` looks an embedding up by its owner thought;
+ without this index the lookup is a full table scan, and it runs
+ inside three dreaming loops.
+ * ``idx_thought_updated_cycle`` on ``thought(updated_cycle)`` —
+ ``list_thoughts`` orders by ``updated_cycle`` on every call.
+ * ``idx_thought_type`` on ``thought(thought_type)`` —
+ ``thought_type`` equality is used by the reflection-id scan on
+ every search and by ``list_thoughts`` filtering.
+
+ Idempotent: every statement uses ``CREATE INDEX IF NOT EXISTS``, so
+ re-running the migration leaves the schema unchanged. The ``edge``
+ and ``embedding`` tables may be absent in a partial bootstrap (they
+ are created lazily), so each is guarded by ``_table_exists`` exactly
+ as ``_migrate_core_v12_to_v13`` guards ``edge``. The ``thought``
+ table is always present, but each indexed column is additionally
+ guarded by ``_column_exists`` so a minimal or hand-rolled legacy
+ schema that has not yet grown a column (for example a very old
+ database whose ``thought`` table predates ``updated_cycle``) skips
+ that single index instead of raising ``no such column``.
+ """
+ # ``thought`` is always present, but a minimal legacy schema may lack
+ # an indexed column; index only the columns that exist.
+ if await self._column_exists("thought", "updated_cycle"):
+ await self._db.execute(
+ "CREATE INDEX IF NOT EXISTS idx_thought_updated_cycle ON thought(updated_cycle)"
+ )
+ if await self._column_exists("thought", "thought_type"):
+ await self._db.execute(
+ "CREATE INDEX IF NOT EXISTS idx_thought_type ON thought(thought_type)"
+ )
+ # ``edge`` / ``embedding`` may be absent in a partial bootstrap;
+ # creating an index on a missing table would raise ``no such table``.
+ if await self._table_exists("edge"):
+ await self._db.execute(
+ "CREATE INDEX IF NOT EXISTS idx_edge_to_thought ON edge(to_thought_id)"
+ )
+ if await self._table_exists("embedding"):
+ await self._db.execute(
+ "CREATE INDEX IF NOT EXISTS idx_embedding_owner ON embedding(owner_id)"
+ )
+
async def _fk_present(self, table: str, column: str) -> bool:
"""Return ``True`` when ``table`` carries an FK on ``column``."""
cursor = await self._db.execute(f"PRAGMA foreign_key_list({table})")
@@ -1027,6 +1104,21 @@ async def _table_exists(self, table: str) -> bool:
)
return await cursor.fetchone() is not None
+ async def _column_exists(self, table: str, column: str) -> bool:
+ """Return ``True`` when ``table`` has a column named ``column``.
+
+ Args:
+ table: The table to inspect. Must already exist.
+ column: The column name to look for.
+
+ Returns:
+ ``True`` if the column is present in ``PRAGMA table_info``.
+
+ """
+ cursor = await self._db.execute(f"PRAGMA table_info({table})")
+ rows = await cursor.fetchall()
+ return any(row["name"] == column for row in rows)
+
async def _purge_orphan_children(self) -> None:
"""Delete orphan rows whose parent thought no longer exists.
diff --git a/src/engrava/infrastructure/sqlite/schema_core.sql b/src/engrava/infrastructure/sqlite/schema_core.sql
index e9b576c..29e48b1 100644
--- a/src/engrava/infrastructure/sqlite/schema_core.sql
+++ b/src/engrava/infrastructure/sqlite/schema_core.sql
@@ -1,7 +1,8 @@
-- engrava: Core thought-graph schema (free-tier boundary — no internal-cognitive columns).
--- Version: core-13 (valid-time axis: nullable valid_from / valid_until on thought + edge)
+-- Version: core-14 (hot-path indexes: edge.to_thought_id, embedding.owner_id,
+-- thought.updated_cycle, thought.thought_type)
-PRAGMA user_version = 13;
+PRAGMA user_version = 14;
CREATE TABLE IF NOT EXISTS thought (
thought_id TEXT PRIMARY KEY,
@@ -211,3 +212,24 @@ CREATE INDEX IF NOT EXISTS idx_edge_valid_from ON edge(valid_from);
CREATE INDEX IF NOT EXISTS idx_edge_valid_until ON edge(valid_until)
WHERE valid_until IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_edge_valid_range ON edge(valid_from, valid_until);
+
+-- -------------------------------------------------------------------
+-- Hot-path indexes for the core read queries (core-14)
+-- -------------------------------------------------------------------
+-- These back the equality and sort columns hit on every common read:
+-- * idx_edge_to_thought — get_edges (IN / BOTH direction) and the
+-- reflection-consolidation scan filter edge on to_thought_id.
+-- * idx_embedding_owner — get_embedding looks up an embedding by its
+-- owner thought; without an index this is a full table scan, and it
+-- runs inside three dreaming loops.
+-- * idx_thought_updated_cycle — list_thoughts orders by updated_cycle on
+-- every call.
+-- * idx_thought_type — thought_type equality is used by the
+-- reflection-id scan on every search and by list_thoughts filtering.
+-- A fresh-bootstrap database must carry the same indexes as one upgraded
+-- in place, so they are declared here as well as in the migration helper.
+
+CREATE INDEX IF NOT EXISTS idx_edge_to_thought ON edge(to_thought_id);
+CREATE INDEX IF NOT EXISTS idx_embedding_owner ON embedding(owner_id);
+CREATE INDEX IF NOT EXISTS idx_thought_updated_cycle ON thought(updated_cycle);
+CREATE INDEX IF NOT EXISTS idx_thought_type ON thought(thought_type);
diff --git a/tests/test_cognitive_journal.py b/tests/test_cognitive_journal.py
index cb0d79c..9620048 100644
--- a/tests/test_cognitive_journal.py
+++ b/tests/test_cognitive_journal.py
@@ -718,7 +718,7 @@ async def test_fresh_schema_has_journal_table(self) -> None:
cursor = await conn.execute("PRAGMA user_version")
row = await cursor.fetchone()
- assert row[0] == 13
+ assert row[0] == 14
finally:
await conn.close()
@@ -760,7 +760,7 @@ async def test_migrate_from_v5(self) -> None:
cursor = await conn.execute("PRAGMA user_version")
row = await cursor.fetchone()
- assert row[0] == 13
+ assert row[0] == 14
finally:
await conn.close()
@@ -775,7 +775,7 @@ async def test_migration_idempotent(self) -> None:
cursor = await conn.execute("PRAGMA user_version")
row = await cursor.fetchone()
- assert row[0] == 13
+ assert row[0] == 14
finally:
await conn.close()
diff --git a/tests/test_dedup_migration.py b/tests/test_dedup_migration.py
index 4f91f0c..5f08c50 100644
--- a/tests/test_dedup_migration.py
+++ b/tests/test_dedup_migration.py
@@ -182,7 +182,7 @@ async def test_ensure_schema_fresh_db_starts_at_head(
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
assert "content_hash" in await _table_columns(fresh_db, "thought")
assert await _index_exists(fresh_db, "idx_thought_content_hash")
@@ -197,7 +197,7 @@ async def test_ensure_schema_from_v9_to_head(
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
assert "content_hash" in await _table_columns(fresh_db, "thought")
assert await _index_exists(fresh_db, "idx_thought_content_hash")
@@ -212,7 +212,7 @@ async def test_ensure_schema_from_v9_idempotent(
for _ in range(3):
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
async def test_ensure_schema_at_head_skips_all_migration_branches(
@@ -221,11 +221,11 @@ async def test_ensure_schema_at_head_skips_all_migration_branches(
"""Already-migrated DB stays at head across repeat ``ensure_schema`` calls."""
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema() # bootstrap fresh -> head
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
# Re-run; helpers should not fire (idempotent on user_version branch).
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
assert "content_hash" in await _table_columns(fresh_db, "thought")
diff --git a/tests/test_embedding_providers.py b/tests/test_embedding_providers.py
index 528e73a..57a235f 100644
--- a/tests/test_embedding_providers.py
+++ b/tests/test_embedding_providers.py
@@ -493,7 +493,7 @@ async def test_migration_from_v4_creates_metadata(self) -> None:
cursor = await conn.execute("PRAGMA user_version")
row = await cursor.fetchone()
assert row is not None
- assert int(row[0]) == 13
+ assert int(row[0]) == 14
# _metadata table should exist.
cursor = await conn.execute(
diff --git a/tests/test_engrava_completeness.py b/tests/test_engrava_completeness.py
index 7881945..029c7d3 100644
--- a/tests/test_engrava_completeness.py
+++ b/tests/test_engrava_completeness.py
@@ -491,7 +491,7 @@ async def test_migration_idempotent(self) -> None:
cursor = await conn.execute("PRAGMA user_version")
row = await cursor.fetchone()
assert row is not None
- assert int(row[0]) == 13
+ assert int(row[0]) == 14
await conn.close()
diff --git a/tests/test_extension_load_with_migrations.py b/tests/test_extension_load_with_migrations.py
index 5bbe8d7..6b8541f 100644
--- a/tests/test_extension_load_with_migrations.py
+++ b/tests/test_extension_load_with_migrations.py
@@ -63,7 +63,7 @@ async def test_schema_version_is_head(self, db: aiosqlite.Connection) -> None:
await _fresh_core(db)
cursor = await db.execute("PRAGMA user_version")
row = await cursor.fetchone()
- assert int(row[0]) == 13
+ assert int(row[0]) == 14
async def test_no_manifests_leaves_versions_table_empty(self, db: aiosqlite.Connection) -> None:
await _fresh_core(db)
@@ -289,7 +289,7 @@ async def test_existing_v8_db_upgraded_to_v9(self, tmp_path: Path) -> None:
cursor = await conn2.execute("PRAGMA user_version")
row = await cursor.fetchone()
- assert int(row[0]) == 13
+ assert int(row[0]) == 14
cursor = await conn2.execute(
"SELECT name FROM sqlite_master WHERE type='table' "
diff --git a/tests/test_hot_path_index_migration.py b/tests/test_hot_path_index_migration.py
new file mode 100644
index 0000000..944d72d
--- /dev/null
+++ b/tests/test_hot_path_index_migration.py
@@ -0,0 +1,644 @@
+"""Schema migration tests for the hot-path indexes (core-13 -> core-14).
+
+Exercises ``_migrate_core_v13_to_v14`` directly (idempotence, table-absence
+tolerance, index creation) and the full ``ensure_schema`` cascade so a
+database stamped at v13 converges on the head ``user_version`` with the four
+new hot-path indexes on ``thought``, ``edge`` and ``embedding``.
+
+The migration is purely additive: it creates indexes that back the equality
+filters and the sort column hit on every common read, without touching any
+row or column. A freshly bootstrapped database must carry exactly the same
+indexes as one upgraded in place from v13.
+
+Also verifies the connection-init PRAGMA tuning (``synchronous=NORMAL`` and
+``busy_timeout=5000``) on both connection-init paths
+(``SqliteEngravaCore.from_config`` and ``ServiceManager``), and that the new
+equality / sort indexes are actually chosen by the query planner.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import aiosqlite
+import pytest
+
+from engrava import SqliteEngravaCore
+
+if TYPE_CHECKING:
+ from collections.abc import AsyncIterator
+ from pathlib import Path
+
+# The four indexes introduced by the v13 -> v14 migration, grouped by the
+# table they live on.
+_NEW_THOUGHT_INDEXES = (
+ "idx_thought_updated_cycle",
+ "idx_thought_type",
+)
+_NEW_EDGE_INDEXES = ("idx_edge_to_thought",)
+_NEW_EMBEDDING_INDEXES = ("idx_embedding_owner",)
+_ALL_NEW_INDEXES = _NEW_THOUGHT_INDEXES + _NEW_EDGE_INDEXES + _NEW_EMBEDDING_INDEXES
+
+_HEAD_VERSION = 14
+
+
+# ---------------------------------------------------------------------------
+# Helpers (mirror test_valid_time_migration.py)
+# ---------------------------------------------------------------------------
+
+
+async def _user_version(db: aiosqlite.Connection) -> int:
+ cursor = await db.execute("PRAGMA user_version")
+ row = await cursor.fetchone()
+ assert row is not None
+ return int(row[0])
+
+
+async def _index_names(db: aiosqlite.Connection, table: str) -> set[str]:
+ cursor = await db.execute(
+ "SELECT name FROM sqlite_master WHERE type = 'index' AND tbl_name = ?",
+ (table,),
+ )
+ rows = await cursor.fetchall()
+ return {row["name"] for row in rows if row["name"] is not None}
+
+
+async def _index_exists(db: aiosqlite.Connection, name: str) -> bool:
+ cursor = await db.execute(
+ "SELECT 1 FROM sqlite_master WHERE type = 'index' AND name = ?",
+ (name,),
+ )
+ return await cursor.fetchone() is not None
+
+
+async def _row_count(db: aiosqlite.Connection, table: str) -> int:
+ cursor = await db.execute(f"SELECT COUNT(*) AS n FROM {table}") # noqa: S608
+ row = await cursor.fetchone()
+ assert row is not None
+ return int(row["n"])
+
+
+async def _table_names(db: aiosqlite.Connection) -> set[str]:
+ cursor = await db.execute("SELECT name FROM sqlite_master WHERE type = 'table'")
+ rows = await cursor.fetchall()
+ return {row["name"] for row in rows}
+
+
+async def _bootstrap_core_at_v13(db: aiosqlite.Connection) -> None:
+ """Recreate a faithful core-13 ``thought`` + ``edge`` + ``embedding`` schema.
+
+ Mirrors what ``schema_core.sql`` looked like at ``user_version=13``
+ (valid-time axis present, but without the four hot-path indexes the
+ v14 upgrade adds). The pre-v14 indexes a real v13 install carries on
+ these tables are recreated too, so that after the upgrade a migrated
+ database is structurally identical to a freshly bootstrapped one. The
+ four new indexes are deliberately absent — that is precisely the
+ surface the upgrade re-adds.
+ """
+ await db.executescript(
+ """
+ CREATE TABLE IF NOT EXISTS thought (
+ thought_id TEXT PRIMARY KEY,
+ thought_type TEXT NOT NULL,
+ essence TEXT NOT NULL,
+ content TEXT NOT NULL,
+ content_hash TEXT,
+ priority TEXT NOT NULL,
+ lifecycle_status TEXT NOT NULL DEFAULT 'CREATED',
+ created_cycle INTEGER NOT NULL DEFAULT 0,
+ updated_cycle INTEGER NOT NULL DEFAULT 0,
+ source TEXT NOT NULL DEFAULT 'human',
+ confidence REAL,
+ embedding_ref TEXT,
+ source_type TEXT NOT NULL DEFAULT 'EXPERIENCE',
+ confirmation_count INTEGER NOT NULL DEFAULT 0,
+ consolidated_from TEXT,
+ visibility TEXT NOT NULL DEFAULT 'selective',
+ access_count INTEGER NOT NULL DEFAULT 0,
+ last_accessed_at TEXT,
+ created_at TEXT,
+ updated_at TEXT,
+ expires_at TEXT,
+ metadata_json TEXT NOT NULL DEFAULT '{}',
+ valid_from TEXT,
+ valid_until TEXT
+ );
+ CREATE TABLE IF NOT EXISTS edge (
+ edge_id TEXT PRIMARY KEY,
+ from_thought_id TEXT NOT NULL,
+ to_thought_id TEXT NOT NULL,
+ edge_type TEXT NOT NULL,
+ weight REAL NOT NULL DEFAULT 0.5,
+ created_cycle INTEGER NOT NULL DEFAULT 0,
+ source TEXT NOT NULL DEFAULT 'EXPERIENCE',
+ decay_multiplier REAL NOT NULL DEFAULT 1.0,
+ valid_from TEXT,
+ valid_until TEXT,
+ UNIQUE(from_thought_id, to_thought_id, edge_type),
+ FOREIGN KEY (from_thought_id) REFERENCES thought(thought_id) ON DELETE CASCADE,
+ FOREIGN KEY (to_thought_id) REFERENCES thought(thought_id) ON DELETE CASCADE
+ );
+ CREATE TABLE IF NOT EXISTS embedding (
+ embedding_id TEXT PRIMARY KEY,
+ owner_type TEXT NOT NULL,
+ owner_id TEXT NOT NULL,
+ model_name TEXT NOT NULL,
+ dimension INTEGER NOT NULL,
+ vector_blob BLOB NOT NULL,
+ created_at TEXT NOT NULL,
+ FOREIGN KEY (owner_id) REFERENCES thought(thought_id) ON DELETE CASCADE
+ );
+ CREATE INDEX IF NOT EXISTS idx_thought_expires ON thought(expires_at)
+ WHERE expires_at IS NOT NULL;
+ CREATE INDEX IF NOT EXISTS idx_thought_content_hash ON thought(content_hash);
+ CREATE INDEX IF NOT EXISTS idx_edge_type_from ON edge(edge_type, from_thought_id);
+ CREATE INDEX IF NOT EXISTS idx_thought_valid_from ON thought(valid_from);
+ CREATE INDEX IF NOT EXISTS idx_thought_valid_until ON thought(valid_until)
+ WHERE valid_until IS NOT NULL;
+ CREATE INDEX IF NOT EXISTS idx_thought_valid_range
+ ON thought(valid_from, valid_until);
+ CREATE INDEX IF NOT EXISTS idx_edge_valid_from ON edge(valid_from);
+ CREATE INDEX IF NOT EXISTS idx_edge_valid_until ON edge(valid_until)
+ WHERE valid_until IS NOT NULL;
+ CREATE INDEX IF NOT EXISTS idx_edge_valid_range ON edge(valid_from, valid_until);
+ PRAGMA user_version = 13;
+ """,
+ )
+ await db.commit()
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+async def fresh_db() -> AsyncIterator[aiosqlite.Connection]:
+ """Empty in-memory SQLite (``user_version`` starts at 0)."""
+ conn = await aiosqlite.connect(":memory:")
+ conn.row_factory = aiosqlite.Row
+ yield conn
+ await conn.close()
+
+
+# ---------------------------------------------------------------------------
+# Helper-level tests (direct ``_migrate_core_v13_to_v14``)
+# ---------------------------------------------------------------------------
+
+
+async def test_v13_base_lacks_the_new_indexes(fresh_db: aiosqlite.Connection) -> None:
+ """Guard: the v13 base fixture genuinely omits the four new indexes.
+
+ This is the pre-fix structural assertion — a v13 database has none of
+ the hot-path indexes, which is exactly the gap the migration closes.
+ """
+ await _bootstrap_core_at_v13(fresh_db)
+ for index_name in _ALL_NEW_INDEXES:
+ assert not await _index_exists(fresh_db, index_name), index_name
+
+
+async def test_migrate_v13_to_v14_creates_all_four_indexes(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """The migration creates all four hot-path indexes."""
+ await _bootstrap_core_at_v13(fresh_db)
+ store = SqliteEngravaCore(fresh_db)
+ await store._migrate_core_v13_to_v14()
+
+ for index_name in _ALL_NEW_INDEXES:
+ assert await _index_exists(fresh_db, index_name), index_name
+
+
+async def test_migrate_v13_to_v14_idempotent(fresh_db: aiosqlite.Connection) -> None:
+ """Re-running the helper is safe: no duplicate indexes, no error."""
+ await _bootstrap_core_at_v13(fresh_db)
+ store = SqliteEngravaCore(fresh_db)
+
+ for _ in range(3):
+ await store._migrate_core_v13_to_v14()
+
+ for index_name in _ALL_NEW_INDEXES:
+ assert await _index_exists(fresh_db, index_name), index_name
+
+
+async def test_migrate_v13_to_v14_tolerates_absent_edge_and_embedding(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """A thought-only partial bootstrap migrates cleanly.
+
+ Some databases carry only the ``thought`` table at this point — the
+ ``edge`` and ``embedding`` tables are created lazily. The migration
+ must skip the edge / embedding indexes rather than raising ``no such
+ table``, while still indexing ``thought`` fully.
+ """
+ await fresh_db.executescript(
+ """
+ CREATE TABLE thought (
+ thought_id TEXT PRIMARY KEY,
+ thought_type TEXT NOT NULL,
+ essence TEXT NOT NULL,
+ content TEXT NOT NULL,
+ priority TEXT NOT NULL,
+ updated_cycle INTEGER NOT NULL DEFAULT 0,
+ created_at TEXT
+ );
+ PRAGMA user_version = 13;
+ """,
+ )
+ await fresh_db.commit()
+ tables = await _table_names(fresh_db)
+ assert "edge" not in tables
+ assert "embedding" not in tables
+
+ store = SqliteEngravaCore(fresh_db)
+ await store._migrate_core_v13_to_v14() # must not raise
+
+ for index_name in _NEW_THOUGHT_INDEXES:
+ assert await _index_exists(fresh_db, index_name), index_name
+ for index_name in _NEW_EDGE_INDEXES + _NEW_EMBEDDING_INDEXES:
+ assert not await _index_exists(fresh_db, index_name), index_name
+
+
+async def test_migrate_v13_to_v14_tolerates_absent_indexed_column(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """A minimal legacy ``thought`` table missing ``updated_cycle`` migrates cleanly.
+
+ A hand-rolled or very old schema may carry a ``thought`` table that
+ predates a now-indexed column. The migration must skip that single
+ index (guarded by ``_column_exists``) rather than raising ``no such
+ column``, while still creating the indexes for the columns present.
+ """
+ await fresh_db.executescript(
+ """
+ CREATE TABLE thought (
+ thought_id TEXT PRIMARY KEY,
+ thought_type TEXT NOT NULL,
+ essence TEXT NOT NULL,
+ content TEXT NOT NULL,
+ priority TEXT NOT NULL
+ );
+ PRAGMA user_version = 13;
+ """,
+ )
+ await fresh_db.commit()
+
+ store = SqliteEngravaCore(fresh_db)
+ await store._migrate_core_v13_to_v14() # must not raise
+
+ # thought_type is present, so its index is created.
+ assert await _index_exists(fresh_db, "idx_thought_type")
+ # updated_cycle is absent, so its index is skipped (not fabricated).
+ assert not await _index_exists(fresh_db, "idx_thought_updated_cycle")
+
+
+# ---------------------------------------------------------------------------
+# ensure_schema cascade tests
+# ---------------------------------------------------------------------------
+
+
+async def test_ensure_schema_fresh_db_lands_at_head(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """An empty DB bootstraps straight to v14 with all four new indexes."""
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+
+ assert await _user_version(fresh_db) == _HEAD_VERSION
+ for index_name in _ALL_NEW_INDEXES:
+ assert await _index_exists(fresh_db, index_name), index_name
+
+
+async def test_ensure_schema_from_v13_to_head(fresh_db: aiosqlite.Connection) -> None:
+ """A v13 DB walks the ``< 14`` branch up to head and gains the indexes."""
+ await _bootstrap_core_at_v13(fresh_db)
+ assert await _user_version(fresh_db) == 13
+
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+
+ assert await _user_version(fresh_db) == _HEAD_VERSION
+ for index_name in _ALL_NEW_INDEXES:
+ assert await _index_exists(fresh_db, index_name), index_name
+
+
+async def test_ensure_schema_from_empty_v13_base_to_head(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """An *empty* (no rows) v13 base also lands at head with the indexes."""
+ await _bootstrap_core_at_v13(fresh_db)
+ assert await _row_count(fresh_db, "thought") == 0
+
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+
+ assert await _user_version(fresh_db) == _HEAD_VERSION
+ for index_name in _ALL_NEW_INDEXES:
+ assert await _index_exists(fresh_db, index_name), index_name
+
+
+async def test_ensure_schema_idempotent_at_head(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """Repeated ``ensure_schema`` calls stay at v14 without error."""
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+ assert await _user_version(fresh_db) == _HEAD_VERSION
+
+ for _ in range(3):
+ await store.ensure_schema()
+
+ assert await _user_version(fresh_db) == _HEAD_VERSION
+ for index_name in _ALL_NEW_INDEXES:
+ assert await _index_exists(fresh_db, index_name), index_name
+
+
+@pytest.mark.parametrize("source_version", [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
+async def test_cascade_from_any_version_to_head(
+ fresh_db: aiosqlite.Connection,
+ source_version: int,
+) -> None:
+ """A DB stamped at any historical core version cascades to head v14.
+
+ Only the ``user_version`` PRAGMA is seeded; ``ensure_schema`` walks
+ the matching elif branch up to head, exactly as an in-place upgrade
+ from an older install would.
+ """
+ bootstrap = SqliteEngravaCore(fresh_db)
+ await bootstrap.ensure_schema()
+ await fresh_db.execute(f"PRAGMA user_version = {source_version}")
+ await fresh_db.commit()
+ assert await _user_version(fresh_db) == source_version
+
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+
+ assert await _user_version(fresh_db) == _HEAD_VERSION
+ for index_name in _ALL_NEW_INDEXES:
+ assert await _index_exists(fresh_db, index_name), index_name
+
+
+async def test_ensure_schema_from_v13_preserves_row_counts(
+ fresh_db: aiosqlite.Connection,
+) -> None:
+ """The additive migration changes no row counts (zero data loss)."""
+ await _bootstrap_core_at_v13(fresh_db)
+ await fresh_db.executemany(
+ """
+ INSERT INTO thought (thought_id, thought_type, essence, content, priority)
+ VALUES (?, 'OBSERVATION', 'e', 'c', 'P2')
+ """,
+ [("t-1",), ("t-2",), ("t-3",)],
+ )
+ await fresh_db.execute(
+ """
+ INSERT INTO edge (edge_id, from_thought_id, to_thought_id, edge_type)
+ VALUES (?, 't-1', 't-2', 'ASSOCIATED'), (?, 't-2', 't-3', 'CONSOLIDATED_FROM')
+ """,
+ ("e-1", "e-2"),
+ )
+ await fresh_db.execute(
+ """
+ INSERT INTO embedding
+ (embedding_id, owner_type, owner_id, model_name, dimension,
+ vector_blob, created_at)
+ VALUES (?, 'THOUGHT', 't-1', 'm', 3, ?, '2026-01-01T00:00:00+00:00')
+ """,
+ ("emb-1", b"\x00\x01\x02"),
+ )
+ await fresh_db.commit()
+ thoughts_before = await _row_count(fresh_db, "thought")
+ edges_before = await _row_count(fresh_db, "edge")
+ embeddings_before = await _row_count(fresh_db, "embedding")
+
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+
+ assert await _user_version(fresh_db) == _HEAD_VERSION
+ assert await _row_count(fresh_db, "thought") == thoughts_before == 3
+ assert await _row_count(fresh_db, "edge") == edges_before == 2
+ assert await _row_count(fresh_db, "embedding") == embeddings_before == 1
+
+
+# ---------------------------------------------------------------------------
+# Fresh-v14 == migrated-v14 structural equivalence
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("table", ["thought", "edge", "embedding"])
+async def test_fresh_equals_migrated_schema(table: str) -> None:
+ """A fresh-bootstrap v14 DB has the same index set as a migrated-v14 DB.
+
+ Compares the set of indexes for ``table`` between a database that ran
+ the fresh-create DDL and one upgraded in place from v13. The migration
+ must leave the on-disk index surface identical to a fresh bootstrap.
+ """
+ fresh = await aiosqlite.connect(":memory:")
+ fresh.row_factory = aiosqlite.Row
+ migrated = await aiosqlite.connect(":memory:")
+ migrated.row_factory = aiosqlite.Row
+ try:
+ await SqliteEngravaCore(fresh).ensure_schema()
+
+ await _bootstrap_core_at_v13(migrated)
+ await SqliteEngravaCore(migrated).ensure_schema()
+
+ assert await _user_version(fresh) == await _user_version(migrated) == _HEAD_VERSION
+ assert await _index_names(fresh, table) == await _index_names(migrated, table)
+ finally:
+ await fresh.close()
+ await migrated.close()
+
+
+# ---------------------------------------------------------------------------
+# PRAGMA tuning at connection init (both paths)
+# ---------------------------------------------------------------------------
+
+
+async def _read_pragma(db: aiosqlite.Connection, pragma: str) -> int:
+ cursor = await db.execute(f"PRAGMA {pragma}")
+ row = await cursor.fetchone()
+ assert row is not None
+ return int(row[0])
+
+
+@pytest.fixture
+def config_path(tmp_path: Path) -> Path:
+ """Write a minimal YAML config file pointing at a temp database file."""
+ db_file = tmp_path / "engrava.db"
+ config_file = tmp_path / "engrava.yaml"
+ config_file.write_text(
+ f"database:\n path: {db_file}\n wal_mode: true\n",
+ encoding="utf-8",
+ )
+ return config_file
+
+
+async def test_from_config_sets_synchronous_normal_and_busy_timeout(
+ config_path: Path,
+) -> None:
+ """``from_config`` tunes ``synchronous=NORMAL`` and ``busy_timeout=5000``."""
+ async with await SqliteEngravaCore.from_config(config_path) as store:
+ # synchronous=NORMAL reports as 1.
+ assert await _read_pragma(store._db, "synchronous") == 1
+ assert await _read_pragma(store._db, "busy_timeout") == 5000
+
+
+async def test_service_manager_sets_synchronous_normal_and_busy_timeout(
+ tmp_path: Path,
+) -> None:
+ """``EngravaManager`` tunes ``synchronous=NORMAL`` and ``busy_timeout=5000``."""
+ from engrava.infrastructure.service_manager import EngravaManager
+
+ async with EngravaManager(data_dir=tmp_path) as manager:
+ store = await manager.get_store("svc-pragma")
+ assert await _read_pragma(store._db, "synchronous") == 1
+ assert await _read_pragma(store._db, "busy_timeout") == 5000
+
+
+# ---------------------------------------------------------------------------
+# Query-plan tests — the planner actually uses the new indexes
+# ---------------------------------------------------------------------------
+
+
+def _scanned_object(plan_rows: list[aiosqlite.Row]) -> set[str]:
+ """Return the exact table tokens a query plan reports a SCAN over.
+
+ The plan detail reads e.g. ``SCAN thought`` or ``SEARCH edge USING
+ INDEX idx_edge_to_thought (...)``. We parse the token immediately
+ after ``SCAN`` so a substring such as ``thought`` does not spuriously
+ match ``thought_fts`` — the exact object name is asserted, not a
+ substring (the planner reports the precise table token here).
+ """
+ scanned: set[str] = set()
+ for row in plan_rows:
+ detail = str(row["detail"])
+ if detail.startswith("SCAN "):
+ scanned.add(detail.split()[1])
+ return scanned
+
+
+def _indexes_used(plan_rows: list[aiosqlite.Row]) -> set[str]:
+ """Return the set of index names the plan reports ``USING INDEX``."""
+ used: set[str] = set()
+ for row in plan_rows:
+ detail = str(row["detail"])
+ marker = "USING INDEX "
+ if marker in detail:
+ after = detail.split(marker, 1)[1]
+ used.add(after.split()[0])
+ return used
+
+
+async def _explain(
+ db: aiosqlite.Connection, sql: str, params: tuple[object, ...]
+) -> list[aiosqlite.Row]:
+ cursor = await db.execute(f"EXPLAIN QUERY PLAN {sql}", params)
+ return list(await cursor.fetchall())
+
+
+@pytest.fixture
+async def populated_store(
+ fresh_db: aiosqlite.Connection,
+) -> SqliteEngravaCore:
+ """A head-version store with enough rows that the planner prefers indexes."""
+ store = SqliteEngravaCore(fresh_db)
+ await store.ensure_schema()
+
+ row_count = 200
+ await fresh_db.executemany(
+ """
+ INSERT INTO thought
+ (thought_id, thought_type, essence, content, priority, updated_cycle)
+ VALUES (?, ?, 'e', 'c', 'P2', ?)
+ """,
+ [(f"t-{i}", "REFLECTION" if i % 2 else "OBSERVATION", i) for i in range(row_count)],
+ )
+ await fresh_db.executemany(
+ """
+ INSERT INTO edge (edge_id, from_thought_id, to_thought_id, edge_type)
+ VALUES (?, ?, ?, 'ASSOCIATED')
+ """,
+ [(f"e-{i}", f"t-{i}", f"t-{(i + 1) % row_count}") for i in range(row_count)],
+ )
+ await fresh_db.executemany(
+ """
+ INSERT INTO embedding
+ (embedding_id, owner_type, owner_id, model_name, dimension,
+ vector_blob, created_at)
+ VALUES (?, 'THOUGHT', ?, 'm', 3, ?, '2026-01-01T00:00:00+00:00')
+ """,
+ [(f"emb-{i}", f"t-{i}", b"\x00\x01\x02") for i in range(row_count)],
+ )
+ await fresh_db.commit()
+ # ANALYZE so the planner has table/index statistics to choose from.
+ await fresh_db.execute("ANALYZE")
+ await fresh_db.commit()
+ return store
+
+
+async def test_get_edges_query_uses_to_thought_index(
+ populated_store: SqliteEngravaCore,
+) -> None:
+ """``WHERE to_thought_id = ?`` searches via ``idx_edge_to_thought``, not a scan."""
+ db = populated_store._db
+ plan = await _explain(db, "SELECT * FROM edge WHERE to_thought_id = ?", ("t-5",))
+
+ assert "edge" not in _scanned_object(plan)
+ assert "idx_edge_to_thought" in _indexes_used(plan)
+
+
+async def test_get_embedding_query_uses_owner_index(
+ populated_store: SqliteEngravaCore,
+) -> None:
+ """``WHERE owner_id = ?`` searches via ``idx_embedding_owner``, not a scan."""
+ db = populated_store._db
+ plan = await _explain(
+ db,
+ "SELECT * FROM embedding WHERE owner_type = 'THOUGHT' AND owner_id = ?",
+ ("t-5",),
+ )
+
+ assert "embedding" not in _scanned_object(plan)
+ assert "idx_embedding_owner" in _indexes_used(plan)
+
+
+async def test_list_thoughts_order_uses_updated_cycle_index(
+ populated_store: SqliteEngravaCore,
+) -> None:
+ """``ORDER BY updated_cycle`` is satisfied by ``idx_thought_updated_cycle``.
+
+ With the index present the planner can read rows in ``updated_cycle``
+ order directly instead of doing a full scan plus a sort step, so the
+ plan uses the index and reports no ``USE TEMP B-TREE FOR ORDER BY``.
+ """
+ db = populated_store._db
+ plan = await _explain(
+ db,
+ "SELECT * FROM thought ORDER BY updated_cycle DESC LIMIT ? OFFSET ?",
+ (10, 0),
+ )
+
+ assert "idx_thought_updated_cycle" in _indexes_used(plan)
+ # The index ordering removes the need for an explicit sort pass.
+ details = [str(row["detail"]) for row in plan]
+ assert not any("USE TEMP B-TREE FOR ORDER BY" in d for d in details)
+
+
+async def test_thought_type_filter_uses_type_index(
+ populated_store: SqliteEngravaCore,
+) -> None:
+ """``WHERE thought_type = ?`` searches via ``idx_thought_type``, not a scan.
+
+ The exact table token after SCAN/SEARCH is asserted so the FTS shadow
+ table ``thought_fts`` is never mistaken for ``thought``.
+ """
+ db = populated_store._db
+ plan = await _explain(
+ db,
+ "SELECT * FROM thought WHERE thought_type = ?",
+ ("REFLECTION",),
+ )
+
+ assert "thought" not in _scanned_object(plan)
+ assert "idx_thought_type" in _indexes_used(plan)
diff --git a/tests/test_metadata_migration.py b/tests/test_metadata_migration.py
index 71b928f..b5c21a6 100644
--- a/tests/test_metadata_migration.py
+++ b/tests/test_metadata_migration.py
@@ -171,7 +171,7 @@ async def test_ensure_schema_fresh_db_lands_at_head(
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
assert "metadata_json" in await _table_columns(fresh_db, "thought")
@@ -185,7 +185,7 @@ async def test_ensure_schema_from_v10_to_head(
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
assert "metadata_json" in await _table_columns(fresh_db, "thought")
@@ -195,12 +195,12 @@ async def test_ensure_schema_idempotent_at_head(
"""Repeat calls after reaching head stay at the head version without errors."""
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
for _ in range(3):
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
assert "metadata_json" in await _table_columns(fresh_db, "thought")
@@ -286,5 +286,5 @@ async def test_cascade_from_any_version_to_head(
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
assert "metadata_json" in await _table_columns(fresh_db, "thought")
diff --git a/tests/test_mind_store_core.py b/tests/test_mind_store_core.py
index 8ad32b6..fbb760b 100644
--- a/tests/test_mind_store_core.py
+++ b/tests/test_mind_store_core.py
@@ -582,7 +582,7 @@ async def test_user_version_set_to_current(self, db: aiosqlite.Connection) -> No
cursor = await db.execute("PRAGMA user_version")
row = await cursor.fetchone()
assert row is not None
- assert int(row[0]) == 13
+ assert int(row[0]) == 14
async def test_search_fts_lazy_probes_index(self, db: aiosqlite.Connection) -> None:
"""search_fts should work without an explicit _probe_fts call."""
diff --git a/tests/test_referential_integrity.py b/tests/test_referential_integrity.py
index 465c33d..58075e9 100644
--- a/tests/test_referential_integrity.py
+++ b/tests/test_referential_integrity.py
@@ -152,7 +152,7 @@ async def test_user_version_is_head(self, store: SqliteEngravaCore) -> None:
cursor = await store._db.execute("PRAGMA user_version")
row = await cursor.fetchone()
assert row is not None
- assert row[0] == 13
+ assert row[0] == 14
class TestCreateEdgeRejectsOrphans:
@@ -429,7 +429,7 @@ async def test_clean_v11_migrates_with_zero_row_loss(
await core.ensure_schema()
version_row = await (await db.execute("PRAGMA user_version")).fetchone()
assert version_row is not None
- assert version_row[0] == 13
+ assert version_row[0] == 14
for table, expected in (("edge", 1), ("embedding", 1), ("action", 1), ("thought", 2)):
row = await (
await db.execute(f"SELECT COUNT(*) FROM {table}") # noqa: S608
@@ -508,7 +508,7 @@ async def test_migration_is_idempotent(
await core.ensure_schema() # second pass — must converge without error
version_row = await (await db.execute("PRAGMA user_version")).fetchone()
assert version_row is not None
- assert version_row[0] == 13
+ assert version_row[0] == 14
# FK declarations must still be exactly 2 on edge, not duplicated.
rows = list(await (await db.execute("PRAGMA foreign_key_list(edge)")).fetchall())
assert len(rows) == 2
@@ -662,7 +662,7 @@ async def test_full_ladder_path_from_oldest_supported_to_v12(
await core.ensure_schema()
version_row = await (await db.execute("PRAGMA user_version")).fetchone()
assert version_row is not None
- assert version_row[0] == 13
+ assert version_row[0] == 14
for table, expected in (("edge", 1), ("embedding", 1), ("action", 1), ("thought", 2)):
row = await (
await db.execute(f"SELECT COUNT(*) FROM {table}") # noqa: S608
diff --git a/tests/test_search_hybrid_graph.py b/tests/test_search_hybrid_graph.py
index 93df2dc..ee2d96e 100644
--- a/tests/test_search_hybrid_graph.py
+++ b/tests/test_search_hybrid_graph.py
@@ -603,4 +603,4 @@ async def test_schema_migration_v7_to_head(self, tmp_path: Path) -> None:
await conn.close()
assert row is not None, "idx_edge_type_from missing after v7->head migration"
- assert int(version_row[0]) == 13
+ assert int(version_row[0]) == 14
diff --git a/tests/test_service_isolation.py b/tests/test_service_isolation.py
index 7f33641..867d172 100644
--- a/tests/test_service_isolation.py
+++ b/tests/test_service_isolation.py
@@ -316,7 +316,7 @@ async def test_per_service_schema_independent(self, tmp_path: Path) -> None:
row_a = await cursor_a.fetchone()
cursor_b = await store_b._db.execute("PRAGMA user_version")
row_b = await cursor_b.fetchone()
- assert row_a[0] == row_b[0] == 13
+ assert row_a[0] == row_b[0] == 14
async def test_per_service_fts_independent(self, tmp_path: Path) -> None:
data_dir = tmp_path / "services"
@@ -483,7 +483,7 @@ def test_snapshot_includes_metadata_header(
lines = out.read_text(encoding="utf-8").strip().splitlines()
header = json.loads(lines[0])
assert header["_type"] == "metadata"
- assert header["schema_version"] == 13
+ assert header["schema_version"] == 14
assert header["embedding_model_name"] == "all-MiniLM-L12-v2"
assert header["embedding_dimension"] == 16
diff --git a/tests/test_ttl_auto_expiry.py b/tests/test_ttl_auto_expiry.py
index c369022..6b285c0 100644
--- a/tests/test_ttl_auto_expiry.py
+++ b/tests/test_ttl_auto_expiry.py
@@ -271,7 +271,7 @@ async def test_fresh_schema_version_current(self, db: aiosqlite.Connection) -> N
cursor = await db.execute("PRAGMA user_version")
row = await cursor.fetchone()
assert row is not None
- assert row[0] == 13
+ assert row[0] == 14
async def test_expires_at_column_exists(self, db: aiosqlite.Connection) -> None:
cursor = await db.execute("PRAGMA table_info(thought)")
@@ -303,7 +303,7 @@ async def test_migration_from_v6_idempotent(self) -> None:
cursor = await conn.execute("PRAGMA user_version")
row = await cursor.fetchone()
assert row is not None
- assert row[0] == 13
+ assert row[0] == 14
await conn.close()
diff --git a/tests/test_valid_time_migration.py b/tests/test_valid_time_migration.py
index b97eac2..2c09f3f 100644
--- a/tests/test_valid_time_migration.py
+++ b/tests/test_valid_time_migration.py
@@ -396,7 +396,7 @@ async def test_ensure_schema_fresh_db_lands_at_head(
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
assert {"valid_from", "valid_until"} <= await _table_columns(fresh_db, "thought")
assert {"valid_from", "valid_until"} <= await _table_columns(fresh_db, "edge")
for index_name in _ALL_VALID_INDEXES:
@@ -413,7 +413,7 @@ async def test_ensure_schema_from_v12_to_head(
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
assert {"valid_from", "valid_until"} <= await _table_columns(fresh_db, "thought")
assert {"valid_from", "valid_until"} <= await _table_columns(fresh_db, "edge")
@@ -424,12 +424,12 @@ async def test_ensure_schema_idempotent_at_head(
"""Repeated ``ensure_schema`` calls stay at v13 without error."""
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
for _ in range(3):
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
async def test_ensure_schema_from_v12_backfills_and_preserves_counts(
@@ -463,7 +463,7 @@ async def test_ensure_schema_from_v12_backfills_and_preserves_counts(
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
assert await _row_count(fresh_db, "thought") == 2
assert await _row_count(fresh_db, "edge") == 1
@@ -489,7 +489,7 @@ async def test_cascade_from_any_version_to_head(
fresh_db: aiosqlite.Connection,
source_version: int,
) -> None:
- """A DB stamped at any historical core version cascades to head v13.
+ """A DB stamped at any historical core version cascades to head v14.
Only the ``user_version`` PRAGMA is seeded; ``ensure_schema`` walks
the matching elif branch up to head, exactly as an in-place upgrade
@@ -504,19 +504,19 @@ async def test_cascade_from_any_version_to_head(
store = SqliteEngravaCore(fresh_db)
await store.ensure_schema()
- assert await _user_version(fresh_db) == 13
+ assert await _user_version(fresh_db) == 14
assert {"valid_from", "valid_until"} <= await _table_columns(fresh_db, "thought")
assert {"valid_from", "valid_until"} <= await _table_columns(fresh_db, "edge")
# ---------------------------------------------------------------------------
-# Fresh-v13 == migrated-v13 structural equivalence
+# Fresh-head == migrated-head structural equivalence
# ---------------------------------------------------------------------------
@pytest.mark.parametrize("table", ["thought", "edge"])
async def test_fresh_equals_migrated_schema(table: str) -> None:
- """A fresh-bootstrap v13 DB is structurally identical to a migrated-v13 DB.
+ """A fresh-bootstrap head DB is structurally identical to a migrated head DB.
Compares ``PRAGMA table_info`` (column name + declared type) and the
set of indexes for ``table`` between a database that ran the
@@ -532,7 +532,7 @@ async def test_fresh_equals_migrated_schema(table: str) -> None:
await _bootstrap_core_at_v12(migrated)
await SqliteEngravaCore(migrated).ensure_schema()
- assert await _user_version(fresh) == await _user_version(migrated) == 13
+ assert await _user_version(fresh) == await _user_version(migrated) == 14
assert await _table_info(fresh, table) == await _table_info(migrated, table)
assert await _index_names(fresh, table) == await _index_names(migrated, table)
finally:
From c3cf339d462e20819b0d9dda43b66c798a9ca847 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Sat, 13 Jun 2026 14:15:46 +0200
Subject: [PATCH 27/40] test: isolate subprocess examples (offline,
single-thread, no stdin)
---
tests/docs/test_docs_examples_execute.py | 32 +++++++++++++++++
tests/examples/test_quickstart_runs.py | 44 +++++++++++++++++++++++-
2 files changed, 75 insertions(+), 1 deletion(-)
diff --git a/tests/docs/test_docs_examples_execute.py b/tests/docs/test_docs_examples_execute.py
index df71bb5..9e010f1 100644
--- a/tests/docs/test_docs_examples_execute.py
+++ b/tests/docs/test_docs_examples_execute.py
@@ -55,6 +55,7 @@
from __future__ import annotations
+import os
import subprocess
import sys
from typing import TYPE_CHECKING
@@ -69,6 +70,33 @@
# Bound for every documentation subprocess so a hung example cannot wedge CI.
_RUN_TIMEOUT_S = 120
+
+def _isolated_child_env() -> dict[str, str]:
+ """Return a deterministic, offline, single-threaded environment for a snippet.
+
+ Documentation snippets are run in a fresh subprocess. Forcing the offline
+ flags makes the run network-independent regardless of the caller's ambient
+ environment, and pinning the native thread pools keeps a snippet that
+ happens to import a heavy dependency from contending for native resources
+ with the rest of the suite.
+
+ Returns:
+ A copy of ``os.environ`` with the deterministic overrides applied.
+
+ """
+ env = dict(os.environ)
+ env.update(
+ {
+ "HF_HUB_OFFLINE": "1",
+ "TRANSFORMERS_OFFLINE": "1",
+ "OMP_NUM_THREADS": "1",
+ "MKL_NUM_THREADS": "1",
+ "TOKENIZERS_PARALLELISM": "false",
+ }
+ )
+ return env
+
+
# Self-contained, executable blocks, identified by (markdown path, an anchor
# substring that must appear in the block body). The anchor makes the binding
# robust to small line-number drift and documents *which* block is meant.
@@ -146,6 +174,10 @@ def _run_script(body: str, tmp_path: Path) -> subprocess.CompletedProcess[str]:
capture_output=True,
text=True,
timeout=_RUN_TIMEOUT_S,
+ # The snippets read nothing from stdin; closing it removes a stdin-
+ # inheritance wedge when the parent runs under pytest's output capture.
+ stdin=subprocess.DEVNULL,
+ env=_isolated_child_env(),
)
diff --git a/tests/examples/test_quickstart_runs.py b/tests/examples/test_quickstart_runs.py
index fbbe6c5..481b296 100644
--- a/tests/examples/test_quickstart_runs.py
+++ b/tests/examples/test_quickstart_runs.py
@@ -29,6 +29,43 @@
)
+def _isolated_child_env(**overrides: str) -> dict[str, str]:
+ """Return a deterministic, offline, single-threaded environment for a child.
+
+ ``quickstart.py`` loads ``sentence-transformers``/``torch`` in a fresh
+ subprocess. Two hazards make that spawn flaky if the child simply inherits
+ the ambient environment:
+
+ * **Network.** Without the offline flags a cold cache lets the encoder
+ reach for the HuggingFace Hub, so the test would block on a socket and
+ depend on the *caller* having exported the offline vars. Forcing them
+ here makes the suite network-independent by construction.
+ * **Native thread pools.** torch/OpenMP/MKL each spin up worker pools sized
+ to the host CPU. Pinning them to a single thread keeps a heavy model load
+ from contending for native resources with the rest of the suite — the
+ walkthrough does a one-shot encode where extra threads buy nothing.
+
+ Args:
+ **overrides: Extra variables to set on top of the isolated defaults.
+
+ Returns:
+ A copy of ``os.environ`` with the deterministic overrides applied.
+
+ """
+ env = dict(os.environ)
+ env.update(
+ {
+ "HF_HUB_OFFLINE": "1",
+ "TRANSFORMERS_OFFLINE": "1",
+ "OMP_NUM_THREADS": "1",
+ "MKL_NUM_THREADS": "1",
+ "TOKENIZERS_PARALLELISM": "false",
+ }
+ )
+ env.update(overrides)
+ return env
+
+
def _run_example(script_name: str, timeout_s: float = 120.0) -> subprocess.CompletedProcess[str]:
return subprocess.run( # noqa: S603 — trusted invocation of the shipped example script
[sys.executable, str(EXAMPLES_DIR / script_name)],
@@ -36,6 +73,10 @@ def _run_example(script_name: str, timeout_s: float = 120.0) -> subprocess.Compl
capture_output=True,
text=True,
timeout=timeout_s,
+ # The example reads nothing from stdin; closing it removes a stdin-
+ # inheritance wedge when the parent runs under pytest's output capture.
+ stdin=subprocess.DEVNULL,
+ env=_isolated_child_env(),
)
@@ -132,7 +173,7 @@ def test_quickstart_exits_with_actionable_message_without_extras(tmp_path: Path)
"_orig = importlib.util.find_spec\n"
"importlib.util.find_spec = _patched_find_spec\n",
)
- env = {**os.environ, "PYTHONPATH": str(tmp_path)}
+ env = _isolated_child_env(PYTHONPATH=str(tmp_path))
result = subprocess.run( # noqa: S603 — trusted invocation of the shipped example script
[sys.executable, str(EXAMPLES_DIR / "quickstart.py")],
check=False,
@@ -140,6 +181,7 @@ def test_quickstart_exits_with_actionable_message_without_extras(tmp_path: Path)
text=True,
env=env,
timeout=30,
+ stdin=subprocess.DEVNULL,
)
assert result.returncode == 2, (
f"expected exit code 2 when the extra is hidden; got {result.returncode}; "
From 59dcf5d6f0f7fb806bff95b6f57ac473563bec81 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Sun, 14 Jun 2026 15:47:19 +0200
Subject: [PATCH 28/40] docs: drop reference to non-existent purity-check
script in CONTRIBUTING
---
CONTRIBUTING.md | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 42dfb50..a8e94a4 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -97,13 +97,8 @@ Public Engrava artifacts must stay free of internal tier references.
- Avoid internal branded example names in comments, docstrings, snippets, and tests.
- Prefer neutral names like `ThirdPartyHooks`, `CustomPlugin`, or `ConsumerApp`.
-Run the purity check before opening a PR:
-
-```bash
-python scripts/assert_purity.py
-```
-
-CI also runs the same check and will fail if a forbidden reference leaks into the public tree.
+Before opening a PR, re-read your diff against the three rules above and remove any internal
+reference. Maintainers verify this invariant during review.
## Code Style
From 6794436cf4eec8e16e01a67091d00435b29d1ca5 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Mon, 15 Jun 2026 00:07:12 +0200
Subject: [PATCH 29/40] fix(mcp): map write-tool errors and complete the 0.4.0
documentation
Map the duplicate link_thoughts edge, invalid field value, and illegal
lifecycle transition in the MCP _tool_errors handler to curated, actionable
messages so internal SQLite schema names, Pydantic model internals, and the
status-type name never reach the client; an unrecognised integrity error is
re-raised unchanged. Add client-level tests asserting no internal symbol leaks.
Also complete the 0.4.0 documentation pass: announce the bi-temporal model,
the MCP server, and execute_mindql in the changelog and API reference; document
the FTS keyword query model, the MindQL quoting/fullmatch rules and FIND row
cap, the SQLite tuning and hot-path/valid-time indexes, the OpenAI-compatible
embedding retry and max_seq_length behaviour, and the user_version 12->14
upgrade; correct the concurrency busy_timeout/synchronous notes; and add
valid-time glossary and concepts coverage.
---
CHANGELOG.md | 38 ++++++++++
docs/api-reference.md | 26 +++++++
docs/benchmarks.md | 10 +--
docs/concepts.md | 10 +++
docs/concurrency.md | 22 ++++--
docs/dreaming.md | 11 ++-
docs/glossary.md | 19 +++++
docs/guides/embeddings.md | 34 ++++++++-
docs/guides/mcp.md | 31 +++++---
docs/mindql.md | 27 ++++++-
docs/performance.md | 31 ++++++++
docs/search.md | 42 ++++++++++-
docs/troubleshooting.md | 59 ++++++++++++++-
docs/upgrade.md | 31 ++++++--
src/engrava/mcp/server.py | 65 +++++++++++++++--
tests/mcp/test_errors.py | 148 ++++++++++++++++++++++++++++++++++++++
16 files changed, 560 insertions(+), 44 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ebc1c3b..17f32ee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,44 @@ and this project adheres to [Semantic Versioning 2.0.0](https://semver.org/spec/
## [Unreleased]
+### Added
+
+- **Bi-temporal model: track when a fact is *true*, not just when you stored
+ it.** `ThoughtRecord` and `EdgeRecord` gain two optional, nullable ISO-8601
+ fields — `valid_from` and `valid_until` — describing the half-open real-world
+ interval during which a fact holds (the upper bound is exclusive; a `None`
+ bound is treated as ±∞, so facts you never annotate keep matching every query).
+ Four opt-in MindQL `WHERE` predicates query this *valid time* on the `thoughts`
+ and `edges` tables: `valid_now`, `valid_at `, `valid_within `
+ (interval overlap), and `valid_between ` (fully contained — the one
+ predicate that excludes open-bounded rows). Two new store primitives,
+ `invalidate_thought(id, valid_until)` and `invalidate_edge(id, valid_until)`,
+ retire a fact by closing its interval instead of deleting it — deterministic,
+ idempotent, non-cascading, and fully auditable (the row stays on file and a
+ point-in-time query before the cut-off still finds it). Reflections built by
+ dreaming inherit their members' valid-time extent (open-on-either-side is
+ contagious). A query that uses no temporal predicate behaves exactly as before.
+ See the [Bi-temporal Model](docs/bitemporal.md) guide.
+
+- **MCP server: connect any MCP client to an engrava store.** A new optional
+ `mcp` extra (`pip install "engrava[mcp]"`) ships a Model Context Protocol
+ server that exposes a store over stdio to Claude Desktop, Claude Code, Cursor,
+ Windsurf, VS Code, and other MCP clients. Two entry points, `engrava-mcp` and
+ `python -m engrava.mcp`, build the same server. It registers eleven tools (six
+ read, five write), two static `engrava://` resources plus an
+ `engrava://thought/{thought_id}` resource template, and three prompt templates,
+ resolving its store from `ENGRAVA_MCP_CONFIG` (an `engrava.yaml`) or
+ `ENGRAVA_DB_PATH` (a bare database file). A read-only mode
+ (`ENGRAVA_MCP_READ_ONLY`) drops the five write tools entirely, leaving a
+ retrieval-only surface. The server is a pure API consumer — plain
+ `pip install engrava` is unaffected and stays dependency-light. See the
+ [MCP server](docs/guides/mcp.md) guide.
+
+- **`execute_mindql` on the store.** `SqliteEngravaCore.execute_mindql(query, *,
+ extensions=None)` runs a parsed `MindQLQuery` directly against the store's own
+ connection, returning a `MindQLResult` — a convenience over constructing a
+ `MindQLExecutor` by hand. See the [API Reference](docs/api-reference.md#mindql).
+
### Performance
- **Hot-path indexes and tuned SQLite PRAGMAs make the common reads faster.**
diff --git a/docs/api-reference.md b/docs/api-reference.md
index 3c6cc99..c482b53 100644
--- a/docs/api-reference.md
+++ b/docs/api-reference.md
@@ -65,6 +65,7 @@ keyword arguments and does **not** return a UUID string.
| `await list_thoughts(...)` | `list[ThoughtRecord]` | List with filters (keyword-only) |
| `await count_thoughts(...)` | `int` | Count with filters (keyword-only) |
| `await delete_thought(thought_id)` | `bool` | Hard delete; `True` if a row was removed |
+| `await invalidate_thought(thought_id, valid_until)` | `ThoughtRecord` | Close the thought's *valid-time* interval at the given ISO-8601 instant — deterministic, idempotent, non-cascading, and **not a delete** (the row stays on file and remains retrievable for instants before `valid_until`). Raises `ThoughtNotFoundError` if missing. See [Bi-temporal Model](bitemporal.md#invalidate-vs-delete) |
| `await record_access(thought_id)` | `None` | Mark a thought as accessed — bumps `access_count` and sets `last_accessed_at`; raises `ThoughtNotFoundError` if missing. Drives the access-frequency dreaming signal. |
```python
@@ -119,6 +120,7 @@ not exist.
| `await list_edges(*, edge_type=None, source=None, limit=5000)` | `list[EdgeRecord]` | List edges with optional filters |
| `await update_edge(edge_id, **changes)` | `EdgeRecord` | Update edge fields |
| `await delete_edge(edge_id)` | `bool` | Hard delete; `True` if a row was removed |
+| `await invalidate_edge(edge_id, valid_until)` | `EdgeRecord` | Close the edge's *valid-time* interval at the given ISO-8601 instant — deterministic, idempotent, and **not a delete** (the row stays on file). Invalidating a thought does **not** cascade to its edges; invalidate them separately. See [Bi-temporal Model](bitemporal.md#invalidate-vs-delete) |
```python
import uuid
@@ -270,6 +272,8 @@ create modified copies.
| `created_at` | `str \| None` | ISO-8601 datetime when persisted |
| `updated_at` | `str \| None` | ISO-8601 datetime of last mutation |
| `expires_at` | `str \| None` | ISO-8601 datetime when the thought expires (TTL) |
+| `valid_from` | `str \| None` | ISO-8601 start of the fact's real-world *valid time* (open lower bound when `None`); see [Bi-temporal Model](bitemporal.md) |
+| `valid_until` | `str \| None` | ISO-8601 end of *valid time*, **exclusive** (open upper bound when `None`); see [Bi-temporal Model](bitemporal.md) |
| `metadata` | `dict[str, MetadataValue]` | Caller-supplied structured attributes (default `{}`) |
#### `metadata` field
@@ -331,6 +335,8 @@ extension is recommended for filtering queries (`json_extract(metadata_json, '$.
| `created_cycle` | `int` | Creation cycle |
| `source` | `KnowledgeSource` | Provenance (default `EXPERIENCE`) |
| `decay_multiplier` | `float` | Decay rate multiplier (default `1.0`) |
+| `valid_from` | `str \| None` | ISO-8601 start of the edge's real-world *valid time* (open lower bound when `None`); see [Bi-temporal Model](bitemporal.md) |
+| `valid_until` | `str \| None` | ISO-8601 end of *valid time*, **exclusive** (open upper bound when `None`); see [Bi-temporal Model](bitemporal.md) |
### `EmbeddingRecord`
@@ -467,6 +473,26 @@ class EmbeddingProviderProtocol(Protocol):
See [MindQL](mindql.md) for the query language reference.
+### `execute_mindql` (on the store)
+
+`SqliteEngravaCore` exposes a convenience that runs a MindQL query directly
+against the store's own connection, so you don't have to construct a
+`MindQLExecutor` yourself. Like the executor, it takes a **parsed**
+`MindQLQuery` — `parse()` the string first.
+
+| Method | Returns | Description |
+|--------|---------|-------------|
+| `await execute_mindql(query, *, extensions=None)` | `MindQLResult` | Run a parsed `MindQLQuery` on the store's connection. `extensions` is an optional `dict[str, MindQLExtension]` registering extension commands. |
+
+```python
+from engrava import parse
+
+result = await store.execute_mindql(
+ parse("FIND thoughts WHERE lifecycle_status = 'ACTIVE' LIMIT 10")
+)
+print(result.rows)
+```
+
### `MindQLExecutor`
`MindQLExecutor` runs against an open `aiosqlite.Connection` (the same
diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index dc256b3..75c7218 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -24,12 +24,12 @@ into REFLECTIONs. This benchmark measures three properties:
2. **Direct retrieval neutrality (AC-9b, <= 0.05 in v0.3.0):** does
dreaming preserve FTS/vector retrieval performance on questions with
direct lexical answers? *Measures dreaming does NOT degrade baseline
- competence.* (v0.4.0 tightens the ceiling.)
+ competence.* (A future release aims to tighten the ceiling.)
3. **Sanity tolerance (AC-8, <= 0.05 in v0.3.0):** how much does dreaming
influence retrieval on scenarios where consolidation is irrelevant?
- *Measures absence of pathological behavior.* (v0.4.0 tightens the
- ceiling.)
+ *Measures absence of pathological behavior.* (A future release aims to
+ tighten the ceiling.)
## Default invocation — Binding acceptance measurements
@@ -98,7 +98,7 @@ scenarios show ~10pp regression in the dreaming-ON arm:
This is **expected v0.3.0 behavior**. REFLECTIONs participate in retrieval
at parity (`reflection_boost=1.0`) and occasionally displace correct
OBSERVATIONs from top-5 results for these specific scenario types.
-Ranking refinement landing in v0.4.0 will tighten this behavior — see the
+A planned ranking refinement will tighten this behavior — see the
"Roadmap" section below.
`abstract_theme_recall` showing 0.000 OFF and 0.000 ON is also expected:
@@ -116,7 +116,7 @@ mechanism and is verified at the data layer.
## Roadmap
-v0.4.0 will land a REFLECTION retrieval refinement that:
+A future release will land a REFLECTION retrieval refinement that:
- Tightens **AC-9b direct neutrality** back to <= 0.02
- Tightens **AC-8 sanity tolerance** back to <= 0.02
diff --git a/docs/concepts.md b/docs/concepts.md
index eecb4b5..6126d0f 100644
--- a/docs/concepts.md
+++ b/docs/concepts.md
@@ -47,6 +47,16 @@ Every thought carries **two** texts, and the split is deliberate:
> tight `essence`, not the whole `content`. Putting the same long text in both
> defeats the purpose. Think *headline* (`essence`) vs *article* (`content`).
+### `valid_from` / `valid_until` (optional valid time)
+
+A thought also carries two optional, nullable timestamps — `valid_from` and
+`valid_until` — that record **when the fact is true in the world**, a separate
+axis from when Engrava stored it (`created_at`) and from the [cycle](#cycle-the-agent-clock).
+Both default to `None` (an open interval = "valid for all time"), so you can
+ignore them entirely until you need point-in-time history. The same two fields
+exist on an [edge](#edge). See [The Bi-temporal Model](bitemporal.md) for the full
+semantics and the query predicates.
+
### Thought types
`ThoughtType` is a closed set — choose the one that fits what you're storing:
diff --git a/docs/concurrency.md b/docs/concurrency.md
index ca2df89..89b8e27 100644
--- a/docs/concurrency.md
+++ b/docs/concurrency.md
@@ -39,11 +39,23 @@ that loop, share it freely. (See
When a connection can't immediately get the lock it needs (another writer holds
it), SQLite waits up to the **busy timeout** before giving up with
-`database is locked`. Engrava inherits Python's `sqlite3` default of **5000 ms
-(5 s)** — it does not override it.
-
-For workloads with more write contention you can raise it on your own connection
-before handing it to the store, or after `from_config` via the store's
+`database is locked`.
+
+How the timeout is set depends on how the store opened its connection:
+
+- **`from_config` and `EngravaManager`** (engrava owns the connection) open it
+ with `PRAGMA busy_timeout=5000` **explicitly** — a second connection waits up to
+ **5 s** for a lock instead of failing immediately. These paths also set
+ `PRAGMA synchronous=NORMAL`, the documented-safe companion to WAL (durable
+ across an application crash; only the most recent transactions are at risk on an
+ OS crash or power loss).
+- **The manual `SqliteEngravaCore(conn)` constructor** (you own the connection)
+ changes none of these pragmas. The connection keeps whatever it was opened with;
+ Python's `sqlite3`/`aiosqlite` default `busy_timeout` already happens to be
+ **5000 ms**, and the default `synchronous` is `FULL`.
+
+For workloads with more write contention you can raise the timeout on your own
+connection before handing it to the store, or after `from_config` via the store's
connection:
```python
diff --git a/docs/dreaming.md b/docs/dreaming.md
index fdefd18..7c75bce 100644
--- a/docs/dreaming.md
+++ b/docs/dreaming.md
@@ -267,12 +267,9 @@ of `DreamingGates`, `EdgeCreationConfig`, and `SearchConfig` fields.
## Reflections (meta-consolidation)
-> **v0.4.0**
-
-Starting from v0.4.0, `run_consolidation()` runs a **third phase** after
-promotion and edge creation: it clusters semantically related thoughts
-and creates **`ThoughtType.REFLECTION`** meta-thoughts that aggregate
-each cluster.
+`run_consolidation()` runs a **third phase** after promotion and edge
+creation: it clusters semantically related thoughts and creates
+**`ThoughtType.REFLECTION`** meta-thoughts that aggregate each cluster.
### What is a REFLECTION?
@@ -341,7 +338,7 @@ extensions:
enable_reflections: true # set to false to skip phase 3 entirely
```
-### `ConsolidationResult` fields (v0.4.0)
+### `ConsolidationResult` fields
```python
result = await ext.run_consolidation(store, current_cycle=42)
diff --git a/docs/glossary.md b/docs/glossary.md
index 61c3568..bad3d33 100644
--- a/docs/glossary.md
+++ b/docs/glossary.md
@@ -76,6 +76,25 @@ and dreaming's age gates. Leaving it at `None` makes recency inactive; freezing
at a constant makes recency useless and stalls dreaming. See
[Core Concepts → Cycle](concepts.md#cycle-the-agent-clock).
+### Valid time
+
+The second of Engrava's two time axes: **when a fact is true in the world**, as
+opposed to **transaction time** (when Engrava recorded it — `created_at` /
+`updated_at`). Valid time is carried by two optional, nullable ISO-8601 fields,
+`valid_from` and `valid_until`, on both `ThoughtRecord` and `EdgeRecord`. They
+describe a half-open interval (`valid_until` is exclusive); a `None` bound means
+*open* (±∞), so an un-annotated record is "valid for all time". Queried through
+the `valid_now` / `valid_at` / `valid_within` / `valid_between` MindQL predicates.
+See [The Bi-temporal Model](bitemporal.md).
+
+### Transaction time
+
+When Engrava *recorded or last changed* a fact — the `created_at` / `updated_at`
+bookkeeping timestamps it sets automatically. It never moves backwards and you do
+not manage it; contrast with [valid time](#valid-time) (the real-world axis you
+set) and the [cycle](#cycle) (the logical agent clock). See
+[The Bi-temporal Model](bitemporal.md).
+
### Signal
One scoring component that [hybrid search](#hybrid-search) computes for a
diff --git a/docs/guides/embeddings.md b/docs/guides/embeddings.md
index 6899d8b..98f2f4b 100644
--- a/docs/guides/embeddings.md
+++ b/docs/guides/embeddings.md
@@ -13,7 +13,10 @@ works.
## Two things a provider gives you
1. **Ingest-time embedding** — with `auto_embed=True`, every thought is embedded
- on write, so it becomes findable by meaning.
+ on write, so it becomes findable by meaning. When the `essence` is just the
+ leading prefix of `content` (a common convention, e.g. `essence = content[:200]`),
+ only `content` is embedded — the redundant prefix is dropped so it can't dominate
+ the vector. A genuinely distinct `essence` is still embedded alongside `content`.
2. **Query-time embedding** — at search time the query must also be a vector.
`search_hybrid` takes the query *text* and, when a provider is configured,
embeds it **for you** (unless you pass an explicit `query_vector`).
@@ -84,6 +87,13 @@ provider = SentenceTransformerProvider(
No API key, no network after the first model download. Best default for
self-hosting.
+On load, this provider raises the model's `max_seq_length` to the architecture's
+true maximum when the shipped checkpoint reports a conservatively-low value — the
+bundled `all-MiniLM-L12-v2`, for instance, ships `128` while its backbone supports
+`512`. Without this, the tail of any longer thought would be silently truncated
+before encoding. The value is read from the model, not hard-coded, so a model that
+already reports its real maximum is left unchanged.
+
### `OpenAICompatibleProvider` — OpenAI or any OpenAI-compatible API
Calls an OpenAI-style `/embeddings` endpoint. Requires the `embeddings-openai`
@@ -107,6 +117,26 @@ provider = OpenAICompatibleProvider(
`api_key` defaults to the `OPENAI_API_KEY` environment variable when omitted.
Set `base_url` to target a compatible gateway (Azure OpenAI, a local proxy, etc.).
+**Automatic retry on transient failures.** This provider retries a request with
+bounded exponential backoff when the endpoint reports a transient failure — a read
+timeout or network blip, or a transient HTTP status (`408`, `409`, `425`, `429`,
+`500`, `502`, `503`, `504`) — so a short outage is absorbed instead of failing your
+ingest. Non-transient statuses (`400`, `401`, `403`, `404`) surface immediately
+with no retry, and a transient failure that persists across every attempt is still
+raised (the call never loops forever). Two keyword-only knobs tune it:
+`max_attempts` (default `3`) and `base_retry_delay_s` (default `1.0`); the defaults
+leave the success path at a single request, so existing callers see no change. This
+applies to `OpenAICompatibleProvider` only — `OllamaProvider` and
+`HuggingFaceProvider` do not retry.
+
+```python
+provider = OpenAICompatibleProvider(
+ model_name="text-embedding-3-small",
+ max_attempts=5, # up to 5 tries on transient failures
+ base_retry_delay_s=0.5, # exponential backoff starting at 0.5s
+)
+```
+
### `OllamaProvider` — local Ollama server
Calls a running [Ollama](https://ollama.com) instance. Requires the
@@ -203,7 +233,7 @@ result = await store.search_similar(query_vec, top_k=5)
`engrava restore --re-embed`).
- **Dimension follows the model.** Local/HF providers infer it from the model;
`CallbackProvider` requires you to declare `dimension` to match what your
- callback returns. For the `sqlite-vec` ANN backend, set
+ callback returns. For the `sqlite-vec` vector backend, set
`extensions.vector.dimension` in config to match.
## Config-driven equivalents
diff --git a/docs/guides/mcp.md b/docs/guides/mcp.md
index b251d67..fb90718 100644
--- a/docs/guides/mcp.md
+++ b/docs/guides/mcp.md
@@ -231,6 +231,13 @@ instead.
passthrough (`SELECT`), aggregate `COUNT`, and any extension commands are
rejected over the wire. See [MindQL](../mindql.md) for the `FIND` grammar.
+`query_memory` also accepts the **valid-time predicates** (`valid_now`,
+`valid_at`, `valid_within`, `valid_between`) in the `WHERE` clause, e.g.
+`FIND thoughts WHERE valid_now` or
+`FIND thoughts WHERE valid_at '2026-01-01T00:00:00+00:00'`. This is the only way
+to do point-in-time / time-travel filtering over MCP. See the
+[Bi-temporal Model](../bitemporal.md) for the semantics.
+
### Write tools (hidden in read-only mode)
| Tool | Purpose | Key arguments | Annotation |
@@ -253,15 +260,17 @@ not idempotent. The valid `thought_type`, `lifecycle_status`, `priority`, and
Where tools are *invoked*, **resources** are addressable `engrava://` URIs that a
client surfaces as attachable context (drop them into a conversation, no tool
-call). Three resources are registered. They are reads by definition, so they are
-**always available** — they are *not* hidden by read-only mode — and each returns
-a JSON document (`application/json`).
+call). Three resources are registered — **two static** resources
+(`engrava://stats`, `engrava://recent`) and **one resource template**
+(`engrava://thought/{thought_id}`, parameterised by id). They are reads by
+definition, so they are **always available** — they are *not* hidden by read-only
+mode — and each returns a JSON document (`application/json`).
-| Resource | Returns |
-|---|---|
-| `engrava://thought/{thought_id}` | A single thought as JSON. Reading an unknown identifier yields a graceful not-found payload rather than an error. |
-| `engrava://stats` | Store-health counts and total size — the same payload as the `memory_stats` tool (both share one implementation, so they always agree). |
-| `engrava://recent` | The most-recently-updated thoughts (newest first) as JSON. |
+| Resource | Kind | Returns |
+|---|---|---|
+| `engrava://thought/{thought_id}` | template | A single thought as JSON. Reading an unknown identifier yields a graceful not-found payload rather than an error. |
+| `engrava://stats` | static | Store-health counts and total size — the same payload as the `memory_stats` tool (both share one implementation, so they always agree). |
+| `engrava://recent` | static | The most-recently-updated thoughts (newest first) as JSON. |
## Prompts
@@ -339,8 +348,10 @@ play.
- The server is **single-writer**, like engrava itself — point it at a store that
is not being written concurrently by another process (see
[Concurrency](../concurrency.md)).
-- Tool errors are returned as clean, actionable messages (for example, an unknown
- `thought_id`, or a non-`FIND` query) rather than raw tracebacks.
+- Tool errors are returned as clean, actionable messages — an unknown
+ `thought_id`, a non-`FIND` query, a duplicate `link_thoughts` edge, or an
+ invalid field value — rather than raw tracebacks, and they never expose internal
+ table/column names or other deployment internals.
- Thoughts and edges created through the write tools start at cycle `0`: this API
consumer has no notion of the agent [cycle clock](../concepts.md#cycle-the-agent-clock),
which your application owns.
diff --git a/docs/mindql.md b/docs/mindql.md
index 0337230..57f5bda 100644
--- a/docs/mindql.md
+++ b/docs/mindql.md
@@ -15,7 +15,10 @@ SELECT
- The command verb (`FIND`, `COUNT`, `SELECT`) is case-insensitive.
- `FIND` and `COUNT` **require a table name** as the second token.
- A `WHERE` clause is `field operator value`; string values must be
- single-quoted, bare numbers are coerced to `int`/`float`.
+ single-quoted, bare numbers are coerced to `int`/`float`. The quoting decides
+ the type: a single-quoted value is kept **verbatim as a string**, so a
+ zero-padded identifier like `source = '007'` matches the stored string `'007'`,
+ whereas an unquoted `created_cycle = 7` is coerced to the integer `7`.
- Operators: `=`, `!=`, `>`, `<`, `>=`, `<=`. Conditions chain with `AND`.
### Queryable tables
@@ -44,6 +47,13 @@ Filterable `thought` columns include `thought_type`, `lifecycle_status`,
`confirmation_count`, `created_cycle`, `updated_cycle`, and `thought_id`.
A column outside the per-table allowlist raises `MindQLParseError`.
+**Default row cap.** A `FIND` with no `LIMIT` clause is capped at **100 rows**
+when it runs, so an unqualified `FIND thoughts` can never trigger an unbounded
+scan. The cap is applied at execution, not at parse time — `parse("FIND thoughts")`
+leaves `query.limit` as `None`, and the executor substitutes the default only if
+no explicit `LIMIT` is present. An explicit `LIMIT` always overrides the default;
+`COUNT` queries are unaffected (they aggregate and never materialise the rows).
+
**Returns:** matching rows as dicts.
### COUNT
@@ -94,6 +104,13 @@ FIND thoughts WHERE valid_between '2026-01-01T00:00:00+00:00' '2026-12-31T00:00:
requires real bounds on both ends and therefore excludes open-bound rows.
- A query that uses **no** temporal predicate behaves exactly as before.
+> **Valid time is predicate-only, not a filterable column.** Query valid time
+> *only* through the four predicates above. `valid_from` and `valid_until` are not
+> in the per-table column allowlist, so an ordinary comparison such as
+> `WHERE valid_from = '2026-01-01T00:00:00+00:00'` is **rejected when the query
+> runs** (`MindQLParseError: Column 'valid_from' not allowed for table 'thought'`)
+> — use `valid_at` / `valid_within` / `valid_between` instead.
+
The semantics, the open-interval (`NULL` = ±∞) rule, and `invalidate` are
documented in full on [The Bi-temporal Model](bitemporal.md).
@@ -109,7 +126,8 @@ See [Extensions](extensions.md) for the registration flow.
### Parsing
`parse()` returns a `MindQLQuery` plan. Its fields are `command`, `table`,
-`conditions`, `limit`, `raw_sql`, `extension_name`, and `extension_args`.
+`conditions`, `temporal_predicates` (the parsed valid-time predicates, empty when
+none are used), `limit`, `raw_sql`, `extension_name`, and `extension_args`.
```python
from engrava import parse, MindQLParseError
@@ -152,6 +170,11 @@ print(f"Active thoughts: {count_result.count}")
outside a table's allowlist.
- Unknown command verbs raise `MindQLParseError` unless registered as an
extension command.
+- A `WHERE` fragment must match the `field operator value` grammar **in full**.
+ Trailing content after a condition (for example `WHERE priority = 'P1' OR 1=1`)
+ is rejected with a `MindQLParseError` rather than silently parsing only the
+ leading `priority = 'P1'` and discarding the rest — so a malformed condition
+ can never quietly change the result set.
## CLI Usage
diff --git a/docs/performance.md b/docs/performance.md
index ac36147..42f7df2 100644
--- a/docs/performance.md
+++ b/docs/performance.md
@@ -102,6 +102,37 @@ because the vectors are reused from the existing `embedding` table.
equal your embedding model's output. Mixing dimensions corrupts results (see
[Embedding Dimension Consistency](known-limitations.md#embedding-dimension-consistency)).
+## SQLite tuning & hot-path indexes
+
+Stores opened via `from_config` (and `EngravaManager`) come **pre-tuned** — you do
+not configure any of this:
+
+- **`synchronous=NORMAL`** — the documented-safe companion to WAL. It cuts an
+ `fsync` from every commit (lower write latency) while staying durable across an
+ application crash; only the most recent transactions are at risk on an OS crash
+ or power loss. (See [Concurrency](concurrency.md#busy-timeout).)
+- **`busy_timeout=5000`** — a second connection waits up to 5 s for a lock instead
+ of failing immediately with `database is locked`.
+- **Four hot-path indexes** back the equality filters and sort column that the
+ common reads hit, turning what were full-table scans into index lookups:
+
+ | Index | Column | Speeds up |
+ |---|---|---|
+ | `idx_edge_to_thought` | `edge(to_thought_id)` | `get_edges` (IN / BOTH) and the reflection-consolidation scan |
+ | `idx_embedding_owner` | `embedding(owner_id)` | `get_embedding` lookups by thought |
+ | `idx_thought_updated_cycle` | `thought(updated_cycle)` | `list_thoughts` recency ordering |
+ | `idx_thought_type` | `thought(thought_type)` | `thought_type` equality filters |
+
+The `0.4` schema also adds **valid-time indexes** so the
+[bi-temporal](bitemporal.md) predicates are index-backed: `valid_from`, a
+**partial** `valid_until` index (only non-`NULL` upper bounds are indexed — an open
+upper bound is the common case and stays overhead-free), and a composite
+`(valid_from, valid_until)` range index, on both the `thought` and `edge` tables.
+
+The indexes are created idempotently (`CREATE INDEX IF NOT EXISTS`) and an existing
+database gains them automatically through the additive `0.3 → 0.4` schema migration
+— zero data loss, no manual step. See the [Upgrade Guide](upgrade.md#03---04).
+
## Write throughput and bulk ingest
By default each mutating call commits its own transaction. For a bulk load that
diff --git a/docs/search.md b/docs/search.md
index 1d15c84..7aac2c7 100644
--- a/docs/search.md
+++ b/docs/search.md
@@ -26,6 +26,46 @@ its weight is **redistributed proportionally** across active signals.
- `graph_weight` is `0.0` → graph skipped, zero overhead.
- All signals disabled → fallback to `list_thoughts(LIMIT top_k)`.
+## Keyword query syntax (FTS)
+
+The keyword signal — and the `search_fts()` method and the MCP `search_keywords`
+tool that expose it directly — runs your text against an SQLite FTS5 index. engrava
+normalises the query before handing it to FTS5, with two modes that switch
+automatically on what you type:
+
+**Bare queries are matched with `OR`.** A plain natural-language query like
+`what was my sister doing` is treated as a bag of words joined with `OR`, so a
+document matches when it shares **any** word. BM25's IDF weighting then ranks the
+documents that share the most *distinctive* words first, so common function words
+(`what`, `was`, `my`) carry little weight and need no stopword list or stemmer —
+this works in any language. (Before this, the words were joined with FTS5's
+implicit `AND`, so a question only matched documents containing *every* word and
+relevant answers were missed.)
+
+```python
+# Bare query -> OR-matched: finds docs sharing any content word, best-ranked first
+hits = await store.search_fts("what was my sister doing", top_k=10)
+```
+
+**Expert syntax is preserved unchanged.** If your query uses FTS5 operators, it is
+passed through as written:
+
+- **quoted phrases** — `"machine learning"` matches the exact phrase;
+- **uppercase booleans** — `AND`, `OR`, `NOT` (must be uppercase) compose terms,
+ e.g. `python AND NOT snake`;
+- **prefix** — a trailing `*` does prefix matching, e.g. `neur*`;
+- **column filters** — `essence:` and `content:` restrict a term to that column,
+ e.g. `content:berlin`.
+
+**Punctuation never raises.** Unsafe characters split a token into separate terms
+rather than breaking the query: a contraction like `sister's` becomes `sister OR
+s`, so it still matches a stored `sister's dog`. Pasting a URL or a timestamp is
+safe too — only the real `essence:` / `content:` column filters are honoured, so
+`http://example.com` and `12:30` are treated as ordinary search terms (they do
+**not** become spurious `http:` / `12:` column filters). A genuinely malformed
+full-text expression is logged and degraded to zero FTS hits, so the rest of a
+hybrid search still returns results.
+
## Graph-Aware Ranking
The graph signal uses **1-hop-weighted neighbour boost**. If a
@@ -110,8 +150,6 @@ of `SearchConfig` fields.
## Querying reflections
-> **v0.4.0**
-
After `DreamingExtension.run_consolidation()` runs its clustering phase,
`ThoughtType.REFLECTION` meta-thoughts exist in the store. Three knobs
control how hybrid search handles them.
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
index 047d084..3916756 100644
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -72,7 +72,8 @@ erroring when its prerequisite is missing. Work through this checklist:
| You pass `query_text` but no provider and no `query_vector` | same as above — there is no vector to compare against. |
| `current_cycle` is `None` | the **recency** signal is skipped (it cannot compute an age). |
| `recency_weight` is `0.0` | recency is disabled even if `current_cycle` is set. |
-| The query shares no FTS tokens with any thought | FTS legitimately returns nothing — this is a real miss, not a bug. |
+| The query shares no FTS tokens with any thought | FTS legitimately returns nothing — this is a real miss, not a bug. Note a *bare* query is `OR`-matched (any shared word hits), so this is rarer than it looks; if you instead get **too many** hits, you may want strict matching — see below. |
+| You used lowercase `and` / `or` between words | These are **not** FTS5 operators — they are matched as ordinary words (and `OR`-joined like any bare query). Booleans must be **uppercase** (`AND`, `OR`, `NOT`). |
Inspect which signals actually ran via `HybridSearchResult.backends_used`:
@@ -86,6 +87,39 @@ embedding provider (see the [Embeddings guide](guides/embeddings.md)). If
`'recency'` is missing, pass a non-`None` `current_cycle` **and** a
`recency_weight > 0`.
+## Keyword search returns too many results (I wanted all words to match)
+
+**Symptom.** A multi-word `search_fts` / `search_keywords` query returns documents
+that contain only *some* of the words, not all of them.
+
+**Cause.** A **bare** keyword query is matched with `OR`, by design — a document
+matches when it shares *any* word, and BM25 ranks the ones sharing the most
+distinctive words first (see [Keyword query syntax](search.md#keyword-query-syntax-fts)).
+This is what lets natural-language questions find relevant answers; it is not a bug.
+
+**Fix.** When you genuinely need every term, use FTS5 expert syntax explicitly —
+**uppercase** `AND` between the words, or a quoted phrase for an exact sequence:
+
+```python
+# require both words
+await store.search_fts("python AND asyncio", top_k=10)
+# require an exact phrase
+await store.search_fts('"event loop"', top_k=10)
+```
+
+Lowercase `and`/`or` will **not** work — they are matched as ordinary words.
+
+## Pasting a URL, path, or timestamp into search
+
+**Symptom.** You expect a query containing `http://…`, `12:30`, or a Windows path
+to error or to be interpreted as an FTS column filter.
+
+**Cause / behaviour.** It does neither. Only the real `essence:` and `content:`
+column filters are honoured; any other `token:token` (a URL scheme, a clock time)
+is split into ordinary search terms, so the query is safe to run and a genuinely
+malformed FTS expression degrades to zero FTS hits rather than raising. No action
+needed — this is the intended robustness.
+
## Dreaming promotes nothing (consolidation is inert)
**Symptom.** `run_consolidation(...)` returns `promoted_count == 0` every time.
@@ -125,6 +159,29 @@ print(result.promoted_count)
See [Dreaming](dreaming.md) for the full gate-and-signal model.
+## Embedding ingest fails against an OpenAI-compatible endpoint
+
+**Symptom.** Writes that auto-embed (or explicit embed calls) raise after a pause,
+either immediately or after a few seconds of retrying.
+
+**Cause / what to expect.** `OpenAICompatibleProvider` retries a request with
+bounded exponential backoff on a *transient* failure — a read timeout or network
+blip, or a transient HTTP status (`408`, `409`, `425`, `429`, `500`, `502`, `503`,
+`504`). Two outcomes:
+
+- **A transient failure that persists across every attempt** is raised as a
+ `RuntimeError` once `max_attempts` is exhausted (it never loops forever). If you
+ see this under sustained `429`s, you are being rate-limited — raise
+ `base_retry_delay_s`, lower your ingest concurrency, or batch more slowly.
+- **A non-transient status** (`400`, `401`, `403`, `404`) is surfaced
+ **immediately with no retry** — it indicates a request/auth/model error, not a
+ blip. Check your `api_key`, `base_url`, and `model_name`.
+
+**Fix.** Tune the retry budget on the provider (`max_attempts`, default `3`;
+`base_retry_delay_s`, default `1.0`), or address the underlying cause above. Only
+`OpenAICompatibleProvider` retries — `OllamaProvider` / `HuggingFaceProvider` do
+not. See the [Embeddings guide](guides/embeddings.md#openaicompatibleprovider--openai-or-any-openai-compatible-api).
+
## `EmbeddingModelMismatchError` when opening an existing database
**Symptom.** A store that worked before now raises `EmbeddingModelMismatchError`
diff --git a/docs/upgrade.md b/docs/upgrade.md
index b5ae903..a20564d 100644
--- a/docs/upgrade.md
+++ b/docs/upgrade.md
@@ -148,8 +148,11 @@ predicates, and `invalidate`. From an upgrade standpoint, the change is
**The migration runs on first open, with zero data loss.** The first time a
0.4 process calls `ensure_schema()` (most apps already do this at startup), the
-core schema steps forward to the new version inside a transaction. `pip install
---upgrade engrava` plus your normal startup is all that is required:
+core schema steps forward from `user_version = 12` (the 0.3 schema) to
+`user_version = 14` in **two additive steps** (12 → 13 adds the valid-time
+columns and their indexes; 13 → 14 adds the hot-path indexes), each inside a
+transaction. `pip install --upgrade engrava` plus your normal startup is all that
+is required:
```bash
pip install --upgrade engrava
@@ -179,14 +182,27 @@ What the migration does:
and `busy_timeout=5000` (a PRAGMA-only change with no on-disk effect). Like
the valid-time step, it runs automatically on first open with zero data loss.
-**Existing queries are unchanged.** A query that uses no temporal predicate
-behaves exactly as it did on 0.3. And because a `NULL` bound is treated as an
-**open interval end** (−∞ / +∞), the open-from rows above still match
+**Structured (MindQL) queries are unchanged.** A query that uses no temporal
+predicate behaves exactly as it did on 0.3. And because a `NULL` bound is treated
+as an **open interval end** (−∞ / +∞), the open-from rows above still match
`valid_now` and `valid_at` queries — an un-dated fact is treated as "valid since
the beginning of time", not as "excluded". So adopting valid time is incremental:
you can start annotating new facts whenever you like, and the old ones keep
surfacing in temporal queries until you choose to bound them.
+**Search behavior changes (no migration, but worth knowing).** Two 0.4 fixes to
+keyword/full-text search are not schema changes but do change results:
+
+- **Bare full-text queries now `OR`-match** instead of `AND`-matching, so a
+ natural-language query that returned *nothing* on 0.3 (because no document
+ contained *every* word) may now return results. This is the intended fix; if you
+ relied on strict all-words matching, use uppercase `AND` or a quoted phrase
+ explicitly. See [Keyword query syntax](search.md#keyword-query-syntax-fts).
+- Stored embeddings are **not** re-computed by the upgrade — the full-content
+ embedding fix and the `max_seq_length` fix take effect only when a thought is
+ re-written (re-created, or its `essence`/`content` updated), at which point it is
+ re-embedded with the corrected input. Existing vectors are untouched until then.
+
> **Honest note about edges.** Because the upgrade cannot invent a `valid_from`
> for an edge that never had a date, every edge migrated from 0.3 carries
> `valid_from = NULL`. That is the correct "open lower bound", so those edges
@@ -194,6 +210,11 @@ surfacing in temporal queries until you choose to bound them.
> (which requires real bounds on both ends) until you set their bounds
> explicitly. This is expected, not a defect.
+**New optional MCP server.** 0.4 also ships an optional Model Context Protocol
+server behind a new `mcp` extra — `pip install "engrava[mcp]"`. It is purely
+additive: plain `pip install engrava` is unaffected and existing code needs no
+change. See the [MCP server guide](guides/mcp.md).
+
This is a schema-changing minor upgrade, so follow the
[rolling-upgrades](#rolling-upgrades-multiple-workers) procedure (back up,
quiesce writers, migrate once, start new workers) if you run multiple processes
diff --git a/src/engrava/mcp/server.py b/src/engrava/mcp/server.py
index 4e40657..b5e060f 100644
--- a/src/engrava/mcp/server.py
+++ b/src/engrava/mcp/server.py
@@ -95,6 +95,7 @@
import json
import os
+import sqlite3
import uuid
from contextlib import asynccontextmanager
from dataclasses import replace
@@ -104,9 +105,14 @@
from mcp.server.fastmcp import FastMCP
from mcp.server.fastmcp.exceptions import ToolError
from mcp.types import ToolAnnotations
+from pydantic import ValidationError
from engrava.domain.enums import EdgeType, LifecycleStatus, Priority, ThoughtType
-from engrava.domain.exceptions import ReferentialIntegrityError, ThoughtNotFoundError
+from engrava.domain.exceptions import (
+ InvalidTransitionError,
+ ReferentialIntegrityError,
+ ThoughtNotFoundError,
+)
from engrava.domain.models.edge import EdgeRecord
from engrava.domain.models.thought import ThoughtRecord
from engrava.mcp.config import ResolvedStore, resolve_store
@@ -237,7 +243,12 @@ async def _tool_errors() -> AsyncIterator[None]:
The messages name only the documented configuration *environment
variables* (never a filesystem path), carry no stack frames, and expose
no internal symbol names, so a misuse reply leaks nothing about the
- deployment.
+ deployment. In particular, a database constraint violation (a duplicate
+ ``link_thoughts`` edge), a domain-model validation error, and an illegal
+ lifecycle transition are mapped to curated messages here so the raw SQLite
+ table/column names, Pydantic's internal model details, and the internal
+ status-type name never reach the client; an unrecognised integrity error is
+ re-raised unchanged rather than described.
Yields:
``None``; the caller runs the guarded tool body inside the ``with``.
@@ -279,6 +290,19 @@ async def _tool_errors() -> AsyncIterator[None]:
"identifier, or use search_memory or list_memory to find it."
)
raise ToolError(msg) from exc
+ except InvalidTransitionError as exc:
+ # An illegal lifecycle change on update_thought (the wire status is
+ # coerced to the enum, so the state-machine guard fires). The raw
+ # message names the internal status type; surface the move in plain
+ # user terms instead. The state values are the public lifecycle names
+ # (CREATED/ACTIVE/DONE/ARCHIVED), not internal symbols.
+ msg = (
+ f"Cannot change lifecycle status from {exc.current_state} to "
+ f"{exc.target_state}: that transition is not allowed. The lifecycle "
+ "advances CREATED -> ACTIVE -> DONE -> ARCHIVED and cannot move "
+ "backwards or skip ahead."
+ )
+ raise ToolError(msg) from exc
except ReferentialIntegrityError as exc:
msg = (
f"Cannot link thoughts: no thought exists with id "
@@ -286,6 +310,36 @@ async def _tool_errors() -> AsyncIterator[None]:
"the identifier."
)
raise ToolError(msg) from exc
+ except ValidationError as exc:
+ # A field value rejected by the domain model (e.g. an essence below the
+ # minimum length, or a value outside an enum). Pydantic's own message
+ # names the internal model class and links errors.pydantic.dev, so it
+ # must NOT be echoed; surface the offending field names only.
+ fields = ", ".join(
+ ".".join(str(part) for part in err.get("loc", ())) for err in exc.errors()
+ )
+ detail = f" (check: {fields})" if fields else ""
+ msg = (
+ f"One or more fields are invalid{detail}. Correct the value(s) and "
+ "retry — see the tool's argument descriptions for the accepted "
+ "types and ranges."
+ )
+ raise ToolError(msg) from exc
+ except sqlite3.IntegrityError as exc:
+ # A constraint violation from the database. The raw message names the
+ # internal table and columns (e.g. a UNIQUE constraint over the edge
+ # endpoints), which must not reach the client. Map the reachable case —
+ # a duplicate edge from link_thoughts — to a schema-free message; any
+ # other integrity error is re-raised unchanged rather than silently
+ # described, so it is never masked.
+ if "UNIQUE" in str(exc):
+ msg = (
+ "An edge of that type already links those two thoughts. Edges "
+ "are unique per (source, target, type), so this link already "
+ "exists — no change was made."
+ )
+ raise ToolError(msg) from exc
+ raise
class StoreProvider:
@@ -766,8 +820,9 @@ async def update_thought_impl(
essence: New compact canonical text, if changing.
content: New full content, if changing.
priority: New urgency level, if changing.
- lifecycle_status: New lifecycle state, if changing. The store
- validates that the transition is allowed.
+ lifecycle_status: New lifecycle state, if changing. Supplied as the
+ string name of a :class:`~engrava.LifecycleStatus` member; the
+ store validates that the transition is allowed.
confidence: New reliability estimate in ``[0.0, 1.0]``, if changing.
Returns:
@@ -777,7 +832,7 @@ async def update_thought_impl(
Raises:
ThoughtNotFoundError: If no thought has the given identifier.
StaleDataError: If the thought changed concurrently.
- InvalidTransitionError: If a lifecycle change is not permitted.
+ InvalidTransitionError: If the lifecycle change is not permitted.
"""
changes: dict[str, object] = {}
diff --git a/tests/mcp/test_errors.py b/tests/mcp/test_errors.py
index 26dc3c6..e0cb4bb 100644
--- a/tests/mcp/test_errors.py
+++ b/tests/mcp/test_errors.py
@@ -57,6 +57,23 @@
"ReferentialIntegrityError",
"SqliteEngravaCore",
"lifespan",
+ # Database-constraint internals: a UNIQUE violation's raw message names the
+ # edge table and its columns — none of these may reach the client.
+ "IntegrityError",
+ "UNIQUE constraint",
+ "edge.from_thought_id",
+ "edge.to_thought_id",
+ "edge.edge_type",
+ # Domain-model-validation internals: Pydantic's raw message names the model
+ # class and links its docs site.
+ "ValidationError",
+ "ThoughtRecord",
+ "pydantic",
+ "errors.pydantic.dev",
+ # Lifecycle-transition internals: the raw InvalidTransitionError message
+ # names the internal status type.
+ "InvalidTransitionError",
+ "Invalid LifecycleStatus transition",
)
#: Phrases that would wrongly suggest raw SQL is runnable over the wire.
@@ -339,6 +356,137 @@ async def test_missing_endpoint_reports_id_with_hint(
_assert_no_leak(text)
+class TestDuplicateEdge:
+ """A duplicate ``link_thoughts`` edge is mapped to a schema-free message."""
+
+ async def test_duplicate_link_reports_clean_message_no_schema(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ # The two seeded thoughts can be linked once; a second identical link
+ # violates the (source, target, type) UNIQUE constraint. The raw
+ # sqlite3 message names the edge table and its columns — the client
+ # must instead get a curated, schema-free message.
+ async with _client_for(store) as client:
+ first = await client.call_tool(
+ "link_thoughts",
+ {
+ "from_thought_id": "thought-alpha",
+ "to_thought_id": "thought-beta",
+ "edge_type": "ASSOCIATED",
+ },
+ )
+ assert first.isError is False # the first link succeeds
+
+ duplicate = await client.call_tool(
+ "link_thoughts",
+ {
+ "from_thought_id": "thought-alpha",
+ "to_thought_id": "thought-beta",
+ "edge_type": "ASSOCIATED",
+ },
+ )
+
+ assert duplicate.isError is True
+ text = _error_text(duplicate.content)
+ # Actionable: it explains the uniqueness rule in user terms ...
+ assert "already" in text.lower()
+ # ... and leaks no table/column names, raw constraint text, or symbol.
+ _assert_no_leak(text)
+
+
+class TestInvalidFieldValue:
+ """An invalid field value is mapped without leaking Pydantic internals."""
+
+ async def test_empty_essence_reports_clean_validation_message(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ # ``essence`` has a minimum length; an empty string fails domain-model
+ # validation. The raw Pydantic error names the model class and links
+ # its docs site — the client must get a curated message instead.
+ async with _client_for(store) as client:
+ result = await client.call_tool(
+ "store_thought",
+ {"essence": "", "content": "some content"},
+ )
+
+ assert result.isError is True
+ text = _error_text(result.content)
+ # Actionable: it points at the offending field and says it is invalid ...
+ assert "invalid" in text.lower()
+ assert "essence" in text.lower()
+ # ... and leaks no Pydantic URL, model class name, or symbol.
+ _assert_no_leak(text)
+
+ async def test_out_of_range_confidence_reports_clean_message(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ # ``confidence`` is constrained to [0.0, 1.0] by the domain model (not
+ # by the tool's argument schema), so an out-of-range value reaches the
+ # tool body and is rejected there — exercising the ValidationError
+ # mapping. The curated message names the field, never the model.
+ async with _client_for(store) as client:
+ result = await client.call_tool(
+ "store_thought",
+ {"essence": "ok", "content": "c", "confidence": 5.0},
+ )
+
+ assert result.isError is True
+ text = _error_text(result.content)
+ assert "invalid" in text.lower()
+ assert "confidence" in text.lower()
+ _assert_no_leak(text)
+
+
+class TestIllegalTransition:
+ """An illegal lifecycle change is mapped without the internal type name."""
+
+ async def test_backwards_transition_reports_clean_message(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ # The seeded thoughts are ACTIVE; ACTIVE -> CREATED is backwards and
+ # illegal. FastMCP coerces the wire status to the LifecycleStatus enum,
+ # so the store's transition guard fires. The raw message names the
+ # internal status type — the client must get a curated message instead.
+ async with _client_for(store) as client:
+ result = await client.call_tool(
+ "update_thought",
+ {"thought_id": "thought-alpha", "lifecycle_status": "CREATED"},
+ )
+
+ assert result.isError is True
+ text = _error_text(result.content)
+ # Actionable: it states the move in plain terms (the public state names
+ # are fine; the internal type name is not) ...
+ assert "ACTIVE" in text
+ assert "CREATED" in text
+ assert "not allowed" in text.lower() or "cannot" in text.lower()
+ # ... and leaks neither the raw "Invalid LifecycleStatus transition"
+ # phrasing nor the exception class name.
+ _assert_no_leak(text)
+
+ async def test_illegal_transition_does_not_change_state(
+ self,
+ store: SqliteEngravaCore,
+ ) -> None:
+ # The rejection is real: the thought stays ACTIVE after an illegal
+ # update attempt, confirming the guard blocks the write (not just the
+ # message wrapper).
+ async with _client_for(store) as client:
+ await client.call_tool(
+ "update_thought",
+ {"thought_id": "thought-alpha", "lifecycle_status": "CREATED"},
+ )
+ after = await client.call_tool("get_thought", {"thought_id": "thought-alpha"})
+
+ assert after.isError is False
+ assert after.structuredContent is not None
+ assert after.structuredContent["thought"]["lifecycle_status"] == "ACTIVE"
+
+
class TestSuccessPathUnchanged:
"""Mapping errors must not alter what a successful tool call returns."""
From be9b110d24494f3641bd504a00c2fc838b55c867 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Mon, 15 Jun 2026 10:43:00 +0200
Subject: [PATCH 30/40] feat(api): add remember() and recall() convenience
methods on the store
---
CHANGELOG.md | 12 +
src/engrava/domain/models/thought.py | 4 +-
src/engrava/domain/protocols/engrava_core.py | 65 ++-
.../infrastructure/sqlite/engrava_core.py | 106 ++++
tests/test_remember_recall.py | 460 ++++++++++++++++++
5 files changed, 644 insertions(+), 3 deletions(-)
create mode 100644 tests/test_remember_recall.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 17f32ee..ae7381a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,18 @@ and this project adheres to [Semantic Versioning 2.0.0](https://semver.org/spec/
### Added
+- **`remember` and `recall`: store and retrieve in one call.** Two ergonomic
+ convenience methods on the store let you persist a string and get relevant
+ strings back without hand-building a `ThoughtRecord` or wiring up
+ `search_hybrid`. `remember(text, *, metadata=None, deduplicate=False)` stores
+ a string as a thought (deriving the essence from its opening) and honours
+ opt-in content deduplication; `recall(query, *, top_k=10, current_cycle=None)`
+ returns the ranked matches. Passing `current_cycle` to `recall` blends in the
+ recency signal; on a large store recalled without it, a single DEBUG log line
+ points out that a cycle would let recent thoughts rank higher. `ThoughtRecord`
+ now defaults `created_cycle` and `updated_cycle` to `0`, so callers that do
+ not track cognitive cycles can omit them.
+
- **Bi-temporal model: track when a fact is *true*, not just when you stored
it.** `ThoughtRecord` and `EdgeRecord` gain two optional, nullable ISO-8601
fields — `valid_from` and `valid_until` — describing the half-open real-world
diff --git a/src/engrava/domain/models/thought.py b/src/engrava/domain/models/thought.py
index 6c34f26..a5c7803 100644
--- a/src/engrava/domain/models/thought.py
+++ b/src/engrava/domain/models/thought.py
@@ -119,8 +119,8 @@ class ThoughtRecord(BaseModel):
content: str = Field(min_length=1)
priority: Priority
lifecycle_status: LifecycleStatus
- created_cycle: int = Field(ge=0)
- updated_cycle: int = Field(ge=0)
+ created_cycle: int = Field(default=0, ge=0)
+ updated_cycle: int = Field(default=0, ge=0)
source: str = Field(min_length=1)
confidence: float | None = Field(default=None, ge=0.0, le=1.0)
embedding_ref: str | None = None
diff --git a/src/engrava/domain/protocols/engrava_core.py b/src/engrava/domain/protocols/engrava_core.py
index 2681f81..49c8a9c 100644
--- a/src/engrava/domain/protocols/engrava_core.py
+++ b/src/engrava/domain/protocols/engrava_core.py
@@ -14,7 +14,7 @@
from engrava.domain.models.embedding import EmbeddingRecord
from engrava.domain.models.metrics import EngravaMetrics
from engrava.domain.models.search import HybridSearchResult
- from engrava.domain.models.thought import ThoughtRecord
+ from engrava.domain.models.thought import MetadataValue, ThoughtRecord
from engrava.domain.models.ttl import CleanupResult
@@ -60,6 +60,69 @@ async def create_thought(
"""
...
+ async def remember(
+ self,
+ text: str,
+ *,
+ metadata: dict[str, MetadataValue] | None = None,
+ deduplicate: bool = False,
+ ) -> ThoughtRecord:
+ """Store a string as a thought with one call.
+
+ Ergonomic shorthand over :meth:`create_thought` for the common
+ case of persisting a bare string: implementations build a
+ :class:`ThoughtRecord` (deriving ``essence`` from the opening of
+ ``text``) and delegate to ``create_thought``.
+
+ The thought is created at the store's default cognitive cycle
+ (cycle ``0``); callers that track cycles should build a
+ :class:`ThoughtRecord` explicitly and use ``create_thought``.
+
+ Args:
+ text: The content to remember. Becomes the thought's
+ ``content``; the opening is also used as its ``essence``.
+ metadata: Optional structured attributes (e.g. ``speaker``,
+ ``lang``, ``session_id``). Defaults to an empty mapping.
+ deduplicate: When ``True`` and a thought with byte-identical
+ ``content`` already exists, its ``confirmation_count`` is
+ incremented and the existing record is returned instead of
+ inserting a duplicate (delegated to
+ ``create_thought(deduplicate=True)``).
+
+ Returns:
+ The persisted thought record (or the existing record with a
+ bumped ``confirmation_count`` when deduplication hits).
+
+ """
+ ...
+
+ async def recall(
+ self,
+ query: str,
+ *,
+ top_k: int = 10,
+ current_cycle: int | None = None,
+ ) -> HybridSearchResult:
+ """Retrieve thoughts relevant to a query with one call.
+
+ Ergonomic shorthand over :meth:`search_hybrid` for the common
+ retrieval case: implementations delegate to ``search_hybrid`` with
+ the query text and the given ``top_k``/``current_cycle``.
+
+ Args:
+ query: Natural-language text to search for.
+ top_k: Maximum number of results to return.
+ current_cycle: Current cognitive cycle. When provided, the
+ recency signal is blended into ranking; when ``None``,
+ recency is skipped.
+
+ Returns:
+ A ``HybridSearchResult`` with the ranked matches and the set of
+ backends that contributed.
+
+ """
+ ...
+
async def get_thought(self, thought_id: str) -> ThoughtRecord | None:
"""Retrieve a thought by its ID.
diff --git a/src/engrava/infrastructure/sqlite/engrava_core.py b/src/engrava/infrastructure/sqlite/engrava_core.py
index 99eef66..c31eac1 100644
--- a/src/engrava/infrastructure/sqlite/engrava_core.py
+++ b/src/engrava/infrastructure/sqlite/engrava_core.py
@@ -109,6 +109,12 @@ def _build_embed_input(essence: str, content: str) -> str:
#: Standalone uppercase boolean operators that switch a query into expert mode.
#: Lowercase ``and``/``or``/``not`` are ordinary words, not operators.
_FTS_BOOLEAN_OPERATORS = frozenset({"AND", "OR", "NOT"})
+#: Thought-count above which :meth:`SqliteEngravaCore.recall` emits a one-time
+#: DEBUG nudge when called without ``current_cycle`` (so the recency signal is
+#: silently inactive). Below this the omission is unremarkable; past it, a store
+#: large enough to benefit from recency that never receives a cycle is worth a
+#: single diagnostic breadcrumb (never a warning, never repeated).
+_RECENCY_NUDGE_THRESHOLD = 25
_SUPPRESS_SEARCH_METRICS: contextvars.ContextVar[bool] = contextvars.ContextVar(
"engrava_suppress_search_metrics",
default=False,
@@ -229,6 +235,8 @@ def __init__(
# row-level locking. Acquired only on the dedup branch — the
# legacy ``deduplicate=False`` path stays lock-free.
self._dedup_lock: asyncio.Lock = asyncio.Lock()
+ # Fires the recency-off nudge in ``recall`` at most once per instance.
+ self._recency_nudge_emitted: bool = False
@property
def journal(self) -> JournalWriter | None:
@@ -1776,6 +1784,104 @@ async def create_thought(
await self._maybe_auto_cleanup(exclude_id=thought.thought_id)
return await self._hooks.on_store(thought)
+ async def remember(
+ self,
+ text: str,
+ *,
+ metadata: dict[str, MetadataValue] | None = None,
+ deduplicate: bool = False,
+ ) -> ThoughtRecord:
+ """Store a string as a thought with one call.
+
+ Ergonomic shorthand over :meth:`create_thought` for the common case
+ of persisting a bare string. A :class:`ThoughtRecord` is built with a
+ fresh UUID, ``content=text`` and ``essence=text[:200]`` (the compact
+ canonical prefix used in prompts), then handed to ``create_thought``.
+
+ The thought is created at the store's default cognitive cycle
+ (``created_cycle == updated_cycle == 0``); callers that track cognitive
+ cycles should build a :class:`ThoughtRecord` explicitly and call
+ ``create_thought`` so the cycle is recorded.
+
+ Args:
+ text: The content to remember. Becomes the thought's ``content``;
+ its opening (capped at 200 characters) becomes the ``essence``.
+ metadata: Optional structured attributes (e.g. ``speaker``,
+ ``lang``, ``session_id``). Defaults to an empty mapping.
+ deduplicate: When ``True`` and a thought with byte-identical
+ ``content`` already exists, its ``confirmation_count`` is
+ incremented and the existing record is returned instead of
+ inserting a duplicate (forwarded to
+ ``create_thought(deduplicate=True)``). Default ``False``
+ inserts a new row on every call.
+
+ Returns:
+ The persisted thought record (or the existing record with a bumped
+ ``confirmation_count`` when deduplication hits).
+
+ """
+ thought = ThoughtRecord(
+ thought_id=str(_uuid.uuid4()),
+ thought_type=ThoughtType.NOTE,
+ essence=text[:200],
+ content=text,
+ priority=Priority.P3,
+ lifecycle_status=LifecycleStatus.ACTIVE,
+ source="remember",
+ metadata=metadata or {},
+ )
+ return await self.create_thought(thought, deduplicate=deduplicate)
+
+ async def recall(
+ self,
+ query: str,
+ *,
+ top_k: int = 10,
+ current_cycle: int | None = None,
+ ) -> HybridSearchResult:
+ """Retrieve thoughts relevant to a query with one call.
+
+ Ergonomic shorthand over :meth:`search_hybrid` for the common
+ retrieval case: the query text is passed straight through with the
+ given ``top_k`` and ``current_cycle``.
+
+ When ``current_cycle`` is ``None`` the recency signal is inactive
+ (see ``search_hybrid``). A store that holds more than
+ ``_RECENCY_NUDGE_THRESHOLD`` thoughts and recalls without a cycle emits
+ a single DEBUG-level breadcrumb on the module logger — once per store
+ instance — pointing out that passing ``current_cycle`` would let recent
+ thoughts rank higher. It is never a warning and never repeats.
+
+ Args:
+ query: Natural-language text to search for.
+ top_k: Maximum number of results to return.
+ current_cycle: Current cognitive cycle. When provided, the recency
+ signal is blended into ranking; when ``None``, recency is
+ skipped.
+
+ Returns:
+ A ``HybridSearchResult`` with the ranked matches and the set of
+ backends that contributed.
+
+ """
+ if current_cycle is None and not self._recency_nudge_emitted:
+ count_cursor = await self._db.execute("SELECT COUNT(*) FROM thought")
+ count_row = await count_cursor.fetchone()
+ total = int(count_row[0]) if count_row is not None else 0
+ if total > _RECENCY_NUDGE_THRESHOLD:
+ self._recency_nudge_emitted = True
+ logger.debug(
+ "recall() called without current_cycle on a store of %d thoughts; "
+ "passing current_cycle enables the recency signal so recent thoughts "
+ "rank higher",
+ total,
+ )
+ return await self.search_hybrid(
+ query_text=query,
+ top_k=top_k,
+ current_cycle=current_cycle,
+ )
+
async def cleanup_expired(
self,
now: str | None = None,
diff --git a/tests/test_remember_recall.py b/tests/test_remember_recall.py
new file mode 100644
index 0000000..57f6aac
--- /dev/null
+++ b/tests/test_remember_recall.py
@@ -0,0 +1,460 @@
+"""Functional contract for the ergonomic ``remember`` / ``recall`` pair.
+
+These two convenience methods are the smallest possible surface for storing a
+string and getting relevant strings back, so an agent author never has to hand-
+build a :class:`~engrava.domain.models.thought.ThoughtRecord` or call
+``search_hybrid`` with the right keyword arguments for the common case.
+
+Everything here is deterministic and network-free: query embeddings come from a
+:class:`~engrava.CallbackProvider` wrapping a bag-of-words hashing embedder (the
+same pattern the search-contract suite uses), so the vector arm is exercised
+without loading a model or reaching the network.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import logging
+from typing import TYPE_CHECKING
+
+import aiosqlite
+import pytest
+
+from engrava import CallbackProvider, SearchConfig, SqliteEngravaCore
+from engrava.domain.models.thought import ThoughtRecord
+from engrava.domain.protocols.engrava_core import EngravaCoreProtocol
+
+if TYPE_CHECKING:
+ from collections.abc import AsyncIterator
+
+
+# ---------------------------------------------------------------------------
+# Deterministic embedding provider (bag-of-words hashing — network-free)
+# ---------------------------------------------------------------------------
+
+_EMBED_DIM = 128
+
+
+def _tokenize(text: str) -> list[str]:
+ """Split text into lowercase alphanumeric word tokens.
+
+ Args:
+ text: Arbitrary input text.
+
+ Returns:
+ Lowercase word tokens, with punctuation stripped.
+ """
+ tokens: list[str] = []
+ current: list[str] = []
+ for char in text.lower():
+ if char.isalnum():
+ current.append(char)
+ elif current:
+ tokens.append("".join(current))
+ current = []
+ if current:
+ tokens.append("".join(current))
+ return tokens
+
+
+def _bag_of_words_embed(text: str) -> list[float]:
+ """Embed text as an L2-normalized bag-of-words hashing vector.
+
+ Cosine similarity between two such vectors grows with the fraction of
+ shared vocabulary, giving ``recall`` a deterministic, network-free
+ semantic signal.
+
+ Args:
+ text: Input text to embed.
+
+ Returns:
+ An ``_EMBED_DIM``-length unit vector (all-zero only for empty text).
+ """
+ vector = [0.0] * _EMBED_DIM
+ for token in _tokenize(text):
+ digest = hashlib.sha1(token.encode("utf-8")).digest() # noqa: S324
+ index = int.from_bytes(digest[:4], "big") % _EMBED_DIM
+ vector[index] += 1.0
+ norm = sum(value * value for value in vector) ** 0.5
+ if norm == 0.0:
+ return vector
+ return [value / norm for value in vector]
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def embedding_provider() -> CallbackProvider:
+ """Return a deterministic bag-of-words embedding provider.
+
+ Returns:
+ A :class:`CallbackProvider` wrapping the network-free hashing embedder.
+ """
+ return CallbackProvider(
+ callback=_bag_of_words_embed,
+ dimension=_EMBED_DIM,
+ model_name="bag-of-words-remember-recall",
+ )
+
+
+async def _make_store(
+ *,
+ embedding_provider: CallbackProvider | None = None,
+ auto_embed: bool = False,
+ search_config: SearchConfig | None = None,
+) -> SqliteEngravaCore:
+ """Build a schema-applied in-memory store.
+
+ Args:
+ embedding_provider: Optional provider for the vector arm.
+ auto_embed: Whether to auto-embed thoughts on write.
+ search_config: Optional search-weight configuration.
+
+ Returns:
+ A ready-to-use :class:`SqliteEngravaCore` over an in-memory database.
+ """
+ conn = await aiosqlite.connect(":memory:")
+ conn.row_factory = aiosqlite.Row
+ await conn.execute("PRAGMA journal_mode = WAL")
+ await conn.execute("PRAGMA foreign_keys = ON")
+ store = SqliteEngravaCore(
+ conn,
+ embedding_provider=embedding_provider,
+ auto_embed=auto_embed,
+ search_config=search_config,
+ )
+ await store.ensure_schema()
+ return store
+
+
+@pytest.fixture
+async def fts_store() -> AsyncIterator[SqliteEngravaCore]:
+ """Return an empty FTS-only store (no embedding provider).
+
+ Yields:
+ A :class:`SqliteEngravaCore` over an in-memory database.
+ """
+ store = await _make_store()
+ yield store
+ await store._db.close()
+
+
+@pytest.fixture
+async def hybrid_store(
+ embedding_provider: CallbackProvider,
+) -> AsyncIterator[SqliteEngravaCore]:
+ """Return an empty store with a deterministic vector arm.
+
+ Args:
+ embedding_provider: The network-free bag-of-words provider.
+
+ Yields:
+ A :class:`SqliteEngravaCore` with ``auto_embed`` enabled.
+ """
+ store = await _make_store(embedding_provider=embedding_provider, auto_embed=True)
+ yield store
+ await store._db.close()
+
+
+@pytest.fixture
+async def recency_store() -> AsyncIterator[SqliteEngravaCore]:
+ """Return an FTS-only store configured with a positive recency weight.
+
+ The recency arm only contributes when both a ``current_cycle`` is supplied
+ *and* the resolved recency weight is non-zero, so a store that means to
+ exercise recency must carry a :class:`SearchConfig` with a positive
+ ``default_recency_weight`` (the packaged default).
+
+ Yields:
+ A :class:`SqliteEngravaCore` whose ``SearchConfig`` enables recency.
+ """
+ store = await _make_store(search_config=SearchConfig(default_recency_weight=0.1))
+ yield store
+ await store._db.close()
+
+
+# ---------------------------------------------------------------------------
+# Test 1 — remember() persists a retrievable thought from a bare string
+# ---------------------------------------------------------------------------
+
+
+async def test_remember_persists_retrievable_thought(fts_store: SqliteEngravaCore) -> None:
+ """``remember`` turns a bare string into a stored, retrievable thought."""
+ text = "The alternator on the blue sedan is failing and needs replacement."
+ stored = await fts_store.remember(text)
+
+ assert isinstance(stored, ThoughtRecord)
+ assert stored.content == text
+ # essence is a compact prefix capped at 200 chars (here the whole string).
+ assert stored.essence == text[:200]
+
+ fetched = await fts_store.get_thought(stored.thought_id)
+ assert fetched is not None
+ assert fetched.content == text
+
+
+async def test_remember_truncates_essence_to_200_chars(fts_store: SqliteEngravaCore) -> None:
+ """A long body yields a 200-char essence while content is preserved whole."""
+ text = "alternator " * 60 # ~660 chars
+ stored = await fts_store.remember(text)
+
+ assert stored.content == text
+ assert stored.essence == text[:200]
+ assert len(stored.essence) == 200
+
+
+# ---------------------------------------------------------------------------
+# Test 2 — remember(deduplicate=True) collapses byte-identical content
+# ---------------------------------------------------------------------------
+
+
+async def test_remember_deduplicate_collapses_identical_content(
+ fts_store: SqliteEngravaCore,
+) -> None:
+ """``remember(deduplicate=True)`` bumps confirmation_count, not row count."""
+ text = "Standup is moved to 10am on Thursdays."
+ first = await fts_store.remember(text, deduplicate=True)
+ second = await fts_store.remember(text, deduplicate=True)
+
+ assert second.thought_id == first.thought_id
+ assert second.confirmation_count == first.confirmation_count + 1
+
+ # Exactly one row persisted despite two remember() calls.
+ cursor = await fts_store._db.execute("SELECT COUNT(*) FROM thought")
+ row = await cursor.fetchone()
+ assert row is not None
+ assert int(row[0]) == 1
+
+
+async def test_remember_without_dedup_creates_two_rows(fts_store: SqliteEngravaCore) -> None:
+ """The default ``deduplicate=False`` inserts a new row every call."""
+ text = "Standup is moved to 10am on Thursdays."
+ await fts_store.remember(text)
+ await fts_store.remember(text)
+
+ cursor = await fts_store._db.execute("SELECT COUNT(*) FROM thought")
+ row = await cursor.fetchone()
+ assert row is not None
+ assert int(row[0]) == 2
+
+
+# ---------------------------------------------------------------------------
+# Test 3 — recall() returns the relevant stored string
+# ---------------------------------------------------------------------------
+
+
+async def test_recall_returns_relevant_thought(hybrid_store: SqliteEngravaCore) -> None:
+ """``recall`` finds the turn whose vocabulary matches the query."""
+ await hybrid_store.remember(
+ "My sister's golden retriever Biscuit is terrified of thunderstorms."
+ )
+ await hybrid_store.remember("The quarterly budget spreadsheet is over by 1200 dollars.")
+ await hybrid_store.remember("We are flying to Paris in October near Montmartre.")
+
+ results = await hybrid_store.recall("what is my sister's dog afraid of", top_k=3)
+
+ assert results.results, "recall returned no results"
+ top_id = results.results[0][0]
+ top = await hybrid_store.get_thought(top_id)
+ assert top is not None
+ assert "Biscuit" in top.content
+
+
+async def test_recall_respects_top_k(fts_store: SqliteEngravaCore) -> None:
+ """``recall`` caps the number of returned results at ``top_k``."""
+ for i in range(5):
+ await fts_store.remember(f"Note number {i} about the office fiddle leaf fig plant.")
+
+ results = await fts_store.recall("fiddle leaf fig plant", top_k=2)
+ assert len(results.results) <= 2
+
+
+# ---------------------------------------------------------------------------
+# Test 4 — recall(current_cycle=...) activates the recency backend
+# ---------------------------------------------------------------------------
+
+
+async def test_recall_current_cycle_activates_recency(recency_store: SqliteEngravaCore) -> None:
+ """Passing ``current_cycle`` wires the recency signal into the fusion.
+
+ Recency contributes only when a ``current_cycle`` is supplied *and* the
+ resolved recency weight is positive; the ``recency_store`` fixture supplies
+ the weight, and ``recall`` threads the cycle through to ``search_hybrid``.
+ """
+ await recency_store.remember("The retro is locked in for half past noon on the calendar.")
+
+ results = await recency_store.recall("retro calendar", current_cycle=10)
+ assert "recency" in results.backends_used
+
+
+async def test_recall_without_current_cycle_skips_recency(
+ recency_store: SqliteEngravaCore,
+) -> None:
+ """Omitting ``current_cycle`` leaves the recency backend out of the fusion.
+
+ Even with a positive recency weight configured, ``recall`` without a
+ ``current_cycle`` must not activate the recency arm — confirming the cycle
+ is the gating input ``recall`` forwards.
+ """
+ await recency_store.remember("The retro is locked in for half past noon on the calendar.")
+
+ results = await recency_store.recall("retro calendar")
+ assert "recency" not in results.backends_used
+
+
+# ---------------------------------------------------------------------------
+# Test 5 — remember(metadata=...) round-trips structured attributes
+# ---------------------------------------------------------------------------
+
+
+async def test_remember_round_trips_metadata(fts_store: SqliteEngravaCore) -> None:
+ """Structured ``metadata`` survives a remember/get round trip byte-exact."""
+ stored = await fts_store.remember(
+ "Customer asked about the refund window.",
+ metadata={"speaker": "agent", "turn_index": 7, "lang": "en"},
+ )
+
+ fetched = await fts_store.get_thought(stored.thought_id)
+ assert fetched is not None
+ assert fetched.metadata == {"speaker": "agent", "turn_index": 7, "lang": "en"}
+
+
+async def test_remember_defaults_metadata_to_empty(fts_store: SqliteEngravaCore) -> None:
+ """Omitting ``metadata`` stores an empty dict, not ``None``."""
+ stored = await fts_store.remember("A note with no metadata.")
+ fetched = await fts_store.get_thought(stored.thought_id)
+ assert fetched is not None
+ assert fetched.metadata == {}
+
+
+# ---------------------------------------------------------------------------
+# Test 6 — protocol parity (amended): methods on core + protocol, dedup wiring
+# ---------------------------------------------------------------------------
+
+
+def test_core_satisfies_protocol_with_remember_recall() -> None:
+ """``remember`` / ``recall`` are declared on the protocol and present on core."""
+ # The methods are declared on the core protocol itself.
+ assert hasattr(EngravaCoreProtocol, "remember")
+ assert hasattr(EngravaCoreProtocol, "recall")
+
+ # They are concretely present on the SQLite core implementation.
+ assert callable(SqliteEngravaCore.remember)
+ assert callable(SqliteEngravaCore.recall)
+
+
+async def test_core_instance_is_runtime_checkable_protocol_member(
+ fts_store: SqliteEngravaCore,
+) -> None:
+ """A live ``SqliteEngravaCore`` satisfies the runtime-checkable protocol."""
+ assert isinstance(fts_store, EngravaCoreProtocol)
+
+
+async def test_remember_dedup_true_delegates_to_create_thought(
+ fts_store: SqliteEngravaCore,
+) -> None:
+ """``remember(deduplicate=True)`` forwards the flag to ``create_thought``.
+
+ Two checks together prove the flag is threaded through rather than dropped:
+
+ * The first (outer) ``create_thought`` invocation made by ``remember``
+ carries ``deduplicate=True`` — captured by a spy that records only the
+ caller-facing call, not the internal re-delegation the dedup branch makes
+ on a cache miss.
+ * Behaviourally, a second deduplicated remember lands on the *same* row
+ with a bumped ``confirmation_count`` — the observable effect of
+ ``create_thought(deduplicate=True)``.
+ """
+ outer_calls: list[bool] = []
+ original = fts_store.create_thought
+
+ async def _spy(thought: ThoughtRecord, **kwargs: object) -> ThoughtRecord:
+ outer_calls.append(bool(kwargs.get("deduplicate", False)))
+ return await original(thought, **kwargs)
+
+ fts_store.create_thought = _spy # type: ignore[method-assign]
+ first = await fts_store.remember("dedup wiring probe", deduplicate=True)
+ second = await fts_store.remember("dedup wiring probe", deduplicate=True)
+
+ # The first thing remember() calls is create_thought(deduplicate=True).
+ assert outer_calls[0] is True
+ # And the dedup behaviour is observable: same row, bumped confirmation.
+ assert second.thought_id == first.thought_id
+ assert second.confirmation_count == first.confirmation_count + 1
+
+
+# ---------------------------------------------------------------------------
+# Test 7 — one-time recency-off DEBUG nudge fires once per store instance
+# ---------------------------------------------------------------------------
+
+
+async def test_recall_emits_recency_nudge_once_over_threshold(
+ fts_store: SqliteEngravaCore,
+ caplog: pytest.LogCaptureFixture,
+) -> None:
+ """A large store recalled without ``current_cycle`` emits one DEBUG nudge."""
+ # Cross the threshold: more than 25 thoughts and no current_cycle passed.
+ for i in range(30):
+ await fts_store.remember(f"Recency nudge corpus item {i} about widgets and gadgets.")
+
+ logger_name = "engrava.infrastructure.sqlite.engrava_core"
+ with caplog.at_level(logging.DEBUG, logger=logger_name):
+ await fts_store.recall("widgets gadgets")
+ await fts_store.recall("widgets gadgets")
+ await fts_store.recall("widgets gadgets")
+
+ nudges = [
+ record
+ for record in caplog.records
+ if record.name == logger_name
+ and record.levelno == logging.DEBUG
+ and "recency" in record.getMessage().lower()
+ ]
+ assert len(nudges) == 1, f"expected exactly one recency nudge, got {len(nudges)}"
+
+
+async def test_recall_no_nudge_under_threshold(
+ fts_store: SqliteEngravaCore,
+ caplog: pytest.LogCaptureFixture,
+) -> None:
+ """A small store does not emit the recency nudge."""
+ for i in range(3):
+ await fts_store.remember(f"Small corpus item {i}.")
+
+ logger_name = "engrava.infrastructure.sqlite.engrava_core"
+ with caplog.at_level(logging.DEBUG, logger=logger_name):
+ await fts_store.recall("corpus item")
+
+ nudges = [
+ record
+ for record in caplog.records
+ if record.name == logger_name
+ and record.levelno == logging.DEBUG
+ and "recency" in record.getMessage().lower()
+ ]
+ assert nudges == []
+
+
+async def test_recall_no_nudge_when_current_cycle_supplied(
+ fts_store: SqliteEngravaCore,
+ caplog: pytest.LogCaptureFixture,
+) -> None:
+ """Supplying ``current_cycle`` suppresses the recency nudge even when large."""
+ for i in range(30):
+ await fts_store.remember(f"Nudge-suppression corpus item {i} about widgets.")
+
+ logger_name = "engrava.infrastructure.sqlite.engrava_core"
+ with caplog.at_level(logging.DEBUG, logger=logger_name):
+ await fts_store.recall("widgets", current_cycle=5)
+
+ nudges = [
+ record
+ for record in caplog.records
+ if record.name == logger_name
+ and record.levelno == logging.DEBUG
+ and "recency" in record.getMessage().lower()
+ ]
+ assert nudges == []
From edfe620c0c13b93cd8397bb13da110fbd841702e Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Mon, 15 Jun 2026 11:05:40 +0200
Subject: [PATCH 31/40] docs: lead quickstart with remember()/recall() short
path
---
docs/quickstart.md | 32 +++++++++++++++++++++++
tests/docs/test_docs_examples_behavior.py | 27 +++++++++++++++++++
2 files changed, 59 insertions(+)
diff --git a/docs/quickstart.md b/docs/quickstart.md
index a1689f7..d3b8883 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -114,6 +114,38 @@ asyncio.run(main())
> use `await SqliteEngravaCore.from_config("engrava.yaml")` (it opens and owns
> the connection for you).
+## Store and search a memory — the short way
+
+`remember()` and `recall()` are the two-call path for getting started: store a
+piece of text, then search for it. No IDs to generate, no record to assemble.
+
+```python
+await store.remember("User prefers concise answers")
+await store.remember("User works in Berlin")
+
+result = await store.recall("what does the user prefer?")
+for thought_id, score in result.results:
+ record = await store.get_thought(thought_id)
+ if record is not None:
+ print(f"{record.essence} (score: {score:.3f})")
+```
+
+`remember()` stores the text as a thought (generating its ID for you) and
+returns the stored `ThoughtRecord`. `recall()` runs the same hybrid search as
+`search_hybrid()` and returns the ranked results.
+
+> **A note on time.** `recall()` leaves *recency* ranking off until you pass a
+> `current_cycle`. A *cycle* is a logical clock **you** own (see
+> [Cycle](concepts.md#cycle-the-agent-clock)): increment it once per turn, pass
+> it to `remember(..., created_cycle=n)` on write and `recall(..., current_cycle=n)`
+> on read, and newer memories start ranking ahead of older ones. Until then,
+> search ranks on keyword + vector + priority only — nothing is faked.
+
+The rest of this page shows the full-control path: building a `ThoughtRecord`
+yourself, linking thoughts with edges, and querying with MindQL. Reach for it
+when you need to set fields `remember()` defaults for you (priority, thought
+type, metadata, the cycle clock).
+
## Add Thoughts
```python
diff --git a/tests/docs/test_docs_examples_behavior.py b/tests/docs/test_docs_examples_behavior.py
index 7e9cf57..3f86419 100644
--- a/tests/docs/test_docs_examples_behavior.py
+++ b/tests/docs/test_docs_examples_behavior.py
@@ -137,6 +137,33 @@ async def test_quickstart_search_returns_tuples() -> None:
assert all(isinstance(tid, str) and isinstance(sc, float) for tid, sc in results)
+async def test_quickstart_remember_and_recall() -> None:
+ """quickstart 'Store and search a memory — the short way'.
+
+ remember() stores text as a thought (generating its id) and returns the
+ ThoughtRecord; recall() returns a HybridSearchResult whose ``results`` are
+ (thought_id, score) tuples. Without a ``current_cycle``, recall() leaves
+ recency off — the documented contract — so ``backends_used`` carries no
+ recency marker.
+ """
+ async with aiosqlite.connect(":memory:") as conn:
+ conn.row_factory = aiosqlite.Row
+ store = SqliteEngravaCore(conn)
+ await store.ensure_schema()
+
+ stored = await store.remember("User prefers concise answers")
+ assert stored.thought_id # generated for the caller
+ assert stored.essence == "User prefers concise answers"
+ await store.remember("User works in Berlin")
+
+ result = await store.recall("what does the user prefer?")
+ for thought_id, score in result.results:
+ assert isinstance(thought_id, str)
+ assert isinstance(score, float)
+ # Documented: recency stays off until a current_cycle is supplied.
+ assert "recency" not in result.backends_used
+
+
async def test_quickstart_mindql_find_and_count() -> None:
"""README + quickstart + mindql.md MindQL usage.
From 8c00368f974bdc54b797c96fbce1f264885e9c42 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Mon, 15 Jun 2026 11:39:40 +0200
Subject: [PATCH 32/40] =?UTF-8?q?docs:=20fix=20quickstart=20cycle=20note?=
=?UTF-8?q?=20=E2=80=94=20remember()=20takes=20no=20created=5Fcycle;=20use?=
=?UTF-8?q?=20ThoughtRecord=20for=20write-side=20cycle?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
docs/quickstart.md | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/docs/quickstart.md b/docs/quickstart.md
index d3b8883..067cd86 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -136,15 +136,17 @@ returns the stored `ThoughtRecord`. `recall()` runs the same hybrid search as
> **A note on time.** `recall()` leaves *recency* ranking off until you pass a
> `current_cycle`. A *cycle* is a logical clock **you** own (see
-> [Cycle](concepts.md#cycle-the-agent-clock)): increment it once per turn, pass
-> it to `remember(..., created_cycle=n)` on write and `recall(..., current_cycle=n)`
-> on read, and newer memories start ranking ahead of older ones. Until then,
-> search ranks on keyword + vector + priority only — nothing is faked.
+> [Cycle](concepts.md#cycle-the-agent-clock)): increment it once per turn and pass
+> it to `recall(..., current_cycle=n)` on read, and newer memories start ranking
+> ahead of older ones. `remember()` stamps both cycle fields at `0`; when you need
+> to set the cycle on a *write*, build a `ThoughtRecord` with `created_cycle=n` and
+> call `create_thought()` (shown below). Until you supply a cycle, search ranks on
+> keyword + vector + priority only — nothing is faked.
The rest of this page shows the full-control path: building a `ThoughtRecord`
yourself, linking thoughts with edges, and querying with MindQL. Reach for it
when you need to set fields `remember()` defaults for you (priority, thought
-type, metadata, the cycle clock).
+type, metadata, and the cycle clock on writes).
## Add Thoughts
From 3f415e2292e9634e510b784527c10763fff2c9d3 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Mon, 15 Jun 2026 21:04:35 +0200
Subject: [PATCH 33/40] docs: lead README Basic Usage with remember()/recall()
---
README.md | 38 ++++++++++++++++++--------------------
1 file changed, 18 insertions(+), 20 deletions(-)
diff --git a/README.md b/README.md
index 329b0c4..08d6ef2 100644
--- a/README.md
+++ b/README.md
@@ -40,13 +40,15 @@ pip install engrava[embeddings-openai] # OpenAI-compatible embeddings
### Basic Usage
+Store a memory and search for it in two calls — no IDs to generate, no record
+to assemble:
+
```python
import asyncio
-import uuid
import aiosqlite
-from engrava import LifecycleStatus, Priority, SqliteEngravaCore, ThoughtRecord, ThoughtType
+from engrava import SqliteEngravaCore
async def main() -> None:
@@ -56,29 +58,25 @@ async def main() -> None:
store = SqliteEngravaCore(conn)
await store.ensure_schema()
- # Build a ThoughtRecord and persist it with create_thought.
- observation = ThoughtRecord(
- thought_id=str(uuid.uuid4()),
- thought_type=ThoughtType.OBSERVATION,
- essence="Python is great for AI",
- content="Python's async ecosystem makes it ideal for AI agents.",
- priority=Priority.P2,
- lifecycle_status=LifecycleStatus.ACTIVE,
- created_cycle=0,
- updated_cycle=0,
- source="human",
- )
- await store.create_thought(observation)
-
- # Retrieve it
- thought = await store.get_thought(observation.thought_id)
- if thought is not None:
- print(f"Stored: {thought.essence}")
+ await store.remember("Python is great for AI agents")
+ await store.remember("SQLite needs no server")
+
+ result = await store.recall("what language is good for agents?")
+ for thought_id, score in result.results:
+ thought = await store.get_thought(thought_id)
+ if thought is not None:
+ print(f"{thought.essence} (score: {score:.3f})")
asyncio.run(main())
```
+`remember()` stores the text as a thought (generating its ID for you) and
+returns the stored `ThoughtRecord`; `recall()` runs the same hybrid search as
+`search_hybrid()` and returns the ranked results. For full control — setting
+priority, thought type, metadata, or the cognitive cycle on a write — build a
+`ThoughtRecord` yourself and call `create_thought()`.
+
From here, link thoughts with [typed edges](#edge-based-knowledge-graph),
query them with [MindQL](#mindql-query-language), or run the full
ingest → dream → search tour in the [Quick Start guide](docs/quickstart.md).
From d42a85240020577e3231d7e1f705019f3d35b879 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Mon, 15 Jun 2026 21:48:24 +0200
Subject: [PATCH 34/40] docs: correct reflection_boost default to 1.0; add test
verifying documented config defaults
---
CHANGELOG.md | 6 +-
docs/architecture.md | 2 +-
docs/search.md | 9 +-
.../infrastructure/sqlite/engrava_core.py | 2 +-
tests/docs/test_docs_config_defaults.py | 134 ++++++++++++++++++
5 files changed, 144 insertions(+), 9 deletions(-)
create mode 100644 tests/docs/test_docs_config_defaults.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ae7381a..34586f6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -787,13 +787,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- **`search_hybrid()` — `include_reflections` + `reflection_boost`** — callers
can now pass `include_reflections=False` to exclude REFLECTION thoughts from
hybrid search results, or a custom `reflection_boost` multiplier to re-rank
- them. The default boost (`SearchConfig.reflection_boost = 1.2`) gives
- REFLECTION thoughts a mild up-ranking.
+ them. The default boost (`SearchConfig.reflection_boost = 1.0`) leaves
+ REFLECTION thoughts competing on equal footing until raised.
- **`search_reflections_only()`** — convenience method on `SqliteEngravaCore`
that returns only `ThoughtType.REFLECTION` thoughts ranked by hybrid score.
- **`list_edges()`** — new `SqliteEngravaCore` method for querying edges by
optional `edge_type` / `source` filters with configurable `limit`.
-- **`SearchConfig.reflection_boost`** — new field (default `1.2`) parsed from
+- **`SearchConfig.reflection_boost`** — new field (default `1.0`) parsed from
YAML `search.reflection_boost`.
- **Dream-created edges** — `run_consolidation()` now creates
diff --git a/docs/architecture.md b/docs/architecture.md
index 319d099..38ec895 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -130,7 +130,7 @@ reasoners) belongs in consumers, not in engrava core.
- `ConsolidationResult.reflections_created` is a new field (default `0`).
- New `DreamingGates` fields: `min_cluster_size`, `cluster_similarity_threshold`,
`cluster_algorithm`, `enable_reflections` — all backward-compatible defaults.
-- New `SearchConfig.reflection_boost` field (default `1.2`).
+- New `SearchConfig.reflection_boost` field (default `1.0`).
- New `search_hybrid()` params: `include_reflections` (default `True`),
`reflection_boost` (default `None` → uses config).
- New `search_reflections_only()` helper method.
diff --git a/docs/search.md b/docs/search.md
index 7aac2c7..9a848aa 100644
--- a/docs/search.md
+++ b/docs/search.md
@@ -168,11 +168,12 @@ result = await store.search_hybrid(
)
```
-### `reflection_boost` (default `SearchConfig.reflection_boost = 1.2`)
+### `reflection_boost` (default `SearchConfig.reflection_boost = 1.0`)
When REFLECTIONs are included, their final score is multiplied by this
-factor. The default `1.2` gives a modest upranking so high-level
-abstractions surface for broad queries without dominating narrow ones.
+factor. The default `1.0` leaves REFLECTIONs competing on equal footing;
+raise it above `1.0` for a modest upranking so high-level abstractions
+surface for broad queries without dominating narrow ones.
```python
# Stronger boost — reflections rank near the top for broad queries
@@ -194,7 +195,7 @@ Configure the default in YAML:
```yaml
search:
- reflection_boost: 1.2 # applies when reflection_boost not overridden per-call
+ reflection_boost: 1.0 # applies when reflection_boost not overridden per-call
```
### `search_reflections_only()`
diff --git a/src/engrava/infrastructure/sqlite/engrava_core.py b/src/engrava/infrastructure/sqlite/engrava_core.py
index c31eac1..0fd4c15 100644
--- a/src/engrava/infrastructure/sqlite/engrava_core.py
+++ b/src/engrava/infrastructure/sqlite/engrava_core.py
@@ -3676,7 +3676,7 @@ async def search_hybrid( # noqa: C901, PLR0912, PLR0915
excluded from results.
reflection_boost: Multiplier applied to REFLECTION thought
scores. ``None`` uses the value from ``SearchConfig``
- (default ``1.2``).
+ (default ``1.0``).
Returns:
``HybridSearchResult`` with ranked results and diagnostics.
diff --git a/tests/docs/test_docs_config_defaults.py b/tests/docs/test_docs_config_defaults.py
new file mode 100644
index 0000000..5046c36
--- /dev/null
+++ b/tests/docs/test_docs_config_defaults.py
@@ -0,0 +1,134 @@
+"""Layer 4 of the documentation-example tests — documented config defaults.
+
+The compile / phantom-API / behaviour layers catch syntactically-wrong or
+nonexistent API, but none of them verify a *documented default value* against
+the shipped configuration object. A doc line like
+``reflection_boost (default 1.2)`` compiles fine, names a real field, and runs
+fine — yet silently misleads a user who copies the value into their config when
+the code actually ships ``1.0``.
+
+This module closes that gap. For every config field whose default is quoted in
+the docs, it asserts the documented value equals the runtime default on the
+shipped dataclass — and that the value actually appears at the documented
+location, so the test fails loudly if either the docs or this registry drift.
+
+To add a new checked default: append a row to ``DOCUMENTED_DEFAULTS`` with the
+config class, field name, and the doc files that state it. The expected value is
+read from the *code*, never hard-coded here, so the code stays the single source
+of truth.
+"""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+
+import pytest
+
+from engrava import DreamingGates, SearchConfig, TTLConfig
+from tests.docs._md_blocks import REPO_ROOT
+
+
+@dataclass(frozen=True)
+class DocumentedDefault:
+ """A config default that the documentation quotes.
+
+ Attributes:
+ config_factory: Zero-arg callable returning a default config instance.
+ field: The attribute whose default is documented.
+ doc_files: Doc paths (relative to repo root) that state the default.
+ label: Human-readable id for test parametrisation.
+
+ """
+
+ config_factory: type
+ field: str
+ doc_files: tuple[str, ...]
+ label: str
+
+
+# Each entry pairs a documented config default with the shipped dataclass that
+# owns it. The expected value is read from the dataclass at runtime (never
+# hard-coded), so the assertion is "the docs match the code", with the code as
+# the single source of truth.
+DOCUMENTED_DEFAULTS: tuple[DocumentedDefault, ...] = (
+ DocumentedDefault(
+ config_factory=SearchConfig,
+ field="reflection_boost",
+ doc_files=("docs/search.md", "docs/architecture.md"),
+ label="SearchConfig.reflection_boost",
+ ),
+ DocumentedDefault(
+ config_factory=TTLConfig,
+ field="check_every_n_operations",
+ doc_files=("docs/data-lifecycle.md",),
+ label="TTLConfig.check_every_n_operations",
+ ),
+ DocumentedDefault(
+ config_factory=DreamingGates,
+ field="min_age_cycles",
+ doc_files=("docs/troubleshooting.md",),
+ label="DreamingGates.min_age_cycles",
+ ),
+)
+
+
+def _format_default(value: object) -> str:
+ """Render a default value the way the docs quote it (``1.0`` not ``1``)."""
+ if isinstance(value, bool):
+ return str(value)
+ if isinstance(value, float) and value.is_integer():
+ return f"{value:.1f}"
+ return str(value)
+
+
+@pytest.mark.parametrize(
+ "spec",
+ DOCUMENTED_DEFAULTS,
+ ids=[d.label for d in DOCUMENTED_DEFAULTS],
+)
+def test_documented_default_matches_shipped(spec: DocumentedDefault) -> None:
+ """Each documented config default equals the shipped runtime default."""
+ shipped = getattr(spec.config_factory(), spec.field)
+ rendered = _format_default(shipped)
+
+ # The value the docs MUST state (e.g. ``1.0``), in the backtick form the
+ # docs use for inline code.
+ expected_token = f"`{rendered}`"
+
+ # A line documents the CONFIG-FIELD default (not a method-param default)
+ # when it names the field, says "default", and quotes a numeric literal.
+ # Lines whose default is ``None`` / "uses config" describe the per-call
+ # method parameter, not the config field — they are skipped.
+ numeric_default = re.compile(r"`?\d+(?:\.\d+)?`?")
+
+ for rel in spec.doc_files:
+ path = REPO_ROOT / rel
+ text = path.read_text(encoding="utf-8")
+
+ field_default_lines = [
+ ln
+ for ln in text.splitlines()
+ if spec.field in ln
+ and "default" in ln.lower()
+ and "none" not in ln.lower()
+ and numeric_default.search(ln) is not None
+ ]
+ assert field_default_lines, (
+ f"{rel} names no numeric documented default for {spec.field!r}; "
+ f"update DOCUMENTED_DEFAULTS or the doc."
+ )
+ for line in field_default_lines:
+ assert expected_token in line or f"= {rendered}" in line or f": {rendered}" in line, (
+ f"{rel} documents {spec.field} with a default that does not match "
+ f"the shipped value {rendered!r} (from {spec.config_factory.__name__}). "
+ f"Offending line: {line.strip()!r}. Fix the doc to state {rendered!r}."
+ )
+
+
+def test_registry_is_nonempty() -> None:
+ """Guard against the registry silently emptying (vacuous pass)."""
+ assert len(DOCUMENTED_DEFAULTS) >= 3, (
+ "DOCUMENTED_DEFAULTS shrank unexpectedly; documented config defaults "
+ "would no longer be verified against the shipped code."
+ )
From 2e80d0485ae0993babe2678a9ab0d05021d2ac86 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Mon, 15 Jun 2026 21:50:08 +0200
Subject: [PATCH 35/40] docs: document percept/utterance/thought metadata
helpers in api-reference
---
docs/api-reference.md | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/docs/api-reference.md b/docs/api-reference.md
index c482b53..463534b 100644
--- a/docs/api-reference.md
+++ b/docs/api-reference.md
@@ -415,6 +415,31 @@ for thought_id, score in result.results:
...
```
+## Metadata Helpers
+
+Three exported helpers build the structured `metadata` dict that pins a
+thought's origin (self vs external, source id, language). They are pure
+functions — same arguments always return an equal dict — and you are free to
+pass a literal dict instead; the helpers exist to remove typo-driven shape
+mismatches at the call site.
+
+| Helper | Signature | Use for |
+|--------|-----------|---------|
+| `percept` | `percept(*, is_self=False, source_id=None, label=None, confidence="high", lang="en")` | Input arriving from outside (user message, document) |
+| `utterance` | `utterance(*, lang="en")` | The agent's own output sent to the world |
+| `thought` | `thought(*, lang="en")` | The agent's internal cognition (reflection, plan) |
+
+```python
+from engrava import percept
+
+metadata = percept(source_id="user-1", label="user")
+# -> {'perspective': 'percept',
+# 'source': {'is_self': False, 'confidence': 'high', 'id': 'user-1', 'label': 'user'},
+# 'lang': 'en', 'content_type': 'natural_language'}
+```
+
+Pass the returned dict as `ThoughtRecord(..., metadata=...)`.
+
## Enums
All enums are `StrEnum` — JSON-serializable and stored as strings.
From c55f0adac8e23864ccadafeca658400f86186b80 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Tue, 16 Jun 2026 22:03:50 +0200
Subject: [PATCH 36/40] docs: list all installable extras in README (mcp +
ollama/hf embeddings); note dreaming needs no extra
---
README.md | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 08d6ef2..0b93e90 100644
--- a/README.md
+++ b/README.md
@@ -33,11 +33,17 @@ pip install engrava
Optional extras:
```bash
-pip install engrava[vec] # sqlite-vec vector search backend
-pip install engrava[embeddings-local] # sentence-transformers embeddings
-pip install engrava[embeddings-openai] # OpenAI-compatible embeddings
+pip install engrava[vec] # sqlite-vec vector search backend
+pip install engrava[mcp] # MCP server (engrava-mcp) for Claude Desktop/Code, Cursor, …
+pip install engrava[embeddings-local] # sentence-transformers embeddings (local model)
+pip install engrava[embeddings-openai] # OpenAI-compatible embeddings API
+pip install engrava[embeddings-ollama] # Ollama local embeddings server
+pip install engrava[embeddings-hf] # HuggingFace Inference API embeddings
```
+Dreaming/consolidation and the knowledge graph need **no extra** — they are part
+of the base install.
+
### Basic Usage
Store a memory and search for it in two calls — no IDs to generate, no record
From c6fa9466082af13d58cab0836cbe6d90264a927e Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Tue, 16 Jun 2026 22:10:25 +0200
Subject: [PATCH 37/40] build: drop no-op 'dreaming' extra (empty deps;
dreaming is in the base install)
---
pyproject.toml | 1 -
1 file changed, 1 deletion(-)
diff --git a/pyproject.toml b/pyproject.toml
index 7d298bd..3de3ee0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,7 +50,6 @@ Changelog = "https://github.com/sovantica/engrava/blob/main/CHANGELOG.md"
[project.optional-dependencies]
vec = ["sqlite-vec>=0.1.0,<0.2.0"]
mcp = ["mcp>=1.27.0"]
-dreaming = []
embeddings-local = ["sentence-transformers>=3.0.0", "torch>=2.0.0"]
embeddings-openai = ["httpx>=0.27.0"]
embeddings-ollama = ["httpx>=0.27.0"]
From ed8b1843678d5eaa3f63d025e6b2f4a4cb783af9 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Tue, 16 Jun 2026 22:44:39 +0200
Subject: [PATCH 38/40] docs(tests): de-reference internal principle name in
doc-test rationale
---
tests/docs/test_docs_examples_compile.py | 4 ++--
tests/docs/test_docs_examples_execute.py | 3 ++-
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/tests/docs/test_docs_examples_compile.py b/tests/docs/test_docs_examples_compile.py
index 5860288..0e0a367 100644
--- a/tests/docs/test_docs_examples_compile.py
+++ b/tests/docs/test_docs_examples_compile.py
@@ -12,8 +12,8 @@
*not* to exist in engrava (fabricated enum members, removed/renamed
symbols, wrong attribute names) must never appear in a doc code block.
This is the inverse of a secret-scanner: it enumerates only **public,
- nonexistent** identifiers, so it is safe on the public surface and cannot
- leak anything (cf. AGENT_PRINCIPLES Principle 1).
+ nonexistent** identifiers, so it is safe to ship and cannot leak any
+ internal term.
These guards are what would have caught the 0.3.x documentation drift
(``thought_type="INSIGHT"``, ``create_edge(..., "ASSOCIATION")``,
diff --git a/tests/docs/test_docs_examples_execute.py b/tests/docs/test_docs_examples_execute.py
index 9e010f1..df6b8b8 100644
--- a/tests/docs/test_docs_examples_execute.py
+++ b/tests/docs/test_docs_examples_execute.py
@@ -34,7 +34,8 @@
explicit entry in ``EXECUTABLE_BLOCKS`` or ``CONCATENATED_PAGES`` below. The
opt-in lives entirely in this test module — there is no special fence syntax or
marker in the Markdown — so the public docs (and the engrava.ai mirror) need no
-magic annotations to be executed (cf. AGENT_PRINCIPLES Principle 1).
+magic annotations to be executed: published Markdown stays clean of any
+test-only markers.
* To execute a **single** self-contained block, add a
``(markdown_path, anchor_substring)`` entry to ``EXECUTABLE_BLOCKS``. The
From cd5df19af3ac02ae027e9a6dc10344d6b223802e Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Tue, 16 Jun 2026 23:17:35 +0200
Subject: [PATCH 39/40] docs: bring architecture + CLI docs up to 0.4.0
(bi-temporal + MCP)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Update the architecture overview for 0.4.0 — add the bi-temporal valid-time
surface and the MCP server to the Layer Model and Core Components, and rewrite
the upgrade-path sections (REFLECTION clustering correctly attributed to 0.3.0;
v0.3->v0.4 now documents bi-temporal + MCP). Also document the gc --expired
flag and a bi-temporal query in the CLI guide, and drop an internal reference
from a module docstring.
---
docs/architecture.md | 54 ++++++++++++++++++++++++++++++-----------
docs/cli.md | 9 +++++--
src/engrava/cli/main.py | 9 ++++---
3 files changed, 52 insertions(+), 20 deletions(-)
diff --git a/docs/architecture.md b/docs/architecture.md
index 38ec895..a565d24 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -10,7 +10,7 @@ Imports flow **downward only**:
```
┌──────────────────────────────────────────────────┐
-│ CLI / Consumer apps, scripts, benchmarks │
+│ CLI / MCP server / Consumer apps, scripts, … │
├──────────────────────────────────────────────────┤
│ Extensions / Embeddings / MindQL │
│ (dreaming, hooks, providers, query language) │
@@ -33,6 +33,11 @@ Imports flow **downward only**:
- **Embeddings** (`src/engrava/embeddings/`) — pluggable embedding
providers.
- **CLI** (`src/engrava/cli/`) — Click-based command-line interface.
+- **MCP server** (`src/engrava/mcp/`) — an optional [Model Context
+ Protocol](https://modelcontextprotocol.io) server behind the `mcp` extra. Like
+ the CLI it is a **top-layer API *consumer***, not part of engrava core: it wraps
+ the public async API over stdio so MCP clients (Claude Desktop, Cursor, …) can
+ use a store. See [MCP server](../docs/guides/mcp.md).
## Core Components
@@ -45,6 +50,11 @@ The primary store implementation. Provides:
- Embedding storage and vector similarity search
- Full-text search (FTS5)
- Hybrid search (5-signal fusion — see below)
+- **Bi-temporal valid time** — optional `valid_from` / `valid_until` bounds on
+ thoughts *and* edges (a second time axis: *when a fact is true*, distinct from
+ when it was recorded), queried via the four valid-time MindQL predicates, with
+ `invalidate_thought` / `invalidate_edge` to close an interval without deleting.
+ See [The Bi-temporal Model](../docs/bitemporal.md).
- Schema management and migrations
### Hybrid Search (5-Signal Fusion)
@@ -121,19 +131,35 @@ reasoners) belongs in consumers, not in engrava core.
- New `DreamingConfig.edges` block defaults to `enabled=True`.
- Existing databases receive dream-created edges on the next
consolidation run; no retroactive edge creation for historical data.
+- **REFLECTION clustering** (the third dreaming phase) also shipped in 0.3.0:
+ `run_consolidation()` clusters semantically related thoughts and creates
+ `ThoughtType.REFLECTION` meta-thoughts. Opt-out via
+ `DreamingGates.enable_reflections = False`. New fields —
+ `ConsolidationResult.reflections_created` (default `0`); `DreamingGates`
+ `min_cluster_size` / `cluster_similarity_threshold` / `cluster_algorithm` /
+ `enable_reflections`; `SearchConfig.reflection_boost` (default `1.0`);
+ `search_hybrid()` `include_reflections` (default `True`) and `reflection_boost`
+ (default `None` → uses config); the `search_reflections_only()` and
+ `thought_exists_by_source()` helpers — all with backward-compatible defaults.
+ REFLECTIONs are created on the next consolidation run; no retroactive clustering.
## Upgrade Path (v0.3 → v0.4)
-- **Additive only** — no breaking changes.
-- `run_consolidation()` gains a third phase (clustering + REFLECTION
- creation). Opt-out via `DreamingGates.enable_reflections = False`.
-- `ConsolidationResult.reflections_created` is a new field (default `0`).
-- New `DreamingGates` fields: `min_cluster_size`, `cluster_similarity_threshold`,
- `cluster_algorithm`, `enable_reflections` — all backward-compatible defaults.
-- New `SearchConfig.reflection_boost` field (default `1.0`).
-- New `search_hybrid()` params: `include_reflections` (default `True`),
- `reflection_boost` (default `None` → uses config).
-- New `search_reflections_only()` helper method.
-- New `thought_exists_by_source()` utility method.
-- Existing databases: no migration needed. REFLECTIONs are created on
- the next consolidation run; no retroactive clustering.
+- **Additive only** — no breaking changes; existing code is unaffected and a query
+ that uses no temporal predicate behaves exactly as before.
+- **Bi-temporal model.** Thoughts and edges gain two optional, nullable ISO-8601
+ fields, `valid_from` and `valid_until` (an open bound = ±∞). Four opt-in MindQL
+ `WHERE` predicates query *valid time* — `valid_now`, `valid_at`, `valid_within`,
+ `valid_between` — and `invalidate_thought` / `invalidate_edge` close an interval
+ without deleting. REFLECTIONs inherit their members' valid-time extent. The schema
+ migration is **additive** (`user_version 12 → 14`, two steps), zero data loss; a
+ legacy row keeps open (`NULL`) bounds and still matches point-in-time queries.
+ See [The Bi-temporal Model](../docs/bitemporal.md).
+- **MCP server.** A new optional `mcp` extra (`pip install "engrava[mcp]"`) ships a
+ Model Context Protocol server over stdio (`engrava-mcp`). It is a pure API
+ *consumer* — plain `pip install engrava` is unaffected and stays dependency-light.
+ See [MCP server](../docs/guides/mcp.md).
+- **`execute_mindql`** — a store-level convenience that runs a parsed `MindQLQuery`
+ against the store's own connection.
+- Existing databases: the valid-time columns and indexes are added automatically on
+ first open; no manual migration step.
diff --git a/docs/cli.md b/docs/cli.md
index 6f0e4b9..e8527c5 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -85,8 +85,12 @@ or registered extension commands:
engrava query "FIND thoughts WHERE lifecycle_status = 'ACTIVE'"
engrava query "COUNT thoughts WHERE priority = 'P1'"
engrava --format json query "SELECT thought_id, essence FROM thought LIMIT 5"
+engrava query "FIND thoughts WHERE valid_now" # only currently-valid facts
```
+The bi-temporal `valid_now`, `valid_at`, `valid_within`, and `valid_between`
+predicates work here too — see [MindQL](mindql.md) for their full semantics.
+
### `snapshot`
Exports the **entire** database to a JSONL snapshot (one record per line).
@@ -119,8 +123,9 @@ engrava --db /data/engrava.db snapshot --service tenant_a # -> /data/tenant_a.
engrava --config engrava.yaml snapshot --service tenant_a # data_dir from config
```
-> A snapshot exports `thought`, `edge`, `embedding`, and `action` records — but
-> **not** the audit journal (`journal_entry`). See
+> A snapshot exports every column of the `thought`, `edge`, `embedding`, and
+> `action` records — including the bi-temporal `valid_from` / `valid_until`
+> fields — but **not** the audit journal (`journal_entry`). See
> [Backup & Recovery](backup-and-recovery.md) for what this means and when to use
> a physical file backup instead.
diff --git a/src/engrava/cli/main.py b/src/engrava/cli/main.py
index faf85ab..45d03ee 100644
--- a/src/engrava/cli/main.py
+++ b/src/engrava/cli/main.py
@@ -11,9 +11,6 @@
engrava gc
engrava migrate
engrava export -o thoughts.json
-
-Related:
- - REQ-OPS-018 (CLI tooling for engrava)
"""
from __future__ import annotations
@@ -950,7 +947,11 @@ async def _gc_archived(
)
@click.pass_context
def gc(ctx: click.Context, *, dry_run: bool, expired: bool) -> None:
- """Garbage-collect archived thoughts and their orphaned edges."""
+ """Garbage-collect archived thoughts and their orphaned edges.
+
+ With ``--expired``, also clean up expired TTL thoughts first (archived
+ or deleted per the configured ``ttl.strategy``).
+ """
cfg: EngravaCLIConfig = ctx.obj["config"]
async def _gc() -> None:
From bb5dff17492cb3aa80db5c398c5a26dc5a33f163 Mon Sep 17 00:00:00 2001
From: przemarzec <98286080+przemarzec@users.noreply.github.com>
Date: Wed, 17 Jun 2026 23:30:05 +0200
Subject: [PATCH 40/40] docs: fix 0.4.0 drift found in the full doc audit
Correct seven low/medium doc inaccuracies vs shipped 0.4.0: created_cycle/
updated_cycle optional (default 0) not required; fix three dead "& multi-tenancy"
anchor links; add the MCP server to the quickstart Next Steps; type the
subclassing example param as aiosqlite.Row; qualify search_keywords as an MCP
tool not a store method; the priority signal shipped in 0.3.0 not v0.2.1; and
point the core scope at infrastructure/sqlite.
---
docs/concepts.md | 7 ++++---
docs/extensions.md | 4 +++-
docs/guides/migrating-from-other-memory.md | 6 +++---
docs/known-limitations.md | 2 +-
docs/quickstart.md | 1 +
docs/scopes.md | 2 +-
docs/troubleshooting.md | 5 +++--
7 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/docs/concepts.md b/docs/concepts.md
index 6126d0f..2c58c7d 100644
--- a/docs/concepts.md
+++ b/docs/concepts.md
@@ -139,9 +139,10 @@ turn / interaction / scheduled pass.
Three fields use it:
-- **`created_cycle`** / **`updated_cycle`** — required on every `ThoughtRecord`
- (the model enforces `updated_cycle >= created_cycle`). They stamp *when, in
- your agent's logical time*, a thought appeared and last changed.
+- **`created_cycle`** / **`updated_cycle`** — optional on `ThoughtRecord`, both
+ default to `0` (so callers that don't track cognitive cycles can omit them);
+ when set, the model enforces `updated_cycle >= created_cycle`. They stamp
+ *when, in your agent's logical time*, a thought appeared and last changed.
- **`current_cycle`** — the value you pass into `search_hybrid(...)` and
`run_consolidation(...)` to tell Engrava "it is now tick N."
diff --git a/docs/extensions.md b/docs/extensions.md
index 4ffc72f..59589e7 100644
--- a/docs/extensions.md
+++ b/docs/extensions.md
@@ -322,10 +322,12 @@ For deeper customization, subclass `SqliteEngravaCore` and override
the template methods:
```python
+import aiosqlite
+
from engrava import SqliteEngravaCore, ThoughtRecord
class ExtendedStore(SqliteEngravaCore):
- def _row_to_thought(self, row: dict) -> ThoughtRecord:
+ def _row_to_thought(self, row: aiosqlite.Row) -> ThoughtRecord:
"""Override to produce a richer model type."""
# Add custom field mapping here
return super()._row_to_thought(row)
diff --git a/docs/guides/migrating-from-other-memory.md b/docs/guides/migrating-from-other-memory.md
index 2b7954c..3f3f919 100644
--- a/docs/guides/migrating-from-other-memory.md
+++ b/docs/guides/migrating-from-other-memory.md
@@ -10,7 +10,7 @@ It covers three things:
2. [Porting your write/read calls](#porting-your-calls) with before/after snippets.
3. [Bulk-importing](#bulk-import) an existing corpus efficiently.
-It ends with [filtering, scoping & multi-tenancy](#filtering-scoping-and-multi-tenancy)
+It ends with [filtering, scoping & multi-tenancy](#filtering-scoping--multi-tenancy)
— the one area where Engrava's defaults differ most from a hosted service, and
what to do about it.
@@ -28,7 +28,7 @@ common concepts onto Engrava:
| "Memory" / "record" / "document" | **`ThoughtRecord`** | The unit you store. Has `essence` (short) + `content` (full). |
| "Memory type" / "role" | **`thought_type`** (`OBSERVATION`, `BELIEF`, `TASK`, …) | A small fixed taxonomy; see [Core Concepts](../concepts.md). |
| Free-form metadata / `metadata={...}` | **`ThoughtRecord.metadata`** | An arbitrary JSON dict, persisted and round-tripped. |
-| "User id" / "session id" / namespace | A key inside **`metadata`** (or `source`) | Engrava has no built-in tenant field — see [scoping](#filtering-scoping-and-multi-tenancy). |
+| "User id" / "session id" / namespace | A key inside **`metadata`** (or `source`) | Engrava has no built-in tenant field — see [scoping](#filtering-scoping--multi-tenancy). |
| Relationship / link between memories | **`EdgeRecord`** (typed, weighted) | First-class graph; edges also feed ranking. |
| Embedding / vector | Stored on write only with `embedding_provider=...` **and** `auto_embed=True`; otherwise call `store_embedding(thought_id, vector)` yourself | See the [Embeddings guide](embeddings.md). |
| Vector / similarity search | **`search_similar(query_vector, …)`** | Needs a ready query vector. |
@@ -88,7 +88,7 @@ for thought_id, score in result.results:
print(score, record.essence)
```
-See [filtering, scoping & multi-tenancy](#filtering-scoping-and-multi-tenancy)
+See [filtering, scoping & multi-tenancy](#filtering-scoping--multi-tenancy)
for why the post-filter is there and how to do it better.
## Bulk import
diff --git a/docs/known-limitations.md b/docs/known-limitations.md
index e7851a7..df3382b 100644
--- a/docs/known-limitations.md
+++ b/docs/known-limitations.md
@@ -86,7 +86,7 @@ issues. Performance depends on index coverage and query patterns.
## `HybridSearchResult.backends_used` Is an Open Set
`backends_used` is a `frozenset[str]` that may grow as new scoring signals
-are added (e.g. `"priority"` was added in v0.2.1). Do **not** compare it
+are added (e.g. `"priority"` was added in v0.3.0). Do **not** compare it
with exact equality (`== {"fts5", "vector"}`). Use subset checks instead:
```python
diff --git a/docs/quickstart.md b/docs/quickstart.md
index 067cd86..f478709 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -296,5 +296,6 @@ Build something next, then reach for the references:
- [Configuration](configuration.md) — YAML-based setup for production use
- [API Reference](api-reference.md) — full class and method reference
- [MindQL](mindql.md) — complete query language reference
+- [MCP server](guides/mcp.md) — connect an MCP client (Claude Desktop, Cursor, …) to a store
- [Troubleshooting](troubleshooting.md) — when something doesn't work as expected
- [FAQ](faq.md) — quick answers to common questions
diff --git a/docs/scopes.md b/docs/scopes.md
index db131e4..50f6733 100644
--- a/docs/scopes.md
+++ b/docs/scopes.md
@@ -7,7 +7,7 @@ Example: `feat(dreaming): add priority signal to hybrid search`
## Scopes
-- `core` — domain models, core engine (`src/engrava/domain/`, `src/engrava/infrastructure/core/`)
+- `core` — domain models, core engine (`src/engrava/domain/`, `src/engrava/infrastructure/sqlite/`)
- `domain` — domain-layer models and protocols (`src/engrava/domain/`)
- `infra` — infrastructure layer (SQLite, persistence)
- `dreaming` — consolidation cycle, reflection, edges, priority
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
index 3916756..e898cf0 100644
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -89,8 +89,9 @@ embedding provider (see the [Embeddings guide](guides/embeddings.md)). If
## Keyword search returns too many results (I wanted all words to match)
-**Symptom.** A multi-word `search_fts` / `search_keywords` query returns documents
-that contain only *some* of the words, not all of them.
+**Symptom.** A multi-word `search_fts` query (or the MCP `search_keywords` tool that
+wraps it — note `search_keywords` is an MCP tool name, not a `SqliteEngravaCore`
+method) returns documents that contain only *some* of the words, not all of them.
**Cause.** A **bare** keyword query is matched with `OR`, by design — a document
matches when it shares *any* word, and BM25 ranks the ones sharing the most