Skip to content
Merged
55 changes: 38 additions & 17 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ Orb/
│ │ │ # depends on nothing else (see Data Contracts below)
│ │ ├── connection.py # DB_PATH, get_db() async context manager, _build_set_clause
│ │ ├── schema.py # CREATE TABLES script
│ │ ├── preset_schema.py # Preset engine product/security policy (single source of
│ │ │ # truth): DOMAIN_ROOTS, EXCLUDED/SECRET/PRESERVED cols
│ │ ├── seeds.py # SEED_* / DEFAULT_* constants
│ │ ├── bootstrap.py # init_db() (schema + inline ALTERs + seed inserts), reset_to_defaults()
│ │ ├── queries/ # Per-domain CRUD modules (one file per table group)
Expand All @@ -79,7 +81,9 @@ Orb/
│ ├── macros.py # Macro resolution ({{user}}, {{char}}, {{roll}}, etc.)
│ ├── kv_tracker.py # Debug: logs messages/tools to JSON for inspection
│ ├── presets.py # Preset/backup engine: selective export, merge-import,
│ │ # full snapshots/restore (sqlite ATTACH + VACUUM INTO)
│ │ # full snapshots/restore (sqlite ATTACH + VACUUM INTO).
│ │ # Schema-driven: mechanics derived from live schema via
│ │ # PRAGMA; policy declared in database/preset_schema.py
│ ├── locks.py # Cross-module asyncio locks (workflow_state / character_state / config / maintenance)
│ ├── utils.py # Shared utilities
│ ├── passes/
Expand Down Expand Up @@ -239,22 +243,39 @@ grouped into coarse **domains** (`characters`, `chats`, `lorebooks`, `fragments`
`phrase_bank`, `configs`); a *preset* carries a chosen subset, a *snapshot* is a
full-domain preset, and both live in one on-disk library described by an
`orb_preset_meta` row. Two ways data crosses back in: **apply** (merge by
identity — UUID rows upsert, child collections replace wholesale, integer-PK rows
reinsert with remapped references) and **restore** (roll back to the file — a
full-coverage file is swapped in whole via `restore_full`; a partial file is
restored *domain-scoped* via `restore_partial`/`apply_preset(replace=True)`,
which empties each covered domain before the merge so those domains match the
file exactly while uncovered ones are untouched). Both work on any library file —
imported ones included; restore's auto-snapshot makes the overwrite reversible.
**Import** is non-destructive: it just lands an external `.db` in the library
(the user then applies or restores it). Destructive ops auto-snapshot first.

The single source of truth for *which tables belong to which domain* is the
`DOMAIN_TABLES` map at the top of `presets.py`. **When you add a table** (or a
table sprouts a cross-domain FK), update that map and the per-domain merge logic
there — keep the domain grouping current rather than expanding this section. Runs
synchronously off the event loop via `asyncio.to_thread` under
`backend.locks.maintenance_lock`.
identity) and **restore** (roll back to the file — a full-coverage file is swapped
in whole via `restore_full`; a partial file is restored *domain-scoped* via
`restore_partial`/`apply_preset(replace=True)`, which empties each covered domain
before the merge so those domains match the file exactly while uncovered ones are
untouched). Both work on any library file — imported ones included; restore's
auto-snapshot makes the overwrite reversible. **Import** is non-destructive: it
just lands an external `.db` in the library (the user then applies or restores it).
Destructive ops auto-snapshot first.

**The merge engine is schema-driven.** It introspects the live schema with
`PRAGMA` (`_build_schema_model()`) to derive *all* of its mechanics — per-table
classification (`singleton` = a `CHECK (id = 1)` table updated in place; `stable`
= portable identity, upserted by PK; `surrogate` = autoincrement rowid, reinserted
under fresh ids with an old→new map), the FK graph, the topological insert order,
and which edges to defer (self refs + cross-table cycles, inserted NULL then fixed
up). Ownership (`ON DELETE CASCADE`) edges define the entity tree and the
child-replace scope; non-CASCADE edges are soft cross-references, reconciled after
a full replace. **Adding a child table or an FK column needs zero edits in
`presets.py`** — the model just grows.

The *only* hand-maintained input is the product/security **policy** in
`backend/database/preset_schema.py`: `DOMAIN_ROOTS` (root table → user-facing
domain; every non-root auto-joins its root's domain by climbing ownership edges),
`EXCLUDED_TABLES`, `SECRET_COLUMNS` (blanked when `configs` isn't exported),
`IMPLIED_DOMAINS` (e.g. `chats` pulls in `characters`), `PRESERVED_COLUMNS`
(local-only cols kept across the settings-singleton overwrite), and the
`SENSITIVE_*` markers. **When you add a table** that introduces a new entity root,
or a column that looks secret, update that file. A drift backstop —
`schema_coverage_problems()`, asserted by `tests/integration/test_preset_schema_coverage.py`
— fails loudly the moment a freshly-migrated table maps to no domain, an FK
references an unclassified parent, or a sensitive-looking column is missing from
`SECRET_COLUMNS`. Runs synchronously off the event loop via `asyncio.to_thread`
under `backend.locks.maintenance_lock`.

## Data Contracts (the model layer)

Expand Down
3 changes: 2 additions & 1 deletion backend/database/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from __future__ import annotations

from .bootstrap import init_db, reset_to_defaults
from .bootstrap import init_db, reset_to_defaults, schema_safety_problems
from .connection import DB_PATH, get_db
from .queries.character_cards import (
create_character_card,
Expand Down Expand Up @@ -198,6 +198,7 @@
"get_world",
"get_world_by_name",
"get_worlds",
"schema_safety_problems",
"init_db",
"insert_alternate_greeting_swipes",
"insert_workflow_attachment_row",
Expand Down
32 changes: 32 additions & 0 deletions backend/database/bootstrap.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import json
import sqlite3

from .connection import get_db
from .schema import CREATE_TABLES_SQL
Expand All @@ -13,6 +14,37 @@
)


def schema_safety_problems() -> list[str]:
"""Return why the live DB schema is unsafe for the preset engine, or ``[]`` if safe.

Call right after ``run_pending`` so any developer schema change the preset engine
cannot safely handle -- a new uncovered table, a stale policy constant, a migration
that leaves a table unlike ``CREATE_TABLES_SQL`` (the 0026 persona_lock_id /
0008 vestigial-column class of bug) -- surfaces at boot, naming the constant or
migration to fix.

Non-fatal by design: the check guards *preset/backup* operations, not normal app
queries, so a schema quirk must warn loudly rather than brick the whole app at
boot (a single missed cleanup migration would otherwise refuse every real
install's startup). The preset ops themselves still call
``presets.assert_schema_safe`` and fail hard on the same problems, so no backup is
ever built or applied against an unsafe schema.

``backend.presets`` is imported lazily here because it pulls in the migration
runner and would otherwise close an import cycle through this package. ``DB_PATH``
is read off the ``connection`` module at call time (not the import-time binding)
so a monkeypatched path in tests resolves correctly.
"""
from .. import presets
from . import connection

conn = sqlite3.connect(connection.DB_PATH)
try:
return presets.schema_safety_problems(conn)
finally:
conn.close()


async def init_db():
"""Create the latest schema for fresh installs and seed empty tables.

Expand Down
71 changes: 71 additions & 0 deletions backend/database/migrations/0027_rebuild_persona_lock_fks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""0027_rebuild_persona_lock_fks — give persona_lock_id a real foreign key on
databases migrated through 0026.

0026 added ``persona_lock_id`` to ``conversations`` and ``character_cards`` as a
bare ``INTEGER`` (an ALTER-added column cannot carry an enforced REFERENCES
clause), while fresh installs declare it
``INTEGER REFERENCES user_personas(id) ON DELETE SET NULL``
(see backend/database/schema.py). The preset engine builds its merge/FK model
from the *live* ``PRAGMA foreign_key_list``, so on a migrated DB those columns
were invisible to the FK machinery: the merge copied lock ids verbatim instead
of remapping them through the personas id-map, and an export that drops the
configs domain never SET-NULLed them. This rebuilds the two tables to the
canonical DDL so the live schema matches a fresh install.

Idempotent: a table whose ``persona_lock_id`` edge already exists (every fresh
install, and any DB already through 0027) is skipped. Run with foreign keys
OFF for the duration — the standard SQLite "other kinds of schema changes"
recipe — so dropping the old table neither cascades into ``messages`` nor trips
a constraint. Both tables have TEXT primary keys, so child references
(``messages.conversation_id`` …) keep resolving across the drop/rename.

The rebuilt DDL is derived from ``schema.table_create_sql`` rather than pasted,
so this migration can never disagree with the schema-equivalence gate.
"""

from __future__ import annotations

import sqlite3

from backend.database import schema

_TABLES = ("conversations", "character_cards")


def _has_persona_lock_fk(conn: sqlite3.Connection, table: str) -> bool:
# PRAGMA foreign_key_list row: (id, seq, parent_table, from, to, on_update, on_delete, match)
for row in conn.execute(f"PRAGMA foreign_key_list({table})").fetchall():
if row[3] == "persona_lock_id" and row[2] == "user_personas":
return True
return False


def _rebuild(conn: sqlite3.Connection, table: str) -> None:
block = schema.table_create_sql(table)
new_ddl = block.replace(f"CREATE TABLE IF NOT EXISTS {table}", f"CREATE TABLE {table}_new", 1)
conn.execute(new_ddl)
new_cols = [r[1] for r in conn.execute(f"PRAGMA table_info({table}_new)").fetchall()]
old_cols = {r[1] for r in conn.execute(f"PRAGMA table_info({table})").fetchall()}
cols = ", ".join(c for c in new_cols if c in old_cols)
conn.execute(f"INSERT INTO {table}_new ({cols}) SELECT {cols} FROM {table}")
conn.execute(f"DROP TABLE {table}")
conn.execute(f"ALTER TABLE {table}_new RENAME TO {table}")
print(f"[migrations] 0027: rebuilt {table} with the persona_lock_id foreign key")


def migrate(conn: sqlite3.Connection) -> None:
# PRAGMA foreign_keys is a no-op inside a transaction, and DROP/RENAME under
# FK enforcement could cascade or fail; the runner has committed before this
# call, so close any stray transaction, flip FKs off for the rebuild, then
# restore the prior state.
conn.commit()
had_fk = conn.execute("PRAGMA foreign_keys").fetchone()[0]
conn.execute("PRAGMA foreign_keys=OFF")
try:
for table in _TABLES:
if not _has_persona_lock_fk(conn, table):
_rebuild(conn, table)
conn.commit()
finally:
if had_fk:
conn.execute("PRAGMA foreign_keys=ON")
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""0028_drop_vestigial_schema_artifacts — drop every table/column an earlier build
left behind that the fresh-install DDL (backend/database/schema.py) never carried,
so a migrated DB stops diverging from ``CREATE_TABLES_SQL``.

All four artefact groups are the same class of bug: a feature (or an early cut of
one) shipped schema via a since-rewritten migration or the old inline ``init_db``
path, the feature was removed or redesigned, and nothing dropped the leftovers from
databases that booted in the window. Each tripped the fresh-vs-migrated
schema-equivalence gate (backend/presets.py ``assert_schema_safe``), refusing every
preset export/snapshot/restore. The full inventory, found by fresh-installing every
historical DDL version in git history and migrating it to HEAD:

1. ``settings.active_model_config_id`` — superseded when the active-model pointer
moved to ``endpoints.active_model_config_id`` (migration 0010); the old
settings-level pointer was never read again and never dropped.
2. ``settings.active_agent_endpoint_id`` / ``settings.active_agent_model_config_id``
— an early version of the agent-endpoint feature (later rewritten into what is
now 0013) put this pointer pair on ``settings``; the redesign kept only
``settings.agent_endpoint_id`` + ``endpoints.agent_active_model_config_id``.
3. ``voice_profiles`` table and ``conversation_logs.reasoning_feedback`` /
``conversation_logs.feedback_latency_ms`` — legacy TTS storage (0015) ported and
dropped by 0020, but re-created by bootstrap while the table was still in the
then-current DDL; and an early cut of the feedback sub-step whose split columns
0024 consolidated into the single ``feedback`` JSON column.
4. ``settings.tts_scripter_enabled`` / ``settings.tts_scripter_prompt`` — the
detached LLM speech scripter (84bf39e), removed by 16a4288, which deleted the
DDL and inline ALTERs but not the columns already on disk.

``voice_profiles`` is dropped only when empty: on any DB that reaches 0028, 0020
has already run, so any real rows were ported long ago; a non-empty table would
mean un-ported data, so we leave it for a human rather than silently lose it (the
equivalence gate keeps complaining, which is the intended loud signal).

Idempotent: every drop is skipped when the table/column is already absent (fresh
installs, or a DB already through 0028). ``ALTER TABLE … DROP COLUMN`` is the same
mechanism migration 0016 uses; foreign keys are flipped off for the ``settings``
column drops since several carry a ``REFERENCES`` clause.
"""

from __future__ import annotations

import sqlite3

_VESTIGIAL_SETTINGS_COLUMNS = (
"active_model_config_id",
"active_agent_endpoint_id",
"active_agent_model_config_id",
"tts_scripter_enabled",
"tts_scripter_prompt",
)
_VESTIGIAL_LOG_COLUMNS = ("reasoning_feedback", "feedback_latency_ms")


def migrate(conn: sqlite3.Connection) -> None:
settings_cols = {row[1] for row in conn.execute("PRAGMA table_info(settings)").fetchall()}
to_drop = [c for c in _VESTIGIAL_SETTINGS_COLUMNS if c in settings_cols]
if to_drop:
# PRAGMA foreign_keys is a no-op inside a transaction; the runner has
# committed before this call. Flip FKs off for the column drops (several
# carry a REFERENCES clause), then restore prior state.
conn.commit()
had_fk = conn.execute("PRAGMA foreign_keys").fetchone()[0]
conn.execute("PRAGMA foreign_keys=OFF")
try:
for col in to_drop:
conn.execute(f"ALTER TABLE settings DROP COLUMN {col}")
conn.commit()
print(f"[migrations] 0028: dropped vestigial settings.{col}")
finally:
if had_fk:
conn.execute("PRAGMA foreign_keys=ON")

tables = {row[0] for row in conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()}
if "voice_profiles" in tables:
rows = conn.execute("SELECT COUNT(*) FROM voice_profiles").fetchone()[0]
if rows == 0:
conn.execute("DROP TABLE voice_profiles")
print("[migrations] 0028: dropped vestigial empty voice_profiles table")
else:
# Un-ported rows: refuse to drop and lose data. The equivalence gate stays
# red on purpose so this surfaces for a human instead of vanishing.
print(
f"[migrations] 0028: voice_profiles has {rows} row(s); leaving it in place "
f"(0020 should have ported and dropped it — investigate before dropping)"
)

log_cols = {row[1] for row in conn.execute("PRAGMA table_info(conversation_logs)").fetchall()}
for col in _VESTIGIAL_LOG_COLUMNS:
if col in log_cols:
conn.execute(f"ALTER TABLE conversation_logs DROP COLUMN {col}")
print(f"[migrations] 0028: dropped vestigial conversation_logs.{col}")
Loading
Loading