From d6e66adfb0b397014c3858d4ff3857a91abfbe3c Mon Sep 17 00:00:00 2001 From: JS Ng Date: Fri, 13 Mar 2026 13:50:30 +0800 Subject: [PATCH 01/18] docs: add storage migration protocol (issue #27) Proposes a simple, explicit migration system for Campus storage: - Migration files with upgrade()/downgrade() functions - State tracking via _migrations table - CLI: `campus migrate` and `campus rollback` - Yapper integration for audit logging - Supports PostgreSQL tables and MongoDB documents Addresses requirements from issue #27: 1. Initialize app via auto-run on deploy 2. Data migration support with batching 3. Cross-database migration handlers 4. Rollback capability with downgrade() 5. Audit logging via Yapper events Co-Authored-By: Claude Opus 4.6 --- docs/migration-protocol.md | 605 +++++++++++++++++++++++++++++++++++++ 1 file changed, 605 insertions(+) create mode 100644 docs/migration-protocol.md diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md new file mode 100644 index 00000000..28e9af8f --- /dev/null +++ b/docs/migration-protocol.md @@ -0,0 +1,605 @@ +# Campus Storage Migration Protocol + +## Overview + +This document defines the storage migration protocol for Campus. Migrations are versioned changes to database schemas and data that evolve the storage layer over time while maintaining data integrity and enabling rollback. + +**Design Philosophy**: Simple, explicit, and auditable. For ~2000 users, we prioritize clarity over automation complexity. + +--- + +## Requirements + +Per [issue #27](https://github.com/nyjc-computing/campus/issues/27): + +1. **Initialize the app** - Apply pending migrations on deployment +2. **Update existing data** - Transform data when schemas change +3. **Cross-database migration** - Support PostgreSQL tables and MongoDB documents +4. **Rollback/restore** - Ability to revert migrations +5. **Audit logging** - Record all migration actions for investigation + +--- + +## Architecture + +### Storage Types + +| Storage Type | Backend | Use Case | +|-------------|---------|----------| +| **Tables** | PostgreSQL | Relational data with fixed schema (users, sessions, clients) | +| **Documents** | MongoDB | Flexible JSON-like objects (assignments, submissions) | +| **Objects** | S3-compatible | Binary blobs (files, attachments) | + +### Migration Structure + +``` +campus/storage/migrations/ +├── __init__.py # Migration runner interface +├── state.py # Migration state tracking (uses storage layer) +├── runner.py # Main migration execution logic +├── scripts/ +│ ├── migrate.py # CLI entry point: `campus migrate` +│ └── rollback.py # CLI entry point: `campus rollback` +└── migrations/ + ├── 001_init_assignments.py + ├── 002_init_submissions.py + ├── 003_add_response_timestamps.py + └── ... +``` + +--- + +## Migration File Format + +Each migration is a Python module with: + +1. **Revision ID** - Sequential 3-digit number +2. **Description** - Human-readable change summary +3. **upgrade()** - Forward transformation +4. **downgrade()** - Reverse transformation +5. **Storage type** - One of: `table`, `document`, `object` + +```python +"""Add response timestamps to submissions. + +Revision ID: 003 +Create Date: 2025-03-13 +Storage Type: document +""" + +from datetime import datetime, UTC +from campus.storage import get_collection + + +REVISION_ID = "003" +DESCRIPTION = "Add response timestamps to submissions" +STORAGE_TYPE = "document" + + +def upgrade(): + """Add created_at field to existing responses.""" + submissions = get_collection("submissions") + + # Get all submissions with responses + all_docs = submissions.get_matching({}) + + for doc in all_docs: + updated_responses = [] + for response in doc.get("responses", []): + # Add timestamp if not present + if "created_at" not in response: + response["created_at"] = datetime.now(UTC).isoformat() + updated_responses.append(response) + + # Update document + if updated_responses: + submissions.update_by_id(doc["id"], {"responses": updated_responses}) + + +def downgrade(): + """Remove created_at field from responses.""" + submissions = get_collection("submissions") + + all_docs = submissions.get_matching({}) + + for doc in all_docs: + updated_responses = [] + for response in doc.get("responses", []): + # Strip out created_at + response.pop("created_at", None) + updated_responses.append(response) + + submissions.update_by_id(doc["id"], {"responses": updated_responses}) +``` + +--- + +## Migration State Tracking + +Migrations track their state using the storage layer itself: + +### PostgreSQL Schema (for state tracking) + +```sql +CREATE TABLE IF NOT EXISTS "_migrations" ( + "id" TEXT PRIMARY KEY, -- Revision ID (e.g., "001", "002") + "description" TEXT NOT NULL, -- Human-readable description + "storage_type" TEXT NOT NULL, -- "table", "document", "object" + "applied_at" TIMESTAMP NOT NULL, -- When migration was applied + "rollback_at" TIMESTAMP NULL -- When migration was rolled back (if applicable) +); +``` + +### State Interface + +```python +# campus/storage/migrations/state.py + +from campus.storage import get_table +from datetime import datetime, UTC + +class MigrationState: + """Track migration application state.""" + + def __init__(self): + self._table = get_table("_migrations") + + def init_state_table(self): + """Initialize the _migrations table if not exists.""" + # Uses init_from_schema() - allowed for system table + sql = '''CREATE TABLE IF NOT EXISTS "_migrations" ( + "id" TEXT PRIMARY KEY, + "description" TEXT NOT NULL, + "storage_type" TEXT NOT NULL, + "applied_at" TIMESTAMP NOT NULL, + "rollback_at" TIMESTAMP NULL + );''' + from campus.storage.tables.backend.postgres import PostgreSQLTable + table = PostgreSQLTable("_migrations") + table.init_from_schema(sql) + + def record_migration(self, revision_id: str, description: str, storage_type: str): + """Record a successful migration.""" + self._table.insert_one({ + "id": revision_id, + "description": description, + "storage_type": storage_type, + "applied_at": datetime.now(UTC).isoformat(), + "rollback_at": None + }) + + def get_applied_migrations(self) -> set[str]: + """Get set of applied migration IDs.""" + return {row["id"] for row in self._table.get_matching({})} + + def mark_rollback(self, revision_id: str): + """Mark a migration as rolled back.""" + self._table.update_by_id(revision_id, { + "rollback_at": datetime.now(UTC).isoformat() + }) + + def is_applied(self, revision_id: str) -> bool: + """Check if a migration has been applied.""" + try: + self._table.get_by_id(revision_id) + return True + except NotFoundError: + return False +``` + +--- + +## Migration Runner + +```python +# campus/storage/migrations/runner.py + +import importlib +from pathlib import Path +from campus.yapper import Yapper +from .state import MigrationState + +class MigrationRunner: + """Execute and track migrations.""" + + def __init__(self, migrations_dir: Path): + self.migrations_dir = migrations_dir + self.state = MigrationState() + self.logger = Yapper() + + def discover_migrations(self) -> list[dict]: + """Find all migration files and return sorted list.""" + migrations = [] + for file in self.migrations_dir.glob("*.py"): + if file.name.startswith("_"): + continue + + module = importlib.import_module(f".migrations.{file.stem}", package="campus.storage") + migrations.append({ + "id": module.REVISION_ID, + "description": module.DESCRIPTION, + "storage_type": module.STORAGE_TYPE, + "module": module, + "path": file + }) + + return sorted(migrations, key=lambda m: m["id"]) + + def upgrade(self, target: str | None = None): + """Apply pending migrations up to target revision.""" + self.state.init_state_table() + applied = self.state.get_applied_migrations() + migrations = self.discover_migrations() + + for migration in migrations: + if migration["id"] in applied: + continue + if target and migration["id"] > target: + break + + print(f"Applying {migration['id']}: {migration['description']}") + migration["module"].upgrade() + self.state.record_migration( + migration["id"], + migration["description"], + migration["storage_type"] + ) + + # Log migration event via Yapper + self.logger.emit("migration.applied", { + "revision_id": migration["id"], + "description": migration["description"], + "storage_type": migration["storage_type"] + }) + print(f" ✓ Applied {migration['id']}") + + def downgrade(self, target: str): + """Rollback migrations back to target revision.""" + self.state.init_state_table() + applied = self.state.get_applied_migrations() + migrations = list(reversed(self.discover_migrations())) + + for migration in migrations: + if migration["id"] not in applied: + continue + if migration["id"] <= target: + break + + print(f"Rolling back {migration['id']}: {migration['description']}") + migration["module"].downgrade() + self.state.mark_rollback(migration["id"]) + + # Log rollback event via Yapper + self.logger.emit("migration.rolled_back", { + "revision_id": migration["id"], + "description": migration["description"], + "storage_type": migration["storage_type"] + }) + print(f" ✓ Rolled back {migration['id']}") +``` + +--- + +## CLI Interface + +Add to `campus` CLI via `main.py`: + +```python +# main.py additions + +def migrate(target: str | None = None): + """Run database migrations.""" + from campus.storage.migrations import MigrationRunner + from pathlib import Path + + migrations_dir = Path(__file__).parent / "storage" / "migrations" / "migrations" + runner = MigrationRunner(migrations_dir) + runner.upgrade(target) + print("Migration complete.") + + +def rollback(target: str): + """Rollback to a specific migration.""" + from campus.storage.migrations import MigrationRunner + from pathlib import Path + + migrations_dir = Path(__file__).parent / "storage" / "migrations" / "migrations" + runner = MigrationRunner(migrations_dir) + runner.downgrade(target) + print("Rollback complete.") + + +if __name__ == "__main__": + import sys + command = sys.argv[1] if len(sys.argv) >= 2 else None + + if command == "migrate": + target = sys.argv[2] if len(sys.argv) >= 3 else None + migrate(target) + elif command == "rollback": + target = sys.argv[2] + rollback(target) + else: + main(deployment=command) +``` + +### Usage + +```bash +# Apply all pending migrations +campus migrate + +# Apply up to specific revision +campus migrate 003 + +# Rollback to specific revision +campus rollback 002 +``` + +--- + +## Deployment Integration + +### Automatic Migration on Deploy + +Add to deployment configuration (`wsgi.py` or app factory): + +```python +# wsgi.py or apps/*/factory.py + +from campus.storage.migrations import MigrationRunner +from pathlib import Path +import os + +def create_app(): + app = Flask(__name__) + + # Run migrations on app startup in production + if os.getenv("ENV") in ("staging", "production"): + try: + migrations_dir = Path(__file__).parent / "storage" / "migrations" / "migrations" + runner = MigrationRunner(migrations_dir) + runner.upgrade() + except Exception as e: + # Log but don't fail deployment + app.logger.error(f"Migration failed: {e}") + + return app +``` + +### Pre-Deployment Checklist + +1. **Backup database** before running migrations in production +2. **Test migrations** on staging with production-like data +3. **Review downgrade logic** - ensure rollback path exists +4. **Monitor logs** via Yapper events for migration completion + +--- + +## Best Practices + +### 1. Idempotent Migrations + +Migrations should be safe to run multiple times: + +```python +def upgrade(): + """Add index if not exists.""" + table = get_table("submissions") + # Check before creating + try: + table.execute('CREATE INDEX idx_submissions_course ON "submissions"("course_id")') + except psycopg2.errors.DuplicateTable: + pass # Index already exists +``` + +### 2. Zero-Downtime for Schema Changes + +For PostgreSQL schema changes with active users: + +1. **Add columns** as nullable first +2. **Backfill data** in separate migration +3. **Add constraints** only after backfill complete +4. **Drop old columns** only after verifying usage + +### 3. Data Migration Batching + +For large collections, process in batches: + +```python +def upgrade(): + """Backfill data for all users.""" + users = get_table("users") + batch_size = 100 + offset = 0 + + while True: + batch = users.get_matching({}).skip(offset).limit(batch_size) + if not batch: + break + + for user in batch: + # Transform data + users.update_by_id(user["id"], {...}) + + offset += batch_size +``` + +### 4. Transaction Safety + +Wrap related changes in transactions: + +```python +def upgrade(): + """Make multiple related changes atomically.""" + from campus.storage.tables.backend.postgres import PostgreSQLTable + + table = PostgreSQLTable("assignments") + with table._get_connection() as conn: + with conn.cursor() as cursor: + cursor.execute("ALTER TABLE assignments ADD COLUMN new_field TEXT") + cursor.execute("UPDATE assignments SET new_field = default_value") + conn.commit() +``` + +--- + +## Migration Types + +### Type 1: Schema Migration (DDL) + +Changes to table/collection structure: + +```python +# PostgreSQL - Add column +def upgrade(): + table = PostgreSQLTable("users") + table.init_from_schema( + 'ALTER TABLE "users" ADD COLUMN IF NOT EXISTS "display_name" TEXT;' + ) + +def downgrade(): + table = PostgreSQLTable("users") + table.init_from_schema( + 'ALTER TABLE "users" DROP COLUMN IF EXISTS "display_name";' + ) +``` + +### Type 2: Data Migration (DML) + +Transform existing data: + +```python +def upgrade(): + users = get_table("users") + all_users = users.get_matching({}) + + for user in all_users: + if "email" in user and "@" not in user["email"]: + # Fix malformed emails + users.update_by_id(user["id"], {"email": None}) +``` + +### Type 3: Cross-Storage Migration + +Move data between storage types: + +```python +def upgrade(): + """Migrate from table to document storage.""" + old_table = get_table("sessions") + new_collection = get_collection("sessions") + + for session in old_table.get_matching({}): + new_collection.insert_one(session) +``` + +--- + +## Rollback Protocol + +### Pre-Rollback Checklist + +1. **Backup current state** - Export data before rollback +2. **Verify downgrade logic** - Test on staging first +3. **Check dependencies** - Ensure no later migrations depend on this one +4. **Plan forward migration** - Know how you'll re-apply after rollback + +### Rollback Execution + +```bash +# 1. Check current state +campus migrate status + +# 2. Backup production data +pg_dump $POSTGRESDB_URI > backup_$(date +%Y%m%d).sql + +# 3. Rollback to safe revision +campus rollback 002 + +# 4. Verify application health +curl -f https://api.campus.nyjc.app/health || exit 1 +``` + +--- + +## Event Logging via Yapper + +All migrations emit events for audit and monitoring: + +```python +# Events emitted: +logger.emit("migration.applied", { + "revision_id": "003", + "description": "Add response timestamps", + "storage_type": "document", + "applied_at": "2025-03-13T10:30:00Z" +}) + +logger.emit("migration.rolled_back", { + "revision_id": "003", + "description": "Add response timestamps", + "storage_type": "document", + "rolled_back_at": "2025-03-13T11:00:00Z" +}) + +logger.emit("migration.failed", { + "revision_id": "004", + "error": "Duplicate key violation", + "traceback": "..." +}) +``` + +### Consuming Migration Events + +```python +from campus.yapper import Yapper + +yapper = Yapper() + +@yapper.on_event("migration.applied") +def log_migration(event): + """Send migration notification to admins.""" + send_alert(f"Migration {event.data['revision_id']} applied: {event.data['description']}") +``` + +--- + +## Appendix: Migration Template + +```python +"""{Description}. + +Revision ID: {XXX} +Create Date: {YYYY-MM-DD} +Storage Type: {table/document/object} +""" + +from campus.storage import {{get_table, get_collection}} + +REVISION_ID = "{XXX}" +DESCRIPTION = "{Description}" +STORAGE_TYPE = "{table/document/object}" + + +def upgrade(): + """Apply this migration.""" + # TODO: Implement forward migration + pass + + +def downgrade(): + """Reverse this migration.""" + # TODO: Implement rollback + pass +``` + +--- + +## References + +- [Issue #27: Storage Migration Protocol](https://github.com/nyjc-computing/campus/issues/27) +- [Campus Storage Architecture](../architecture.md) +- [PostgreSQL Table Backend](../campus/storage/tables/backend/postgres.py) +- [MongoDB Document Backend](../campus/storage/documents/backend/mongodb.py) +- [Yapper Event Framework](../campus/yapper/base.py) From 1904be999a49195a3f2de3708a6689d6be0b1da1 Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 10:59:20 +0800 Subject: [PATCH 02/18] docs: clarify transaction strategy for migrations (PR review) Addresses review question comparing autocommit vs atomic transaction approaches. Key changes: - Add "Transaction Strategy" section with trade-off analysis - Explain hybrid approach: one migration = one transaction boundary - Document pros/cons of each strategy - Add guidance for large-scale migrations requiring batching Rationale: - Autocommit per migration enables handling large datasets - Atomic within migration ensures all-or-nothing for that change - Cross-storage compatible (PostgreSQL + MongoDB independently) - Matches Campus scale (~2000 users) and existing patterns Co-Authored-By: Claude Opus 4.6 --- docs/migration-protocol.md | 97 +++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 2 deletions(-) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index 28e9af8f..fe85ef08 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -49,6 +49,95 @@ campus/storage/migrations/ --- +## Transaction Strategy + +### Design Decision: Autocommit per Migration, Atomic Within + +Each migration file commits independently, but operations within a single migration are atomic. + +```python +# Each migration is ONE transaction boundary +def upgrade(): + """All operations in this migration commit together.""" + with PostgreSQLTransaction(): + # Multiple statements execute atomically + cursor.execute("ALTER TABLE submissions ADD COLUMN status TEXT") + cursor.execute("UPDATE submissions SET status = 'pending'") + # If anything fails, entire upgrade() rolls back +``` + +**Transaction boundary = migration file**, not individual statements. + +### Why This Approach? + +| Factor | Autocommit per Migration | Single Atomic Transaction | +|--------|-------------------------|---------------------------| +| **Failure recovery** | Manual rollback via `downgrade()` | Automatic rollback | +| **Large datasets** | ✅ Handles 10k+ rows with batching | ❌ Transaction timeout | +| **Cross-storage** | ✅ Works with PostgreSQL + MongoDB | ❌ No cross-DB transactions | +| **Intermediate states** | ⚠️ Partial if migration fails | ✅ Always consistent | +| **Testing** | ⚠️ Must test downgrade path | ✅ Simpler error handling | + +### Trade-off Analysis + +**Autocommit with Downgrade (Chosen)** + +*Pros:* +- Resilient to partial failures - failed migration leaves earlier migrations intact +- Works with large datasets - no single transaction timeout for bulk updates +- Cross-database compatible - MongoDB and PostgreSQL can migrate independently +- Progress visibility - `_migrations` table shows exactly what's been applied +- Safe for long-running operations - data backfills won't hit transaction limits + +*Cons:* +- No automatic atomic rollback - if migration fails, database in intermediate state +- Manual cleanup required - must run `campus rollback` to reverse failed migrations +- Potential inconsistency - app could start mid-migration if deploy continues unchecked +- Harder testing - must explicitly test `downgrade()` path + +**Alternative: All-or-Nothing Transaction** + +*Pros:* +- Automatic safety - any failure rolls back entire migration +- No intermediate states - database always in consistent schema +- Simpler error handling - just let transaction fail + +*Cons:* +- Transaction limits - PostgreSQL has size/duration constraints +- Fails with large datasets - updating thousands of rows times out +- Not cross-storage compatible - can't atomically update PostgreSQL + MongoDB +- Blocks longer - tables locked during entire transaction +- Memory pressure - large transactions consume connection resources + +### For Campus (~2000 users) + +The hybrid approach balances safety with practicality: + +1. **One migration = one logical change** - Natural atomic boundary +2. **Manageable transaction sizes** - ~2000 users means reasonable single-migration volume +3. **Clear rollback semantics** - `downgrade()` reverses at same boundary +4. **Matches existing patterns** - `init_from_schema()` already uses explicit transactions + +### For Large Migrations + +Document batched approach for operations that exceed single transaction limits: + +```python +def upgrade(): + """For very large tables, process in batches.""" + batch_size = 100 + for offset in range(0, total_rows, batch_size): + with PostgreSQLTransaction(): + # Each batch is atomic + for row in get_batch(offset, batch_size): + transform_and_update(row) + # Commits between batches +``` + +This pattern provides atomicity at the batch level while avoiding transaction timeouts. + +--- + ## Migration File Format Each migration is a Python module with: @@ -427,21 +516,25 @@ def upgrade(): ### 4. Transaction Safety -Wrap related changes in transactions: +Wrap related changes in transactions. See [Transaction Strategy](#transaction-strategy) for rationale. ```python def upgrade(): - """Make multiple related changes atomically.""" + """Make multiple related changes atomically within this migration.""" from campus.storage.tables.backend.postgres import PostgreSQLTable table = PostgreSQLTable("assignments") with table._get_connection() as conn: with conn.cursor() as cursor: + # These statements commit together as one unit cursor.execute("ALTER TABLE assignments ADD COLUMN new_field TEXT") cursor.execute("UPDATE assignments SET new_field = default_value") conn.commit() + # If either fails, both roll back automatically ``` +**Key principle:** Each `upgrade()`/`downgrade()` function is its own transaction boundary. Don't commit mid-migration. + --- ## Migration Types From 97fd5315ae98d7738e8254f2efc2e096666c8c0e Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 11:35:02 +0800 Subject: [PATCH 03/18] docs: add type-safe resource access option (PR review) Addresses review comment about fragility to typos in collection/table names. - Show both import-based (type-safe) and string-based access patterns - Recommend resource imports for production migrations - Keep string access as simpler alternative for quick migrations Reference: PR #382 review comment --- docs/migration-protocol.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index fe85ef08..e52e5107 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -148,6 +148,32 @@ Each migration is a Python module with: 4. **downgrade()** - Reverse transformation 5. **Storage type** - One of: `table`, `document`, `object` +### Accessing Storage: Type-Safe vs String-Based + +**Option 1: Import from resource module (type-safe, recommended)** + +```python +from campus.api.resources import SubmissionsResource + +def upgrade(): + """Access collection through resource - no typos possible.""" + submissions = SubmissionsResource._storage # Internal access +``` + +**Option 2: Direct string access (simple, common in migrations)** + +```python +from campus.storage import get_collection + +def upgrade(): + """Direct access - be careful with typos.""" + submissions = get_collection("submissions") +``` + +> **Note:** Using resource imports (`campus.api.resources.*`) provides type safety and prevents typos. Direct string access is simpler but requires careful testing. + +### Example Migration + ```python """Add response timestamps to submissions. From 960a0eb9653f5876f71afd72f2779a92f131cfd7 Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 11:41:25 +0800 Subject: [PATCH 04/18] docs: add one-step revision policy (PR review) Addresses review comment confirming migrations transform one revision at a time. - upgrade(): N-1 to N - downgrade(): N to N-1 - Never skip versions - run sequentially Ensures predictable transformation chain and easy rollback. Reference: PR #382 review comment --- docs/migration-protocol.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index e52e5107..d8776757 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -227,6 +227,20 @@ def downgrade(): submissions.update_by_id(doc["id"], {"responses": updated_responses}) ``` +### One-Step Revision Policy + +Each migration's `upgrade()` and `downgrade()` functions transform **exactly one revision step**. + +- `upgrade()` transforms schema from version *N-1* to *N* +- `downgrade()` transforms schema from version *N* to *N-1* +- Never skip versions - migrations run sequentially + +This ensures: +- Predictable transformation chain +- Easy rollback of any single migration +- Clear dependency order +``` + --- ## Migration State Tracking From 482920db96aebf1f280d676ee975358d3d30ddd2 Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 11:42:34 +0800 Subject: [PATCH 05/18] docs: add migration status field and archival strategy (PR review) Addresses review comment about migration status tracking and archival. - Add "status" field: pending/applied/rolled_back/failed - Add "error_message" field for failed migrations - Document archival strategy: PostgreSQL table is primary audit log - Explain why PostgreSQL > JSON/bucket for this use case Reference: PR #382 review comment --- docs/migration-protocol.md | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index d8776757..91305e62 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -254,11 +254,35 @@ CREATE TABLE IF NOT EXISTS "_migrations" ( "id" TEXT PRIMARY KEY, -- Revision ID (e.g., "001", "002") "description" TEXT NOT NULL, -- Human-readable description "storage_type" TEXT NOT NULL, -- "table", "document", "object" + "status" TEXT NOT NULL, -- "pending", "applied", "rolled_back", "failed" "applied_at" TIMESTAMP NOT NULL, -- When migration was applied - "rollback_at" TIMESTAMP NULL -- When migration was rolled back (if applicable) + "rollback_at" TIMESTAMP NULL, -- When migration was rolled back (if applicable) + "error_message" TEXT NULL -- Error details if status is "failed" ); ``` +### Status Values + +| Status | Meaning | +|--------|---------| +| `pending` | Discovered but not yet applied | +| `applied` | Successfully applied to database | +| `rolled_back` | Reversed via `downgrade()` | +| `failed` | Migration execution failed (see `error_message`) | + +### Archival Strategy + +The `_migrations` table **is** the permanent audit log. For long-term archival: + +1. **Primary**: Keep `_migrations` table indefinitely (small, low overhead) +2. **Secondary**: Yapper events provide real-time streaming to external systems +3. **Optional**: Export to object storage via bucket backup scripts + +PostgreSQL table is preferred over JSON/bucket storage because: +- Queryable for status checks (`SELECT * FROM _migrations WHERE status = 'failed'`) +- Transactional consistency with schema changes +- Supports joins and aggregations for reporting + ### State Interface ```python From 6a7c855289cbb29fa8e0f98efc013f10e665e0ec Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 11:46:20 +0800 Subject: [PATCH 06/18] docs: update StateInterface for status field (PR review) Update MigrationState class to reflect new schema: - Add status field to init_state_table() - Update record_migration() to set status="applied" - Add record_failure() for failed migrations with error_message - Update get_applied_migrations() to filter by status="applied" - Update mark_rollback() to set status="rolled_back" - Update is_applied() to check status value Reference: PR #382 review comment --- docs/migration-protocol.md | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index 91305e62..17398e41 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -299,13 +299,14 @@ class MigrationState: def init_state_table(self): """Initialize the _migrations table if not exists.""" - # Uses init_from_schema() - allowed for system table sql = '''CREATE TABLE IF NOT EXISTS "_migrations" ( "id" TEXT PRIMARY KEY, "description" TEXT NOT NULL, "storage_type" TEXT NOT NULL, + "status" TEXT NOT NULL, "applied_at" TIMESTAMP NOT NULL, - "rollback_at" TIMESTAMP NULL + "rollback_at" TIMESTAMP NULL, + "error_message" TEXT NULL );''' from campus.storage.tables.backend.postgres import PostgreSQLTable table = PostgreSQLTable("_migrations") @@ -317,25 +318,40 @@ class MigrationState: "id": revision_id, "description": description, "storage_type": storage_type, + "status": "applied", "applied_at": datetime.now(UTC).isoformat(), - "rollback_at": None + "rollback_at": None, + "error_message": None + }) + + def record_failure(self, revision_id: str, description: str, storage_type: str, error: str): + """Record a failed migration attempt.""" + self._table.insert_one({ + "id": revision_id, + "description": description, + "storage_type": storage_type, + "status": "failed", + "applied_at": datetime.now(UTC).isoformat(), + "rollback_at": None, + "error_message": error }) def get_applied_migrations(self) -> set[str]: - """Get set of applied migration IDs.""" - return {row["id"] for row in self._table.get_matching({})} + """Get set of successfully applied migration IDs.""" + return {row["id"] for row in self._table.get_matching({"status": "applied"})} def mark_rollback(self, revision_id: str): """Mark a migration as rolled back.""" self._table.update_by_id(revision_id, { + "status": "rolled_back", "rollback_at": datetime.now(UTC).isoformat() }) def is_applied(self, revision_id: str) -> bool: - """Check if a migration has been applied.""" + """Check if a migration has been successfully applied.""" try: - self._table.get_by_id(revision_id) - return True + row = self._table.get_by_id(revision_id) + return row["status"] == "applied" except NotFoundError: return False ``` From 4d095329c687e4bae059b8b50817b4ebcf2296e0 Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 11:47:44 +0800 Subject: [PATCH 07/18] docs: clarify CLI vs automated deployment (PR review) Addresses review question about whether CLI is necessary given automated deployment. - Add "When is CLI Needed?" section - Automated deployment: primary path (staging, PR validation, CI/CD) - CLI: secondary for manual operations (local dev, rollback, status, recovery) - Add `campus status` command to query migration state CLI remains useful for manual operations even with automated deployment. Reference: PR #382 review comment --- docs/migration-protocol.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index 17398e41..533a8c3f 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -451,6 +451,22 @@ class MigrationRunner: ## CLI Interface +### When is CLI Needed? + +**Automated deployment (primary):** +- Staging/production: migrations run automatically on app startup +- PR validation: migrations tested against backup database +- CI/CD: status reported via deployment checks + +**CLI (secondary, for manual operations):** +- Local development: apply migrations during feature development +- Staging manual run: re-run migrations after fixing failures +- Rollback: manual `rollback` when deployment needs quick revert +- Status check: query migration state without starting app +- Recovery: repair failed migration states + +### CLI Implementation + Add to `campus` CLI via `main.py`: ```python @@ -478,6 +494,20 @@ def rollback(target: str): print("Rollback complete.") +def status(): + """Show current migration state.""" + from campus.storage.migrations import MigrationState + + state = MigrationState() + state.init_state_table() + applied = state.get_applied_migrations() + + print(f"Applied migrations: {len(applied)}") + for mid in sorted(applied): + print(f" ✓ {mid}") + print(f"Pending: compute by listing migrations/ vs applied") + + if __name__ == "__main__": import sys command = sys.argv[1] if len(sys.argv) >= 2 else None @@ -488,6 +518,8 @@ if __name__ == "__main__": elif command == "rollback": target = sys.argv[2] rollback(target) + elif command == "status": + status() else: main(deployment=command) ``` @@ -503,6 +535,9 @@ campus migrate 003 # Rollback to specific revision campus rollback 002 + +# Show migration state +campus status ``` --- From 6af038fcfa63873dd50c0ccecbf30323ab8fd892 Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 11:48:28 +0800 Subject: [PATCH 08/18] docs: add pre-flight checks to MigrationRunner (PR review) Addresses review comment about integrating pre-deployment checklist into runner. - Add pre_flight_checks() method to MigrationRunner - Warns when running in production - Validates downgrade() exists for applied migrations - Shows pending migrations count - Requires confirmation in production - Add deployment checklist table showing built-in vs manual checks Moves validation from external checklist to automated pre-flight checks. Reference: PR #382 review comment --- docs/migration-protocol.md | 64 +++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index 533a8c3f..9279975f 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -571,12 +571,66 @@ def create_app(): return app ``` -### Pre-Deployment Checklist +### Pre-Deployment Checks (Built into MigrationRunner) -1. **Backup database** before running migrations in production -2. **Test migrations** on staging with production-like data -3. **Review downgrade logic** - ensure rollback path exists -4. **Monitor logs** via Yapper events for migration completion +The migration runner should validate before executing: + +```python +# campus/storage/migrations/runner.py + +class MigrationRunner: + """Execute and track migrations.""" + + def pre_flight_checks(self) -> list[str]: + """Validate environment before running migrations. + Returns list of warnings (empty if all checks pass). + """ + warnings = [] + + # 1. Check if running in production + if os.getenv("ENV") == "production": + warnings.append("Running in PRODUCTION - ensure database backup exists") + + # 2. Verify downgrade() exists for applied migrations + for mid in self.state.get_applied_migrations(): + module = self._load_migration_module(mid) + if not hasattr(module, 'downgrade'): + warnings.append(f"Migration {mid} has no downgrade()") + + # 3. Check for pending migrations + applied = self.state.get_applied_migrations() + available = {m["id"] for m in self.discover_migrations()} + pending = available - applied + if pending: + warnings.append(f"Pending migrations: {sorted(pending)}") + + return warnings + + def upgrade(self, target: str | None = None): + """Apply pending migrations with pre-flight checks.""" + # Run pre-flight checks + for warning in self.pre_flight_checks(): + print(f"⚠️ {warning}") + + # Confirm if in production + if os.getenv("ENV") == "production": + response = input("Continue with migration? (yes/no): ") + if response.lower() != "yes": + raise RuntimeError("Migration cancelled by user") + + # ... rest of upgrade logic +``` + +### Deployment Checklist + +| Check | Built-in | Manual | +|-------|----------|--------| +| Database backup exists | ⚠️ Warning only | ✅ Verify backup completed | +| Downgrade logic exists | ✅ Validates in code | | +| Pending migrations count | ✅ Shows in pre-flight | | +| Test on staging first | | ✅ PR validation required | +| Monitor Yapper events | ✅ Auto-emits events | ✅ Confirm delivery | +| Health check after deploy | | ✅ Verify application status | --- From 11edcbdbb1bad2a8a83af5ec169f833022f61aa3 Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 11:58:38 +0800 Subject: [PATCH 09/18] docs: use resource imports (Option 1) in migration example (PR review) Reviewer choice: Use resource imports for type safety. - Remove Option 2 (string-based access) from example - Update example migration to use SubmissionsResource._storage - Emphasize benefits: no typos, IDE autocomplete, single source of truth Reference: PR #382 review comment --- docs/migration-protocol.md | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index 9279975f..040eeb7d 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -148,29 +148,22 @@ Each migration is a Python module with: 4. **downgrade()** - Reverse transformation 5. **Storage type** - One of: `table`, `document`, `object` -### Accessing Storage: Type-Safe vs String-Based +### Accessing Storage in Migrations -**Option 1: Import from resource module (type-safe, recommended)** +Migrations should import from resource modules for type safety: ```python from campus.api.resources import SubmissionsResource def upgrade(): """Access collection through resource - no typos possible.""" - submissions = SubmissionsResource._storage # Internal access + submissions = SubmissionsResource._storage # Internal storage access ``` -**Option 2: Direct string access (simple, common in migrations)** - -```python -from campus.storage import get_collection - -def upgrade(): - """Direct access - be careful with typos.""" - submissions = get_collection("submissions") -``` - -> **Note:** Using resource imports (`campus.api.resources.*`) provides type safety and prevents typos. Direct string access is simpler but requires careful testing. +**Benefits:** +- ✅ No typos in collection/table names +- ✅ IDE autocomplete and type checking +- ✅ Single source of truth for storage configuration ### Example Migration @@ -183,7 +176,7 @@ Storage Type: document """ from datetime import datetime, UTC -from campus.storage import get_collection +from campus.api.resources import SubmissionsResource REVISION_ID = "003" @@ -193,7 +186,7 @@ STORAGE_TYPE = "document" def upgrade(): """Add created_at field to existing responses.""" - submissions = get_collection("submissions") + submissions = SubmissionsResource._storage # Get all submissions with responses all_docs = submissions.get_matching({}) @@ -213,7 +206,7 @@ def upgrade(): def downgrade(): """Remove created_at field from responses.""" - submissions = get_collection("submissions") + submissions = SubmissionsResource._storage all_docs = submissions.get_matching({}) From 9f33261608ce3c636e84f52ebc0a887405297b1e Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 11:59:17 +0800 Subject: [PATCH 10/18] docs: analyze separate migrations DB vs existing DBs (PR review) Addresses question about where to store _migrations table. Current architecture: campus.auth and campus.api have separate DBs. Option A (future): Dedicated migrations/audit DB - Centralized audit trail, clear separation - Additional infrastructure to manage Option B (current): Store in each service DB - Simpler, existing connections - Fragmented audit trail Recommendation: Start with Option B, move to A when needed. Reference: PR #382 review comment --- docs/migration-protocol.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index 040eeb7d..aebe7d06 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -276,6 +276,36 @@ PostgreSQL table is preferred over JSON/bucket storage because: - Transactional consistency with schema changes - Supports joins and aggregations for reporting +### Database Placement Strategy + +**Current architecture:** Separate PostgreSQL databases for `campus.auth` and `campus.api`. + +**Option A: Dedicated migrations/audit database (recommended for future)** + +| Pros | Cons | +|------|------| +| Centralized audit trail across all services | Additional infrastructure to manage | +| Clear separation of concerns | Extra database connection | +| Easier backup/retention policies | Cross-database queries not possible | +| Can host other admin/audit tables | | +| Migration state survives service rebuilds | | + +**Option B: Store _migrations in each service database (current proposal)** + +| Pros | Cons | +|------|------| +| No additional infrastructure | Fragmented audit trail | +| Simpler deployment | Harder to query across services | +| Existing connections | Service-specific data | +| Low overhead | | + +**Recommendension:** Start with Option B (each DB tracks its own migrations). Move to Option A when: +- Multiple services need unified audit view +- Additional admin/audit tables emerge +- Compliance requires centralized logging + +This keeps the protocol simple now while leaving room to grow. + ### State Interface ```python From 2f2cc1337131d099f8aac66b8a44d3409e3cf09e Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 14:05:52 +0800 Subject: [PATCH 11/18] docs: separate admin CLI from user CLI (PR review) Reviewer clarification: campus CLI is for user access, not admin control. - Create separate campus-admin package for operational commands - campus CLI: user features (auth login, api list) - campus-admin CLI: migrations, health checks, configuration Rationale: - User CLI should not expose dangerous admin operations - Admin commands require different auth/authorization - Clear boundary prevents accidental misuse Reference: PR #382 review comment --- docs/migration-protocol.md | 61 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index aebe7d06..dc7846b2 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -474,14 +474,71 @@ class MigrationRunner: ## CLI Interface -### When is CLI Needed? +### Admin CLI vs User CLI + +**Campus CLI (`campus`) = User features** +- End-user commands for interacting with Campus services +- Example: `campus auth login`, `campus api list` + +**Admin CLI (`campus-admin`) = Operational control** +- Database migrations, health checks, configuration +- Example: `campus-admin migrate`, `campus-admin status` + +**Separation rationale:** +- User CLI should not expose dangerous admin operations +- Admin commands require different authentication/authorization +- Clear boundary prevents accidental misuse + +### Admin CLI Implementation + +Create separate `campus-admin` package: + +```python +# campus-admin/main.py + +def migrate(target: str | None = None): + """Run database migrations.""" + from campus.storage.migrations import MigrationRunner + from pathlib import Path + + migrations_dir = Path(__file__).parent / "storage" / "migrations" / "migrations" + runner = MigrationRunner(migrations_dir) + runner.upgrade(target) + print("Migration complete.") + + +def rollback(target: str): + """Rollback to a specific migration.""" + from campus.storage.migrations import MigrationRunner + from pathlib import Path + + migrations_dir = Path(__file__).parent / "storage" / "migrations" / "migrations" + runner = MigrationRunner(migrations_dir) + runner.downgrade(target) + print("Rollback complete.") + + +def status(): + """Show current migration state.""" + from campus.storage.migrations import MigrationState + + state = MigrationState() + state.init_state_table() + applied = state.get_applied_migrations() + + print(f"Applied migrations: {len(applied)}") + for mid in sorted(applied): + print(f" ✓ {mid}") +``` + +### When is Admin CLI Needed? **Automated deployment (primary):** - Staging/production: migrations run automatically on app startup - PR validation: migrations tested against backup database - CI/CD: status reported via deployment checks -**CLI (secondary, for manual operations):** +**Admin CLI (secondary, for manual operations):** - Local development: apply migrations during feature development - Staging manual run: re-run migrations after fixing failures - Rollback: manual `rollback` when deployment needs quick revert From 781a9abacca8df63812af41c434dc2738efbfcce Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 14:15:48 +0800 Subject: [PATCH 12/18] docs: analyze storage interface for migrations (PR review) Addresses question: Should migrations use a different storage interface? Option A (current): Use existing storage layer via get_table() - Reuses infrastructure, consistent with app code - Risk: circular dependency if storage layer needs migration Option B: Direct database connection for migrations - Independent bootstrapping, can migrate storage layer - Con: duplicate connection management, more code Recommendation: Option A for Campus simplicity - Migrations are part of app, not external tools - Storage layer changes are infrequent - Document "bootstrap migrations" as special case if needed Reference: PR #382 review comment --- docs/migration-protocol.md | 48 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index dc7846b2..fe66a385 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -379,9 +379,53 @@ class MigrationState: return False ``` ---- +### Storage Interface for Migrations + +**Current approach:** `MigrationState` uses `get_table("_migrations")` through the normal storage layer. + +**Question:** Should migrations use a dedicated storage interface? + +**Option A: Use existing storage layer (current)** + +```python +# Current approach +from campus.storage import get_table + +def __init__(self): + self._table = get_table("_migrations") +``` + +| Pros | Cons | +|------|------| +| Reuses existing infrastructure | Migrations depend on storage layer working | +| Single connection pool | Can't migrate the storage layer itself | +| Consistent with app code | Circular dependency risk | + +**Option B: Direct database connection for migrations** + +```python +# Dedicated migration interface +import psycopg2 +from campus.common.env import get_postgres_url + +class MigrationState: + def __init__(self): + self._conn = psycopg2.connect(get_postgres_url()) +``` + +| Pros | Cons | +|------|------| +| Bootstraps independently | Duplicate connection management | +| Can migrate storage layer itself | More code to maintain | +| No circular dependencies | Inconsistent with app code | + +**Recommendation:** Option A (existing storage layer) for Campus because: +- Migrations are part of the app, not external tools +- Storage layer changes are infrequent +- Simpler to maintain one connection pattern +- If storage layer needs migration, handle with special-case init -## Migration Runner +For complex scenarios requiring Option B, document as "bootstrap migrations" in implementation. ```python # campus/storage/migrations/runner.py From eccee44bbc9e18ed5772a867770a0074a82a7f14 Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 15:37:56 +0800 Subject: [PATCH 13/18] docs: rename CLI from campus-admin to admin (PR review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewer decision: Use 'admin' as program name, not 'campus-admin'. Changes: - Rename campus-admin → admin throughout - Remove duplicate CLI Implementation section - Update all usage examples to use 'admin' command - Clarify separate package/project status Reference: PR #382 review comment --- docs/migration-protocol.md | 80 ++++++-------------------------------- 1 file changed, 11 insertions(+), 69 deletions(-) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index fe66a385..47292db2 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -524,21 +524,22 @@ class MigrationRunner: - End-user commands for interacting with Campus services - Example: `campus auth login`, `campus api list` -**Admin CLI (`campus-admin`) = Operational control** +**Admin CLI (`admin`) = Operational control** - Database migrations, health checks, configuration -- Example: `campus-admin migrate`, `campus-admin status` +- Example: `admin migrate`, `admin status` **Separation rationale:** - User CLI should not expose dangerous admin operations - Admin commands require different authentication/authorization - Clear boundary prevents accidental misuse +- Separate package/project, not bundled with campus ### Admin CLI Implementation -Create separate `campus-admin` package: +Create separate `admin` package: ```python -# campus-admin/main.py +# admin/main.py def migrate(target: str | None = None): """Run database migrations.""" @@ -589,79 +590,20 @@ def status(): - Status check: query migration state without starting app - Recovery: repair failed migration states -### CLI Implementation - -Add to `campus` CLI via `main.py`: - -```python -# main.py additions - -def migrate(target: str | None = None): - """Run database migrations.""" - from campus.storage.migrations import MigrationRunner - from pathlib import Path - - migrations_dir = Path(__file__).parent / "storage" / "migrations" / "migrations" - runner = MigrationRunner(migrations_dir) - runner.upgrade(target) - print("Migration complete.") - - -def rollback(target: str): - """Rollback to a specific migration.""" - from campus.storage.migrations import MigrationRunner - from pathlib import Path - - migrations_dir = Path(__file__).parent / "storage" / "migrations" / "migrations" - runner = MigrationRunner(migrations_dir) - runner.downgrade(target) - print("Rollback complete.") - - -def status(): - """Show current migration state.""" - from campus.storage.migrations import MigrationState - - state = MigrationState() - state.init_state_table() - applied = state.get_applied_migrations() - - print(f"Applied migrations: {len(applied)}") - for mid in sorted(applied): - print(f" ✓ {mid}") - print(f"Pending: compute by listing migrations/ vs applied") - - -if __name__ == "__main__": - import sys - command = sys.argv[1] if len(sys.argv) >= 2 else None - - if command == "migrate": - target = sys.argv[2] if len(sys.argv) >= 3 else None - migrate(target) - elif command == "rollback": - target = sys.argv[2] - rollback(target) - elif command == "status": - status() - else: - main(deployment=command) -``` - ### Usage ```bash # Apply all pending migrations -campus migrate +admin migrate # Apply up to specific revision -campus migrate 003 +admin migrate 003 # Rollback to specific revision -campus rollback 002 +admin rollback 002 # Show migration state -campus status +admin status ``` --- @@ -895,13 +837,13 @@ def upgrade(): ```bash # 1. Check current state -campus migrate status +admin status # 2. Backup production data pg_dump $POSTGRESDB_URI > backup_$(date +%Y%m%d).sql # 3. Rollback to safe revision -campus rollback 002 +admin rollback 002 # 4. Verify application health curl -f https://api.campus.nyjc.app/health || exit 1 From ba5eef4052688f4bf115dd397af2dd4d6f50605e Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 15:48:25 +0800 Subject: [PATCH 14/18] docs: address storage interface optimization for audit logging (PR review) Reviewer question: Should audit/migration logging use raw queries for performance? Context: Audit logs trace every API call (high volume), unlike regular API traffic. Analysis: - Raw queries: faster but create mixed patterns, slippery slope - Storage layer: consistent but has abstraction overhead Recommendation: Storage layer with optimization hooks - Add bulk_insert() for high-volume audit - Keep single pattern throughout codebase - Avoid slippery slope of developers bypassing storage layer Decision: Storage layer + OptimizedTable for bulk operations, not raw queries. Reference: PR #382 review comment --- docs/migration-protocol.md | 53 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index 47292db2..e82dd962 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -427,6 +427,59 @@ class MigrationState: For complex scenarios requiring Option B, document as "bootstrap migrations" in implementation. +### High-Volume Audit Logging + +**Context:** Audit/migration logging traces every API call, unlike regular API traffic which is user-initiated and lower volume. + +**Question:** Should audit logging bypass the storage layer and use raw queries? + +**Storage layer approach (current):** +```python +# Using storage layer +from campus.storage import get_table +audit = get_table("_audit") +audit.insert_one({...}) +``` + +**Raw query approach (alternative):** +```python +# Direct SQL for high-volume logging +import psycopg2 +conn = psycopg2.connect(get_postgres_url()) +cursor = conn.cursor() +cursor.execute("INSERT INTO _audit (...) VALUES (...)") +``` + +**Trade-off analysis:** + +| Factor | Storage Layer | Raw Queries | +|--------|---------------|-------------| +| **Performance** | Abstraction overhead | Faster execution | +| **Consistency** | Single pattern | Mixed patterns in codebase | +| **Maintainability** | Centralized query logic | Scattered SQL | +| **Optimization** | Harder to tune | Can batch/bulk insert | +| **Safety** | Validated schemas | Manual SQL validation | + +**Recommendation:** Use storage layer for migrations, but add optimization hooks for high-volume audit: + +```python +# campus/storage/tables/optimized.py +class OptimizedTable(TableInterface): + """Table interface optimized for high-volume inserts.""" + + def bulk_insert(self, rows: list[dict]) -> None: + """Batch insert for audit/migration logging.""" + # Bypass row-by-row validation for bulk operations + with self._get_connection() as conn: + with conn.cursor() as cursor: + cursor.executemany("INSERT INTO _table (...) VALUES (...)", rows) + conn.commit() +``` + +This keeps the storage layer pattern while providing optimization for heavy audit workloads. Mixing raw queries throughout the codebase creates a slippery slope where developers bypass the storage layer for convenience rather than performance needs. + +**Decision:** Storage layer with optimization hooks, not raw query access. + ```python # campus/storage/migrations/runner.py From dc6f10bfb311891063a8db3646537ba02e5d797a Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 16:30:29 +0800 Subject: [PATCH 15/18] chore: trigger CI From 3a445e30ad4fd4894c693c25313c6eb93fdda843 Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 16:49:12 +0800 Subject: [PATCH 16/18] docs: separate admin storage layer (final decision) Architecture decision: Auditing & migrations live in campus-admin as separate apps with their own schema and storage layer. Key changes: - Add architecture diagram showing Campus vs campus-admin separation - Update storage structure: campus-admin/storage/migrations/ - Document separate campus_admin_db for _migrations and _audit_log - Update all imports: campus_admin.storage instead of campus.storage - Explain rationale: domain data vs operational infrastructure - Document raw SQL for audit (high-volume write optimization) Rationale: - Concern separation: Domain models vs operational infrastructure - Performance isolation: Admin write-heavy won't impact Campus queries - Different backup policies: Admin longer retention than business data - Independent deployment: Campus updates don't depend on admin schema - Clear ownership: campus-admin owns its storage, not campus.storage Reference: PR #382 final decision --- docs/migration-protocol.md | 362 ++++++++++++++++++++++++------------- 1 file changed, 239 insertions(+), 123 deletions(-) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index e82dd962..fb6b4234 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -8,6 +8,89 @@ This document defines the storage migration protocol for Campus. Migrations are --- +## Architecture Decision: Separate Admin Storage + +**Final Decision:** Auditing & migrations live in `campus-admin` as separate apps with their own schema and storage layer. + +### Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Campus Applications │ +├─────────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────────────────┐ ┌──────────────────┐ │ +│ │ campus.auth │ │ campus.api │ │ +│ │ │ │ │ │ +│ │ Users │ │ Assignments │ │ +│ │ Sessions │ │ Submissions │ │ +│ │ OAuth Clients │ │ Circles │ │ +│ │ │ │ Timetables │ │ +│ └────────┬─────────┘ └────────┬─────────┘ │ +│ │ │ │ +│ └────────────┬───────────────┘ │ +│ ▼ │ +│ ┌────────────────────────────────┐ │ +│ │ Campus Storage Layer │ │ +│ │ (PostgreSQL + MongoDB) │ │ +│ └────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────┐ +│ campus-admin (Separate) │ +├─────────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────────────────┐ ┌──────────────────┐ │ +│ │ Migrations │ │ Audit │ │ +│ │ │ │ │ │ +│ │ _migrations │ │ _audit_log │ │ +│ │ State tracking │ │ API traces │ │ +│ │ Rollback │ │ Event history │ │ +│ └────────┬─────────┘ └────────┬─────────┘ │ +│ │ │ │ +│ └────────────┬───────────────┘ │ +│ ▼ │ +│ ┌────────────────────────────────┐ │ +│ │ Admin Storage Layer │ │ +│ │ (Separate PostgreSQL DB) │ │ +│ │ - Separate schema │ │ +│ │ - Optimized for high volume │ │ +│ └────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Key Decisions + +| Decision | Rationale | +|----------|-----------| +| **Separate apps** | Migrations/audit are operational concerns, not domain models | +| **Separate storage** | Campus schema focuses on business data, admin on operational data | +| **campus-admin package** | Clear boundary: campus = user features, admin = operational control | +| **`admin` CLI** | Separate command-line tool for admin operations | + +### Benefits of Separation + +1. **Clear concern boundaries** - Domain models vs operational infrastructure +2. **Independent deployment** - Admin changes don't require Campus deployment +3. **Different storage optimization** - Admin optimized for high-volume writes, Campus for queries +4. **Security isolation** - Admin data protected separately from user data +5. **Development velocity** - Campus team can ship features without worrying about admin schema + +### Storage Separation + +| Aspect | Campus Storage | Admin Storage | +|--------|---------------|---------------| +| **Database** | Separate DBs for auth/api | Dedicated admin DB | +| **Schema** | Business domain models | Operational infrastructure | +| **Access pattern** | Query-heavy (user requests) | Write-heavy (audit/ migrations) | +| **Optimization** | Indexed for reads | Optimized for bulk inserts | +| **Connection** | Via `campus.storage` | Direct to admin DB | +| **Tables** | users, sessions, assignments... | _migrations, _audit_log | + +--- + ## Requirements Per [issue #27](https://github.com/nyjc-computing/campus/issues/27): @@ -18,11 +101,13 @@ Per [issue #27](https://github.com/nyjc-computing/campus/issues/27): 4. **Rollback/restore** - Ability to revert migrations 5. **Audit logging** - Record all migration actions for investigation +**Implementation:** All requirements implemented in `campus-admin` package. + --- ## Architecture -### Storage Types +### Campus Storage Types | Storage Type | Backend | Use Case | |-------------|---------|----------| @@ -30,23 +115,54 @@ Per [issue #27](https://github.com/nyjc-computing/campus/issues/27): | **Documents** | MongoDB | Flexible JSON-like objects (assignments, submissions) | | **Objects** | S3-compatible | Binary blobs (files, attachments) | -### Migration Structure +### Admin Storage Structure ``` -campus/storage/migrations/ -├── __init__.py # Migration runner interface -├── state.py # Migration state tracking (uses storage layer) -├── runner.py # Main migration execution logic -├── scripts/ -│ ├── migrate.py # CLI entry point: `campus migrate` -│ └── rollback.py # CLI entry point: `campus rollback` -└── migrations/ - ├── 001_init_assignments.py - ├── 002_init_submissions.py - ├── 003_add_response_timestamps.py - └── ... +campus-admin/ +├── storage/ +│ ├── __init__.py # Admin storage interface +│ ├── migrations/ +│ │ ├── __init__.py # Migration runner interface +│ │ ├── state.py # Migration state tracking +│ │ ├── runner.py # Main migration execution logic +│ │ └── migrations/ +│ │ ├── 001_init_assignments.py +│ │ ├── 002_init_submissions.py +│ │ └── ... +│ └── audit/ +│ ├── __init__.py # Audit logging interface +│ ├── log.py # High-volume audit writer +│ └── queries.py # Audit query helpers +└── main.py # `admin` CLI entry point ``` +### Migration Structure (Updated) + +**Migrations run from campus-admin, operate on Campus data:** + +``` +campus-admin/ +├── storage/ +│ ├── __init__.py +│ ├── migrations/ +│ │ ├── __init__.py # Migration runner interface +│ │ ├── state.py # State tracking (in admin DB) +│ │ ├── runner.py # Executes migrations +│ │ └── migrations/ +│ │ ├── 001_init_assignments.py # Operates on campus.api +│ │ ├── 002_init_submissions.py # Operates on campus.api +│ │ ├── 003_add_response_timestamps.py +│ │ └── ... +│ └── audit/ +│ ├── __init__.py +│ ├── log.py # High-volume audit writer +│ └── queries.py # Audit query helpers +├── __init__.py +└── main.py # `admin` CLI entry point +``` + +**Key point:** Migration files live in `campus-admin` but import from `campus.api.resources` or `campus.auth.resources` to operate on Campus data. + --- ## Transaction Strategy @@ -278,40 +394,63 @@ PostgreSQL table is preferred over JSON/bucket storage because: ### Database Placement Strategy -**Current architecture:** Separate PostgreSQL databases for `campus.auth` and `campus.api`. +**Decision:** Dedicated admin/audit database for `campus-admin`. -**Option A: Dedicated migrations/audit database (recommended for future)** +``` +PostgreSQL Instances: +├── campus_auth_db (campus.auth users, sessions, clients) +├── campus_api_db (campus.api assignments, submissions, circles) +└── campus_admin_db (campus-admin _migrations, _audit_log) +``` -| Pros | Cons | -|------|------| -| Centralized audit trail across all services | Additional infrastructure to manage | -| Clear separation of concerns | Extra database connection | -| Easier backup/retention policies | Cross-database queries not possible | -| Can host other admin/audit tables | | -| Migration state survives service rebuilds | | +### Rationale for Separate Admin Database -**Option B: Store _migrations in each service database (current proposal)** +| Consideration | Decision | Reasoning | +|---------------|----------|-----------| +| **Concern separation** | ✅ Separate DB | Domain data vs operational infrastructure | +| **Performance isolation** | ✅ Separate DB | Admin write-heavy won't impact Campus queries | +| **Backup policies** | ✅ Separate DB | Different retention: admin longer, business data GDPR | +| **Deployment coupling** | ✅ Separate DB | Campus updates don't depend on admin schema | +| **Cross-service audit** | ✅ Separate DB | Single audit trail across auth + API + future services | +| **Infrastructure cost** | ⚠️ Additional DB | Managed via Railway/RDS, minimal overhead | +| **Query complexity** | ⚠️ No cross-DB joins | Not needed - audit is append-only log | -| Pros | Cons | -|------|------| -| No additional infrastructure | Fragmented audit trail | -| Simpler deployment | Harder to query across services | -| Existing connections | Service-specific data | -| Low overhead | | +### Storage Interface Separation -**Recommendension:** Start with Option B (each DB tracks its own migrations). Move to Option A when: -- Multiple services need unified audit view -- Additional admin/audit tables emerge -- Compliance requires centralized logging +**Campus storage layer:** +- Accessed via `campus.storage.get_table()` / `get_collection()` +- Optimized for domain queries +- Validates against Campus models -This keeps the protocol simple now while leaving room to grow. +**Admin storage layer:** +- Accessed via `campus_admin.storage.get_table()` +- Optimized for high-volume writes (audit) +- Separate connection pool +- May use raw SQL for bulk inserts (audit logging optimization) + +```python +# Campus app uses campus storage +from campus.storage import get_table +users = get_table("users") + +# Admin app uses admin storage +from campus_admin.storage import get_table +migrations = get_table("_migrations") + +# High-volume audit may use optimized writer +from campus_admin.storage.audit import AuditWriter +audit = AuditWriter() +audit.bulk_log(events) # Uses executemany for performance +``` ### State Interface +Located in `campus-admin/storage/migrations/state.py`: + ```python -# campus/storage/migrations/state.py +# campus-admin/storage/migrations/state.py -from campus.storage import get_table +from campus_admin.storage import get_table from datetime import datetime, UTC class MigrationState: @@ -381,111 +520,86 @@ class MigrationState: ### Storage Interface for Migrations -**Current approach:** `MigrationState` uses `get_table("_migrations")` through the normal storage layer. - -**Question:** Should migrations use a dedicated storage interface? - -**Option A: Use existing storage layer (current)** +**Decision:** Migrations use dedicated `campus_admin.storage` interface. ```python -# Current approach -from campus.storage import get_table - -def __init__(self): - self._table = get_table("_migrations") -``` - -| Pros | Cons | -|------|------| -| Reuses existing infrastructure | Migrations depend on storage layer working | -| Single connection pool | Can't migrate the storage layer itself | -| Consistent with app code | Circular dependency risk | +# campus-admin/storage/migrations/state.py -**Option B: Direct database connection for migrations** - -```python -# Dedicated migration interface -import psycopg2 -from campus.common.env import get_postgres_url +from campus_admin.storage import get_table class MigrationState: def __init__(self): - self._conn = psycopg2.connect(get_postgres_url()) + # Connects to campus_admin_db, not campus databases + self._table = get_table("_migrations") ``` -| Pros | Cons | -|------|------| -| Bootstraps independently | Duplicate connection management | -| Can migrate storage layer itself | More code to maintain | -| No circular dependencies | Inconsistent with app code | - -**Recommendation:** Option A (existing storage layer) for Campus because: -- Migrations are part of the app, not external tools -- Storage layer changes are infrequent -- Simpler to maintain one connection pattern -- If storage layer needs migration, handle with special-case init - -For complex scenarios requiring Option B, document as "bootstrap migrations" in implementation. +**Why dedicated interface:** +- **Separation of concerns** - Admin infrastructure independent of Campus domain models +- **Independent deployment** - Campus schema changes don't break admin/migrations +- **Different optimization** - Admin optimized for audit writes, Campus for queries +- **Clear ownership** - `campus_admin` package owns its storage, not `campus.storage` -### High-Volume Audit Logging - -**Context:** Audit/migration logging traces every API call, unlike regular API traffic which is user-initiated and lower volume. +**Migration runners still operate on Campus data:** +```python +# campus-admin/storage/migrations/migrations/001_init_assignments.py -**Question:** Should audit logging bypass the storage layer and use raw queries? +from campus.api.resources import AssignmentsResource -**Storage layer approach (current):** -```python -# Using storage layer -from campus.storage import get_table -audit = get_table("_audit") -audit.insert_one({...}) +def upgrade(): + """Migrate Campus schema via Campus resources.""" + assignments = AssignmentsResource._storage + # Operate on Campus data using Campus resources ``` -**Raw query approach (alternative):** +**State tracking stays in admin DB:** ```python -# Direct SQL for high-volume logging -import psycopg2 -conn = psycopg2.connect(get_postgres_url()) -cursor = conn.cursor() -cursor.execute("INSERT INTO _audit (...) VALUES (...)") +# campus-admin tracks migration state +from campus_admin.storage import get_table +state_table = get_table("_migrations") +state_table.insert_one({"id": "001", "status": "applied", ...}) ``` -**Trade-off analysis:** - -| Factor | Storage Layer | Raw Queries | -|--------|---------------|-------------| -| **Performance** | Abstraction overhead | Faster execution | -| **Consistency** | Single pattern | Mixed patterns in codebase | -| **Maintainability** | Centralized query logic | Scattered SQL | -| **Optimization** | Harder to tune | Can batch/bulk insert | -| **Safety** | Validated schemas | Manual SQL validation | +### High-Volume Audit Logging -**Recommendation:** Use storage layer for migrations, but add optimization hooks for high-volume audit: +**Decision:** Admin storage uses optimized bulk inserts for audit. ```python -# campus/storage/tables/optimized.py -class OptimizedTable(TableInterface): - """Table interface optimized for high-volume inserts.""" +# campus-admin/storage/audit/log.py + +class AuditWriter: + """High-volume audit logging for admin database.""" + + def __init__(self): + # Direct connection for performance + self._conn = psycopg2.connect(get_admin_postgres_url()) def bulk_insert(self, rows: list[dict]) -> None: - """Batch insert for audit/migration logging.""" - # Bypass row-by-row validation for bulk operations - with self._get_connection() as conn: - with conn.cursor() as cursor: - cursor.executemany("INSERT INTO _table (...) VALUES (...)", rows) - conn.commit() + """Batch insert for audit - optimized for high volume.""" + with self._conn.cursor() as cursor: + cursor.executemany( + "INSERT INTO _audit_log (event_type, data, created_at) VALUES (%s, %s, %s)", + [(r["event_type"], json.dumps(r["data"]), r["created_at"]) for r in rows] + ) + self._conn.commit() ``` -This keeps the storage layer pattern while providing optimization for heavy audit workloads. Mixing raw queries throughout the codebase creates a slippery slope where developers bypass the storage layer for convenience rather than performance needs. +**Why raw queries for audit:** +- **Performance** - Traces every API call (high volume) +- **Write pattern** - Append-only, no read-modify-write +- **No validation needed** - Audit data structure controlled internally +- **Different pattern** - Audit is infrastructure, not domain model -**Decision:** Storage layer with optimization hooks, not raw query access. +**Campus domain models still use campus.storage:** +- Domain entities (users, assignments) use validated storage layer +- Only admin/audit infrastructure uses optimized direct access +- Clear boundary enforced by package separation ```python -# campus/storage/migrations/runner.py +# campus-admin/storage/migrations/runner.py import importlib from pathlib import Path -from campus.yapper import Yapper +from campus_admin.yapper import Yapper from .state import MigrationState class MigrationRunner: @@ -589,14 +703,14 @@ class MigrationRunner: ### Admin CLI Implementation -Create separate `admin` package: +Located in `campus-admin/main.py`: ```python -# admin/main.py +# campus-admin/main.py def migrate(target: str | None = None): """Run database migrations.""" - from campus.storage.migrations import MigrationRunner + from campus_admin.storage.migrations import MigrationRunner from pathlib import Path migrations_dir = Path(__file__).parent / "storage" / "migrations" / "migrations" @@ -607,7 +721,7 @@ def migrate(target: str | None = None): def rollback(target: str): """Rollback to a specific migration.""" - from campus.storage.migrations import MigrationRunner + from campus_admin.storage.migrations import MigrationRunner from pathlib import Path migrations_dir = Path(__file__).parent / "storage" / "migrations" / "migrations" @@ -618,7 +732,7 @@ def rollback(target: str): def status(): """Show current migration state.""" - from campus.storage.migrations import MigrationState + from campus_admin.storage.migrations import MigrationState state = MigrationState() state.init_state_table() @@ -665,12 +779,12 @@ admin status ### Automatic Migration on Deploy -Add to deployment configuration (`wsgi.py` or app factory): +Migrations run from `campus-admin`, triggered during Campus deployment: ```python # wsgi.py or apps/*/factory.py -from campus.storage.migrations import MigrationRunner +from campus_admin.storage.migrations import MigrationRunner from pathlib import Path import os @@ -680,7 +794,7 @@ def create_app(): # Run migrations on app startup in production if os.getenv("ENV") in ("staging", "production"): try: - migrations_dir = Path(__file__).parent / "storage" / "migrations" / "migrations" + migrations_dir = Path(__file__).parent.parent / "campus-admin" / "storage" / "migrations" / "migrations" runner = MigrationRunner(migrations_dir) runner.upgrade() except Exception as e: @@ -690,12 +804,14 @@ def create_app(): return app ``` +**Note:** Campus app triggers migrations, but migration logic lives in `campus-admin`. + ### Pre-Deployment Checks (Built into MigrationRunner) The migration runner should validate before executing: ```python -# campus/storage/migrations/runner.py +# campus-admin/storage/migrations/runner.py class MigrationRunner: """Execute and track migrations.""" @@ -934,7 +1050,7 @@ logger.emit("migration.failed", { ### Consuming Migration Events ```python -from campus.yapper import Yapper +from campus_admin.yapper import Yapper yapper = Yapper() From eb2cf48a7955d4ec63cf92e62a0055ce5e98e8a9 Mon Sep 17 00:00:00 2001 From: JS Ng Date: Thu, 26 Mar 2026 16:51:18 +0800 Subject: [PATCH 17/18] fix: ASCII art fixes --- docs/migration-protocol.md | 88 +++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index fb6b4234..a26c84d8 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -15,50 +15,50 @@ This document defines the storage migration protocol for Campus. Migrations are ### Architecture ``` -┌─────────────────────────────────────────────────────────────┐ -│ Campus Applications │ -├─────────────────────────────────────────────────────────────┤ -│ │ -│ ┌──────────────────┐ ┌──────────────────┐ │ -│ │ campus.auth │ │ campus.api │ │ -│ │ │ │ │ │ -│ │ Users │ │ Assignments │ │ -│ │ Sessions │ │ Submissions │ │ -│ │ OAuth Clients │ │ Circles │ │ -│ │ │ │ Timetables │ │ -│ └────────┬─────────┘ └────────┬─────────┘ │ -│ │ │ │ -│ └────────────┬───────────────┘ │ -│ ▼ │ -│ ┌────────────────────────────────┐ │ -│ │ Campus Storage Layer │ │ -│ │ (PostgreSQL + MongoDB) │ │ -│ └────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────┘ - -┌─────────────────────────────────────────────────────────────┐ -│ campus-admin (Separate) │ -├─────────────────────────────────────────────────────────────┤ -│ │ -│ ┌──────────────────┐ ┌──────────────────┐ │ -│ │ Migrations │ │ Audit │ │ -│ │ │ │ │ │ -│ │ _migrations │ │ _audit_log │ │ -│ │ State tracking │ │ API traces │ │ -│ │ Rollback │ │ Event history │ │ -│ └────────┬─────────┘ └────────┬─────────┘ │ -│ │ │ │ -│ └────────────┬───────────────┘ │ -│ ▼ │ -│ ┌────────────────────────────────┐ │ -│ │ Admin Storage Layer │ │ -│ │ (Separate PostgreSQL DB) │ │ -│ │ - Separate schema │ │ -│ │ - Optimized for high volume │ │ -│ └────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────┘ +┌───────────────────────────────────────────────────────────┐ +│ Campus Applications │ +├───────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────────────────┐ ┌─────────────────┐ │ +│ │ campus.auth │ │ campus.api │ │ +│ │ │ │ │ │ +│ │ Users │ │ Assignments │ │ +│ │ Sessions │ │ Submissions │ │ +│ │ OAuth Clients │ │ Circles │ │ +│ │ │ │ Timetables │ │ +│ └────────┬─────────┘ └────────┬────────┘ │ +│ │ │ │ +│ └────────────┬───────────────┘ │ +│ ▼ │ +│ ┌───────────────────────────────┐ │ +│ │ Campus Storage Layer │ │ +│ │ (PostgreSQL + MongoDB) │ │ +│ └───────────────────────────────┘ │ +│ │ +└───────────────────────────────────────────────────────────┘ + +┌───────────────────────────────────────────────────────────┐ +│ campus-admin (Separate) │ +├───────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────────────────┐ ┌──────────────────┐ │ +│ │ Migrations │ │ Audit │ │ +│ │ │ │ │ │ +│ │ _migrations │ │ _audit_log │ │ +│ │ State tracking │ │ API traces │ │ +│ │ Rollback │ │ Event history │ │ +│ └────────┬─────────┘ └────────┬─────────┘ │ +│ │ │ │ +│ └────────────┬───────────────┘ │ +│ ▼ │ +│ ┌────────────────────────────────┐ │ +│ │ Admin Storage Layer │ │ +│ │ (Separate PostgreSQL DB) │ │ +│ │ - Separate schema │ │ +│ │ - Optimized for high volume │ │ +│ └────────────────────────────────┘ │ +│ │ +└───────────────────────────────────────────────────────────┘ ``` ### Key Decisions From 6fedb1fdc9b9f504155ab59e89e947b5caa1b16c Mon Sep 17 00:00:00 2001 From: JS Ng Date: Fri, 27 Mar 2026 09:56:50 +0800 Subject: [PATCH 18/18] docs: clarify campus-admin storage module and manual deployment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implementation updates: - campus-admin creates its own storage module (campus-admin/storage/) - NOT under campus.storage, separate module - Does NOT use campus.common.devops.deploy infrastructure - Migrations and deployments are manual (no auto-run) Storage module clarification: - campus/storage/ → Campus domain storage - campus-admin/storage/ → Admin operational storage (separate repo) - Import: from campus_admin.storage import get_table Deployment changes: - Removed automatic migration trigger from Campus apps - Document manual workflow: ssh → admin migrate → admin status - Future auto-run would require campus-admin as dependency (not recommended initially) Reference: PR #382 implementation discussion --- docs/migration-protocol.md | 77 +++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/docs/migration-protocol.md b/docs/migration-protocol.md index a26c84d8..6989bb4c 100644 --- a/docs/migration-protocol.md +++ b/docs/migration-protocol.md @@ -88,6 +88,9 @@ This document defines the storage migration protocol for Campus. Migrations are | **Optimization** | Indexed for reads | Optimized for bulk inserts | | **Connection** | Via `campus.storage` | Direct to admin DB | | **Tables** | users, sessions, assignments... | _migrations, _audit_log | +| **Package** | `campus/storage/` | `campus-admin/storage/` (separate module) | + +**Important:** `campus-admin` creates its own storage module at `campus-admin/storage/`. It is NOT part of `campus.storage` and does NOT share the `campus.common.devops.deploy` infrastructure. --- @@ -418,31 +421,40 @@ PostgreSQL Instances: ### Storage Interface Separation **Campus storage layer:** -- Accessed via `campus.storage.get_table()` / `get_collection()` +- Package: `campus/storage/` +- Accessed via `from campus.storage import get_table` - Optimized for domain queries - Validates against Campus models **Admin storage layer:** -- Accessed via `campus_admin.storage.get_table()` +- Package: `campus-admin/storage/` (separate module, not under campus/) +- Accessed via `from campus_admin.storage import get_table` - Optimized for high-volume writes (audit) - Separate connection pool - May use raw SQL for bulk inserts (audit logging optimization) +- Does NOT use `campus.common.devops.deploy` (manual deployment) ```python # Campus app uses campus storage from campus.storage import get_table users = get_table("users") -# Admin app uses admin storage +# Admin app uses campus-admin storage (separate module) from campus_admin.storage import get_table migrations = get_table("_migrations") -# High-volume audit may use optimized writer +# High-volume audit uses optimized writer from campus_admin.storage.audit import AuditWriter audit = AuditWriter() audit.bulk_log(events) # Uses executemany for performance ``` +**Module structure:** +``` +campus/storage/ → Campus domain storage +campus-admin/storage/ → Admin operational storage (separate repo) +``` + ### State Interface Located in `campus-admin/storage/migrations/state.py`: @@ -777,34 +789,47 @@ admin status ## Deployment Integration -### Automatic Migration on Deploy +### Deployment Approach -Migrations run from `campus-admin`, triggered during Campus deployment: +**Current: Manual deployment for campus-admin** -```python -# wsgi.py or apps/*/factory.py +Migrations are run manually via the `admin` CLI. There is NO automatic migration trigger from Campus apps. -from campus_admin.storage.migrations import MigrationRunner -from pathlib import Path -import os +```bash +# Manual migration workflow +1. ssh into server +2. cd /path/to/campus-admin +3. admin migrate # Apply pending migrations +4. admin status # Verify success +``` -def create_app(): - app = Flask(__name__) +**Future: Optional auto-run (if needed)** - # Run migrations on app startup in production - if os.getenv("ENV") in ("staging", "production"): - try: - migrations_dir = Path(__file__).parent.parent / "campus-admin" / "storage" / "migrations" / "migrations" - runner = MigrationRunner(migrations_dir) - runner.upgrade() - except Exception as e: - # Log but don't fail deployment - app.logger.error(f"Migration failed: {e}") - - return app -``` +Campus apps could potentially trigger migrations, but this is NOT implemented initially. If added, it would require: -**Note:** Campus app triggers migrations, but migration logic lives in `campus-admin`. +1. Campus app depends on campus-admin as a submodule +2. Import from campus-admin.storage.migrations +3. Handle campus-admin not being installed +4. Manual approval before running migrations + +**Recommendation:** Keep manual migrations initially. Auto-run adds complexity and failure modes that aren't justified for ~2000 users. + +### Manual Deployment Steps + +```bash +# 1. Deploy campus-admin code +git pull campus-admin main +poetry install + +# 2. Run migrations manually +admin migrate + +# 3. Verify success +admin status + +# 4. Deploy Campus app (if needed) +# Campus deployment is separate from campus-admin deployment +``` ### Pre-Deployment Checks (Built into MigrationRunner)