diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..3e6c63e --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,247 @@ +# AGENTS.md — Agent-Diff Developer Guide + +## Project Overview + +Agent-Diff is a benchmarking platform for evaluating AI agents that interact with +real-world SaaS APIs (Slack, Linear, Box, Google Calendar). It provides **isolated, +reproducible environments** backed by PostgreSQL schema cloning. + +## Architecture + +``` +┌──────────────────────────┐ ┌──────────────────────┐ +│ Evaluation Client │ │ Agent Sandbox │ +│ (prime eval / SDK) │──────▶│ (Docker container) │ +│ │ │ │ +│ 1. initEnv │ │ Runs agent code │ +│ 2. startRun │ │ Makes API calls ──┐ │ +│ 3. evaluateRun │ └────────────────────┼─┘ +│ 4. getResults │ │ +└──────────┬───────────────┘ │ + │ │ + ▼ ▼ +┌──────────────────────────────────────────────────────────┐ +│ AgentDiff Backend (FastAPI/Starlette) │ +│ │ +│ Platform API (/api/platform/*) │ +│ - initEnv, startRun, evaluateRun, diffRun │ +│ - Template & test suite management │ +│ │ +│ Service APIs (/api/env/{env_id}/services/{service}/*) │ +│ - Box REST API replica (/services/box/2.0/*) │ +│ - Slack API replica (/services/slack/*) │ +│ - Linear GraphQL replica (/services/linear/*) │ +│ - Calendar API replica (/services/calendar/*) │ +│ │ +│ Middleware: │ +│ PlatformMiddleware → API key auth for platform calls │ +│ IsolationMiddleware → per-env DB session + auth │ +└──────────────────────────────────────────────────────────┘ +``` + +## Environment Lifecycle + +### 1. Create an Isolated Environment (initEnv) + +Every evaluation starts by creating an isolated copy of a template database schema. + +**Via SDK (Python):** +```python +from agent_diff import AgentDiff + +client = AgentDiff( + api_key="ad_live_sk_...", + base_url="https://api.agentdiff.dev", # or http://localhost:8000 +) + +env = client.init_env( + templateService="box", # "box" | "linear" | "slack" | "calendar" + templateName="box_default", # name of the seeded template + impersonateUserId="27512847635", # user ID from the seed data +) +# env.environmentId → hex string, e.g. "824d0c408eeb42368f20e24d2d9f03c3" +# env.environmentUrl → "/api/env/{env_id}/services/box" +``` + +**Via curl:** +```bash +curl -X POST https://api.agentdiff.dev/api/platform/initEnv \ + -H "X-API-Key: ad_live_sk_..." \ + -H "Content-Type: application/json" \ + -d '{ + "templateService": "box", + "templateName": "box_default", + "impersonateUserId": "27512847635" + }' +``` + +**What happens internally:** +1. `templateManager.resolve_init_template()` finds the template by service+name +2. `CoreIsolationEngine.create_environment()` clones the template PostgreSQL schema +3. A new `state_` schema is created with all tables and data copied +4. A `RunTimeEnvironment` record is stored in the meta schema with TTL + +### 2. Make API Calls Against the Environment + +Once the environment is created, API calls go to the service replica endpoints: + +``` +Base URL: {base_url}/api/env/{env_id}/services/{service} + +Box: /api/env/{env_id}/services/box/2.0/search?query=fomc +Linear: /api/env/{env_id}/services/linear/graphql +Slack: /api/env/{env_id}/services/slack/conversations.list +Calendar: /api/env/{env_id}/services/calendar/calendars/{calendarId}/events +``` + +Each request goes through `IsolationMiddleware` which: +1. Validates the API key via control plane (`get_principal_id`) +2. Looks up the environment in meta DB to get impersonate_user_id +3. Opens a DB session scoped to the environment's `state_` schema +4. Passes the request to the service route handler + +### 3. Start a Run & Evaluate + +```python +run = client.start_run(envId=env.environmentId) +# ... agent makes API calls that modify the environment ... +result = client.evaluate_run(runId=run.runId, expectedOutput={...}) +results = client.get_results_for_run(runId=run.runId) +``` + +### 4. Cleanup + +```python +client.delete_env(envId=env.environmentId) +``` + +## Available Templates + +| Service | Template Name | Impersonate User ID | +|----------|-------------------|----------------------------------------| +| box | box_default | 27512847635 | +| linear | linear_default | 2790a7ee-fde0-4537-9588-e233aa5a68d1 | +| slack | slack_default | U01AGENBOT9 | +| calendar | calendar_base | (varies by seed) | + +## Writing Tests + +### Integration Tests (in-process, no HTTP server) + +Tests create environments via `core_isolation_engine.create_environment()` and +wire up an `AsyncClient` with middleware that injects the DB session: + +```python +@pytest_asyncio.fixture +async def box_client(test_user_id, core_isolation_engine, session_manager, environment_handler): + env_result = core_isolation_engine.create_environment( + template_schema="box_default", + ttl_seconds=3600, + created_by=test_user_id, + impersonate_user_id="27512847635", + ) + + async def add_db_session(request, call_next): + with session_manager.with_session_for_environment(env_result.environment_id) as session: + request.state.db_session = session + request.state.environment_id = env_result.environment_id + request.state.impersonate_user_id = "27512847635" + request.state.impersonate_email = None + response = await call_next(request) + return response + + from src.services.box.api.routes import routes as box_routes + app = Starlette(routes=box_routes) + app.middleware("http")(add_db_session) + + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + yield client + + environment_handler.drop_schema(env_result.schema_name) +``` + +### Running Tests + +```bash +cd backend +# Requires DATABASE_URL in .env or environment +pytest tests/performance/test_box_bench_perf.py -v -s +pytest tests/integration/ -v +``` + +## Running Evaluations Locally + +```bash +# 1. Activate the bench environment's venv +source third_party/prime-environments/environments/agent_diff_bench/.venv/bin/activate + +# 2. Install the environment package +cd third_party/prime-environments/environments/agent_diff_bench +uv pip install -e . + +# 3. Run evaluation (from the agent_diff_bench directory) +uv run prime eval run agent-diff-bench \ + -m "openai/gpt-5-mini" \ + -n 5 -r 3 -s \ + -a '{"agentdiff_api_key": "ad_live_sk_..."}' +``` + +Results are saved to: `third_party/prime-environments/environments/agent_diff_bench/eval_results/` + +## Database Seeding + +Templates are seeded from JSON files in `backend/seeds/` (Docker) or `examples/` (local). + +Seed scripts in `backend/utils/`: +- `seed_box_template.py` — creates box_default, box_base templates +- `seed_linear_template.py` — creates linear_default, linear_base, linear_expanded +- `seed_slack_template.py` — creates slack_default, slack_bench_default +- `seed_calendar_template.py` — creates calendar_base +- `seed_tests.py` — loads test suite JSON files + +On Railway, seeding runs automatically on deploy when `SEED=true` env var is set. +The Dockerfile startup script runs Alembic migrations then all seed scripts. + +## Performance Profiling + +All `[PERF]` log lines are instrumented for performance tracking: + +- **Middleware**: `[PERF] GET /api/env/.../services/box/... total=Xms auth=Xms meta_db=Xms handler=Xms` +- **Box operations**: `[PERF] search_content TOTAL=Xms`, `[PERF] get_folder_by_id(...) time=Xms` +- **Box schema**: `[PERF] File._get_path_collection depth=N time=Xms` +- **Calendar**: `[PERF] Calendar events_list took Xms` + +Filter with: `grep "\[PERF\]"` in Railway logs. + +## Key Directories + +``` +backend/ + src/ + platform/ # Platform API (initEnv, runs, evaluation) + services/ + box/ # Box API replica + slack/ # Slack API replica + linear/ # Linear API replica + calendar/ # Calendar API replica + tests/ + integration/ # Full-stack integration tests + performance/ # Performance/benchmark tests + validation/ # API parity tests + unit/ # Unit tests + utils/ # Seed scripts + seeds/ # Seed data JSON files (for Docker) + +sdk/agent-diff-python/ # Python SDK (agent_diff package) + +examples/ + box/ # Box seed data + test suites + linear/ # Linear seed data + test suites + slack/ # Slack seed data + test suites + calendar/ # Calendar seed data + +third_party/prime-environments/environments/agent_diff_bench/ + agent_diff_bench.py # Entry point for prime eval + src/environment.py # Environment setup (initEnv, startRun, etc.) +``` diff --git a/backend/src/platform/api/middleware.py b/backend/src/platform/api/middleware.py index 2259ea8..a19e61e 100644 --- a/backend/src/platform/api/middleware.py +++ b/backend/src/platform/api/middleware.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +import time from starlette.middleware.base import BaseHTTPMiddleware from starlette.requests import Request @@ -86,6 +87,8 @@ async def dispatch(self, request: Request, call_next) -> Response: if not path.startswith("/api/env/"): return await call_next(request) + t_total_start = time.perf_counter() + try: path_after_prefix = path[len("/api/env/") :] env_id = path_after_prefix.split("/")[0] if path_after_prefix else "" @@ -106,8 +109,11 @@ async def dispatch(self, request: Request, call_next) -> Response: status_code=status.HTTP_401_UNAUTHORIZED, ) + t_auth_start = time.perf_counter() principal_id = await get_principal_id(api_key_hdr, action="api_request") + t_auth_ms = (time.perf_counter() - t_auth_start) * 1000 + t_meta_start = time.perf_counter() with self.session_manager.with_meta_session() as meta_session: request.state.principal_id = principal_id @@ -125,11 +131,26 @@ async def dispatch(self, request: Request, call_next) -> Response: logger.debug( f"Could not load impersonation data for env {env_id}: {e}" ) + t_meta_ms = (time.perf_counter() - t_meta_start) * 1000 + t_handler_start = time.perf_counter() with self.session_manager.with_session_for_environment(env_id) as session: request.state.db_session = session request.state.environment_id = env_id - return await call_next(request) + response = await call_next(request) + t_handler_ms = (time.perf_counter() - t_handler_start) * 1000 + + t_total_ms = (time.perf_counter() - t_total_start) * 1000 + # Extract service from path for easier log filtering + parts = path_after_prefix.split("/") + service_name = parts[2] if len(parts) > 2 else "unknown" + logger.info( + f"[PERF] {request.method} {path} | service={service_name} " + f"total={t_total_ms:.0f}ms auth={t_auth_ms:.0f}ms " + f"meta_db={t_meta_ms:.0f}ms handler={t_handler_ms:.0f}ms " + f"status={response.status_code}" + ) + return response except PermissionError as exc: return JSONResponse( diff --git a/backend/src/platform/db/migrations/versions/a1b2c3d4e5f6_calendar_composite_indexes.py b/backend/src/platform/db/migrations/versions/a1b2c3d4e5f6_calendar_composite_indexes.py new file mode 100644 index 0000000..7b8c201 --- /dev/null +++ b/backend/src/platform/db/migrations/versions/a1b2c3d4e5f6_calendar_composite_indexes.py @@ -0,0 +1,57 @@ +"""Add composite indexes for calendar event queries + +Adds composite indexes on calendar_events to optimize the most common +query patterns: time-range filtering, status filtering, and sync-token +incremental queries. + +Revision ID: a1b2c3d4e5f6 +Revises: merge_heads_20260130 +Create Date: 2026-02-11 12:00:00.000000 + +""" + +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = "a1b2c3d4e5f6" +down_revision: Union[str, None] = "merge_heads_20260130" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Composite index for the most common list_events query pattern: + # WHERE calendar_id = X AND status != 'cancelled' AND start_datetime < Y + op.create_index( + "ix_event_cal_status_start", + "calendar_events", + ["calendar_id", "status", "start_datetime"], + unique=False, + ) + + # Composite index for time-range queries (list_events with timeMin/timeMax, freebusy): + # WHERE calendar_id = X AND start_datetime >= Y AND end_datetime <= Z + op.create_index( + "ix_event_cal_start_end", + "calendar_events", + ["calendar_id", "start_datetime", "end_datetime"], + unique=False, + ) + + # Composite index for sync-token incremental queries: + # WHERE calendar_id = X AND updated_at > Y + op.create_index( + "ix_event_cal_updated", + "calendar_events", + ["calendar_id", "updated_at"], + unique=False, + ) + + +def downgrade() -> None: + op.drop_index("ix_event_cal_updated", table_name="calendar_events") + op.drop_index("ix_event_cal_start_end", table_name="calendar_events") + op.drop_index("ix_event_cal_status_start", table_name="calendar_events") diff --git a/backend/src/platform/isolationEngine/environment.py b/backend/src/platform/isolationEngine/environment.py index dd382bf..3483304 100644 --- a/backend/src/platform/isolationEngine/environment.py +++ b/backend/src/platform/isolationEngine/environment.py @@ -44,8 +44,56 @@ def migrate_schema(self, template_schema: str, target_schema: str) -> None: ) meta.create_all(translated) + # Copy GIN / non-standard indexes that MetaData.reflect doesn't capture + self._copy_custom_indexes(template_schema, target_schema) + self._set_replica_identity(target_schema) + def _copy_custom_indexes(self, src_schema: str, dst_schema: str) -> None: + """Copy GIN trigram and other custom indexes from template to target schema.""" + with self.session_manager.base_engine.begin() as conn: + rows = conn.execute( + text( + """ + SELECT indexname, indexdef + FROM pg_indexes + WHERE schemaname = :schema + AND indexdef LIKE '%gin%' + """ + ), + {"schema": src_schema}, + ).fetchall() + for idx_name, idx_def in rows: + # Rewrite the CREATE INDEX to target the new schema + new_def = idx_def.replace(f" ON {src_schema}.", f" ON {dst_schema}.") + # Avoid name collisions by prefixing with target schema + new_idx_name = f"{dst_schema}_{idx_name}" + # Handle both CREATE INDEX and CREATE UNIQUE INDEX + new_def = new_def.replace( + f"CREATE UNIQUE INDEX {idx_name}", + f"CREATE UNIQUE INDEX IF NOT EXISTS {new_idx_name}", + ) + new_def = new_def.replace( + f"CREATE INDEX {idx_name}", + f"CREATE INDEX IF NOT EXISTS {new_idx_name}", + ) + try: + # Use a savepoint so a single index failure doesn't abort + # the entire transaction and block subsequent indexes. + nested = conn.begin_nested() + try: + conn.execute(text(new_def)) + nested.commit() + except Exception as exc: + nested.rollback() + logger.warning( + f"Could not copy index {idx_name} to {dst_schema}: {exc}" + ) + except Exception as exc: + logger.warning( + f"Could not copy index {idx_name} to {dst_schema}: {exc}" + ) + def _list_tables(self, conn, schema: str) -> list[str]: rows = conn.execute( text( diff --git a/backend/src/services/box/api/routes.py b/backend/src/services/box/api/routes.py index ef3ca37..8ee5883 100644 --- a/backend/src/services/box/api/routes.py +++ b/backend/src/services/box/api/routes.py @@ -12,6 +12,8 @@ from __future__ import annotations import json +import logging +import time from typing import Any, List, NoReturn, Optional from starlette.requests import Request @@ -33,6 +35,8 @@ bad_request_error, ) +logger = logging.getLogger(__name__) + # Session & User Management @@ -276,6 +280,7 @@ async def get_file_by_id(request: Request) -> Response: "context_info": {"errors": [{"reason": "invalid_parameter", ...}]}} """ try: + t_start = time.perf_counter() session = _session(request) file_id = request.path_params["file_id"] fields = _parse_fields(request) @@ -283,7 +288,8 @@ async def get_file_by_id(request: Request) -> Response: # Get If-None-Match header for conditional GET if_none_match = request.headers.get("if-none-match") - file = ops.get_file_by_id(session, file_id) + file = ops.get_file_by_id(session, file_id, eager_serialize=True) + t_db_ms = (time.perf_counter() - t_start) * 1000 if not file: _box_error( BoxErrorCode.NOT_FOUND, @@ -303,9 +309,18 @@ async def get_file_by_id(request: Request) -> Response: if if_none_match and file.etag == if_none_match: return Response(status_code=status.HTTP_304_NOT_MODIFIED) + t_ser_start = time.perf_counter() file_data = file.to_dict() + t_ser_ms = (time.perf_counter() - t_ser_start) * 1000 filtered_data = _filter_fields(file_data, fields) + t_total_ms = (time.perf_counter() - t_start) * 1000 + if t_total_ms > 20: + logger.info( + f"[PERF] GET /files/{file_id} total={t_total_ms:.0f}ms " + f"db={t_db_ms:.0f}ms serialize={t_ser_ms:.0f}ms" + ) + return _json_response(filtered_data) except BoxAPIError as e: @@ -658,7 +673,8 @@ async def create_folder(request: Request) -> Response: ) folder_with_items = ops.get_folder_by_id( - session, new_folder.id, load_children=True, load_files=True + session, new_folder.id, load_children=True, load_files=True, + eager_serialize=True, ) assert folder_with_items is not None folder_data = folder_with_items.to_dict(include_items=True) @@ -695,6 +711,7 @@ async def get_folder_by_id(request: Request) -> Response: 404 Not Found - if folder doesn't exist """ try: + t_start = time.perf_counter() session = _session(request) folder_id = request.path_params["folder_id"] fields = _parse_fields(request) @@ -702,8 +719,10 @@ async def get_folder_by_id(request: Request) -> Response: # Load children and files for item_collection folder = ops.get_folder_by_id( - session, folder_id, load_children=True, load_files=True + session, folder_id, load_children=True, load_files=True, + eager_serialize=True, ) + t_db_ms = (time.perf_counter() - t_start) * 1000 if not folder: _box_error( BoxErrorCode.NOT_FOUND, @@ -724,9 +743,18 @@ async def get_folder_by_id(request: Request) -> Response: return Response(status_code=status.HTTP_304_NOT_MODIFIED) # Box API always returns item_collection with entries for GET /folders/{id} + t_ser_start = time.perf_counter() folder_data = folder.to_dict(include_items=True) + t_ser_ms = (time.perf_counter() - t_ser_start) * 1000 filtered_data = _filter_fields(folder_data, fields) + t_total_ms = (time.perf_counter() - t_start) * 1000 + if t_total_ms > 20: + logger.info( + f"[PERF] GET /folders/{folder_id} total={t_total_ms:.0f}ms " + f"db={t_db_ms:.0f}ms serialize={t_ser_ms:.0f}ms" + ) + return _json_response(filtered_data) except BoxAPIError as e: @@ -810,7 +838,8 @@ async def update_folder_by_id(request: Request) -> Response: # Re-fetch with children and files for item_collection folder_with_items = ops.get_folder_by_id( - session, updated_folder.id, load_children=True, load_files=True + session, updated_folder.id, load_children=True, load_files=True, + eager_serialize=True, ) # folder_with_items should never be None since we just updated it assert folder_with_items is not None @@ -915,6 +944,7 @@ async def list_folder_items(request: Request) -> Response: - Includes order array in response """ try: + t_start = time.perf_counter() session = _session(request) folder_id = request.path_params["folder_id"] fields = _parse_fields(request) @@ -958,6 +988,13 @@ async def list_folder_items(request: Request) -> Response: ], } + t_total_ms = (time.perf_counter() - t_start) * 1000 + if t_total_ms > 20: + logger.info( + f"[PERF] GET /folders/{folder_id}/items total={t_total_ms:.0f}ms " + f"items={result['total_count']}" + ) + return _json_response(response_data) except BoxAPIError as e: @@ -1029,6 +1066,7 @@ async def search_content(request: Request) -> Response: content_types = [item_type] # Search - returns dict with total_count, entries, offset, limit + t_search_start = time.perf_counter() search_result = ops.search_content( session, query=query, @@ -1036,6 +1074,7 @@ async def search_content(request: Request) -> Response: limit=limit, offset=offset, ) + t_search_ms = (time.perf_counter() - t_search_start) * 1000 # Build entries with field filtering entries = [] @@ -1052,6 +1091,11 @@ async def search_content(request: Request) -> Response: "type": "search_results_items", } + logger.info( + f"[PERF] GET /search query='{query}' total={t_search_ms:.0f}ms " + f"results={search_result['total_count']}" + ) + return _json_response(response_data) except BoxAPIError as e: diff --git a/backend/src/services/box/database/operations.py b/backend/src/services/box/database/operations.py index c7e3e72..26a4c1f 100644 --- a/backend/src/services/box/database/operations.py +++ b/backend/src/services/box/database/operations.py @@ -3,7 +3,9 @@ """ import hashlib +import logging import re +import time import uuid from datetime import datetime from typing import Optional, Literal, cast @@ -11,6 +13,8 @@ from sqlalchemy import select, func, or_, and_ from sqlalchemy.orm import Session, joinedload +logger = logging.getLogger(__name__) + from .schema import ( User, Folder, @@ -19,6 +23,7 @@ FileContent, Comment, Task, + TaskAssignment, Hub, HubItem, Collection, @@ -46,6 +51,100 @@ ) +# --------------------------------------------------------------------------- +# Materialized-path helpers +# --------------------------------------------------------------------------- + + +def _compute_folder_path(parent: Folder) -> str: + """Compute the materialized path for a new child of *parent*. + + The convention is: + root (id=0): path = "/" + child of root: path = "/0/" + child of X whose path is "/0/A/": path = "/0/A/X/" + """ + if parent.path is None: + # Parent has no path yet (legacy data) – fall back + return "/" + return f"{parent.path}{parent.id}/" + + +def _prefetch_ancestor_folders( + session: Session, items: list[Folder | File] +) -> dict[str, dict]: + """Bulk-fetch folder mini-dicts for all ancestors referenced in *items*' paths. + + Returns a dict mapping folder ID → mini-dict (type, id, sequence_id, etag, name). + This replaces per-item ``_get_path_collection`` lazy-load queries with a + single ``SELECT ... WHERE id IN (...)`` for the whole result set. + """ + all_ids: set[str] = set() + for item in items: + if item.path and item.path != "/": + for seg in item.path.strip("/").split("/"): + if seg: + all_ids.add(seg) + if not all_ids: + return {} + + rows = session.execute( + select(Folder.id, Folder.sequence_id, Folder.etag, Folder.name).where( + Folder.id.in_(all_ids) + ) + ).all() + return { + r.id: { + "type": "folder", + "id": r.id, + "sequence_id": r.sequence_id, + "etag": r.etag, + "name": r.name, + } + for r in rows + } + + +def _cascade_path_update(session: Session, folder: Folder, old_path: str) -> None: + """After moving *folder*, update paths of all descendants in bulk. + + Works by replacing the *old_path* prefix with the folder's new path + on every descendant folder and file whose ``path`` starts with the + old prefix. Two SQL UPDATEs – one for folders, one for files. + """ + new_prefix = f"{folder.path}{folder.id}/" + old_prefix = f"{old_path}{folder.id}/" + + if old_prefix == new_prefix: + return # Nothing changed + + from sqlalchemy import update + + # Update descendant folders + session.execute( + update(Folder) + .where(Folder.path.startswith(old_prefix)) + .values( + path=func.concat( + new_prefix, + func.substr(Folder.path, len(old_prefix) + 1), + ) + ) + ) + + # Update descendant files + session.execute( + update(File) + .where(File.path.startswith(old_prefix)) + .values( + path=func.concat( + new_prefix, + func.substr(File.path, len(old_prefix) + 1), + ) + ) + ) + + # CONSTANTS # Allowed sort fields for folder/file listing @@ -195,17 +294,41 @@ def get_folder_by_id( *, load_children: bool = False, load_files: bool = False, + eager_serialize: bool = False, ) -> Optional[Folder]: - """Get a folder by ID, optionally with children and files.""" + """Get a folder by ID, optionally with children and files. + + Args: + eager_serialize: If True, eagerly load all relationships needed by + ``to_dict()`` (parent, created_by, modified_by, owned_by) to + avoid lazy-load N+1 queries during serialization. + """ + t_start = time.perf_counter() stmt = select(Folder).where(Folder.id == folder_id) + if eager_serialize: + stmt = stmt.options( + joinedload(Folder.parent), + joinedload(Folder.created_by), + joinedload(Folder.modified_by), + joinedload(Folder.owned_by), + ) + if load_children: stmt = stmt.options(joinedload(Folder.children)) if load_files: # Also load file versions for file_version in to_mini_dict() stmt = stmt.options(joinedload(Folder.files).joinedload(File.versions)) - return session.execute(stmt).scalars().first() + result = session.execute(stmt).scalars().unique().first() + t_ms = (time.perf_counter() - t_start) * 1000 + if t_ms > 10: + logger.info( + f"[PERF] get_folder_by_id({folder_id}) time={t_ms:.0f}ms " + f"load_children={load_children} load_files={load_files} " + f"eager_serialize={eager_serialize}" + ) + return result def get_root_folder(session: Session) -> Optional[Folder]: @@ -290,6 +413,7 @@ def create_folder( name=name, description=description, parent_id=parent_id, + path=_compute_folder_path(parent), created_by_id=user_id, owned_by_id=user_id, modified_by_id=user_id, @@ -298,7 +422,9 @@ def create_folder( ) session.add(folder) session.flush() - return folder + + # Re-fetch with eager loads so to_dict() won't trigger lazy queries + return get_folder_by_id(session, folder.id, eager_serialize=True) or folder def _is_descendant_of( @@ -343,7 +469,7 @@ def update_folder( shared_link: Optional[dict] | _Unset = UNSET, ) -> Folder: """Update a folder's properties.""" - folder = get_folder_by_id(session, folder_id) + folder = get_folder_by_id(session, folder_id, eager_serialize=True) if not folder: raise not_found_error("folder", folder_id) @@ -419,7 +545,12 @@ def update_folder( {"type": "folder", "id": existing.id, "name": existing.name} ], ) + old_path = folder.path or "/" folder.parent_id = parent_id + folder.parent = new_parent # Keep ORM relationship in sync with FK + folder.path = _compute_folder_path(new_parent) + # Cascade path changes to all descendants + _cascade_path_update(session, folder, old_path) if tags is not None: folder.tags = tags @@ -504,6 +635,7 @@ def list_folder_items( Box API returns folders first, then files, each sorted by the specified field. This matches the real Box API behavior. """ + t_start = time.perf_counter() # Validate limit (Box rejects negative values) if limit < 0: raise bad_request_error("Invalid value for 'limit'. Must be non-negative.") @@ -638,6 +770,13 @@ def list_folder_items( files = session.execute(files_query).scalars().unique().all() entries.extend([f.to_mini_dict() for f in files]) + t_ms = (time.perf_counter() - t_start) * 1000 + if t_ms > 10: + logger.info( + f"[PERF] list_folder_items({folder_id}) time={t_ms:.0f}ms " + f"total={total_count} returned={len(entries)}" + ) + return { "total_count": total_count, "entries": entries, @@ -655,14 +794,37 @@ def get_file_by_id( file_id: str, *, load_versions: bool = False, + eager_serialize: bool = False, ) -> Optional[File]: - """Get a file by ID, optionally with versions.""" + """Get a file by ID, optionally with versions. + + Args: + eager_serialize: If True, eagerly load all relationships needed by + ``to_dict()`` (parent, created_by, modified_by, owned_by, + versions) to avoid lazy-load N+1 queries during serialization. + """ + t_start = time.perf_counter() stmt = select(File).where(File.id == file_id) - if load_versions: + if eager_serialize: + stmt = stmt.options( + joinedload(File.parent), + joinedload(File.created_by), + joinedload(File.modified_by), + joinedload(File.owned_by), + joinedload(File.versions), + ) + elif load_versions: stmt = stmt.options(joinedload(File.versions)) - return session.execute(stmt).scalars().first() + result = session.execute(stmt).scalars().unique().first() + t_ms = (time.perf_counter() - t_start) * 1000 + if t_ms > 10: + logger.info( + f"[PERF] get_file_by_id({file_id}) time={t_ms:.0f}ms " + f"load_versions={load_versions} eager_serialize={eager_serialize}" + ) + return result def create_file( @@ -727,6 +889,7 @@ def create_file( description=description, size=len(content), parent_id=parent_id, + path=_compute_folder_path(parent), # same formula: parent.path + parent.id created_by_id=user_id, owned_by_id=user_id, modified_by_id=user_id, @@ -756,7 +919,8 @@ def create_file( file.file_version_id = version.id session.flush() - return file + # Re-fetch with eager loads so to_dict() won't trigger lazy queries + return get_file_by_id(session, file.id, eager_serialize=True) or file def update_file( @@ -790,7 +954,7 @@ def update_file( Matches SDK FilesManager.update_file_by_id parameters. """ - file = get_file_by_id(session, file_id) + file = get_file_by_id(session, file_id, eager_serialize=True) if not file: raise not_found_error("file", file_id) @@ -856,6 +1020,8 @@ def update_file( conflicts=[{"type": "file", "id": existing.id, "name": existing.name}], ) file.parent_id = parent_id + file.parent = new_parent # Keep ORM relationship in sync with FK + file.path = _compute_folder_path(new_parent) # new_parent.path + new_parent.id if tags is not None: file.tags = tags if shared_link is not UNSET: @@ -929,7 +1095,7 @@ def upload_file_version( Matches SDK UploadsManager.upload_file_version parameters. """ - file = get_file_by_id(session, file_id, load_versions=True) + file = get_file_by_id(session, file_id, eager_serialize=True) if not file: raise not_found_error("file", file_id) @@ -1101,12 +1267,14 @@ def list_file_comments( comments = ( session.execute( select(Comment) + .options(joinedload(Comment.created_by)) .where(Comment.file_id == file_id) .order_by(Comment.created_at.desc()) .offset(offset) .limit(limit) ) .scalars() + .unique() .all() ) @@ -1171,7 +1339,18 @@ def create_comment( ) session.add(comment) session.flush() - return comment + + # Re-fetch with eager loads so to_dict() won't trigger lazy queries + loaded = ( + session.execute( + select(Comment) + .where(Comment.id == comment.id) + .options(joinedload(Comment.created_by)) + ) + .scalars() + .first() + ) + return loaded or comment def update_comment( @@ -1219,9 +1398,18 @@ def get_task_by_id(session: Session, task_id: str) -> Optional[Task]: """Get a task by ID.""" return ( session.execute( - select(Task).where(Task.id == task_id).options(joinedload(Task.assignments)) + select(Task) + .where(Task.id == task_id) + .options( + joinedload(Task.assignments).joinedload(TaskAssignment.assigned_to), + joinedload(Task.assignments).joinedload(TaskAssignment.assigned_by), + joinedload(Task.assignments).joinedload(TaskAssignment.item).joinedload(File.versions), + joinedload(Task.created_by), + joinedload(Task.item).joinedload(File.versions), + ) ) .scalars() + .unique() .first() ) @@ -1244,12 +1432,17 @@ def list_file_tasks( or 0 ) - # Get tasks + # Get tasks with all relationships eagerly loaded tasks = ( session.execute( select(Task) .where(Task.item_id == file_id) - .options(joinedload(Task.assignments)) + .options( + joinedload(Task.assignments).joinedload(TaskAssignment.assigned_to), + joinedload(Task.assignments).joinedload(TaskAssignment.assigned_by), + joinedload(Task.created_by), + joinedload(Task.item).joinedload(File.versions), + ) .order_by(Task.created_at.desc()) .offset(offset) .limit(limit) @@ -1309,7 +1502,22 @@ def create_task( ) session.add(task) session.flush() - return task + + # Re-fetch with eager loads so to_dict() won't trigger lazy queries + loaded = ( + session.execute( + select(Task) + .where(Task.id == task.id) + .options( + joinedload(Task.created_by), + joinedload(Task.item).joinedload(File.versions), + joinedload(Task.assignments), + ) + ) + .scalars() + .first() + ) + return loaded or task def update_task( @@ -1368,12 +1576,15 @@ def get_hub_by_id( load_items: bool = False, ) -> Optional[Hub]: """Get a hub by ID.""" - stmt = select(Hub).where(Hub.id == hub_id) + stmt = select(Hub).where(Hub.id == hub_id).options( + joinedload(Hub.created_by), + joinedload(Hub.updated_by), + ) if load_items: stmt = stmt.options(joinedload(Hub.items)) - return session.execute(stmt).scalars().first() + return session.execute(stmt).scalars().unique().first() def list_hubs( @@ -1386,12 +1597,17 @@ def list_hubs( # Count total total_count = session.execute(select(func.count()).select_from(Hub)).scalar() or 0 - # Get hubs + # Get hubs with eager-loaded relationships for serialization hubs = ( session.execute( - select(Hub).order_by(Hub.created_at.desc()).offset(offset).limit(limit) + select(Hub) + .options(joinedload(Hub.created_by), joinedload(Hub.updated_by)) + .order_by(Hub.created_at.desc()) + .offset(offset) + .limit(limit) ) .scalars() + .unique() .all() ) @@ -1421,7 +1637,9 @@ def create_hub( ) session.add(hub) session.flush() - return hub + + # Re-fetch with eager loads so to_dict() won't trigger lazy queries + return get_hub_by_id(session, hub.id) or hub def update_hub( @@ -1576,7 +1794,9 @@ def search_content( ancestor_folder_ids: Limit to specific folder trees file_extensions: Filter files by extension """ + t_start = time.perf_counter() results = [] + all_items: list[File | Folder] = [] # collected before bulk-prefetch # Default to searching both files and folders if content_types is None: @@ -1584,6 +1804,7 @@ def search_content( # Search files if "file" in content_types: + t_file_q = time.perf_counter() file_query = ( select(File) .options( @@ -1605,18 +1826,26 @@ def search_content( ) if file_extensions: - # Filter by extensions - ext_conditions = [File.name.ilike(f"%.{ext}") for ext in file_extensions] - file_query = file_query.where(or_(*ext_conditions)) + # Filter by extension column (indexed) instead of ILIKE on name + normalized_exts = [ext.lower().lstrip(".") for ext in file_extensions] + file_query = file_query.where( + func.lower(File.extension).in_(normalized_exts) + ) if ancestor_folder_ids: file_query = file_query.where(File.parent_id.in_(ancestor_folder_ids)) files = session.execute(file_query).scalars().unique().all() - results.extend([f.to_search_dict() for f in files]) + t_file_db_ms = (time.perf_counter() - t_file_q) * 1000 + all_items.extend(files) + logger.info( + f"[PERF] search_content files: query_db={t_file_db_ms:.0f}ms " + f"rows={len(files)}" + ) # Search folders if "folder" in content_types: + t_folder_q = time.perf_counter() folder_query = ( select(Folder) .options( @@ -1641,12 +1870,38 @@ def search_content( folder_query = folder_query.where(Folder.parent_id.in_(ancestor_folder_ids)) folders = session.execute(folder_query).scalars().unique().all() - results.extend([f.to_search_dict() for f in folders]) + t_folder_db_ms = (time.perf_counter() - t_folder_q) * 1000 + all_items.extend(folders) + logger.info( + f"[PERF] search_content folders: query_db={t_folder_db_ms:.0f}ms " + f"rows={len(folders)}" + ) + + # Bulk-prefetch ancestor folder mini-dicts (1 query for ALL results) + t_prefetch = time.perf_counter() + ancestor_cache = _prefetch_ancestor_folders(session, all_items) + t_prefetch_ms = (time.perf_counter() - t_prefetch) * 1000 + + # Serialize with pre-fetched cache (zero extra DB queries) + t_ser = time.perf_counter() + results = [item.to_search_dict(ancestor_cache) for item in all_items] + t_ser_ms = (time.perf_counter() - t_ser) * 1000 + if t_prefetch_ms > 5 or t_ser_ms > 5: + logger.info( + f"[PERF] search_content prefetch={t_prefetch_ms:.0f}ms " + f"serialize={t_ser_ms:.0f}ms items={len(all_items)}" + ) # Paginate total_count = len(results) paginated = results[offset : offset + limit] + t_total_ms = (time.perf_counter() - t_start) * 1000 + logger.info( + f"[PERF] search_content TOTAL={t_total_ms:.0f}ms " + f"query='{query}' total_results={total_count} returned={len(paginated)}" + ) + return { "total_count": total_count, "entries": paginated, @@ -1749,6 +2004,7 @@ def get_collection_items( Returns files and folders that have this collection ID in their collections array. """ + t_start = time.perf_counter() collection = get_collection_by_id(session, collection_id) if not collection: raise not_found_error("collection", collection_id) @@ -1756,6 +2012,7 @@ def get_collection_items( entries = [] # Find folders in this collection + t_folder_q = time.perf_counter() folders = ( session.execute( select(Folder) @@ -1776,9 +2033,11 @@ def get_collection_items( .unique() .all() ) + t_folder_ms = (time.perf_counter() - t_folder_q) * 1000 entries.extend([f.to_mini_dict() for f in folders]) # Find files in this collection + t_file_q = time.perf_counter() files = ( session.execute( select(File) @@ -1799,11 +2058,19 @@ def get_collection_items( .unique() .all() ) + t_file_ms = (time.perf_counter() - t_file_q) * 1000 entries.extend([f.to_mini_dict() for f in files]) total_count = len(entries) paginated = entries[offset : offset + limit] + t_total_ms = (time.perf_counter() - t_start) * 1000 + logger.info( + f"[PERF] get_collection_items TOTAL={t_total_ms:.0f}ms " + f"collection_id={collection_id} folder_q={t_folder_ms:.0f}ms " + f"file_q={t_file_ms:.0f}ms total_items={total_count}" + ) + return { "total_count": total_count, "entries": paginated, @@ -1830,7 +2097,7 @@ def update_folder_collections( Returns: Updated folder """ - folder = get_folder_by_id(session, folder_id) + folder = get_folder_by_id(session, folder_id, eager_serialize=True) if not folder: raise not_found_error("folder", folder_id) @@ -1878,7 +2145,7 @@ def update_file_collections( Returns: Updated file """ - file = get_file_by_id(session, file_id) + file = get_file_by_id(session, file_id, eager_serialize=True) if not file: raise not_found_error("file", file_id) diff --git a/backend/src/services/box/database/schema.py b/backend/src/services/box/database/schema.py index 9def3f7..ba2a7dd 100644 --- a/backend/src/services/box/database/schema.py +++ b/backend/src/services/box/database/schema.py @@ -256,6 +256,11 @@ class Folder(Base): parent_id: Mapped[Optional[str]] = mapped_column( String(20), ForeignKey("box_folders.id"), nullable=True, index=True ) + # Materialized path: slash-separated ancestor IDs from root to parent. + # Root folder has path="/", children of root have path="/0/", + # deeper folders have path="/0///..." etc. + # This eliminates N+1 queries in _get_path_collection(). + path: Mapped[Optional[str]] = mapped_column(String(500), default="/") # Ownership created_by_id: Mapped[Optional[str]] = mapped_column( @@ -352,7 +357,11 @@ class Folder(Base): ) # Indexes - __table_args__ = (Index("ix_box_folders_parent_name", "parent_id", "name"),) + __table_args__ = ( + Index("ix_box_folders_parent_name", "parent_id", "name"), + Index("ix_box_folders_item_status", "item_status"), + Index("ix_box_folders_status_parent", "item_status", "parent_id"), + ) def to_mini_dict(self) -> dict: """Return minimal folder representation (Folder--Mini).""" @@ -393,8 +402,14 @@ def to_item_dict(self) -> dict: "folder_upload_email": self.folder_upload_email, } - def to_search_dict(self) -> dict: - """Return folder representation for search results (Box search API format).""" + def to_search_dict(self, ancestor_cache: dict | None = None) -> dict: + """Return folder representation for search results (Box search API format). + + Args: + ancestor_cache: Optional pre-fetched {folder_id: mini_dict} map. + When provided, ``_get_path_collection`` skips its + own DB query and uses the cache instead. + """ return { "id": self.id, "type": "folder", @@ -406,7 +421,7 @@ def to_search_dict(self) -> dict: "trashed_at": self.trashed_at.isoformat() if self.trashed_at else None, "modified_at": self.modified_at.isoformat() if self.modified_at else None, "created_at": self.created_at.isoformat() if self.created_at else None, - "path_collection": self._get_path_collection(), + "path_collection": self._get_path_collection(ancestor_cache), "modified_by": self.modified_by.to_mini_dict() if self.modified_by else None, @@ -507,17 +522,68 @@ def to_dict(self, include_items: bool = False) -> dict: return result - def _get_path_collection(self) -> dict: - """Build the path collection from root to this folder.""" + def _get_path_collection(self, ancestor_cache: dict | None = None) -> dict: + """Build the path collection from root to this folder. + + Uses the materialized ``path`` column (e.g. "/0/123/456/") to avoid + N+1 lazy-load queries up the parent chain. + + Args: + ancestor_cache: Optional pre-fetched ``{folder_id: mini_dict}`` + map. When provided, the method does pure dict lookups with + **zero** DB queries. + """ + # Fast path – use materialized path column + if self.path and self.path != "/": + ancestor_ids = [seg for seg in self.path.strip("/").split("/") if seg] + if not ancestor_ids: + return {"total_count": 0, "entries": []} + + # Use pre-fetched cache when available (bulk search path) + if ancestor_cache is not None: + entries = [ + ancestor_cache[aid] + for aid in ancestor_ids + if aid in ancestor_cache + ] + return {"total_count": len(entries), "entries": entries} + + # Single-item fallback – one IN query + from sqlalchemy.orm import object_session + from sqlalchemy import select as sa_select + + session = object_session(self) + if session is not None: + rows = session.execute( + sa_select( + Folder.id, Folder.sequence_id, Folder.etag, Folder.name, + ).where(Folder.id.in_(ancestor_ids)) + ).all() + lookup = {r.id: r for r in rows} + entries = [ + { + "type": "folder", "id": r.id, + "sequence_id": r.sequence_id, + "etag": r.etag, "name": r.name, + } + for aid in ancestor_ids + if (r := lookup.get(aid)) + ] + return {"total_count": len(entries), "entries": entries} + + return {"total_count": 0, "entries": []} + + # path is "/" or None → root folder / legacy data + if self.path == "/": + return {"total_count": 0, "entries": []} + + # Fallback – walk parent chain (legacy data without path) entries = [] current = self.parent while current: entries.insert(0, current.to_mini_dict()) current = current.parent - return { - "total_count": len(entries), - "entries": entries, - } + return {"total_count": len(entries), "entries": entries} def _get_collections_dict(self) -> list: """Build the collections array for this folder. @@ -578,6 +644,10 @@ class File(Base): parent_id: Mapped[Optional[str]] = mapped_column( String(20), ForeignKey("box_folders.id"), index=True ) + # Materialized path: slash-separated ancestor IDs from root to parent folder. + # E.g. "/0/" for files in root, "/0//" for files one level deep. + # This eliminates N+1 queries in _get_path_collection(). + path: Mapped[Optional[str]] = mapped_column(String(500), default="/0/") # Ownership created_by_id: Mapped[Optional[str]] = mapped_column( @@ -702,7 +772,12 @@ class File(Base): tasks: Mapped[List["Task"]] = relationship("Task", back_populates="item") # Indexes - __table_args__ = (Index("ix_box_files_parent_name", "parent_id", "name"),) + __table_args__ = ( + Index("ix_box_files_parent_name", "parent_id", "name"), + Index("ix_box_files_item_status", "item_status"), + Index("ix_box_files_status_parent", "item_status", "parent_id"), + Index("ix_box_files_extension", "extension"), + ) def to_mini_dict(self) -> dict: """Return minimal file representation (File--Mini).""" @@ -746,8 +821,12 @@ def to_item_dict(self) -> dict: "path_collection": self._get_path_collection(), } - def to_search_dict(self) -> dict: - """Return file representation for search results (Box search API format).""" + def to_search_dict(self, ancestor_cache: dict | None = None) -> dict: + """Return file representation for search results (Box search API format). + + Args: + ancestor_cache: Optional pre-fetched {folder_id: mini_dict} map. + """ return { "id": self.id, "type": "file", @@ -759,7 +838,7 @@ def to_search_dict(self) -> dict: "trashed_at": self.trashed_at.isoformat() if self.trashed_at else None, "modified_at": self.modified_at.isoformat() if self.modified_at else None, "created_at": self.created_at.isoformat() if self.created_at else None, - "path_collection": self._get_path_collection(), + "path_collection": self._get_path_collection(ancestor_cache), "modified_by": self.modified_by.to_mini_dict() if self.modified_by else None, @@ -853,8 +932,62 @@ def _get_file_version_dict(self) -> dict | None: return self.versions[0].to_mini_dict() return None - def _get_path_collection(self) -> dict: - """Build the path collection from root to this file's parent.""" + def _get_path_collection(self, ancestor_cache: dict | None = None) -> dict: + """Build the path collection from root to this file's parent. + + Uses the materialized ``path`` column (e.g. "/0/123/456/") to avoid + N+1 lazy-load queries up the parent chain. + + Args: + ancestor_cache: Optional pre-fetched ``{folder_id: mini_dict}`` + map. When provided, the method does pure dict lookups with + **zero** DB queries. + """ + # Fast path – use materialized path column + if self.path and self.path != "/": + ancestor_ids = [seg for seg in self.path.strip("/").split("/") if seg] + if not ancestor_ids: + return {"total_count": 0, "entries": []} + + # Use pre-fetched cache when available (bulk search path) + if ancestor_cache is not None: + entries = [ + ancestor_cache[aid] + for aid in ancestor_ids + if aid in ancestor_cache + ] + return {"total_count": len(entries), "entries": entries} + + # Single-item fallback – one IN query + from sqlalchemy.orm import object_session + from sqlalchemy import select as sa_select + + session = object_session(self) + if session is not None: + rows = session.execute( + sa_select( + Folder.id, Folder.sequence_id, Folder.etag, Folder.name, + ).where(Folder.id.in_(ancestor_ids)) + ).all() + lookup = {r.id: r for r in rows} + entries = [ + { + "type": "folder", "id": r.id, + "sequence_id": r.sequence_id, + "etag": r.etag, "name": r.name, + } + for aid in ancestor_ids + if (r := lookup.get(aid)) + ] + return {"total_count": len(entries), "entries": entries} + + return {"total_count": 0, "entries": []} + + # path is "/" or None + if self.path == "/": + return {"total_count": 0, "entries": []} + + # Fallback – walk parent chain (legacy data without path) entries = [] current = self.parent while current: diff --git a/backend/src/services/calendar/api/methods.py b/backend/src/services/calendar/api/methods.py index bd56dbe..492a4dc 100644 --- a/backend/src/services/calendar/api/methods.py +++ b/backend/src/services/calendar/api/methods.py @@ -9,6 +9,7 @@ import json import logging +import time from datetime import timezone from typing import Any, Callable, Awaitable, Optional from functools import wraps @@ -158,7 +159,7 @@ def parse_watch_expiration(expiration: Any) -> Optional[int]: dt = dt.replace(tzinfo=timezone.utc) # Convert to milliseconds since epoch return int(dt.timestamp() * 1000) - except (ValueError, AttributeError): + except ValueError, AttributeError: pass # Invalid format @@ -181,7 +182,7 @@ def parse_watch_expiration(expiration: Any) -> Optional[int]: def _get_session(request: Request) -> Session: """ Get the database session from request state. - + The IsolationMiddleware sets request.state.db_session to a session that is scoped to the environment's schema. """ @@ -194,26 +195,26 @@ def _get_session(request: Request) -> Session: def get_user_id(request: Request) -> str: """ Extract user ID from request state. - + The IsolationMiddleware sets request.state.impersonate_user_id and request.state.impersonate_email from the environment configuration. - + This follows the same pattern as the Slack API replica. """ impersonate_user_id = getattr(request.state, "impersonate_user_id", None) impersonate_email = getattr(request.state, "impersonate_email", None) - + # First try direct user ID if impersonate_user_id is not None and str(impersonate_user_id).strip() != "": return str(impersonate_user_id) - + # Then try to resolve from email if impersonate_email: session = _get_session(request) user = get_user_by_email(session, impersonate_email) if user is not None: return user.id - + raise UnauthorizedError("Missing user authentication") @@ -225,10 +226,10 @@ def get_user_email(request: Request) -> Optional[str]: def resolve_calendar_id(request: Request, calendar_id: str) -> str: """ Resolve 'primary' to actual calendar ID. - + In Google Calendar, 'primary' resolves to the user's primary calendar, which is typically identified by their email address. - + For the replica, we: 1. First try to use the impersonate_email (which matches calendar ID pattern) 2. Fall back to looking up the calendar list entry with primary=True @@ -236,28 +237,32 @@ def resolve_calendar_id(request: Request, calendar_id: str) -> str: """ if calendar_id.lower() != "primary": return calendar_id - + # Try using impersonate_email first (matches Google's pattern) email = get_user_email(request) if email: return email - + # Fall back to looking up the primary calendar list entry session = _get_session(request) user_id = get_user_id(request) - + from ..database import CalendarListEntry from sqlalchemy import select - - primary_entry = session.execute( - select(CalendarListEntry) - .where(CalendarListEntry.user_id == user_id) - .where(CalendarListEntry.primary == True) - ).scalars().first() - + + primary_entry = ( + session.execute( + select(CalendarListEntry) + .where(CalendarListEntry.user_id == user_id) + .where(CalendarListEntry.primary == True) + ) + .scalars() + .first() + ) + if primary_entry: return primary_entry.calendar_id - + # Last resort: use user_id return user_id @@ -290,25 +295,28 @@ def get_if_none_match(request: Request) -> Optional[str]: class InvalidParameterError(Exception): """Raised when a query parameter has an invalid value.""" + def __init__(self, param_name: str, message: str): self.param_name = param_name self.message = message super().__init__(message) -def parse_int_param(params: dict[str, str], name: str, default: int, max_value: Optional[int] = None) -> int: +def parse_int_param( + params: dict[str, str], name: str, default: int, max_value: Optional[int] = None +) -> int: """ Parse an integer query parameter with validation. - + Args: params: Query parameters dict name: Parameter name (e.g., "maxResults") default: Default value if parameter not provided max_value: Maximum allowed value (clamps result) - + Returns: Parsed integer value - + Raises: InvalidParameterError: If value is not a valid integer """ @@ -318,9 +326,9 @@ def parse_int_param(params: dict[str, str], name: str, default: int, max_value: else: try: value = int(raw_value) - except (ValueError, TypeError): + except ValueError, TypeError: raise InvalidParameterError(name, f"{name} must be a valid integer") - + if max_value is not None: value = min(value, max_value) return value @@ -329,14 +337,14 @@ def parse_int_param(params: dict[str, str], name: str, default: int, max_value: def parse_optional_int_param(params: dict[str, str], name: str) -> Optional[int]: """ Parse an optional integer query parameter with validation. - + Args: params: Query parameters dict name: Parameter name (e.g., "maxAttendees") - + Returns: Parsed integer value or None if not provided - + Raises: InvalidParameterError: If value is provided but not a valid integer """ @@ -345,7 +353,7 @@ def parse_optional_int_param(params: dict[str, str], name: str) -> Optional[int] return None try: return int(raw_value) - except (ValueError, TypeError): + except ValueError, TypeError: raise InvalidParameterError(name, f"{name} must be a valid integer") @@ -355,19 +363,20 @@ def parse_optional_int_param(params: dict[str, str], name: str) -> Optional[int] def api_handler( - handler: Callable[[Request], Awaitable[JSONResponse]] + handler: Callable[[Request], Awaitable[JSONResponse]], ) -> Callable[[Request], Awaitable[JSONResponse]]: """ Decorator that wraps API handlers with: - Database session access (from IsolationMiddleware) - Error handling and conversion to JSON responses - Consistent response formatting - + The IsolationMiddleware provides: - request.state.db_session: Database session scoped to environment schema - request.state.impersonate_user_id: User ID to impersonate - request.state.impersonate_email: User email to impersonate """ + @wraps(handler) async def wrapper(request: Request) -> JSONResponse: # Get session from middleware (already scoped to environment schema) @@ -389,11 +398,18 @@ async def wrapper(request: Request) -> JSONResponse: }, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, ) - + try: # Alias for backward compatibility with handlers using request.state.db request.state.db = session + t_handler_start = time.perf_counter() response = await handler(request) + t_handler_ms = (time.perf_counter() - t_handler_start) * 1000 + if t_handler_ms > 50: + logger.info( + f"[PERF] Calendar {handler.__name__} took {t_handler_ms:.0f}ms " + f"status={response.status_code}" + ) # Note: Session commit is handled by the IsolationMiddleware context manager return response except CalendarAPIError as e: @@ -453,7 +469,7 @@ async def wrapper(request: Request) -> JSONResponse: status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, ) # Note: Session lifecycle (commit/rollback/close) is handled by IsolationMiddleware - + return wrapper @@ -466,27 +482,27 @@ async def wrapper(request: Request) -> JSONResponse: async def calendars_get(request: Request) -> JSONResponse: """ GET /calendars/{calendarId} - + Returns metadata for a calendar. - + Parameters: - calendarId (path): Calendar identifier - + Headers: - If-None-Match: Return 304 if ETag matches """ session: Session = request.state.db user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] - + # Resolve "primary" to actual calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Get calendar calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + # Check If-None-Match for conditional GET if_none_match = get_if_none_match(request) if if_none_match and etags_match(if_none_match, calendar.etag): @@ -495,7 +511,7 @@ async def calendars_get(request: Request) -> JSONResponse: status_code=status.HTTP_304_NOT_MODIFIED, headers={"ETag": calendar.etag}, ) - + # Serialize and return response_data = serialize_calendar(calendar) return JSONResponse( @@ -509,9 +525,9 @@ async def calendars_get(request: Request) -> JSONResponse: async def calendars_insert(request: Request) -> JSONResponse: """ POST /calendars - + Creates a secondary calendar. - + Request body: - summary (required): Title of the calendar - description: Description of the calendar @@ -521,12 +537,12 @@ async def calendars_insert(request: Request) -> JSONResponse: session: Session = request.state.db user_id = get_user_id(request) body = await get_request_body(request) - + # Validate required fields summary = body.get("summary") if not summary: raise RequiredFieldError("summary") - + # Create calendar # Note: conferenceProperties is accepted in request but not stored # as our replica doesn't support Google Meet integration @@ -538,7 +554,7 @@ async def calendars_insert(request: Request) -> JSONResponse: location=body.get("location"), time_zone=body.get("timeZone"), ) - + # Serialize and return response_data = serialize_calendar(calendar) return JSONResponse( @@ -552,44 +568,44 @@ async def calendars_insert(request: Request) -> JSONResponse: async def calendars_update(request: Request) -> JSONResponse: """ PUT /calendars/{calendarId} - + Updates metadata for a calendar (full replacement). - + Parameters: - calendarId (path): Calendar identifier - + Headers: - If-Match: Only update if ETag matches (optional but recommended) - + Request body: Full calendar resource """ session: Session = request.state.db user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] body = await get_request_body(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Get existing calendar calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + # Check ownership if calendar.owner_id != user_id: raise ForbiddenError("You do not have permission to update this calendar") - + # Check If-Match for conditional update if_match = get_if_match(request) if if_match and not etags_match(if_match, calendar.etag): raise PreconditionFailedError("ETag mismatch - calendar was modified") - + # Validate required fields for PUT (full replacement) summary = body.get("summary") if not summary: raise RequiredFieldError("summary") - + # Update calendar # Note: conferenceProperties is accepted in request but not stored # as our replica doesn't support Google Meet integration @@ -602,7 +618,7 @@ async def calendars_update(request: Request) -> JSONResponse: location=body.get("location"), time_zone=body.get("timeZone"), ) - + # Serialize and return response_data = serialize_calendar(calendar) return JSONResponse( @@ -616,39 +632,39 @@ async def calendars_update(request: Request) -> JSONResponse: async def calendars_patch(request: Request) -> JSONResponse: """ PATCH /calendars/{calendarId} - + Updates metadata for a calendar (partial update). - + Parameters: - calendarId (path): Calendar identifier - + Headers: - If-Match: Only update if ETag matches (optional but recommended) - + Request body: Partial calendar resource (only fields to update) """ session: Session = request.state.db user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] body = await get_request_body(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Get existing calendar calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + # Check ownership if calendar.owner_id != user_id: raise ForbiddenError("You do not have permission to update this calendar") - + # Check If-Match for conditional update if_match = get_if_match(request) if if_match and not etags_match(if_match, calendar.etag): raise PreconditionFailedError("ETag mismatch - calendar was modified") - + # Build update kwargs - only include fields that are present in body update_kwargs: dict[str, Any] = {} if "summary" in body: @@ -660,7 +676,7 @@ async def calendars_patch(request: Request) -> JSONResponse: if "timeZone" in body: update_kwargs["time_zone"] = body["timeZone"] # Note: conferenceProperties is accepted but not stored - + # Update calendar calendar = update_calendar( session=session, @@ -668,7 +684,7 @@ async def calendars_patch(request: Request) -> JSONResponse: user_id=user_id, **update_kwargs, ) - + # Serialize and return response_data = serialize_calendar(calendar) return JSONResponse( @@ -682,32 +698,32 @@ async def calendars_patch(request: Request) -> JSONResponse: async def calendars_delete(request: Request) -> JSONResponse: """ DELETE /calendars/{calendarId} - + Deletes a secondary calendar. Cannot delete primary calendar. - + Parameters: - calendarId (path): Calendar identifier """ session: Session = request.state.db user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] - + # Cannot delete primary calendar if calendar_id == "primary": raise ForbiddenError("Cannot delete primary calendar") - + # Get existing calendar calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + # Check ownership if calendar.owner_id != user_id: raise ForbiddenError("You do not have permission to delete this calendar") - + # Delete calendar delete_calendar(session, calendar_id, user_id) - + # Return empty response (204 No Content style, but Google returns 200 with empty) return JSONResponse( content=None, @@ -719,33 +735,33 @@ async def calendars_delete(request: Request) -> JSONResponse: async def calendars_clear(request: Request) -> JSONResponse: """ POST /calendars/{calendarId}/clear - + Clears a primary calendar. Only works on the primary calendar. Removes all events from the calendar. - + Parameters: - calendarId (path): Calendar identifier (must be "primary") """ session: Session = request.state.db user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] - + # Normalize to get actual calendar ID actual_calendar_id = resolve_calendar_id(request, calendar_id) - + # Clear only works on primary calendar # For simplicity, we allow clearing any owned calendar calendar = get_calendar(session, actual_calendar_id) if calendar is None: raise CalendarNotFoundError(actual_calendar_id) - + # Check ownership if calendar.owner_id != user_id: raise ForbiddenError("You do not have permission to clear this calendar") - + # Clear all events from calendar clear_calendar(session, actual_calendar_id, user_id) - + return JSONResponse( content=None, status_code=status.HTTP_204_NO_CONTENT, @@ -761,9 +777,9 @@ async def calendars_clear(request: Request) -> JSONResponse: async def calendar_list_list(request: Request) -> JSONResponse: """ GET /users/me/calendarList - + Returns the calendars on the user's calendar list. - + Query Parameters: - maxResults: Maximum entries per page (default 100, max 250) - minAccessRole: Filter by minimum access role @@ -775,7 +791,7 @@ async def calendar_list_list(request: Request) -> JSONResponse: session: Session = request.state.db user_id = get_user_id(request) params = get_query_params(request) - + # Parse query parameters with validation max_results = parse_int_param(params, "maxResults", default=100, max_value=250) min_access_role = params.get("minAccessRole") @@ -786,7 +802,9 @@ async def calendar_list_list(request: Request) -> JSONResponse: # Validate: syncToken and minAccessRole cannot be used together if sync_token and min_access_role: - raise ValidationError("syncToken and minAccessRole cannot be specified together") + raise ValidationError( + "syncToken and minAccessRole cannot be specified together" + ) # List calendar entries entries, next_page_token, next_sync_token = list_calendar_list_entries( @@ -799,10 +817,10 @@ async def calendar_list_list(request: Request) -> JSONResponse: show_hidden=show_hidden, sync_token=sync_token, ) - + # Generate list-level etag based on entries and sync state list_etag = generate_etag(f"{user_id}:{next_sync_token or ''}") - + # Serialize response response_data = serialize_calendar_list( entries=entries, @@ -810,7 +828,7 @@ async def calendar_list_list(request: Request) -> JSONResponse: next_sync_token=next_sync_token, etag=list_etag, ) - + return JSONResponse(content=response_data, status_code=status.HTTP_200_OK) @@ -818,24 +836,24 @@ async def calendar_list_list(request: Request) -> JSONResponse: async def calendar_list_get(request: Request) -> JSONResponse: """ GET /users/me/calendarList/{calendarId} - + Returns a calendar from the user's calendar list. - + Parameters: - calendarId (path): Calendar identifier """ session: Session = request.state.db user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] - + # Normalize "primary" to user's primary calendar calendar_id = resolve_calendar_id(request, calendar_id) - + # Get calendar list entry entry = get_calendar_list_entry(session, user_id, calendar_id) if entry is None: raise NotFoundError(f"Calendar {calendar_id} not found in user's calendar list") - + # Check If-None-Match for conditional GET if_none_match = get_if_none_match(request) if if_none_match and etags_match(if_none_match, entry.etag): @@ -844,7 +862,7 @@ async def calendar_list_get(request: Request) -> JSONResponse: status_code=status.HTTP_304_NOT_MODIFIED, headers={"ETag": entry.etag}, ) - + # Serialize and return response_data = serialize_calendar_list_entry(entry) return JSONResponse( @@ -858,32 +876,32 @@ async def calendar_list_get(request: Request) -> JSONResponse: async def calendar_list_insert(request: Request) -> JSONResponse: """ POST /users/me/calendarList - + Inserts an existing calendar into the user's calendar list. - + Query Parameters: - colorRgbFormat: Use RGB colors instead of colorId - + Request body: CalendarListEntry with at least 'id' field """ session: Session = request.state.db user_id = get_user_id(request) body = await get_request_body(request) params = get_query_params(request) - + # The 'id' field in the body is the calendar ID to add calendar_id = body.get("id") if not calendar_id: raise RequiredFieldError("id") - + # Check if calendar exists calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + # Insert into user's calendar list color_rgb_format = params.get("colorRgbFormat", "").lower() == "true" - + entry = insert_calendar_list_entry( session=session, user_id=user_id, @@ -897,7 +915,7 @@ async def calendar_list_insert(request: Request) -> JSONResponse: default_reminders=body.get("defaultReminders"), notification_settings=body.get("notificationSettings"), ) - + # Serialize and return response_data = serialize_calendar_list_entry(entry) return JSONResponse( @@ -911,12 +929,12 @@ async def calendar_list_insert(request: Request) -> JSONResponse: async def calendar_list_update(request: Request) -> JSONResponse: """ PUT /users/me/calendarList/{calendarId} - + Updates an entry on the user's calendar list (full replacement). - + Parameters: - calendarId (path): Calendar identifier - + Query Parameters: - colorRgbFormat: Use RGB colors instead of colorId """ @@ -925,22 +943,22 @@ async def calendar_list_update(request: Request) -> JSONResponse: calendar_id = request.path_params["calendarId"] body = await get_request_body(request) params = get_query_params(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Get existing entry entry = get_calendar_list_entry(session, user_id, calendar_id) if entry is None: raise NotFoundError(f"Calendar {calendar_id} not found in user's calendar list") - + # Check If-Match for conditional update if_match = get_if_match(request) if if_match and not etags_match(if_match, entry.etag): raise PreconditionFailedError("ETag mismatch - entry was modified") - + color_rgb_format = params.get("colorRgbFormat", "").lower() == "true" - + # Update entry (full replacement) entry = update_calendar_list_entry( session=session, @@ -955,7 +973,7 @@ async def calendar_list_update(request: Request) -> JSONResponse: default_reminders=body.get("defaultReminders"), notification_settings=body.get("notificationSettings"), ) - + # Serialize and return response_data = serialize_calendar_list_entry(entry) return JSONResponse( @@ -969,12 +987,12 @@ async def calendar_list_update(request: Request) -> JSONResponse: async def calendar_list_patch(request: Request) -> JSONResponse: """ PATCH /users/me/calendarList/{calendarId} - + Updates an entry on the user's calendar list (partial update). - + Parameters: - calendarId (path): Calendar identifier - + Query Parameters: - colorRgbFormat: Use RGB colors instead of colorId """ @@ -983,22 +1001,22 @@ async def calendar_list_patch(request: Request) -> JSONResponse: calendar_id = request.path_params["calendarId"] body = await get_request_body(request) params = get_query_params(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Get existing entry entry = get_calendar_list_entry(session, user_id, calendar_id) if entry is None: raise NotFoundError(f"Calendar {calendar_id} not found in user's calendar list") - + # Check If-Match for conditional update if_match = get_if_match(request) if if_match and not etags_match(if_match, entry.etag): raise PreconditionFailedError("ETag mismatch - entry was modified") - + color_rgb_format = params.get("colorRgbFormat", "").lower() == "true" - + # Build update kwargs - only include fields present in body update_kwargs: dict[str, Any] = {} if "summaryOverride" in body: @@ -1018,7 +1036,7 @@ async def calendar_list_patch(request: Request) -> JSONResponse: update_kwargs["default_reminders"] = body["defaultReminders"] if "notificationSettings" in body: update_kwargs["notification_settings"] = body["notificationSettings"] - + # Update entry entry = update_calendar_list_entry( session=session, @@ -1026,7 +1044,7 @@ async def calendar_list_patch(request: Request) -> JSONResponse: calendar_id=calendar_id, **update_kwargs, ) - + # Serialize and return response_data = serialize_calendar_list_entry(entry) return JSONResponse( @@ -1040,31 +1058,31 @@ async def calendar_list_patch(request: Request) -> JSONResponse: async def calendar_list_delete(request: Request) -> JSONResponse: """ DELETE /users/me/calendarList/{calendarId} - + Removes a calendar from the user's calendar list. - + Parameters: - calendarId (path): Calendar identifier """ session: Session = request.state.db user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Get existing entry entry = get_calendar_list_entry(session, user_id, calendar_id) if entry is None: raise NotFoundError(f"Calendar {calendar_id} not found in user's calendar list") - + # Cannot remove primary calendar if entry.primary: raise ForbiddenError("Cannot remove primary calendar from calendar list") - + # Delete entry delete_calendar_list_entry(session, user_id, calendar_id) - + return JSONResponse(content=None, status_code=status.HTTP_204_NO_CONTENT) @@ -1072,25 +1090,27 @@ async def calendar_list_delete(request: Request) -> JSONResponse: async def calendar_list_watch(request: Request) -> JSONResponse: """ POST /users/me/calendarList/watch - + Watch for changes to the user's calendar list. - + Request body: Channel resource """ session: Session = request.state.db user_id = get_user_id(request) body = await get_request_body(request) - + # Validate required fields channel_id = body.get("id") if not channel_id: raise RequiredFieldError("id") - + channel_type = body.get("type") if not channel_type: raise RequiredFieldError("type") if channel_type != "web_hook": - raise ValidationError(f"Invalid channel type: {channel_type}. Must be 'web_hook'.") + raise ValidationError( + f"Invalid channel type: {channel_type}. Must be 'web_hook'." + ) address = body.get("address") if not address: @@ -1115,10 +1135,10 @@ async def calendar_list_watch(request: Request) -> JSONResponse: payload=body.get("payload", False), user_id=user_id, # Track ownership ) - + session.add(channel) session.flush() - + # Return channel info response_data = serialize_channel(channel) return JSONResponse(content=response_data, status_code=status.HTTP_200_OK) @@ -1133,9 +1153,9 @@ async def calendar_list_watch(request: Request) -> JSONResponse: async def events_list(request: Request) -> JSONResponse: """ GET /calendars/{calendarId}/events - + Returns events on the specified calendar. - + Query Parameters: - maxResults: Maximum entries per page (default 250, max 2500) - pageToken: Token for pagination @@ -1156,15 +1176,15 @@ async def events_list(request: Request) -> JSONResponse: user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] params = get_query_params(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Verify calendar exists and user has access calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + # Parse query parameters with validation max_results = parse_int_param(params, "maxResults", default=250, max_value=2500) page_token = params.get("pageToken") @@ -1202,7 +1222,9 @@ async def events_list(request: Request) -> JSONResponse: # Validate: orderBy='startTime' requires singleEvents=true if order_by == "startTime" and not single_events: - raise ValidationError("orderBy='startTime' is only available when singleEvents is true") + raise ValidationError( + "orderBy='startTime' is only available when singleEvents is true" + ) if not time_min: time_min = REPLICA_NOW_RFC3339 @@ -1211,8 +1233,9 @@ async def events_list(request: Request) -> JSONResponse: calendar_entry = get_calendar_list_entry(session, user_id, calendar_id) access_role = calendar_entry.access_role.value if calendar_entry else "reader" default_reminders = calendar_entry.default_reminders if calendar_entry else [] - - # List events + + # List events — pass already-fetched calendar to skip redundant DB lookups + t_db_start = time.perf_counter() events, next_page_token, next_sync_token = list_events( session=session, calendar_id=calendar_id, @@ -1228,15 +1251,18 @@ async def events_list(request: Request) -> JSONResponse: sync_token=sync_token, updated_min=updated_min, ical_uid=ical_uid, + _verified_calendar=calendar, ) - + t_db_ms = (time.perf_counter() - t_db_start) * 1000 + # Get user email for self fields user_email = get_user_email(request) - + # Generate list-level etag based on calendar and sync state list_etag = generate_etag(f"{calendar.etag}:{next_sync_token or ''}") - + # Serialize response + t_ser_start = time.perf_counter() response_data = serialize_events_list( events=events, user_email=user_email, @@ -1251,7 +1277,16 @@ async def events_list(request: Request) -> JSONResponse: max_attendees=max_attendees, time_zone=time_zone, ) - + t_ser_ms = (time.perf_counter() - t_ser_start) * 1000 + + t_total_ms = t_db_ms + t_ser_ms + if t_total_ms > 20: + logger.info( + f"[PERF] GET /calendars/{calendar_id}/events " + f"total={t_total_ms:.0f}ms db={t_db_ms:.0f}ms " + f"serialize={t_ser_ms:.0f}ms events={len(events)}" + ) + return JSONResponse(content=response_data, status_code=status.HTTP_200_OK) @@ -1259,13 +1294,13 @@ async def events_list(request: Request) -> JSONResponse: async def events_get(request: Request) -> JSONResponse: """ GET /calendars/{calendarId}/events/{eventId} - + Returns an event based on its Google Calendar ID. - + Parameters: - calendarId (path): Calendar identifier - eventId (path): Event identifier - + Query Parameters: - maxAttendees: Maximum number of attendees to include - timeZone: Time zone for response @@ -1275,15 +1310,15 @@ async def events_get(request: Request) -> JSONResponse: calendar_id = request.path_params["calendarId"] event_id = request.path_params["eventId"] params = get_query_params(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Get event event = get_event(session, calendar_id, event_id, user_id) if event is None: raise EventNotFoundError(event_id) - + # Check If-None-Match for conditional GET if_none_match = get_if_none_match(request) if if_none_match and etags_match(if_none_match, event.etag): @@ -1292,12 +1327,12 @@ async def events_get(request: Request) -> JSONResponse: status_code=status.HTTP_304_NOT_MODIFIED, headers={"ETag": event.etag}, ) - + # Parse optional parameters max_attendees = parse_optional_int_param(params, "maxAttendees") time_zone = params.get("timeZone") user_email = get_user_email(request) - + # Serialize and return response_data = serialize_event( event=event, @@ -1316,12 +1351,12 @@ async def events_get(request: Request) -> JSONResponse: async def events_insert(request: Request) -> JSONResponse: """ POST /calendars/{calendarId}/events - + Creates an event. - + Parameters: - calendarId (path): Calendar identifier - + Query Parameters: - sendUpdates: Who to send notifications (all, externalOnly, none) - conferenceDataVersion: Conference data version (0 or 1) @@ -1333,15 +1368,15 @@ async def events_insert(request: Request) -> JSONResponse: calendar_id = request.path_params["calendarId"] body = await get_request_body(request) params = get_query_params(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Verify calendar exists calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + # Validate required fields start = body.get("start") end = body.get("end") @@ -1349,7 +1384,7 @@ async def events_insert(request: Request) -> JSONResponse: raise RequiredFieldError("start") if not end: raise RequiredFieldError("end") - + # Get user email for creator/organizer user_email = get_user_email(request) or f"{user_id}@calendar.local" @@ -1394,10 +1429,10 @@ async def events_insert(request: Request) -> JSONResponse: recurring_event_id=body.get("recurringEventId"), original_start_time=original_start_time, ) - + # Parse optional response parameters max_attendees = parse_optional_int_param(params, "maxAttendees") - + # Serialize and return response_data = serialize_event( event=event, @@ -1415,13 +1450,13 @@ async def events_insert(request: Request) -> JSONResponse: async def events_update(request: Request) -> JSONResponse: """ PUT /calendars/{calendarId}/events/{eventId} - + Updates an event (full replacement). - + Parameters: - calendarId (path): Calendar identifier - eventId (path): Event identifier - + Query Parameters: - sendUpdates: Who to send notifications (all, externalOnly, none) - conferenceDataVersion: Conference data version (0 or 1) @@ -1434,20 +1469,20 @@ async def events_update(request: Request) -> JSONResponse: event_id = request.path_params["eventId"] body = await get_request_body(request) params = get_query_params(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Get existing event event = get_event(session, calendar_id, event_id, user_id) if event is None: raise EventNotFoundError(event_id) - + # Check If-Match for conditional update if_match = get_if_match(request) if if_match and not etags_match(if_match, event.etag): raise PreconditionFailedError("ETag mismatch - event was modified") - + # Validate required fields for PUT (full replacement) start = body.get("start") end = body.get("end") @@ -1455,13 +1490,13 @@ async def events_update(request: Request) -> JSONResponse: raise RequiredFieldError("start") if not end: raise RequiredFieldError("end") - + # Get user email user_email = get_user_email(request) or f"{user_id}@calendar.local" - + # Check if this is a recurring event instance base_id, original_time_str = parse_instance_id(event_id) - + if original_time_str: # This is a recurring instance - create/update an exception event = update_recurring_instance( @@ -1518,10 +1553,10 @@ async def events_update(request: Request) -> JSONResponse: anyone_can_add_self=body.get("anyoneCanAddSelf", False), sequence=body.get("sequence"), ) - + # Parse optional response parameters max_attendees = parse_optional_int_param(params, "maxAttendees") - + # Serialize and return response_data = serialize_event( event=event, @@ -1539,13 +1574,13 @@ async def events_update(request: Request) -> JSONResponse: async def events_patch(request: Request) -> JSONResponse: """ PATCH /calendars/{calendarId}/events/{eventId} - + Updates an event (partial update). - + Parameters: - calendarId (path): Calendar identifier - eventId (path): Event identifier - + Query Parameters: - sendUpdates: Who to send notifications (all, externalOnly, none) - conferenceDataVersion: Conference data version (0 or 1) @@ -1558,26 +1593,26 @@ async def events_patch(request: Request) -> JSONResponse: event_id = request.path_params["eventId"] body = await get_request_body(request) params = get_query_params(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Get existing event event = get_event(session, calendar_id, event_id, user_id) if event is None: raise EventNotFoundError(event_id) - + # Check If-Match for conditional update if_match = get_if_match(request) if if_match and not etags_match(if_match, event.etag): raise PreconditionFailedError("ETag mismatch - event was modified") - + # Get user email user_email = get_user_email(request) or f"{user_id}@calendar.local" - + # Build update kwargs - only include fields present in body update_kwargs: dict[str, Any] = {} - + field_mappings = { "summary": "summary", "description": "description", @@ -1602,14 +1637,14 @@ async def events_patch(request: Request) -> JSONResponse: "anyoneCanAddSelf": "anyone_can_add_self", "sequence": "sequence", } - + for json_key, python_key in field_mappings.items(): if json_key in body: update_kwargs[python_key] = body[json_key] - + # Check if this is a recurring event instance base_id, original_time_str = parse_instance_id(event_id) - + if original_time_str: # This is a recurring instance - create/update an exception event = update_recurring_instance( @@ -1629,10 +1664,10 @@ async def events_patch(request: Request) -> JSONResponse: user_id=user_id, **update_kwargs, ) - + # Parse optional response parameters max_attendees = parse_optional_int_param(params, "maxAttendees") - + # Serialize and return response_data = serialize_event( event=event, @@ -1650,14 +1685,14 @@ async def events_patch(request: Request) -> JSONResponse: async def events_delete(request: Request) -> JSONResponse: """ DELETE /calendars/{calendarId}/events/{eventId} - + Deletes an event. For recurring event instances, creates a cancelled exception event. - + Parameters: - calendarId (path): Calendar identifier - eventId (path): Event identifier (can be instance ID like "abc_20180618T100000Z") - + Query Parameters: - sendUpdates: Who to send notifications (all, externalOnly, none) """ @@ -1665,13 +1700,13 @@ async def events_delete(request: Request) -> JSONResponse: user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] event_id = request.path_params["eventId"] - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Check if this is a recurring event instance base_id, original_time_str = parse_instance_id(event_id) - + if original_time_str: # This is a recurring instance - create cancelled exception delete_recurring_instance(session, calendar_id, event_id, user_id) @@ -1681,7 +1716,7 @@ async def events_delete(request: Request) -> JSONResponse: if event is None: raise EventNotFoundError(event_id) delete_event(session, calendar_id, event_id, user_id) - + return JSONResponse(content=None, status_code=status.HTTP_204_NO_CONTENT) @@ -1689,17 +1724,17 @@ async def events_delete(request: Request) -> JSONResponse: async def events_import(request: Request) -> JSONResponse: """ POST /calendars/{calendarId}/events/import - + Imports an event. Used to add a private copy of an existing event. Only events with eventType "default" may be imported. - + Parameters: - calendarId (path): Calendar identifier - + Query Parameters: - conferenceDataVersion: Conference data version (0 or 1) - supportsAttachments: Whether attachments are supported - + Request body: Event with iCalUID required """ session: Session = request.state.db @@ -1707,15 +1742,15 @@ async def events_import(request: Request) -> JSONResponse: calendar_id = request.path_params["calendarId"] body = await get_request_body(request) params = get_query_params(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Verify calendar exists calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + # Validate required fields for import ical_uid = body.get("iCalUID") if not ical_uid: @@ -1731,11 +1766,13 @@ async def events_import(request: Request) -> JSONResponse: # Validate eventType - only 'default' events may be imported event_type = body.get("eventType", "default") if event_type != "default": - raise ValidationError(f"Only events with eventType 'default' may be imported. Got: '{event_type}'") - + raise ValidationError( + f"Only events with eventType 'default' may be imported. Got: '{event_type}'" + ) + # Get user email user_email = get_user_email(request) or f"{user_id}@calendar.local" - + # Import event event = import_event( session=session, @@ -1757,10 +1794,10 @@ async def events_import(request: Request) -> JSONResponse: transparency=body.get("transparency"), sequence=body.get("sequence", 0), ) - + # Parse optional response parameters max_attendees = parse_optional_int_param(params, "maxAttendees") - + # Serialize and return response_data = serialize_event( event=event, @@ -1778,14 +1815,14 @@ async def events_import(request: Request) -> JSONResponse: async def events_move(request: Request) -> JSONResponse: """ POST /calendars/{calendarId}/events/{eventId}/move - + Moves an event to another calendar (changes organizer). Only default events can be moved. - + Parameters: - calendarId (path): Source calendar identifier - eventId (path): Event identifier - + Query Parameters: - destination (required): Target calendar identifier - sendUpdates: Who to send notifications (all, externalOnly, none) @@ -1795,27 +1832,27 @@ async def events_move(request: Request) -> JSONResponse: calendar_id = request.path_params["calendarId"] event_id = request.path_params["eventId"] params = get_query_params(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Get destination calendar (required) destination = params.get("destination") if not destination: raise RequiredFieldError("destination") - + destination = resolve_calendar_id(request, destination) - + # Verify destination calendar exists dest_calendar = get_calendar(session, destination) if dest_calendar is None: raise CalendarNotFoundError(destination) - + # Get existing event event = get_event(session, calendar_id, event_id, user_id) if event is None: raise EventNotFoundError(event_id) - + # Move event event = move_event( session=session, @@ -1824,10 +1861,10 @@ async def events_move(request: Request) -> JSONResponse: destination_calendar_id=destination, user_id=user_id, ) - + # Get user email user_email = get_user_email(request) - + # Serialize and return response_data = serialize_event(event=event, user_email=user_email) return JSONResponse( @@ -1841,12 +1878,12 @@ async def events_move(request: Request) -> JSONResponse: async def events_quick_add(request: Request) -> JSONResponse: """ POST /calendars/{calendarId}/events/quickAdd - + Creates an event from a simple text string. - + Parameters: - calendarId (path): Calendar identifier - + Query Parameters: - text (required): Text describing the event - sendUpdates: Who to send notifications (all, externalOnly, none) @@ -1855,23 +1892,23 @@ async def events_quick_add(request: Request) -> JSONResponse: user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] params = get_query_params(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Verify calendar exists calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + # Get text (required) text = params.get("text") if not text: raise RequiredFieldError("text") - + # Get user email user_email = get_user_email(request) or f"{user_id}@calendar.local" - + # Quick add event (parses text to create event) event = quick_add_event( session=session, @@ -1880,7 +1917,7 @@ async def events_quick_add(request: Request) -> JSONResponse: user_email=user_email, text=text, ) - + # Serialize and return response_data = serialize_event(event=event, user_email=user_email) return JSONResponse( @@ -1894,13 +1931,13 @@ async def events_quick_add(request: Request) -> JSONResponse: async def events_instances(request: Request) -> JSONResponse: """ GET /calendars/{calendarId}/events/{eventId}/instances - + Returns instances of the specified recurring event. - + Parameters: - calendarId (path): Calendar identifier - eventId (path): Recurring event identifier - + Query Parameters: - maxResults: Maximum instances per page - pageToken: Token for pagination @@ -1913,20 +1950,20 @@ async def events_instances(request: Request) -> JSONResponse: calendar_id = request.path_params["calendarId"] event_id = request.path_params["eventId"] params = get_query_params(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Verify calendar exists calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + # Get existing event (must be recurring) event = get_event(session, calendar_id, event_id, user_id) if event is None: raise EventNotFoundError(event_id) - + # Parse query parameters with validation max_results = parse_int_param(params, "maxResults", default=250, max_value=2500) page_token = params.get("pageToken") @@ -1953,16 +1990,16 @@ async def events_instances(request: Request) -> JSONResponse: show_deleted=show_deleted, original_start=original_start, ) - + # Get user email user_email = get_user_email(request) - + # Generate etag for instances list list_etag = generate_etag(f"instances:{event_id}:{next_page_token or ''}") - + # Get default reminders from calendar entry default_reminders = calendar_entry.default_reminders if calendar_entry else [] - + # Serialize response response_data = serialize_event_instances( events=instances, @@ -1978,7 +2015,7 @@ async def events_instances(request: Request) -> JSONResponse: max_attendees=max_attendees, time_zone=time_zone, ) - + return JSONResponse(content=response_data, status_code=status.HTTP_200_OK) @@ -1986,37 +2023,39 @@ async def events_instances(request: Request) -> JSONResponse: async def events_watch(request: Request) -> JSONResponse: """ POST /calendars/{calendarId}/events/watch - + Watch for changes to Events resources. - + Parameters: - calendarId (path): Calendar identifier - + Request body: Channel resource """ session: Session = request.state.db user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] body = await get_request_body(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Verify calendar exists calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + # Validate required fields channel_id = body.get("id") if not channel_id: raise RequiredFieldError("id") - + channel_type = body.get("type") if not channel_type: raise RequiredFieldError("type") if channel_type != "web_hook": - raise ValidationError(f"Invalid channel type: {channel_type}. Must be 'web_hook'.") + raise ValidationError( + f"Invalid channel type: {channel_type}. Must be 'web_hook'." + ) address = body.get("address") if not address: @@ -2040,10 +2079,10 @@ async def events_watch(request: Request) -> JSONResponse: payload=body.get("payload", False), user_id=user_id, # Track ownership ) - + session.add(channel) session.flush() - + # Return channel info response_data = serialize_channel(channel) return JSONResponse(content=response_data, status_code=status.HTTP_200_OK) @@ -2058,12 +2097,12 @@ async def events_watch(request: Request) -> JSONResponse: async def acl_list(request: Request) -> JSONResponse: """ GET /calendars/{calendarId}/acl - + Returns the rules in the access control list for the calendar. - + Parameters: - calendarId (path): Calendar identifier - + Query Parameters: - maxResults: Maximum entries per page (default 100, max 250) - pageToken: Token for pagination @@ -2074,21 +2113,21 @@ async def acl_list(request: Request) -> JSONResponse: user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] params = get_query_params(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Verify calendar exists calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + # Parse query parameters with validation max_results = parse_int_param(params, "maxResults", default=100, max_value=250) page_token = params.get("pageToken") show_deleted = params.get("showDeleted", "").lower() == "true" sync_token = params.get("syncToken") - + # List ACL rules rules, next_page_token, next_sync_token = list_acl_rules( session=session, @@ -2099,10 +2138,10 @@ async def acl_list(request: Request) -> JSONResponse: show_deleted=show_deleted, sync_token=sync_token, ) - + # Generate etag for the list list_etag = generate_etag(f"acl:{calendar_id}:{len(rules)}") - + # Serialize response response_data = serialize_acl_list( rules=rules, @@ -2110,7 +2149,7 @@ async def acl_list(request: Request) -> JSONResponse: next_sync_token=next_sync_token, etag=list_etag, ) - + return JSONResponse(content=response_data, status_code=status.HTTP_200_OK) @@ -2118,9 +2157,9 @@ async def acl_list(request: Request) -> JSONResponse: async def acl_get(request: Request) -> JSONResponse: """ GET /calendars/{calendarId}/acl/{ruleId} - + Returns an access control rule. - + Parameters: - calendarId (path): Calendar identifier - ruleId (path): ACL rule identifier @@ -2129,20 +2168,20 @@ async def acl_get(request: Request) -> JSONResponse: user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] rule_id = request.path_params["ruleId"] - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Verify calendar exists calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + # Get ACL rule rule = get_acl_rule(session, calendar_id, rule_id, user_id) if rule is None: raise AclNotFoundError(rule_id) - + # Check If-None-Match for conditional GET if_none_match = get_if_none_match(request) if if_none_match and etags_match(if_none_match, rule.etag): @@ -2151,7 +2190,7 @@ async def acl_get(request: Request) -> JSONResponse: status_code=status.HTTP_304_NOT_MODIFIED, headers={"ETag": rule.etag}, ) - + # Serialize and return response_data = serialize_acl_rule(rule) return JSONResponse( @@ -2165,61 +2204,63 @@ async def acl_get(request: Request) -> JSONResponse: async def acl_insert(request: Request) -> JSONResponse: """ POST /calendars/{calendarId}/acl - + Creates an access control rule. - + Parameters: - calendarId (path): Calendar identifier - + Query Parameters: - sendNotifications: Whether to send notifications (default True) - + Request body: AclRule with role and scope required """ session: Session = request.state.db user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] body = await get_request_body(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Verify calendar exists and user is owner calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + if calendar.owner_id != user_id: - raise ForbiddenError("You do not have permission to modify ACL for this calendar") - + raise ForbiddenError( + "You do not have permission to modify ACL for this calendar" + ) + # Validate required fields role = body.get("role") if not role: raise RequiredFieldError("role") - + # Validate role value matches Google Calendar API valid_roles = {"none", "freeBusyReader", "reader", "writer", "owner"} if role not in valid_roles: raise ValidationError( f"Invalid role value: '{role}'. Must be one of: {', '.join(sorted(valid_roles))}", - field="role" + field="role", ) - + scope = body.get("scope") if not scope: raise RequiredFieldError("scope") - + scope_type = scope.get("type") if not scope_type: raise RequiredFieldError("scope.type") - + # Support both "value" and "emailAddress" (some agents use the wrong field name) scope_value = scope.get("value") or scope.get("emailAddress") - + # Validate that scope.value is required for non-default scope types if scope_type != "default" and not scope_value: raise RequiredFieldError("scope.value") - + # Create ACL rule rule = create_acl_rule( session=session, @@ -2229,7 +2270,7 @@ async def acl_insert(request: Request) -> JSONResponse: scope_type=scope_type, scope_value=scope_value, ) - + # Serialize and return response_data = serialize_acl_rule(rule) return JSONResponse( @@ -2243,13 +2284,13 @@ async def acl_insert(request: Request) -> JSONResponse: async def acl_update(request: Request) -> JSONResponse: """ PUT /calendars/{calendarId}/acl/{ruleId} - + Updates an access control rule (full replacement). - + Parameters: - calendarId (path): Calendar identifier - ruleId (path): ACL rule identifier - + Query Parameters: - sendNotifications: Whether to send notifications (default True) """ @@ -2258,41 +2299,43 @@ async def acl_update(request: Request) -> JSONResponse: calendar_id = request.path_params["calendarId"] rule_id = request.path_params["ruleId"] body = await get_request_body(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Verify calendar exists and user is owner calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + if calendar.owner_id != user_id: - raise ForbiddenError("You do not have permission to modify ACL for this calendar") - + raise ForbiddenError( + "You do not have permission to modify ACL for this calendar" + ) + # Get existing rule rule = get_acl_rule(session, calendar_id, rule_id, user_id) if rule is None: raise AclNotFoundError(rule_id) - + # Check If-Match for conditional update if_match = get_if_match(request) if if_match and not etags_match(if_match, rule.etag): raise PreconditionFailedError("ETag mismatch - ACL rule was modified") - + # Validate required fields role = body.get("role") if not role: raise RequiredFieldError("role") - + # Validate role value matches Google Calendar API valid_roles = {"none", "freeBusyReader", "reader", "writer", "owner"} if role not in valid_roles: raise ValidationError( f"Invalid role value: '{role}'. Must be one of: {', '.join(sorted(valid_roles))}", - field="role" + field="role", ) - + # Update ACL rule rule = update_acl_rule( session=session, @@ -2301,7 +2344,7 @@ async def acl_update(request: Request) -> JSONResponse: user_id=user_id, role=role, ) - + # Serialize and return response_data = serialize_acl_rule(rule) return JSONResponse( @@ -2315,13 +2358,13 @@ async def acl_update(request: Request) -> JSONResponse: async def acl_patch(request: Request) -> JSONResponse: """ PATCH /calendars/{calendarId}/acl/{ruleId} - + Updates an access control rule (partial update). - + Parameters: - calendarId (path): Calendar identifier - ruleId (path): ACL rule identifier - + Query Parameters: - sendNotifications: Whether to send notifications (default True) """ @@ -2330,28 +2373,30 @@ async def acl_patch(request: Request) -> JSONResponse: calendar_id = request.path_params["calendarId"] rule_id = request.path_params["ruleId"] body = await get_request_body(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Verify calendar exists and user is owner calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + if calendar.owner_id != user_id: - raise ForbiddenError("You do not have permission to modify ACL for this calendar") - + raise ForbiddenError( + "You do not have permission to modify ACL for this calendar" + ) + # Get existing rule rule = get_acl_rule(session, calendar_id, rule_id, user_id) if rule is None: raise AclNotFoundError(rule_id) - + # Check If-Match for conditional update if_match = get_if_match(request) if if_match and not etags_match(if_match, rule.etag): raise PreconditionFailedError("ETag mismatch - ACL rule was modified") - + # Build update kwargs - only include fields present in body update_kwargs: dict[str, Any] = {} if "role" in body: @@ -2361,10 +2406,10 @@ async def acl_patch(request: Request) -> JSONResponse: if role not in valid_roles: raise ValidationError( f"Invalid role value: '{role}'. Must be one of: {', '.join(sorted(valid_roles))}", - field="role" + field="role", ) update_kwargs["role"] = role - + # Update ACL rule rule = update_acl_rule( session=session, @@ -2373,7 +2418,7 @@ async def acl_patch(request: Request) -> JSONResponse: user_id=user_id, **update_kwargs, ) - + # Serialize and return response_data = serialize_acl_rule(rule) return JSONResponse( @@ -2387,9 +2432,9 @@ async def acl_patch(request: Request) -> JSONResponse: async def acl_delete(request: Request) -> JSONResponse: """ DELETE /calendars/{calendarId}/acl/{ruleId} - + Deletes an access control rule. - + Parameters: - calendarId (path): Calendar identifier - ruleId (path): ACL rule identifier @@ -2398,26 +2443,28 @@ async def acl_delete(request: Request) -> JSONResponse: user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] rule_id = request.path_params["ruleId"] - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Verify calendar exists and user is owner calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + if calendar.owner_id != user_id: - raise ForbiddenError("You do not have permission to modify ACL for this calendar") - + raise ForbiddenError( + "You do not have permission to modify ACL for this calendar" + ) + # Get existing rule rule = get_acl_rule(session, calendar_id, rule_id, user_id) if rule is None: raise AclNotFoundError(rule_id) - + # Delete ACL rule delete_acl_rule(session, calendar_id, rule_id, user_id) - + return JSONResponse(content=None, status_code=status.HTTP_204_NO_CONTENT) @@ -2425,37 +2472,39 @@ async def acl_delete(request: Request) -> JSONResponse: async def acl_watch(request: Request) -> JSONResponse: """ POST /calendars/{calendarId}/acl/watch - + Watch for changes to ACL resources. - + Parameters: - calendarId (path): Calendar identifier - + Request body: Channel resource """ session: Session = request.state.db user_id = get_user_id(request) calendar_id = request.path_params["calendarId"] body = await get_request_body(request) - + # Normalize calendar ID calendar_id = resolve_calendar_id(request, calendar_id) - + # Verify calendar exists calendar = get_calendar(session, calendar_id) if calendar is None: raise CalendarNotFoundError(calendar_id) - + # Validate required fields channel_id = body.get("id") if not channel_id: raise RequiredFieldError("id") - + channel_type = body.get("type") if not channel_type: raise RequiredFieldError("type") if channel_type != "web_hook": - raise ValidationError(f"Invalid channel type: {channel_type}. Must be 'web_hook'.") + raise ValidationError( + f"Invalid channel type: {channel_type}. Must be 'web_hook'." + ) address = body.get("address") if not address: @@ -2479,10 +2528,10 @@ async def acl_watch(request: Request) -> JSONResponse: params=body.get("params"), payload=body.get("payload", False), ) - + session.add(channel) session.flush() - + # Return channel info response_data = serialize_channel(channel) return JSONResponse(content=response_data, status_code=status.HTTP_200_OK) @@ -2497,36 +2546,36 @@ async def acl_watch(request: Request) -> JSONResponse: async def channels_stop(request: Request) -> JSONResponse: """ POST /channels/stop - + Stop watching resources through this channel. - + Request body: Channel resource with id and resourceId required """ session: Session = request.state.db user_id = get_user_id(request) body = await get_request_body(request) - + # Validate required fields channel_id = body.get("id") if not channel_id: raise RequiredFieldError("id") - + resource_id = body.get("resourceId") if not resource_id: raise RequiredFieldError("resourceId") - + # Get channel channel = get_channel(session, channel_id, resource_id) if channel is None: raise ChannelNotFoundError(channel_id) - + # Validate ownership - only the user who created the channel can stop it if channel.user_id is not None and channel.user_id != user_id: raise ForbiddenError("You do not have permission to stop this channel") - + # Delete channel delete_channel(session, channel_id, resource_id) - + return JSONResponse(content=None, status_code=status.HTTP_204_NO_CONTENT) @@ -2539,7 +2588,7 @@ async def channels_stop(request: Request) -> JSONResponse: async def colors_get(request: Request) -> JSONResponse: """ GET /colors - + Returns the color definitions for calendars and events. """ # Colors are static - return predefined Google Calendar colors @@ -2556,26 +2605,27 @@ async def colors_get(request: Request) -> JSONResponse: async def freebusy_query(request: Request) -> JSONResponse: """ POST /freeBusy - + Returns free/busy information for a set of calendars. - + Request body: FreeBusyRequest with timeMin, timeMax, items required """ session: Session = request.state.db user_id = get_user_id(request) body = await get_request_body(request) - + # Validate required fields time_min = body.get("timeMin") if not time_min: raise RequiredFieldError("timeMin") - + time_max = body.get("timeMax") if not time_max: raise RequiredFieldError("timeMax") - + # Validate time range - timeMin must be before timeMax from dateutil import parser as date_parser + try: min_dt = date_parser.parse(time_min) max_dt = date_parser.parse(time_max) @@ -2583,19 +2633,20 @@ async def freebusy_query(request: Request) -> JSONResponse: raise ValidationError("timeMax must be after timeMin", field="timeMax") except ValueError: raise ValidationError("Invalid datetime format", field="timeMin") - + items = body.get("items", []) - + # Extract calendar IDs from items (keep original IDs, query_free_busy handles resolution) calendar_ids = [] for item in items: cal_id = item.get("id") if cal_id: calendar_ids.append(cal_id) - + # Query free/busy information # Note: groupExpansionMax and calendarExpansionMax are accepted but not used # as our replica doesn't support group expansion + t_fb_start = time.perf_counter() result = query_free_busy( session=session, user_id=user_id, @@ -2604,7 +2655,12 @@ async def freebusy_query(request: Request) -> JSONResponse: calendar_ids=calendar_ids, time_zone=body.get("timeZone"), ) - + t_fb_ms = (time.perf_counter() - t_fb_start) * 1000 + if t_fb_ms > 20: + logger.info( + f"[PERF] POST /freeBusy total={t_fb_ms:.0f}ms calendars={len(calendar_ids)}" + ) + # query_free_busy already returns formatted response return JSONResponse(content=result, status_code=status.HTTP_200_OK) @@ -2618,9 +2674,9 @@ async def freebusy_query(request: Request) -> JSONResponse: async def settings_list(request: Request) -> JSONResponse: """ GET /users/me/settings - + Returns all user settings for the authenticated user. - + Query Parameters: - maxResults: Maximum entries per page (default 100, max 250) - pageToken: Token for pagination @@ -2629,12 +2685,12 @@ async def settings_list(request: Request) -> JSONResponse: session: Session = request.state.db user_id = get_user_id(request) params = get_query_params(request) - + # Parse query parameters with validation max_results = parse_int_param(params, "maxResults", default=100, max_value=250) page_token = params.get("pageToken") sync_token = params.get("syncToken") - + # List settings settings, next_page_token, next_sync_token = list_settings( session=session, @@ -2643,10 +2699,10 @@ async def settings_list(request: Request) -> JSONResponse: page_token=page_token, sync_token=sync_token, ) - + # Generate list-level etag based on user and sync state list_etag = generate_etag(f"settings:{user_id}:{next_sync_token or ''}") - + # Serialize response response_data = serialize_settings_list( settings=settings, @@ -2654,7 +2710,7 @@ async def settings_list(request: Request) -> JSONResponse: next_sync_token=next_sync_token, etag=list_etag, ) - + return JSONResponse(content=response_data, status_code=status.HTTP_200_OK) @@ -2662,16 +2718,16 @@ async def settings_list(request: Request) -> JSONResponse: async def settings_get(request: Request) -> JSONResponse: """ GET /users/me/settings/{setting} - + Returns a single user setting. - + Parameters: - setting (path): The id of the user setting """ session: Session = request.state.db user_id = get_user_id(request) setting_id = request.path_params["setting"] - + # Default settings that should be returned even if not explicitly stored default_settings = { "timezone": "UTC", @@ -2687,7 +2743,7 @@ async def settings_get(request: Request) -> JSONResponse: "autoAddHangouts": "false", "remindOnRespondedEventsOnly": "false", } - + # Try to get setting from database try: setting = get_setting(session, user_id, setting_id) @@ -2700,17 +2756,20 @@ def __init__(self, sid: str, val: str): self.setting_id = sid # Matches serialize_setting expectation self.value = val self.etag = generate_etag(f"setting:{sid}:{val}") + setting = VirtualSetting(setting_id, default_settings[setting_id]) else: # Re-raise for unknown settings raise SettingNotFoundError(setting_id) - + # Serialize and return response_data = serialize_setting(setting) return JSONResponse( content=response_data, status_code=status.HTTP_200_OK, - headers={"ETag": setting.etag} if hasattr(setting, 'etag') and setting.etag else {}, + headers={"ETag": setting.etag} + if hasattr(setting, "etag") and setting.etag + else {}, ) @@ -2718,25 +2777,27 @@ def __init__(self, sid: str, val: str): async def settings_watch(request: Request) -> JSONResponse: """ POST /users/me/settings/watch - + Watch for changes to Settings resources. - + Request body: Channel resource """ session: Session = request.state.db user_id = get_user_id(request) body = await get_request_body(request) - + # Validate required fields channel_id = body.get("id") if not channel_id: raise RequiredFieldError("id") - + channel_type = body.get("type") if not channel_type: raise RequiredFieldError("type") if channel_type != "web_hook": - raise ValidationError(f"Invalid channel type: {channel_type}. Must be 'web_hook'.") + raise ValidationError( + f"Invalid channel type: {channel_type}. Must be 'web_hook'." + ) address = body.get("address") if not address: @@ -2760,10 +2821,10 @@ async def settings_watch(request: Request) -> JSONResponse: payload=body.get("payload", False), user_id=user_id, # Track ownership ) - + session.add(channel) session.flush() - + # Return channel info response_data = serialize_channel(channel) return JSONResponse(content=response_data, status_code=status.HTTP_200_OK) @@ -3029,14 +3090,12 @@ async def settings_handler(request: Request) -> JSONResponse: calendar_routes = [ # POST /calendars - Create a new secondary calendar Route("/calendars", calendars_insert, methods=["POST"]), - # GET/PUT/PATCH/DELETE /calendars/{calendarId} Route( "/calendars/{calendarId}", calendar_by_id_handler, methods=["GET", "PUT", "PATCH", "DELETE"], ), - # POST /calendars/{calendarId}/clear - Clear primary calendar Route( "/calendars/{calendarId}/clear", @@ -3053,14 +3112,12 @@ async def settings_handler(request: Request) -> JSONResponse: calendar_list_handler, methods=["GET", "POST"], ), - # POST /users/me/calendarList/watch - must come before {calendarId} Route( "/users/me/calendarList/watch", calendar_list_watch, methods=["POST"], ), - # GET/PUT/PATCH/DELETE /users/me/calendarList/{calendarId} Route( "/users/me/calendarList/{calendarId}", @@ -3077,42 +3134,36 @@ async def settings_handler(request: Request) -> JSONResponse: events_handler, methods=["GET", "POST"], ), - # POST /calendars/{calendarId}/events/import - must come before {eventId} Route( "/calendars/{calendarId}/events/import", events_import, methods=["POST"], ), - # POST /calendars/{calendarId}/events/quickAdd - must come before {eventId} Route( "/calendars/{calendarId}/events/quickAdd", events_quick_add, methods=["POST"], ), - # POST /calendars/{calendarId}/events/watch - must come before {eventId} Route( "/calendars/{calendarId}/events/watch", events_watch, methods=["POST"], ), - # GET /calendars/{calendarId}/events/{eventId}/instances Route( "/calendars/{calendarId}/events/{eventId}/instances", events_instances, methods=["GET"], ), - # POST /calendars/{calendarId}/events/{eventId}/move Route( "/calendars/{calendarId}/events/{eventId}/move", events_move, methods=["POST"], ), - # GET/PUT/PATCH/DELETE /calendars/{calendarId}/events/{eventId} Route( "/calendars/{calendarId}/events/{eventId}", @@ -3129,14 +3180,12 @@ async def settings_handler(request: Request) -> JSONResponse: acl_handler, methods=["GET", "POST"], ), - # POST /calendars/{calendarId}/acl/watch - must come before {ruleId} Route( "/calendars/{calendarId}/acl/watch", acl_watch, methods=["POST"], ), - # GET/PUT/PATCH/DELETE /calendars/{calendarId}/acl/{ruleId} Route( "/calendars/{calendarId}/acl/{ruleId}", @@ -3183,14 +3232,12 @@ async def settings_handler(request: Request) -> JSONResponse: settings_handler, methods=["GET"], ), - # POST /users/me/settings/watch - must come before {setting} Route( "/users/me/settings/watch", settings_watch, methods=["POST"], ), - # GET /users/me/settings/{setting} Route( "/users/me/settings/{setting}", diff --git a/backend/src/services/calendar/database/operations.py b/backend/src/services/calendar/database/operations.py index a5f8e67..426ec3b 100644 --- a/backend/src/services/calendar/database/operations.py +++ b/backend/src/services/calendar/database/operations.py @@ -2,11 +2,12 @@ # CRUD operations for all Calendar API resources import logging +import time from datetime import datetime, timezone, timedelta from typing import Any, Optional logger = logging.getLogger(__name__) -from sqlalchemy.orm import Session +from sqlalchemy.orm import Session, selectinload, joinedload from sqlalchemy import select, and_, or_, func, update, delete from sqlalchemy.exc import IntegrityError @@ -131,9 +132,7 @@ def get_user(session: Session, user_id: str) -> Optional[User]: def get_user_by_email(session: Session, email: str) -> Optional[User]: """Get a user by email.""" - return session.execute( - select(User).where(User.email == email) - ).scalar_one_or_none() + return session.execute(select(User).where(User.email == email)).scalar_one_or_none() def _create_default_settings(session: Session, user_id: str) -> None: @@ -236,12 +235,16 @@ def get_calendar( user_id: Optional[str] = None, ) -> Calendar: """Get a calendar by ID, optionally resolving 'primary'.""" + t_start = time.perf_counter() if calendar_id == "primary" and user_id: user = session.get(User, user_id) if user: calendar_id = user.email calendar = session.get(Calendar, calendar_id) + t_ms = (time.perf_counter() - t_start) * 1000 + if t_ms > 10: + logger.info(f"[PERF] get_calendar({calendar_id}) time={t_ms:.0f}ms") if calendar is None or calendar.deleted: raise CalendarNotFoundError(calendar_id) @@ -426,6 +429,7 @@ def list_calendar_list_entries( Returns: (entries, next_page_token, next_sync_token) """ + t_start = time.perf_counter() # Handle sync token if sync_token: token_record = session.execute( @@ -442,15 +446,21 @@ def list_calendar_list_entries( raise SyncTokenExpiredError() # Return only items updated since token was created - query = select(CalendarListEntry).where( - and_( - CalendarListEntry.user_id == user_id, - CalendarListEntry.updated_at > token_record.snapshot_time, + query = ( + select(CalendarListEntry) + .options(joinedload(CalendarListEntry.calendar)) + .where( + and_( + CalendarListEntry.user_id == user_id, + CalendarListEntry.updated_at > token_record.snapshot_time, + ) ) ) else: - query = select(CalendarListEntry).where( - CalendarListEntry.user_id == user_id + query = ( + select(CalendarListEntry) + .options(joinedload(CalendarListEntry.calendar)) + .where(CalendarListEntry.user_id == user_id) ) # Apply filters @@ -481,7 +491,7 @@ def list_calendar_list_entries( offset, _ = PageToken.decode(page_token) query = query.offset(offset).limit(max_results + 1) - entries = list(session.execute(query).scalars().all()) + entries = list(session.execute(query).unique().scalars().all()) # Check if there are more results next_page_token = None @@ -495,6 +505,12 @@ def list_calendar_list_entries( # Only generate sync token for initial full sync next_sync_token = _create_sync_token(session, user_id, "calendarList") + t_ms = (time.perf_counter() - t_start) * 1000 + if t_ms > 10: + logger.info( + f"[PERF] list_calendar_list_entries({user_id}) time={t_ms:.0f}ms " + f"entries={len(entries)}" + ) return entries, next_page_token, next_sync_token @@ -576,6 +592,7 @@ def create_event( **kwargs: Any, ) -> Event: """Create a new event.""" + t_start_perf = time.perf_counter() calendar = get_calendar(session, calendar_id, user_id) _check_calendar_access(session, calendar.id, user_id, AccessRole.writer) @@ -607,7 +624,9 @@ def create_event( # Use provided values if present, otherwise default to current user # This matches Google Calendar API behavior where imports preserve original organizer organizer_email = kwargs.pop("organizer_email", None) or user.email - organizer_display_name = kwargs.pop("organizer_display_name", None) or user.display_name + organizer_display_name = ( + kwargs.pop("organizer_display_name", None) or user.display_name + ) organizer_self = organizer_email == user.email event = Event( @@ -663,6 +682,12 @@ def create_event( session.rollback() raise DuplicateError(f"Event already exists: {event_id}") + t_ms = (time.perf_counter() - t_start_perf) * 1000 + if t_ms > 10: + logger.info( + f"[PERF] create_event({calendar_id}, {event_id}) time={t_ms:.0f}ms " + f"attendees={len(attendees) if attendees else 0}" + ) return event @@ -675,25 +700,26 @@ def get_event( ) -> Event: """ Get an event by ID, including recurring event instances. - + This function handles three cases: 1. Regular event: Returns the event directly 2. Persisted exception: Returns the exception event 3. Virtual instance: Creates and returns a virtual Event object - + Args: session: Database session calendar_id: Calendar ID event_id: Event ID (may be an instance ID like "abc123_20180618T100000Z") user_id: User ID for access check time_zone: Optional timezone for response formatting - + Returns: Event object (may be virtual for recurring instances) - + Raises: EventNotFoundError: If event not found or cancelled """ + t_start = time.perf_counter() calendar = get_calendar(session, calendar_id, user_id) _check_calendar_access(session, calendar.id, user_id, AccessRole.reader) @@ -702,25 +728,30 @@ def get_event( if event is not None and event.calendar_id == calendar.id: if event.status == EventStatus.cancelled: raise EventNotFoundError(event_id) + t_ms = (time.perf_counter() - t_start) * 1000 + if t_ms > 10: + logger.info( + f"[PERF] get_event({calendar_id}, {event_id}) time={t_ms:.0f}ms" + ) return event - + # Check if this is a recurring instance ID base_id, original_time_str = parse_instance_id(event_id) if not original_time_str: # Not an instance ID and not found as regular event raise EventNotFoundError(event_id) - + # Get the master recurring event master = session.get(Event, base_id) if master is None or master.calendar_id != calendar.id or not master.recurrence: raise EventNotFoundError(event_id) - + if master.status == EventStatus.cancelled: raise EventNotFoundError(event_id) - + # Parse the original start time original_dt = parse_original_start_time(original_time_str) - + # Check for a cancelled exception for this instance cancelled = session.execute( select(Event).where( @@ -730,14 +761,14 @@ def get_event( ) ) ).scalar_one_or_none() - + if cancelled: raise EventNotFoundError(event_id) - + # Verify this instance exists in the recurrence time_min = original_dt - timedelta(minutes=1) time_max = original_dt + timedelta(minutes=1) - + instance_dates = expand_recurrence( recurrence=master.recurrence, start=master.start_datetime, @@ -745,7 +776,7 @@ def get_event( time_max=time_max, max_instances=10, ) - + # Check if the original_dt is in the expanded instances instance_found = False for inst_dt in instance_dates: @@ -754,15 +785,21 @@ def get_event( inst_dt = inst_dt.replace(tzinfo=timezone.utc) else: inst_dt = inst_dt.astimezone(timezone.utc) - + if abs((inst_dt - original_dt).total_seconds()) < 60: # Within 1 minute instance_found = True break - + if not instance_found: raise EventNotFoundError(event_id) # Create virtual instance with attendees inherited from master + t_ms = (time.perf_counter() - t_start) * 1000 + if t_ms > 10: + logger.info( + f"[PERF] get_event({calendar_id}, {event_id}) time={t_ms:.0f}ms " + f"(virtual instance)" + ) return _create_virtual_instance(master, original_dt, event_id, master.attendees) @@ -781,14 +818,23 @@ def list_events( show_deleted: bool = False, sync_token: Optional[str] = None, ical_uid: Optional[str] = None, + _verified_calendar: Optional[Calendar] = None, ) -> tuple[list[Event], Optional[str], Optional[str]]: """ List events from a calendar. Returns: (events, next_page_token, next_sync_token) + + Args: + _verified_calendar: If the caller has already fetched and verified access + to the calendar, pass it here to avoid redundant DB lookups. """ - calendar = get_calendar(session, calendar_id, user_id) - _check_calendar_access(session, calendar.id, user_id, AccessRole.reader) + t_start = time.perf_counter() + if _verified_calendar is not None: + calendar = _verified_calendar + else: + calendar = get_calendar(session, calendar_id, user_id) + _check_calendar_access(session, calendar.id, user_id, AccessRole.reader) # Handle sync token if sync_token: @@ -806,14 +852,22 @@ def list_events( if token_record is None or token_record.expires_at < calendar_now(): raise SyncTokenExpiredError() - query = select(Event).where( - and_( - Event.calendar_id == calendar.id, - Event.updated_at > token_record.snapshot_time, + query = ( + select(Event) + .options(selectinload(Event.attendees)) + .where( + and_( + Event.calendar_id == calendar.id, + Event.updated_at > token_record.snapshot_time, + ) ) ) else: - query = select(Event).where(Event.calendar_id == calendar.id) + query = ( + select(Event) + .options(selectinload(Event.attendees)) + .where(Event.calendar_id == calendar.id) + ) # Apply filters if not show_deleted: @@ -870,7 +924,7 @@ def list_events( # Add the recurrence predicate to get only recurring masters recurring_query = query.where(Event.recurrence != None) # noqa: E711 recurring_masters = list(session.execute(recurring_query).scalars().all()) - + # Exclude recurring masters from main query (we'll merge expanded instances) query = query.where(Event.recurrence == None) # noqa: E711 @@ -886,49 +940,51 @@ def list_events( if single_events and recurring_masters: from ..core.utils import expand_recurrence, format_rfc3339, parse_rfc3339 from datetime import timedelta - + # Parse page_token offset BEFORE expansion so we know how many instances to generate offset = 0 if page_token: offset, _ = PageToken.decode(page_token) - + # Calculate how many instances we need: offset + max_results + 1 (for next page check) instances_needed = offset + max_results + 1 - + # Get all non-recurring events first (no pagination yet) all_events = list(session.execute(query).scalars().all()) - + # Determine time bounds for expansion now = calendar_now() min_dt = parse_rfc3339(time_min) if time_min else now - timedelta(days=30) max_dt = parse_rfc3339(time_max) if time_max else now + timedelta(days=365) - + # Ensure timezone-aware if min_dt.tzinfo is None: min_dt = min_dt.replace(tzinfo=timezone.utc) if max_dt.tzinfo is None: max_dt = max_dt.replace(tzinfo=timezone.utc) - + # Expand each recurring master into instances for master in recurring_masters: if not master.start_datetime or not master.recurrence: continue - + start_dt = master.start_datetime if start_dt.tzinfo is None: start_dt = start_dt.replace(tzinfo=timezone.utc) - + # Calculate duration duration = timedelta(hours=1) if master.end_datetime and master.start_datetime: duration = master.end_datetime - master.start_datetime - + # Get all exceptions for this master event - exceptions_query = select(Event).where( - Event.recurring_event_id == master.id + exceptions_query = ( + select(Event) + .options(selectinload(Event.attendees)) + .where(Event.recurring_event_id == master.id) ) exceptions = list(session.execute(exceptions_query).scalars().all()) - + # Build a set of exception original start times (for excluding from virtual instances) exception_times: set[str] = set() for exc in exceptions: @@ -937,8 +993,8 @@ def list_events( exc_dt = parse_rfc3339(exc.original_start_time["dateTime"]) if exc_dt.tzinfo is None: exc_dt = exc_dt.replace(tzinfo=timezone.utc) - exception_times.add(exc_dt.strftime('%Y%m%dT%H%M%SZ')) - + exception_times.add(exc_dt.strftime("%Y%m%dT%H%M%SZ")) + # Add exception events to results (modified or cancelled if show_deleted) for exc in exceptions: if exc.status == EventStatus.cancelled and not show_deleted: @@ -950,7 +1006,7 @@ def list_events( exc_start = exc_start.replace(tzinfo=timezone.utc) if exc_start >= min_dt and exc_start < max_dt: all_events.append(exc) - + try: instance_dates = expand_recurrence( recurrence=master.recurrence, @@ -977,7 +1033,7 @@ def list_events( inst_start = inst_start.replace(tzinfo=timezone.utc) # Skip if there's an exception for this instance - inst_time_str = inst_start.strftime('%Y%m%dT%H%M%SZ') + inst_time_str = inst_start.strftime("%Y%m%dT%H%M%SZ") if inst_time_str in exception_times: continue @@ -1002,12 +1058,21 @@ def list_events( organizer_display_name=master.organizer_display_name, organizer_profile_id=master.organizer_profile_id, organizer_self=master.organizer_self, - start={"dateTime": format_rfc3339(inst_start), "timeZone": master.start.get("timeZone", "UTC")}, - end={"dateTime": format_rfc3339(inst_end), "timeZone": master.end.get("timeZone", "UTC")}, + start={ + "dateTime": format_rfc3339(inst_start), + "timeZone": master.start.get("timeZone", "UTC"), + }, + end={ + "dateTime": format_rfc3339(inst_end), + "timeZone": master.end.get("timeZone", "UTC"), + }, start_datetime=inst_start, end_datetime=inst_end, recurring_event_id=master.id, - original_start_time={"dateTime": format_rfc3339(inst_start), "timeZone": master.start.get("timeZone", "UTC")}, + original_start_time={ + "dateTime": format_rfc3339(inst_start), + "timeZone": master.start.get("timeZone", "UTC"), + }, sequence=master.sequence, etag=generate_etag(f"{master.id}:{inst_start.isoformat()}"), html_link=master.html_link, @@ -1039,7 +1104,7 @@ def list_events( instance.attendees.append(virtual_attendee) all_events.append(instance) - + # Helper to normalize datetime to timezone-aware for comparison def _normalize_dt(dt: Optional[datetime]) -> datetime: if dt is None: @@ -1052,18 +1117,20 @@ def _normalize_dt(dt: Optional[datetime]) -> datetime: if order_by == "startTime": all_events.sort(key=lambda e: (_normalize_dt(e.start_datetime), e.id)) elif order_by == "updated": - all_events.sort(key=lambda e: (_normalize_dt(e.updated_at), e.id), reverse=True) + all_events.sort( + key=lambda e: (_normalize_dt(e.updated_at), e.id), reverse=True + ) else: all_events.sort(key=lambda e: (_normalize_dt(e.start_datetime), e.id)) - + # Apply pagination to combined results (offset already decoded above) - paginated_events = all_events[offset:offset + max_results + 1] - + paginated_events = all_events[offset : offset + max_results + 1] + next_page_token = None if len(paginated_events) > max_results: paginated_events = paginated_events[:max_results] next_page_token = PageToken.encode(offset + max_results) - + events = paginated_events else: # Standard pagination for non-single_events queries @@ -1087,6 +1154,12 @@ def _normalize_dt(dt: Optional[datetime]) -> datetime: session, user_id, "events", resource_id=calendar.id ) + t_ms = (time.perf_counter() - t_start) * 1000 + if t_ms > 10: + logger.info( + f"[PERF] list_events({calendar_id}) time={t_ms:.0f}ms " + f"events={len(events)} single_events={single_events} q={q!r}" + ) return events, next_page_token, next_sync_token @@ -1140,9 +1213,7 @@ def update_event( # Handle attendees update if "attendees" in kwargs and kwargs["attendees"] is not None: # Remove existing attendees - session.execute( - delete(EventAttendee).where(EventAttendee.event_id == event_id) - ) + session.execute(delete(EventAttendee).where(EventAttendee.event_id == event_id)) # Add new attendees user = session.get(User, user_id) for idx, attendee_data in enumerate(kwargs["attendees"]): @@ -1307,29 +1378,29 @@ def _get_master_event_for_instance( ) -> tuple[Optional[Event], Optional[str], Optional[datetime]]: """ Get the master event for a recurring instance. - + Args: session: Database session calendar_id: Calendar ID instance_id: Instance ID (may include time suffix) user_id: User ID for access check - + Returns: Tuple of (master_event, original_time_str, original_datetime) or (None, None, None) """ base_id, original_time_str = parse_instance_id(instance_id) - + if not original_time_str: return None, None, None - + # Get the master event master = session.get(Event, base_id) if master is None or master.calendar_id != calendar_id: return None, None, None - + if not master.recurrence: return None, None, None - + original_dt = parse_original_start_time(original_time_str) return master, original_time_str, original_dt @@ -1344,10 +1415,10 @@ def update_recurring_instance( ) -> Event: """ Update a single instance of a recurring event. - + Creates a persisted exception event with the modifications. If an exception already exists for this instance, updates it. - + Args: session: Database session calendar_id: Calendar ID @@ -1355,32 +1426,32 @@ def update_recurring_instance( user_id: User ID user_email: User's email address **kwargs: Fields to update - + Returns: The created/updated exception event """ calendar = get_calendar(session, calendar_id, user_id) _check_calendar_access(session, calendar.id, user_id, AccessRole.writer) - + # Check if an exception already exists existing = session.get(Event, instance_id) if existing and existing.calendar_id == calendar.id: # Update existing exception return update_event(session, calendar_id, instance_id, user_id, **kwargs) - + # Get master event info master, original_time_str, original_dt = _get_master_event_for_instance( session, calendar.id, instance_id, user_id ) - + if not master or not original_dt: raise EventNotFoundError(instance_id) - + # Validate that this instance date is valid for the recurrence # (not excluded by EXDATE and within the recurrence pattern) time_min = original_dt - timedelta(minutes=1) time_max = original_dt + timedelta(minutes=1) - + instance_dates = expand_recurrence( recurrence=master.recurrence, start=master.start_datetime, @@ -1388,7 +1459,7 @@ def update_recurring_instance( time_max=time_max, max_instances=10, ) - + # Check if the original_dt is in the expanded instances instance_found = False for inst_dt in instance_dates: @@ -1397,37 +1468,43 @@ def update_recurring_instance( inst_dt = inst_dt.replace(tzinfo=timezone.utc) else: inst_dt = inst_dt.astimezone(timezone.utc) - + if abs((inst_dt - original_dt).total_seconds()) < 60: # Within 1 minute instance_found = True break - + if not instance_found: raise EventNotFoundError(instance_id) - + # Calculate default start/end for this instance duration = timedelta(hours=1) if master.end_datetime and master.start_datetime: duration = master.end_datetime - master.start_datetime - + tz = master.start.get("timeZone", "UTC") - + # Use provided start/end or calculate from original - new_start = kwargs.get("start", { - "dateTime": format_rfc3339(original_dt), - "timeZone": tz, - }) - new_end = kwargs.get("end", { - "dateTime": format_rfc3339(original_dt + duration), - "timeZone": master.end.get("timeZone", tz), - }) - + new_start = kwargs.get( + "start", + { + "dateTime": format_rfc3339(original_dt), + "timeZone": tz, + }, + ) + new_end = kwargs.get( + "end", + { + "dateTime": format_rfc3339(original_dt + duration), + "timeZone": master.end.get("timeZone", tz), + }, + ) + # Build originalStartTime original_start_time = build_original_start_time(original_dt, tz) - + # Get user for creator info user = session.get(User, user_id) - + # Create exception event exception = Event( id=instance_id, @@ -1450,8 +1527,12 @@ def update_recurring_instance( organizer_id=master.organizer_id, organizer_email=master.organizer_email, organizer_display_name=master.organizer_display_name, - creator_self=master.creator_email == user_email if master.creator_email else False, - organizer_self=master.organizer_email == user_email if master.organizer_email else False, + creator_self=master.creator_email == user_email + if master.creator_email + else False, + organizer_self=master.organizer_email == user_email + if master.organizer_email + else False, # Status and visibility status=EventStatus.confirmed, visibility=kwargs.get("visibility", master.visibility), @@ -1461,18 +1542,26 @@ def update_recurring_instance( hangout_link=kwargs.get("hangout_link", master.hangout_link), conference_data=kwargs.get("conference_data", master.conference_data), reminders=kwargs.get("reminders", master.reminders), - extended_properties=kwargs.get("extended_properties", master.extended_properties), + extended_properties=kwargs.get( + "extended_properties", master.extended_properties + ), # Guest permissions - guests_can_invite_others=kwargs.get("guests_can_invite_others", master.guests_can_invite_others), + guests_can_invite_others=kwargs.get( + "guests_can_invite_others", master.guests_can_invite_others + ), guests_can_modify=kwargs.get("guests_can_modify", master.guests_can_modify), - guests_can_see_other_guests=kwargs.get("guests_can_see_other_guests", master.guests_can_see_other_guests), - anyone_can_add_self=kwargs.get("anyone_can_add_self", master.anyone_can_add_self), + guests_can_see_other_guests=kwargs.get( + "guests_can_see_other_guests", master.guests_can_see_other_guests + ), + anyone_can_add_self=kwargs.get( + "anyone_can_add_self", master.anyone_can_add_self + ), # Generate etag etag=generate_etag(f"{instance_id}:{calendar_now().isoformat()}"), ) - + session.add(exception) - + # Handle attendees - if provided use them, otherwise inherit from master attendees = kwargs.get("attendees") if attendees is not None: @@ -1492,10 +1581,14 @@ def update_recurring_instance( session.add(attendee) else: # No attendees provided - inherit from master event - master_attendees = session.execute( - select(EventAttendee).where(EventAttendee.event_id == master.id) - ).scalars().all() - + master_attendees = ( + session.execute( + select(EventAttendee).where(EventAttendee.event_id == master.id) + ) + .scalars() + .all() + ) + for master_att in master_attendees: attendee = EventAttendee( event_id=instance_id, @@ -1509,7 +1602,7 @@ def update_recurring_instance( additional_guests=master_att.additional_guests, ) session.add(attendee) - + session.flush() return exception @@ -1522,10 +1615,10 @@ def delete_recurring_instance( ) -> None: """ Delete a single instance of a recurring event. - + Creates a cancelled exception event. If an exception already exists, marks it as cancelled. - + Args: session: Database session calendar_id: Calendar ID @@ -1534,7 +1627,7 @@ def delete_recurring_instance( """ calendar = get_calendar(session, calendar_id, user_id) _check_calendar_access(session, calendar.id, user_id, AccessRole.writer) - + # Check if an exception already exists existing = session.get(Event, instance_id) if existing and existing.calendar_id == calendar.id: @@ -1543,19 +1636,19 @@ def delete_recurring_instance( existing.updated_at = calendar_now() existing.etag = generate_etag(f"{instance_id}:cancelled") return - + # Get master event info master, original_time_str, original_dt = _get_master_event_for_instance( session, calendar.id, instance_id, user_id ) - + if not master or not original_dt: raise EventNotFoundError(instance_id) - + # Validate that this instance date is valid for the recurrence time_min = original_dt - timedelta(minutes=1) time_max = original_dt + timedelta(minutes=1) - + instance_dates = expand_recurrence( recurrence=master.recurrence, start=master.start_datetime, @@ -1563,7 +1656,7 @@ def delete_recurring_instance( time_max=time_max, max_instances=10, ) - + # Check if the original_dt is in the expanded instances instance_found = False for inst_dt in instance_dates: @@ -1571,24 +1664,24 @@ def delete_recurring_instance( inst_dt = inst_dt.replace(tzinfo=timezone.utc) else: inst_dt = inst_dt.astimezone(timezone.utc) - + if abs((inst_dt - original_dt).total_seconds()) < 60: instance_found = True break - + if not instance_found: raise EventNotFoundError(instance_id) - + # Calculate default times for this instance duration = timedelta(hours=1) if master.end_datetime and master.start_datetime: duration = master.end_datetime - master.start_datetime - + tz = master.start.get("timeZone", "UTC") - + # Build originalStartTime original_start_time = build_original_start_time(original_dt, tz) - + # Create cancelled exception event exception = Event( id=instance_id, @@ -1616,7 +1709,7 @@ def delete_recurring_instance( status=EventStatus.cancelled, # Key difference - cancelled status etag=generate_etag(f"{instance_id}:cancelled"), ) - + session.add(exception) session.flush() @@ -1629,48 +1722,48 @@ def get_or_create_instance( ) -> Optional[Event]: """ Get an event, including virtual recurring instances. - + This function handles three cases: 1. Regular event: Returns the event directly 2. Persisted exception: Returns the exception event 3. Virtual instance: Creates and returns a virtual Event object - + Args: session: Database session calendar_id: Calendar ID instance_id: Event or instance ID user_id: User ID for access check - + Returns: Event object (may be virtual for instances) or None if not found """ calendar = get_calendar(session, calendar_id, user_id) _check_calendar_access(session, calendar.id, user_id, AccessRole.reader) - + # First, try to find the event directly (handles regular events and exceptions) event = session.get(Event, instance_id) if event and event.calendar_id == calendar.id: if event.status == EventStatus.cancelled: return None return event - + # Check if this is an instance ID base_id, original_time_str = parse_instance_id(instance_id) if not original_time_str: # Not an instance ID and not found as regular event return None - + # Get the master event master = session.get(Event, base_id) if not master or master.calendar_id != calendar.id or not master.recurrence: return None - + if master.status == EventStatus.cancelled: return None - + # Parse the original start time original_dt = parse_original_start_time(original_time_str) - + # Verify this date is valid for the recurrence (not excluded) # Check for a cancelled exception cancelled = session.execute( @@ -1681,14 +1774,14 @@ def get_or_create_instance( ) ) ).scalar_one_or_none() - + if cancelled: return None - + # Expand recurrence to verify this instance exists time_min = original_dt - timedelta(minutes=1) time_max = original_dt + timedelta(minutes=1) - + instance_dates = expand_recurrence( recurrence=master.recurrence, start=master.start_datetime, @@ -1696,7 +1789,7 @@ def get_or_create_instance( time_max=time_max, max_instances=10, ) - + # Check if the original_dt is in the expanded instances instance_found = False for inst_dt in instance_dates: @@ -1705,11 +1798,11 @@ def get_or_create_instance( inst_dt = inst_dt.replace(tzinfo=timezone.utc) else: inst_dt = inst_dt.astimezone(timezone.utc) - + if abs((inst_dt - original_dt).total_seconds()) < 60: # Within 1 minute instance_found = True break - + if not instance_found: return None @@ -2011,7 +2104,7 @@ def get_event_instances( exc_dt = parse_rfc3339(exc.original_start_time["dateTime"]) if exc_dt.tzinfo is None: exc_dt = exc_dt.replace(tzinfo=timezone.utc) - exception_times.add(exc_dt.strftime('%Y%m%dT%H%M%SZ')) + exception_times.add(exc_dt.strftime("%Y%m%dT%H%M%SZ")) # Collect all instances (virtual + exceptions) all_instances = [] @@ -2042,7 +2135,9 @@ def get_event_instances( # Log and return empty if recurrence expansion fails # Keep broad exception to maintain graceful degradation (matching Google's behavior) logger.warning( - "Failed to expand recurrence for event %s in get_instances: %s", master.id, e + "Failed to expand recurrence for event %s in get_instances: %s", + master.id, + e, ) return [], None, None @@ -2061,7 +2156,7 @@ def get_event_instances( inst_start = inst_start.replace(tzinfo=timezone.utc) # Skip if there's a persisted exception for this instance - inst_time_str = inst_start.strftime('%Y%m%dT%H%M%SZ') + inst_time_str = inst_start.strftime("%Y%m%dT%H%M%SZ") if inst_time_str in exception_times: continue @@ -2088,12 +2183,21 @@ def get_event_instances( organizer_display_name=master.organizer_display_name, organizer_profile_id=master.organizer_profile_id, organizer_self=master.organizer_self, - start={"dateTime": format_rfc3339(inst_start), "timeZone": master.start.get("timeZone", "UTC")}, - end={"dateTime": format_rfc3339(inst_end), "timeZone": master.end.get("timeZone", "UTC")}, + start={ + "dateTime": format_rfc3339(inst_start), + "timeZone": master.start.get("timeZone", "UTC"), + }, + end={ + "dateTime": format_rfc3339(inst_end), + "timeZone": master.end.get("timeZone", "UTC"), + }, start_datetime=inst_start, end_datetime=inst_end, recurring_event_id=master.id, - original_start_time={"dateTime": format_rfc3339(inst_start), "timeZone": master.start.get("timeZone", "UTC")}, + original_start_time={ + "dateTime": format_rfc3339(inst_start), + "timeZone": master.start.get("timeZone", "UTC"), + }, sequence=master.sequence, etag=generate_etag(f"{master.id}:{inst_start.isoformat()}"), html_link=master.html_link, @@ -2131,7 +2235,7 @@ def get_event_instances( target_dt = parse_rfc3339(original_start) if target_dt.tzinfo is None: target_dt = target_dt.replace(tzinfo=timezone.utc) - target_str = target_dt.strftime('%Y%m%dT%H%M%SZ') + target_str = target_dt.strftime("%Y%m%dT%H%M%SZ") filtered_instances = [] for inst in all_instances: @@ -2139,13 +2243,16 @@ def get_event_instances( inst_dt = parse_rfc3339(inst.original_start_time["dateTime"]) if inst_dt.tzinfo is None: inst_dt = inst_dt.replace(tzinfo=timezone.utc) - if inst_dt.strftime('%Y%m%dT%H%M%SZ') == target_str: + if inst_dt.strftime("%Y%m%dT%H%M%SZ") == target_str: filtered_instances.append(inst) all_instances = filtered_instances # Sort by start time all_instances.sort( - key=lambda e: (e.start_datetime or datetime.min.replace(tzinfo=timezone.utc), e.id) + key=lambda e: ( + e.start_datetime or datetime.min.replace(tzinfo=timezone.utc), + e.id, + ) ) # Limit to max_results @@ -2445,18 +2552,19 @@ def query_free_busy( ) -> dict[str, Any]: """ Query free/busy information for calendars. - + Following Google Calendar API behavior: - If timeZone is not provided, times are returned in UTC (with Z suffix) - If timeZone is provided, times are converted to that timezone (with offset like -08:00) """ + t_start = time.perf_counter() from ..core.utils import parse_rfc3339, format_rfc3339 from zoneinfo import ZoneInfo from datetime import timezone as dt_timezone min_dt = parse_rfc3339(time_min) max_dt = parse_rfc3339(time_max) - + # Determine target timezone for output target_tz = None if time_zone: @@ -2468,34 +2576,102 @@ def query_free_busy( calendars_result: dict[str, dict[str, Any]] = {} + # --- Phase 1: Resolve calendar IDs and batch-check access --- + # Resolve "primary" once + user = session.get(User, user_id) + resolved_map: dict[str, str] = {} # original_id -> resolved_id for cal_id in calendar_ids: - original_cal_id = cal_id # Keep original for response key - try: - # Resolve primary to actual calendar ID - resolved_cal_id = cal_id - if cal_id == "primary": - user = session.get(User, user_id) - if user: - resolved_cal_id = user.email - - # Check access - access_role = _get_user_access_role(session, resolved_cal_id, user_id) - if access_role is None: - calendars_result[original_cal_id] = { - "errors": [ - { - "domain": "calendar", - "reason": "notFound", - } - ] - } - continue + if cal_id == "primary" and user: + resolved_map[cal_id] = user.email + else: + resolved_map[cal_id] = cal_id + + # Batch access check: get all CalendarListEntry rows for this user + these calendars + resolved_ids = list(set(resolved_map.values())) + accessible_entries = { + row.calendar_id: row.access_role + for row in session.execute( + select(CalendarListEntry.calendar_id, CalendarListEntry.access_role).where( + and_( + CalendarListEntry.user_id == user_id, + CalendarListEntry.calendar_id.in_(resolved_ids), + CalendarListEntry.deleted == False, # noqa: E712 + ) + ) + ).all() + } - # Get events in time range - events = session.execute( + # For calendars not in CalendarListEntry, do a single batched ACL check + missing_ids = [cid for cid in resolved_ids if cid not in accessible_entries] + acl_access: dict[str, AccessRole] = {} + if missing_ids and user: + acl_conditions = [ + and_( + AclRule.scope_type == AclScopeType.user, + AclRule.scope_value == user.email, + ), + and_( + AclRule.scope_type == AclScopeType.default, + ), + ] + if "@" in user.email: + domain = user.email.split("@")[1] + acl_conditions.append( + and_( + AclRule.scope_type == AclScopeType.domain, + AclRule.scope_value == domain, + ) + ) + acl_rules = list( + session.execute( + select(AclRule).where( + and_( + AclRule.calendar_id.in_(missing_ids), + AclRule.deleted == False, # noqa: E712 + or_(*acl_conditions), + ) + ) + ) + .scalars() + .all() + ) + # Group by calendar_id, pick highest-priority scope per calendar + scope_priority = { + AclScopeType.user: 3, + AclScopeType.domain: 2, + AclScopeType.default: 1, + } + acl_best_scope: dict[str, AclScopeType] = {} # track scope of best rule + for rule in acl_rules: + existing_scope = acl_best_scope.get(rule.calendar_id) + if existing_scope is None or scope_priority.get( + rule.scope_type, 0 + ) > scope_priority.get(existing_scope, 0): + acl_access[rule.calendar_id] = rule.role + acl_best_scope[rule.calendar_id] = rule.scope_type + + # Determine which calendars are accessible + accessible_cal_ids: list[str] = [] + for original_id, resolved_id in resolved_map.items(): + if resolved_id in accessible_entries or resolved_id in acl_access: + accessible_cal_ids.append(resolved_id) + else: + calendars_result[original_id] = { + "errors": [ + { + "domain": "calendar", + "reason": "notFound", + } + ] + } + + # --- Phase 2: Batch fetch all events across accessible calendars --- + if accessible_cal_ids: + all_events = list( + session.execute( select(Event).where( and_( - Event.calendar_id == resolved_cal_id, + Event.calendar_id.in_(accessible_cal_ids), Event.status != EventStatus.cancelled, Event.transparency == "opaque", or_( @@ -2514,38 +2690,51 @@ def query_free_busy( ), ) ) - ).scalars().all() + ) + .scalars() + .all() + ) - # Build busy periods with timezone conversion if specified + # Group events by calendar_id + events_by_cal: dict[str, list[Event]] = {cid: [] for cid in accessible_cal_ids} + for event in all_events: + if event.calendar_id in events_by_cal: + events_by_cal[event.calendar_id].append(event) + else: + events_by_cal = {} + + # --- Phase 3: Build busy periods --- + for original_id, resolved_id in resolved_map.items(): + if original_id in calendars_result: + # Already set as error + continue + + try: + events = events_by_cal.get(resolved_id, []) busy = [] for event in events: if event.start_datetime and event.end_datetime: start_dt = event.start_datetime end_dt = event.end_datetime - + # Get the event's timezone from the JSONB start/end fields - # The start_datetime/end_datetime columns store local times without offset event_tz_name = None if event.start and isinstance(event.start, dict): event_tz_name = event.start.get("timeZone") - + if event_tz_name and start_dt.tzinfo is None: try: event_tz = ZoneInfo(event_tz_name) - # Interpret the naive datetime in the event's timezone start_dt = start_dt.replace(tzinfo=event_tz) except (KeyError, ValueError): - # Fall back to UTC if timezone is invalid start_dt = start_dt.replace(tzinfo=dt_timezone.utc) elif start_dt.tzinfo is None: - # No event timezone, assume UTC start_dt = start_dt.replace(tzinfo=dt_timezone.utc) - - # Same for end datetime + end_tz_name = None if event.end and isinstance(event.end, dict): end_tz_name = event.end.get("timeZone") - + if end_tz_name and end_dt.tzinfo is None: try: end_tz = ZoneInfo(end_tz_name) @@ -2554,9 +2743,8 @@ def query_free_busy( end_dt = end_dt.replace(tzinfo=dt_timezone.utc) elif end_dt.tzinfo is None: end_dt = end_dt.replace(tzinfo=dt_timezone.utc) - + if target_tz: - # Convert to target timezone and format with offset start_dt = start_dt.astimezone(target_tz) end_dt = end_dt.astimezone(target_tz) busy.append( @@ -2566,7 +2754,6 @@ def query_free_busy( } ) else: - # Return in UTC (with Z suffix per Google API) start_utc = start_dt.astimezone(dt_timezone.utc) end_utc = end_dt.astimezone(dt_timezone.utc) busy.append( @@ -2576,11 +2763,11 @@ def query_free_busy( } ) - calendars_result[original_cal_id] = {"busy": busy} + calendars_result[original_id] = {"busy": busy} except Exception as e: - logger.exception("Error querying free/busy for calendar %s", cal_id) - calendars_result[original_cal_id] = { + logger.exception("Error querying free/busy for calendar %s", original_id) + calendars_result[original_id] = { "errors": [ { "domain": "calendar", @@ -2589,6 +2776,11 @@ def query_free_busy( ] } + t_ms = (time.perf_counter() - t_start) * 1000 + if t_ms > 10: + logger.info( + f"[PERF] query_free_busy() time={t_ms:.0f}ms calendars={len(calendar_ids)}" + ) return { "kind": "calendar#freeBusy", "timeMin": time_min, @@ -2607,14 +2799,18 @@ def _get_user_access_role( calendar_id: str, user_id: str, ) -> Optional[AccessRole]: - """Get user's access role to a calendar.""" + """Get user's access role to a calendar. + + Optimized: checks CalendarListEntry first (single query), then falls back + to a single consolidated ACL query covering user/domain/default scopes. + """ user = session.get(User, user_id) if user is None: return None - # Check CalendarListEntry first + # Check CalendarListEntry first (most common path, indexed on user_id) entry = session.execute( - select(CalendarListEntry).where( + select(CalendarListEntry.access_role).where( and_( CalendarListEntry.user_id == user_id, CalendarListEntry.calendar_id == calendar_id, @@ -2624,56 +2820,59 @@ def _get_user_access_role( ).scalar_one_or_none() if entry: - return entry.access_role + return entry + + # Consolidated ACL check: fetch all matching rules in a single query + # instead of 3 separate queries for user/domain/default scopes + acl_conditions = [ + # User scope + and_( + AclRule.scope_type == AclScopeType.user, + AclRule.scope_value == user.email, + ), + # Default scope (public) + and_( + AclRule.scope_type == AclScopeType.default, + ), + ] - # Check ACL rules - # User scope - rule = session.execute( - select(AclRule).where( + # Domain scope (only if email has a domain) + domain = None + if "@" in user.email: + domain = user.email.split("@")[1] + acl_conditions.append( and_( - AclRule.calendar_id == calendar_id, - AclRule.scope_type == AclScopeType.user, - AclRule.scope_value == user.email, - AclRule.deleted == False, # noqa: E712 + AclRule.scope_type == AclScopeType.domain, + AclRule.scope_value == domain, ) ) - ).scalar_one_or_none() - - if rule: - return rule.role - # Domain scope - if "@" in user.email: - domain = user.email.split("@")[1] - rule = session.execute( + rules = list( + session.execute( select(AclRule).where( and_( AclRule.calendar_id == calendar_id, - AclRule.scope_type == AclScopeType.domain, - AclRule.scope_value == domain, AclRule.deleted == False, # noqa: E712 + or_(*acl_conditions), ) ) - ).scalar_one_or_none() - - if rule: - return rule.role - - # Default scope (public) - rule = session.execute( - select(AclRule).where( - and_( - AclRule.calendar_id == calendar_id, - AclRule.scope_type == AclScopeType.default, - AclRule.deleted == False, # noqa: E712 - ) ) - ).scalar_one_or_none() + .scalars() + .all() + ) - if rule: - return rule.role + if not rules: + return None - return None + # Return the highest-privilege role found + # Priority: user scope > domain scope > default scope + scope_priority = { + AclScopeType.user: 3, + AclScopeType.domain: 2, + AclScopeType.default: 1, + } + best_rule = max(rules, key=lambda r: scope_priority.get(r.scope_type, 0)) + return best_rule.role def _check_calendar_access( diff --git a/backend/src/services/calendar/database/schema.py b/backend/src/services/calendar/database/schema.py index dbd277a..c4a4ecf 100644 --- a/backend/src/services/calendar/database/schema.py +++ b/backend/src/services/calendar/database/schema.py @@ -207,9 +207,7 @@ class CalendarListEntry(Base): user_id: Mapped[str] = mapped_column( ForeignKey("calendar_users.id"), nullable=False ) - calendar_id: Mapped[str] = mapped_column( - ForeignKey("calendars.id"), nullable=False - ) + calendar_id: Mapped[str] = mapped_column(ForeignKey("calendars.id"), nullable=False) etag: Mapped[str] = mapped_column(String(100), nullable=False) # Access role @@ -226,12 +224,8 @@ class CalendarListEntry(Base): foreground_color: Mapped[Optional[str]] = mapped_column(String(10), nullable=True) # Visibility settings - hidden: Mapped[bool] = mapped_column( - Boolean, default=False, server_default="false" - ) - selected: Mapped[bool] = mapped_column( - Boolean, default=True, server_default="true" - ) + hidden: Mapped[bool] = mapped_column(Boolean, default=False, server_default="false") + selected: Mapped[bool] = mapped_column(Boolean, default=True, server_default="true") primary: Mapped[bool] = mapped_column( Boolean, default=False, server_default="false" ) @@ -276,12 +270,16 @@ class Event(Base): Index("ix_event_ical_uid", "ical_uid"), Index("ix_event_recurring", "recurring_event_id"), Index("ix_event_updated", "updated_at"), + # Composite indexes for common query patterns + Index("ix_event_cal_status_start", "calendar_id", "status", "start_datetime"), + Index( + "ix_event_cal_start_end", "calendar_id", "start_datetime", "end_datetime" + ), + Index("ix_event_cal_updated", "calendar_id", "updated_at"), ) id: Mapped[str] = mapped_column(String(1024), primary_key=True) - calendar_id: Mapped[str] = mapped_column( - ForeignKey("calendars.id"), nullable=False - ) + calendar_id: Mapped[str] = mapped_column(ForeignKey("calendars.id"), nullable=False) etag: Mapped[str] = mapped_column(String(100), nullable=False) # Basic info @@ -312,8 +310,12 @@ class Event(Base): String(255), nullable=True ) # Google Profile IDs (different from internal user_id) - creator_profile_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) - organizer_profile_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) + creator_profile_id: Mapped[Optional[str]] = mapped_column( + String(255), nullable=True + ) + organizer_profile_id: Mapped[Optional[str]] = mapped_column( + String(255), nullable=True + ) creator_self: Mapped[bool] = mapped_column( Boolean, default=False, server_default="false" ) @@ -337,7 +339,9 @@ class Event(Base): # Recurrence recurrence: Mapped[Optional[list[str]]] = mapped_column(JSONB, nullable=True) - recurring_event_id: Mapped[Optional[str]] = mapped_column(String(1024), nullable=True) + recurring_event_id: Mapped[Optional[str]] = mapped_column( + String(1024), nullable=True + ) original_start_time: Mapped[Optional[dict[str, Any]]] = mapped_column( JSONB, nullable=True ) @@ -499,7 +503,9 @@ class EventAttendee(Base): default=AttendeeResponseStatus.needsAction, ) comment: Mapped[Optional[str]] = mapped_column(Text, nullable=True) - additional_guests: Mapped[int] = mapped_column(Integer, default=0, server_default="0") + additional_guests: Mapped[int] = mapped_column( + Integer, default=0, server_default="0" + ) # Profile ID (if available) profile_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) @@ -544,14 +550,14 @@ class AclRule(Base): __tablename__ = "calendar_acl_rules" __table_args__ = ( - UniqueConstraint("calendar_id", "scope_type", "scope_value", name="uq_acl_rule"), + UniqueConstraint( + "calendar_id", "scope_type", "scope_value", name="uq_acl_rule" + ), Index("ix_acl_calendar", "calendar_id"), ) id: Mapped[str] = mapped_column(String(255), primary_key=True) - calendar_id: Mapped[str] = mapped_column( - ForeignKey("calendars.id"), nullable=False - ) + calendar_id: Mapped[str] = mapped_column(ForeignKey("calendars.id"), nullable=False) etag: Mapped[str] = mapped_column(String(100), nullable=False) role: Mapped[AccessRole] = mapped_column( @@ -636,7 +642,9 @@ class Channel(Base): token: Mapped[Optional[str]] = mapped_column(String(500), nullable=True) params: Mapped[Optional[dict[str, Any]]] = mapped_column(JSONB, nullable=True) # Whether payload is wanted for notifications - payload: Mapped[bool] = mapped_column(Boolean, default=False, server_default="false") + payload: Mapped[bool] = mapped_column( + Boolean, default=False, server_default="false" + ) # User who created the channel (for ownership validation) user_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) @@ -654,9 +662,7 @@ class SyncToken(Base): """ __tablename__ = "calendar_sync_tokens" - __table_args__ = ( - Index("ix_sync_token_resource", "resource_type", "resource_id"), - ) + __table_args__ = (Index("ix_sync_token_resource", "resource_type", "resource_id"),) id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) token: Mapped[str] = mapped_column(String(255), unique=True, nullable=False) diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 522d084..a855e42 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -31,10 +31,11 @@ @pytest.fixture(scope="session") def db_url(): - """Database URL from environment.""" - url = os.environ.get("DATABASE_URL") - if not url: - pytest.skip("DATABASE_URL not set") + """Database URL from environment, defaults to local postgres.""" + url = os.environ.get( + "DATABASE_URL", + "postgresql://postgres:postgres@localhost:5432/postgres", + ) return url diff --git a/backend/tests/performance/__init__.py b/backend/tests/performance/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/tests/performance/test_box_bench_perf.py b/backend/tests/performance/test_box_bench_perf.py new file mode 100644 index 0000000..723ef4f --- /dev/null +++ b/backend/tests/performance/test_box_bench_perf.py @@ -0,0 +1,922 @@ +""" +Performance tests for Box API - mimicking real box_bench operations. + +These tests create an isolated Box environment (box_default template), +then run the same API call patterns that appear in the box_bench test suite +to measure response times and identify bottlenecks. + +Usage: + # Run from backend/ directory (requires DATABASE_URL in .env or env): + + + + # Run with timing threshold (skip assertions under N ms): + PERF_THRESHOLD_MS=100 pytest tests/performance/test_box_bench_perf.py -v -s + +Environment setup: + 1. The box_default template must be seeded in the database. + 2. Tests use core_isolation_engine.create_environment(template_schema="box_default") + to create an isolated copy of the template. + 3. The impersonate_user_id "27512847635" matches the bench's default user (Admin User). + 4. All environments are auto-cleaned after the test session. +""" + +import asyncio +import logging +import os +import time + +import pytest +import pytest_asyncio +from httpx import AsyncClient, ASGITransport +from starlette.applications import Starlette + +from src.services.box.api.routes import routes as box_routes + +logger = logging.getLogger(__name__) + +# Default user from box_bench.json +BOX_IMPERSONATE_USER_ID = "27512847635" + +# Threshold in ms - tests log warnings above this +PERF_THRESHOLD_MS = int(os.environ.get("PERF_THRESHOLD_MS", "500")) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _timed(label: str): + """Context manager that logs elapsed time.""" + + class Timer: + def __init__(self): + self.elapsed_ms = 0.0 + + def __enter__(self): + self._start = time.perf_counter() + return self + + def __exit__(self, *exc): + self.elapsed_ms = (time.perf_counter() - self._start) * 1000 + marker = "SLOW" if self.elapsed_ms > PERF_THRESHOLD_MS else "OK" + logger.info(f"[PERF {marker}] {label}: {self.elapsed_ms:.0f}ms") + + return Timer() + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest_asyncio.fixture +async def box_client( + test_user_id, + core_isolation_engine, + session_manager, + environment_handler, +): + """ + Create an isolated box_default environment and return an AsyncClient + wired to the Box API routes, just like the real bench does. + + The flow mirrors environment.py setup_state: + 1. core_isolation_engine.create_environment(template_schema="box_default", ...) + 2. Wire session + impersonate_user_id into request.state + 3. Mount box routes + """ + env_result = core_isolation_engine.create_environment( + template_schema="box_default", + ttl_seconds=3600, + created_by=test_user_id, + impersonate_user_id=BOX_IMPERSONATE_USER_ID, + ) + + async def add_db_session(request, call_next): + with session_manager.with_session_for_environment( + env_result.environment_id + ) as session: + request.state.db_session = session + request.state.environment_id = env_result.environment_id + request.state.impersonate_user_id = BOX_IMPERSONATE_USER_ID + request.state.impersonate_email = None + response = await call_next(request) + return response + + app = Starlette(routes=box_routes) + app.middleware("http")(add_db_session) + + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + yield client + + environment_handler.drop_schema(env_result.schema_name) + + +# --------------------------------------------------------------------------- +# Test: GET /users/me (bench test_1) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_get_current_user(box_client: AsyncClient): + """GET /users/me — identify the logged-in user.""" + with _timed("GET /users/me") as t: + resp = await box_client.get("/users/me") + + assert resp.status_code == 200 + data = resp.json() + assert data["type"] == "user" + assert data["id"] == BOX_IMPERSONATE_USER_ID + assert t.elapsed_ms < 5000, f"GET /users/me took {t.elapsed_ms:.0f}ms (>5s)" + + +# --------------------------------------------------------------------------- +# Test: GET /search (bench test_2, test_3, test_4, test_5, test_6, ...) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_search_by_name(box_client: AsyncClient): + """GET /search?query=investments — find folder by name (very common operation).""" + with _timed("GET /search?query=investments") as t: + resp = await box_client.get("/search", params={"query": "investments"}) + + assert resp.status_code == 200 + data = resp.json() + assert data["type"] == "search_results_items" + assert data["total_count"] >= 1 + assert t.elapsed_ms < 5000, f"search 'investments' took {t.elapsed_ms:.0f}ms (>5s)" + + +@pytest.mark.asyncio +async def test_search_fomc(box_client: AsyncClient): + """GET /search?query=fomc — search for FOMC files (bench test_3).""" + with _timed("GET /search?query=fomc") as t: + resp = await box_client.get("/search", params={"query": "fomc"}) + + assert resp.status_code == 200 + data = resp.json() + assert data["total_count"] >= 1 + assert t.elapsed_ms < 5000, f"search 'fomc' took {t.elapsed_ms:.0f}ms (>5s)" + + +@pytest.mark.asyncio +async def test_search_broad_query(box_client: AsyncClient): + """GET /search?query=a — broad search that returns many results (stress test).""" + with _timed("GET /search?query=a (broad)") as t: + resp = await box_client.get("/search", params={"query": "a", "limit": 200}) + + assert resp.status_code == 200 + data = resp.json() + logger.info( + f" Broad search returned {data['total_count']} total, " + f"{len(data['entries'])} entries" + ) + assert t.elapsed_ms < 10000, f"broad search took {t.elapsed_ms:.0f}ms (>10s)" + + +@pytest.mark.asyncio +async def test_search_file_type_filter(box_client: AsyncClient): + """GET /search?query=report&type=file — search with type filter.""" + with _timed("GET /search?query=report&type=file") as t: + resp = await box_client.get( + "/search", params={"query": "report", "type": "file"} + ) + + assert resp.status_code == 200 + data = resp.json() + # All results should be files + for entry in data["entries"]: + assert entry["type"] == "file" + assert t.elapsed_ms < 5000 + + +@pytest.mark.asyncio +async def test_search_folder_type_filter(box_client: AsyncClient): + """GET /search?query=macro&type=folder — folder search (bench test_5).""" + with _timed("GET /search?query=macro&type=folder") as t: + resp = await box_client.get( + "/search", params={"query": "macro", "type": "folder"} + ) + + assert resp.status_code == 200 + data = resp.json() + for entry in data["entries"]: + assert entry["type"] == "folder" + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: GET /folders/{id} (bench test_9, test_12) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_get_root_folder(box_client: AsyncClient): + """GET /folders/0 — get root folder (always ID "0").""" + with _timed("GET /folders/0") as t: + resp = await box_client.get("/folders/0") + + assert resp.status_code == 200 + data = resp.json() + assert data["type"] == "folder" + assert data["id"] == "0" + assert "item_collection" in data + assert t.elapsed_ms < 5000, f"GET /folders/0 took {t.elapsed_ms:.0f}ms (>5s)" + + +@pytest.mark.asyncio +async def test_get_investments_folder(box_client: AsyncClient): + """GET /folders/5610825569 — get the investments folder (bench test_9).""" + with _timed("GET /folders/5610825569") as t: + resp = await box_client.get("/folders/5610825569") + + assert resp.status_code == 200 + data = resp.json() + assert data["type"] == "folder" + assert data["name"] == "investments" + assert "path_collection" in data + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: GET /folders/{id}/items (bench test_12) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_list_root_folder_items(box_client: AsyncClient): + """GET /folders/0/items — list items in root folder.""" + with _timed("GET /folders/0/items") as t: + resp = await box_client.get("/folders/0/items") + + assert resp.status_code == 200 + data = resp.json() + assert "entries" in data + assert "total_count" in data + logger.info(f" Root folder has {data['total_count']} items") + assert t.elapsed_ms < 5000 + + +@pytest.mark.asyncio +async def test_list_investments_folder_items(box_client: AsyncClient): + """GET /folders/5610825569/items — list items in investments folder (bench test_12).""" + with _timed("GET /folders/5610825569/items") as t: + resp = await box_client.get("/folders/5610825569/items") + + assert resp.status_code == 200 + data = resp.json() + assert "entries" in data + logger.info(f" investments folder has {data['total_count']} items") + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: GET /files/{id} (bench test_4 needs file lookup before comment) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_get_file_by_id(box_client: AsyncClient): + """GET /files/{file_id} — get a specific file's details.""" + # First find a file via search + search_resp = await box_client.get( + "/search", params={"query": "fomc", "type": "file"} + ) + assert search_resp.status_code == 200 + entries = search_resp.json()["entries"] + if not entries: + pytest.skip("No fomc files found in seed data") + + file_id = entries[0]["id"] + + with _timed(f"GET /files/{file_id}") as t: + resp = await box_client.get(f"/files/{file_id}") + + assert resp.status_code == 200 + data = resp.json() + assert data["type"] == "file" + assert data["id"] == file_id + assert "path_collection" in data + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: POST /comments (bench test_3, test_4, test_10) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_add_comment_to_file(box_client: AsyncClient): + """POST /comments — add a comment to a file (bench test_3, test_4).""" + # Find a file + search_resp = await box_client.get( + "/search", params={"query": "fomc", "type": "file"} + ) + entries = search_resp.json().get("entries", []) + if not entries: + pytest.skip("No fomc files found") + + file_id = entries[0]["id"] + + with _timed(f"POST /comments on file {file_id}") as t: + resp = await box_client.post( + "/comments", + json={ + "item": {"type": "file", "id": file_id}, + "message": "Relevant", + }, + ) + + assert resp.status_code == 201 + data = resp.json() + assert data["type"] == "comment" + assert data["message"] == "Relevant" + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: POST /folders (bench test_1, test_2, test_11) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_create_folder_in_root(box_client: AsyncClient): + """POST /folders — create folder in root (bench test_1).""" + with _timed("POST /folders (root)") as t: + resp = await box_client.post( + "/folders", + json={"name": "Admin User", "parent": {"id": "0"}}, + ) + + assert resp.status_code == 201 + data = resp.json() + assert data["type"] == "folder" + assert data["name"] == "Admin User" + assert t.elapsed_ms < 5000 + + +@pytest.mark.asyncio +async def test_create_folder_in_subfolder(box_client: AsyncClient): + """POST /folders — create folder inside investments (bench test_2).""" + with _timed("POST /folders (investments)") as t: + resp = await box_client.post( + "/folders", + json={"name": "Analysis_2026", "parent": {"id": "5610825569"}}, + ) + + assert resp.status_code == 201 + data = resp.json() + assert data["name"] == "Analysis_2026" + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: PUT /folders/{id} (bench test_5, test_8, test_9) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_rename_folder(box_client: AsyncClient): + """PUT /folders/{id} — rename folder (bench test_5).""" + # macroeconomics folder ID from bench + folder_id = "1973339758" + + with _timed(f"PUT /folders/{folder_id}") as t: + resp = await box_client.put( + f"/folders/{folder_id}", + json={"name": "Global Economics"}, + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["name"] == "Global Economics" + assert t.elapsed_ms < 5000 + + +@pytest.mark.asyncio +async def test_update_folder_tags(box_client: AsyncClient): + """PUT /folders/{id} — update tags (bench test_8).""" + folder_id = "5610825569" # investments + + with _timed(f"PUT /folders/{folder_id} (tags)") as t: + resp = await box_client.put( + f"/folders/{folder_id}", + json={"tags": ["finance", "investments", "quarterly"]}, + ) + + assert resp.status_code == 200 + data = resp.json() + assert "finance" in data.get("tags", []) + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: PUT /files/{id} (bench test_6 — move file) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_move_file(box_client: AsyncClient): + """PUT /files/{id} — move file to different folder (bench test_6).""" + file_id = "1421498350" # transport-april-2025-csv.csv + target_folder_id = "5610825569" # investments + + with _timed(f"PUT /files/{file_id} (move)") as t: + resp = await box_client.put( + f"/files/{file_id}", + json={"parent": {"id": target_folder_id}}, + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["parent"]["id"] == target_folder_id + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: GET /files/{id}/comments (needed for checking existing comments) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_list_file_comments(box_client: AsyncClient): + """GET /files/{id}/comments — list comments on a file.""" + # Find a file + search_resp = await box_client.get( + "/search", params={"query": "fomc", "type": "file"} + ) + entries = search_resp.json().get("entries", []) + if not entries: + pytest.skip("No fomc files found") + + file_id = entries[0]["id"] + + with _timed(f"GET /files/{file_id}/comments") as t: + resp = await box_client.get(f"/files/{file_id}/comments") + + assert resp.status_code == 200 + data = resp.json() + assert "entries" in data + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: POST /tasks (bench test_10) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_create_task(box_client: AsyncClient): + """POST /tasks — create task on a file (bench test_10).""" + # Find a file + search_resp = await box_client.get( + "/search", params={"query": "fomc", "type": "file"} + ) + entries = search_resp.json().get("entries", []) + if not entries: + pytest.skip("No fomc files found") + + file_id = entries[0]["id"] + + with _timed(f"POST /tasks on file {file_id}") as t: + resp = await box_client.post( + "/tasks", + json={ + "item": {"type": "file", "id": file_id}, + "message": "Review content", + "action": "review", + }, + ) + + assert resp.status_code == 201 + data = resp.json() + assert data["type"] == "task" + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: Hubs (bench test_7, test_13) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_list_hubs(box_client: AsyncClient): + """GET /hubs — list all hubs (bench test_13).""" + with _timed("GET /hubs") as t: + resp = await box_client.get("/hubs", headers={"box-version": "2025.0"}) + + assert resp.status_code == 200 + data = resp.json() + assert "entries" in data + assert t.elapsed_ms < 5000 + + +@pytest.mark.asyncio +async def test_create_hub(box_client: AsyncClient): + """POST /hubs — create a hub (bench test_7).""" + with _timed("POST /hubs") as t: + resp = await box_client.post( + "/hubs", + json={"title": "Research Center", "description": "Research hub"}, + headers={"box-version": "2025.0"}, + ) + + assert resp.status_code == 201 + data = resp.json() + assert data["title"] == "Research Center" + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: Collections (bench test_15+) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_list_collections(box_client: AsyncClient): + """GET /collections — list available collections.""" + with _timed("GET /collections") as t: + resp = await box_client.get("/collections") + + assert resp.status_code == 200 + data = resp.json() + assert "entries" in data + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: Multi-step flows mimicking real bench scenarios +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_bench_flow_search_then_comment(box_client: AsyncClient): + """ + Mimics bench test_3: Search for 'fomc', then add comment to first result. + Measures the combined latency of a typical 2-step agent operation. + """ + with _timed("FLOW: search + comment") as t_total: + # Step 1: Search + with _timed(" step1: search fomc") as t1: + search_resp = await box_client.get( + "/search", params={"query": "fomc", "type": "file"} + ) + assert search_resp.status_code == 200 + entries = search_resp.json()["entries"] + assert len(entries) >= 1 + + file_id = entries[0]["id"] + + # Step 2: Comment + with _timed(f" step2: comment on {file_id}") as t2: + comment_resp = await box_client.post( + "/comments", + json={"item": {"type": "file", "id": file_id}, "message": "Relevant"}, + ) + assert comment_resp.status_code == 201 + + logger.info( + f" FLOW total={t_total.elapsed_ms:.0f}ms " + f"(search={t1.elapsed_ms:.0f}ms + comment={t2.elapsed_ms:.0f}ms)" + ) + assert t_total.elapsed_ms < 10000 + + +@pytest.mark.asyncio +async def test_bench_flow_search_rename_folder(box_client: AsyncClient): + """ + Mimics bench test_5: Search for folder, then rename it. + """ + with _timed("FLOW: search + rename folder") as t_total: + # Step 1: Search for macroeconomics folder + with _timed(" step1: search macroeconomics") as t1: + search_resp = await box_client.get( + "/search", params={"query": "macroeconomics", "type": "folder"} + ) + assert search_resp.status_code == 200 + entries = search_resp.json()["entries"] + assert len(entries) >= 1 + + folder_id = entries[0]["id"] + + # Step 2: Rename + with _timed(f" step2: rename folder {folder_id}") as t2: + rename_resp = await box_client.put( + f"/folders/{folder_id}", + json={"name": "Global Economics"}, + ) + assert rename_resp.status_code == 200 + + logger.info( + f" FLOW total={t_total.elapsed_ms:.0f}ms " + f"(search={t1.elapsed_ms:.0f}ms + rename={t2.elapsed_ms:.0f}ms)" + ) + assert t_total.elapsed_ms < 10000 + + +@pytest.mark.asyncio +async def test_bench_flow_create_nested_folders_and_move(box_client: AsyncClient): + """ + Mimics bench test_11: Create Project_Beta in root, create Docs inside it, + then move a file into Docs. Measures a 4-step agent operation. + """ + with _timed("FLOW: create nested + search + move") as t_total: + # Step 1: Create Project_Beta in root + with _timed(" step1: create Project_Beta"): + resp1 = await box_client.post( + "/folders", + json={"name": "Project_Beta", "parent": {"id": "0"}}, + ) + assert resp1.status_code == 201 + project_beta_id = resp1.json()["id"] + + # Step 2: Create Docs inside Project_Beta + with _timed(" step2: create Docs"): + resp2 = await box_client.post( + "/folders", + json={"name": "Docs", "parent": {"id": project_beta_id}}, + ) + assert resp2.status_code == 201 + docs_id = resp2.json()["id"] + + # Step 3: Search for the file + with _timed(" step3: search for file"): + search_resp = await box_client.get( + "/search", params={"query": "interviewing tips", "type": "file"} + ) + assert search_resp.status_code == 200 + entries = search_resp.json()["entries"] + assert len(entries) >= 1 + file_id = entries[0]["id"] + + # Step 4: Move file + with _timed(f" step4: move file {file_id} to Docs"): + move_resp = await box_client.put( + f"/files/{file_id}", + json={"parent": {"id": docs_id}}, + ) + assert move_resp.status_code == 200 + + logger.info(f" FLOW total={t_total.elapsed_ms:.0f}ms") + assert t_total.elapsed_ms < 15000 + + +@pytest.mark.asyncio +async def test_bench_flow_count_items_and_update_description(box_client: AsyncClient): + """ + Mimics bench test_12: Count files in investments, set description to the count. + """ + with _timed("FLOW: search + list items + update") as t_total: + # Step 1: Search for investments folder + with _timed(" step1: search investments"): + search_resp = await box_client.get( + "/search", params={"query": "investments", "type": "folder"} + ) + assert search_resp.status_code == 200 + entries = search_resp.json()["entries"] + assert len(entries) >= 1 + folder_id = entries[0]["id"] + + # Step 2: List folder items to count + with _timed(f" step2: list items in {folder_id}"): + items_resp = await box_client.get(f"/folders/{folder_id}/items") + assert items_resp.status_code == 200 + item_count = items_resp.json()["total_count"] + + # Step 3: Update folder description + with _timed(" step3: update description"): + update_resp = await box_client.put( + f"/folders/{folder_id}", + json={"description": str(item_count)}, + ) + assert update_resp.status_code == 200 + + logger.info(f" FLOW total={t_total.elapsed_ms:.0f}ms, items counted={item_count}") + assert t_total.elapsed_ms < 10000 + + +# --------------------------------------------------------------------------- +# Test: Repeated searches (simulates an agent retrying/iterating) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_repeated_searches(box_client: AsyncClient): + """ + Run 10 sequential searches to measure consistency and detect degradation. + """ + queries = [ + "investments", + "fomc", + "report", + "macro", + "argentina", + "transport", + "earnings", + "interview", + "research", + "csv", + ] + + times = [] + for q in queries: + with _timed(f" search '{q}'") as t: + resp = await box_client.get("/search", params={"query": q}) + assert resp.status_code == 200 + times.append(t.elapsed_ms) + + avg_ms = sum(times) / len(times) + p50 = sorted(times)[len(times) // 2] + p99 = sorted(times)[int(len(times) * 0.99)] + max_ms = max(times) + + logger.info( + f" 10 searches: avg={avg_ms:.0f}ms p50={p50:.0f}ms " + f"p99={p99:.0f}ms max={max_ms:.0f}ms" + ) + assert max_ms < 10000, f"Worst search took {max_ms:.0f}ms (>10s)" + + +# --------------------------------------------------------------------------- +# Test: Folder traversal depth (triggers _get_path_collection N+1) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_deep_folder_path_collection(box_client: AsyncClient): + """ + Create 5 nested folders, then GET the deepest one. + This stresses _get_path_collection() which walks parent chain. + """ + parent_id = "0" + folder_ids = [] + + for i in range(5): + resp = await box_client.post( + "/folders", + json={"name": f"depth_{i}", "parent": {"id": parent_id}}, + ) + assert resp.status_code == 201 + parent_id = resp.json()["id"] + folder_ids.append(parent_id) + + # Now GET the deepest folder — this triggers path_collection walk + deepest_id = folder_ids[-1] + with _timed(f"GET /folders/{deepest_id} (depth=5)") as t: + resp = await box_client.get(f"/folders/{deepest_id}") + + assert resp.status_code == 200 + data = resp.json() + path = data.get("path_collection", {}) + logger.info( + f" path_collection depth={path.get('total_count', '?')}, " + f"time={t.elapsed_ms:.0f}ms" + ) + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Tests: Parallel requests (4 concurrent) — simulates real agent bench load +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_parallel_4_searches(box_client: AsyncClient): + """ + Fire 4 search requests in parallel and measure wall-clock time. + This mirrors what happens when multiple agent turns hit the Box API + concurrently during an evaluation run. + """ + queries = ["investments", "fomc", "report", "macro"] + + async def do_search(q: str) -> tuple[str, float, int]: + t0 = time.perf_counter() + resp = await box_client.get("/search", params={"query": q}) + elapsed = (time.perf_counter() - t0) * 1000 + return q, elapsed, resp.status_code + + with _timed("PARALLEL: 4 searches") as wall: + results = await asyncio.gather(*(do_search(q) for q in queries)) + + for q, ms, status in results: + logger.info(f" parallel search '{q}': {ms:.0f}ms status={status}") + assert status == 200 + + max_individual = max(ms for _, ms, _ in results) + logger.info( + f" wall_clock={wall.elapsed_ms:.0f}ms max_individual={max_individual:.0f}ms" + ) + assert wall.elapsed_ms < 10000, ( + f"4 parallel searches took {wall.elapsed_ms:.0f}ms wall-clock (>10s)" + ) + + +@pytest.mark.asyncio +async def test_parallel_4_folder_gets(box_client: AsyncClient): + """ + Fire 4 GET /folders requests in parallel with different folder IDs. + """ + folder_ids = ["0", "5610825569", "1973339758", "1173971943"] + + async def do_get(fid: str) -> tuple[str, float, int]: + t0 = time.perf_counter() + resp = await box_client.get(f"/folders/{fid}") + elapsed = (time.perf_counter() - t0) * 1000 + return fid, elapsed, resp.status_code + + with _timed("PARALLEL: 4 folder GETs") as wall: + results = await asyncio.gather(*(do_get(f) for f in folder_ids)) + + for fid, ms, status in results: + logger.info(f" parallel GET /folders/{fid}: {ms:.0f}ms status={status}") + assert status == 200 + + max_individual = max(ms for _, ms, _ in results) + logger.info( + f" wall_clock={wall.elapsed_ms:.0f}ms max_individual={max_individual:.0f}ms" + ) + assert wall.elapsed_ms < 10000, ( + f"4 parallel folder GETs took {wall.elapsed_ms:.0f}ms wall-clock (>10s)" + ) + + +@pytest.mark.asyncio +async def test_parallel_4_mixed_operations(box_client: AsyncClient): + """ + Fire 4 different operation types in parallel — search, folder GET, + list folder items, list hubs — simulating a realistic agent burst. + """ + + async def search() -> tuple[str, float, int]: + t0 = time.perf_counter() + resp = await box_client.get("/search", params={"query": "fomc"}) + return "search", (time.perf_counter() - t0) * 1000, resp.status_code + + async def get_folder() -> tuple[str, float, int]: + t0 = time.perf_counter() + resp = await box_client.get("/folders/0") + return "get_folder", (time.perf_counter() - t0) * 1000, resp.status_code + + async def list_items() -> tuple[str, float, int]: + t0 = time.perf_counter() + resp = await box_client.get("/folders/0/items") + return "list_items", (time.perf_counter() - t0) * 1000, resp.status_code + + async def list_hubs() -> tuple[str, float, int]: + t0 = time.perf_counter() + resp = await box_client.get("/hubs", headers={"box-version": "2025.0"}) + return "list_hubs", (time.perf_counter() - t0) * 1000, resp.status_code + + with _timed("PARALLEL: 4 mixed ops") as wall: + results = await asyncio.gather( + search(), get_folder(), list_items(), list_hubs() + ) + + for op, ms, status in results: + logger.info(f" parallel {op}: {ms:.0f}ms status={status}") + assert status == 200 + + max_individual = max(ms for _, ms, _ in results) + logger.info( + f" wall_clock={wall.elapsed_ms:.0f}ms max_individual={max_individual:.0f}ms" + ) + assert wall.elapsed_ms < 10000, ( + f"4 parallel mixed ops took {wall.elapsed_ms:.0f}ms wall-clock (>10s)" + ) + + +@pytest.mark.asyncio +async def test_parallel_4_writes(box_client: AsyncClient): + """ + Fire 4 write operations in parallel — create folders simultaneously. + Tests DB write contention under concurrent load. + """ + + async def create_folder(name: str) -> tuple[str, float, int]: + t0 = time.perf_counter() + resp = await box_client.post( + "/folders", + json={"name": name, "parent": {"id": "0"}}, + ) + return name, (time.perf_counter() - t0) * 1000, resp.status_code + + names = ["Parallel_A", "Parallel_B", "Parallel_C", "Parallel_D"] + + with _timed("PARALLEL: 4 folder creates") as wall: + results = await asyncio.gather(*(create_folder(n) for n in names)) + + for name, ms, status in results: + logger.info(f" parallel POST /folders '{name}': {ms:.0f}ms status={status}") + assert status == 201 + + max_individual = max(ms for _, ms, _ in results) + logger.info( + f" wall_clock={wall.elapsed_ms:.0f}ms max_individual={max_individual:.0f}ms" + ) + assert wall.elapsed_ms < 10000, ( + f"4 parallel folder creates took {wall.elapsed_ms:.0f}ms wall-clock (>10s)" + ) diff --git a/backend/tests/performance/test_calendar_bench_perf.py b/backend/tests/performance/test_calendar_bench_perf.py new file mode 100644 index 0000000..fe37ec0 --- /dev/null +++ b/backend/tests/performance/test_calendar_bench_perf.py @@ -0,0 +1,1085 @@ +""" +Performance tests for Calendar API - mimicking real calendar_bench operations. + +These tests create an isolated Calendar environment (calendar_default template), +then run the same API call patterns that appear in the calendar_bench test suite +to measure response times and identify bottlenecks. + +Usage: + # Run via docker exec (from ops/ directory): + docker exec ops-backend-1 python -m pytest tests/performance/test_calendar_bench_perf.py -v -s + + # Run with timing threshold (skip assertions under N ms): + docker exec ops-backend-1 sh -c "PERF_THRESHOLD_MS=100 python -m pytest tests/performance/test_calendar_bench_perf.py -v -s" + +Environment setup: + 1. The calendar_default template must be seeded in the database. + 2. Tests use core_isolation_engine.create_environment(template_schema="calendar_default") + to create an isolated copy of the template. + 3. The impersonate_user_id "user_agent" matches the bench's default user (test.user@test.com). + 4. All environments are auto-cleaned after the test session. +""" + +import asyncio +import logging +import os +import time + +import pytest +import pytest_asyncio +from httpx import AsyncClient, ASGITransport +from starlette.applications import Starlette + +from src.services.calendar.api import routes as calendar_routes + +logger = logging.getLogger(__name__) + +# Default user from calendar_bench.json +CALENDAR_IMPERSONATE_USER_ID = "user_agent" +CALENDAR_IMPERSONATE_EMAIL = "test.user@test.com" + +# Well-known IDs from calendar_default seed data +PRIMARY_CALENDAR_ID = "test.user@test.com" +HARVEST_CALENDAR_ID = "cal_harvest_schedule" +DUNGEON_MASTERS_CALENDAR_ID = "cal_dungeon_masters" +TIMELINE_ALPHA_CALENDAR_ID = "cal_timeline_alpha" +EVENT_FAILED_ROCKET_ID = "event_failed_rocket" +EVENT_WEED_WARRIOR_ID = "event_weed_warrior" + +# Threshold in ms - tests log warnings above this +PERF_THRESHOLD_MS = int(os.environ.get("PERF_THRESHOLD_MS", "500")) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _timed(label: str): + """Context manager that logs elapsed time.""" + + class Timer: + def __init__(self): + self.elapsed_ms = 0.0 + + def __enter__(self): + self._start = time.perf_counter() + return self + + def __exit__(self, *exc): + self.elapsed_ms = (time.perf_counter() - self._start) * 1000 + marker = "SLOW" if self.elapsed_ms > PERF_THRESHOLD_MS else "OK" + logger.info(f"[PERF {marker}] {label}: {self.elapsed_ms:.0f}ms") + + return Timer() + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="session") +def _cal_env( + test_user_id, + core_isolation_engine, + session_manager, + environment_handler, +): + """ + Create ONE isolated calendar_default environment for the entire test session. + Cloning the large calendar_default seed (~12k lines) per test is ~2-3s each; + doing it once drops total wall-clock from ~95s to <15s. + """ + env_result = core_isolation_engine.create_environment( + template_schema="calendar_default", + ttl_seconds=3600, + created_by=test_user_id, + impersonate_user_id=CALENDAR_IMPERSONATE_USER_ID, + impersonate_email=CALENDAR_IMPERSONATE_EMAIL, + ) + yield env_result + environment_handler.drop_schema(env_result.schema_name) + + +@pytest_asyncio.fixture +async def cal_client(_cal_env, session_manager): + """ + Lightweight per-test fixture: reuses the session-scoped environment, + only creates a fresh AsyncClient + Starlette app (sub-millisecond). + """ + env_result = _cal_env + + async def add_db_session(request, call_next): + with session_manager.with_session_for_environment( + env_result.environment_id + ) as session: + request.state.db_session = session + request.state.db = session + request.state.environment_id = env_result.environment_id + request.state.impersonate_user_id = CALENDAR_IMPERSONATE_USER_ID + request.state.impersonate_email = CALENDAR_IMPERSONATE_EMAIL + response = await call_next(request) + return response + + app = Starlette(routes=calendar_routes) + app.middleware("http")(add_db_session) + + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + yield client + + +# --------------------------------------------------------------------------- +# Test: GET /users/me/calendarList (bench: calendarList.list) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_list_calendar_list(cal_client: AsyncClient): + """GET /users/me/calendarList — list user's calendars (very common operation).""" + with _timed("GET /users/me/calendarList") as t: + resp = await cal_client.get("/users/me/calendarList") + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#calendarList" + assert len(data["items"]) >= 1 + logger.info(f" CalendarList returned {len(data['items'])} entries") + assert t.elapsed_ms < 5000, f"calendarList.list took {t.elapsed_ms:.0f}ms (>5s)" + + +# --------------------------------------------------------------------------- +# Test: GET /calendars/{calendarId} (bench: calendars.get) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_get_primary_calendar(cal_client: AsyncClient): + """GET /calendars/primary — get primary calendar.""" + with _timed("GET /calendars/primary") as t: + resp = await cal_client.get("/calendars/primary") + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#calendar" + assert data["id"] == PRIMARY_CALENDAR_ID + assert t.elapsed_ms < 5000 + + +@pytest.mark.asyncio +async def test_get_secondary_calendar(cal_client: AsyncClient): + """GET /calendars/{calendarId} — get Harvest Schedule calendar.""" + with _timed(f"GET /calendars/{HARVEST_CALENDAR_ID}") as t: + resp = await cal_client.get(f"/calendars/{HARVEST_CALENDAR_ID}") + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#calendar" + assert "Harvest" in data["summary"] + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: POST /calendars (bench: calendars.insert — bench test_1, test_2, ...) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_create_calendar(cal_client: AsyncClient): + """POST /calendars — create a new secondary calendar (bench test_1).""" + with _timed("POST /calendars") as t: + resp = await cal_client.post( + "/calendars", + json={ + "summary": "Cosmic Voyagers HQ", + "description": "Stargazing activities", + "timeZone": "America/Los_Angeles", + }, + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#calendar" + assert data["summary"] == "Cosmic Voyagers HQ" + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: PATCH /calendars/{calendarId} (bench: calendars.patch) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_patch_calendar(cal_client: AsyncClient): + """PATCH /calendars/{calendarId} — update calendar description (bench test_5).""" + with _timed(f"PATCH /calendars/{HARVEST_CALENDAR_ID}") as t: + resp = await cal_client.patch( + f"/calendars/{HARVEST_CALENDAR_ID}", + json={"description": "Updated harvest calendar"}, + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["description"] == "Updated harvest calendar" + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: GET /calendars/{calendarId}/events (bench: events.list) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_list_events_primary(cal_client: AsyncClient): + """GET /calendars/primary/events — list events on primary calendar.""" + with _timed("GET /calendars/primary/events") as t: + resp = await cal_client.get( + "/calendars/primary/events", + params={"timeMin": "2018-01-01T00:00:00Z", "maxResults": 250}, + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#events" + logger.info(f" Primary calendar has {len(data.get('items', []))} events") + assert t.elapsed_ms < 5000 + + +@pytest.mark.asyncio +async def test_list_events_secondary(cal_client: AsyncClient): + """GET /calendars/{calendarId}/events — list events on Harvest Schedule.""" + with _timed(f"GET /calendars/{HARVEST_CALENDAR_ID}/events") as t: + resp = await cal_client.get( + f"/calendars/{HARVEST_CALENDAR_ID}/events", + params={"timeMin": "2018-01-01T00:00:00Z"}, + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#events" + logger.info(f" Harvest calendar has {len(data.get('items', []))} events") + assert t.elapsed_ms < 5000 + + +@pytest.mark.asyncio +async def test_list_events_with_search(cal_client: AsyncClient): + """GET /calendars/{calendarId}/events?q=... — search events (bench test_4, 5, 6).""" + with _timed("GET events?q=Weed") as t: + resp = await cal_client.get( + f"/calendars/{HARVEST_CALENDAR_ID}/events", + params={"q": "Weed", "timeMin": "2017-01-01T00:00:00Z"}, + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#events" + assert t.elapsed_ms < 5000 + + +@pytest.mark.asyncio +async def test_list_events_single_events(cal_client: AsyncClient): + """GET events?singleEvents=true&orderBy=startTime — expand recurring (common agent pattern).""" + with _timed("GET events?singleEvents=true") as t: + resp = await cal_client.get( + f"/calendars/{PRIMARY_CALENDAR_ID}/events", + params={ + "singleEvents": "true", + "orderBy": "startTime", + "timeMin": "2018-06-01T00:00:00Z", + "timeMax": "2018-07-01T00:00:00Z", + }, + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#events" + logger.info(f" singleEvents returned {len(data.get('items', []))} events") + assert t.elapsed_ms < 10000, f"singleEvents took {t.elapsed_ms:.0f}ms (>10s)" + + +# --------------------------------------------------------------------------- +# Test: GET /calendars/{calendarId}/events/{eventId} (bench: events.get) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_get_event_by_id(cal_client: AsyncClient): + """GET /calendars/{calendarId}/events/{eventId} — get a specific event.""" + with _timed(f"GET events/{EVENT_FAILED_ROCKET_ID}") as t: + resp = await cal_client.get( + f"/calendars/{PRIMARY_CALENDAR_ID}/events/{EVENT_FAILED_ROCKET_ID}" + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#event" + assert data["id"] == EVENT_FAILED_ROCKET_ID + assert "Failed Rocket" in data["summary"] + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: POST /calendars/{calendarId}/events (bench: events.insert — test_1..test_9) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_create_event(cal_client: AsyncClient): + """POST /calendars/{calendarId}/events — create event (bench test_1).""" + with _timed("POST events (basic)") as t: + resp = await cal_client.post( + f"/calendars/{PRIMARY_CALENDAR_ID}/events", + json={ + "summary": "Perseid Meteor Shower Watch Party", + "location": "Hillcrest Observatory Field", + "start": { + "dateTime": "2018-06-24T00:00:00-07:00", + "timeZone": "America/Los_Angeles", + }, + "end": { + "dateTime": "2018-06-24T03:00:00-07:00", + "timeZone": "America/Los_Angeles", + }, + }, + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#event" + assert data["summary"] == "Perseid Meteor Shower Watch Party" + assert t.elapsed_ms < 5000 + + +@pytest.mark.asyncio +async def test_create_event_with_attendees(cal_client: AsyncClient): + """POST events with attendees (bench test_2, test_6).""" + with _timed("POST events (with attendees)") as t: + resp = await cal_client.post( + f"/calendars/{PRIMARY_CALENDAR_ID}/events", + json={ + "summary": "Telescope Alignment Ceremony", + "start": { + "dateTime": "2018-06-23T19:30:00+03:00", + "timeZone": "Europe/Kyiv", + }, + "end": { + "dateTime": "2018-06-23T21:00:00+03:00", + "timeZone": "Europe/Kyiv", + }, + "attendees": [ + {"email": "oleksandra@test.com"}, + {"email": "yuki@test.com"}, + ], + }, + ) + + assert resp.status_code == 200 + data = resp.json() + assert len(data.get("attendees", [])) >= 2 + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: PATCH /calendars/{calendarId}/events/{eventId} (bench: events.patch) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_patch_event(cal_client: AsyncClient): + """PATCH /events/{eventId} — update event details (bench test_1, test_2).""" + with _timed(f"PATCH events/{EVENT_FAILED_ROCKET_ID}") as t: + resp = await cal_client.patch( + f"/calendars/{PRIMARY_CALENDAR_ID}/events/{EVENT_FAILED_ROCKET_ID}", + json={"description": "Updated: SpaceX launch rescheduled"}, + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["description"] == "Updated: SpaceX launch rescheduled" + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: DELETE /calendars/{calendarId}/events/{eventId} (bench: events.delete) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_delete_event(cal_client: AsyncClient): + """DELETE /events/{eventId} — delete event (bench test_1, test_2, test_3).""" + # Create a throwaway event to delete (don't mutate shared seed data) + create_resp = await cal_client.post( + f"/calendars/{PRIMARY_CALENDAR_ID}/events", + json={ + "summary": "Throwaway event for delete test", + "start": {"dateTime": "2018-07-10T10:00:00Z", "timeZone": "UTC"}, + "end": {"dateTime": "2018-07-10T11:00:00Z", "timeZone": "UTC"}, + }, + ) + assert create_resp.status_code == 200 + event_id = create_resp.json()["id"] + + with _timed(f"DELETE events/{event_id}") as t: + resp = await cal_client.delete( + f"/calendars/{PRIMARY_CALENDAR_ID}/events/{event_id}" + ) + + assert resp.status_code == 204 or resp.status_code == 200 + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: POST /freeBusy (bench: freeBusy.query — test_1..test_8) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_freebusy_single_calendar(cal_client: AsyncClient): + """POST /freeBusy — query single calendar (bench test_1).""" + with _timed("POST /freeBusy (1 calendar)") as t: + resp = await cal_client.post( + "/freeBusy", + json={ + "timeMin": "2018-06-23T00:00:00Z", + "timeMax": "2018-06-24T00:00:00Z", + "items": [{"id": "oleksandra@test.com"}], + }, + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#freeBusy" + assert "oleksandra@test.com" in data["calendars"] + assert t.elapsed_ms < 5000 + + +@pytest.mark.asyncio +async def test_freebusy_multiple_calendars(cal_client: AsyncClient): + """POST /freeBusy — query multiple calendars (bench test_2, test_5).""" + with _timed("POST /freeBusy (3 calendars)") as t: + resp = await cal_client.post( + "/freeBusy", + json={ + "timeMin": "2018-06-18T00:00:00Z", + "timeMax": "2018-06-25T00:00:00Z", + "items": [ + {"id": "kenji@test.com"}, + {"id": "oksana@test.com"}, + {"id": "amara@test.com"}, + ], + }, + ) + + assert resp.status_code == 200 + data = resp.json() + assert len(data["calendars"]) == 3 + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: POST /calendars/{calendarId}/acl (bench: acl.insert — test_1..test_8) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_create_acl_rule(cal_client: AsyncClient): + """POST /calendars/{calendarId}/acl — grant access (bench test_1).""" + # First create a calendar to grant access to + create_resp = await cal_client.post( + "/calendars", + json={"summary": "ACL Test Calendar"}, + ) + assert create_resp.status_code == 200 + cal_id = create_resp.json()["id"] + + with _timed(f"POST /calendars/{cal_id}/acl") as t: + resp = await cal_client.post( + f"/calendars/{cal_id}/acl", + json={ + "role": "writer", + "scope": { + "type": "user", + "value": "yuki@test.com", + }, + }, + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["role"] == "writer" + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: GET /calendars/{calendarId}/acl (bench: acl.list) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_list_acl_rules(cal_client: AsyncClient): + """GET /calendars/{calendarId}/acl — list ACL rules.""" + with _timed(f"GET /calendars/{PRIMARY_CALENDAR_ID}/acl") as t: + resp = await cal_client.get(f"/calendars/{PRIMARY_CALENDAR_ID}/acl") + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#acl" + assert len(data["items"]) >= 1 + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: GET /colors (bench: colors.get) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_get_colors(cal_client: AsyncClient): + """GET /colors — get color definitions.""" + with _timed("GET /colors") as t: + resp = await cal_client.get("/colors") + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#colors" + assert "calendar" in data + assert "event" in data + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: GET /users/me/settings (bench: settings.list) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_list_settings(cal_client: AsyncClient): + """GET /users/me/settings — list user settings.""" + with _timed("GET /users/me/settings") as t: + resp = await cal_client.get("/users/me/settings") + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#settings" + assert len(data["items"]) >= 1 + assert t.elapsed_ms < 5000 + + +@pytest.mark.asyncio +async def test_get_setting(cal_client: AsyncClient): + """GET /users/me/settings/{setting} — get a specific setting.""" + with _timed("GET /users/me/settings/timezone") as t: + resp = await cal_client.get("/users/me/settings/timezone") + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#setting" + assert data["id"] == "timezone" + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: POST /calendars/{calendarId}/events/quickAdd (bench: events.quickAdd) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_quick_add_event(cal_client: AsyncClient): + """POST /events/quickAdd — quick add via natural language (bench test_2).""" + with _timed("POST events/quickAdd") as t: + resp = await cal_client.post( + f"/calendars/{PRIMARY_CALENDAR_ID}/events/quickAdd", + params={"text": "Starlit Tea Ceremony with Akira tomorrow 3pm"}, + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#event" + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: POST /calendars/{calendarId}/events/{eventId}/move (bench: events.move) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_move_event(cal_client: AsyncClient): + """POST /events/{eventId}/move — move event to another calendar (bench test_8).""" + # First create an event to move + create_resp = await cal_client.post( + f"/calendars/{PRIMARY_CALENDAR_ID}/events", + json={ + "summary": "Event To Move", + "start": { + "dateTime": "2018-06-20T10:00:00-07:00", + "timeZone": "America/Los_Angeles", + }, + "end": { + "dateTime": "2018-06-20T11:00:00-07:00", + "timeZone": "America/Los_Angeles", + }, + }, + ) + assert create_resp.status_code == 200 + event_id = create_resp.json()["id"] + + with _timed(f"POST events/{event_id}/move") as t: + resp = await cal_client.post( + f"/calendars/{PRIMARY_CALENDAR_ID}/events/{event_id}/move", + params={"destination": HARVEST_CALENDAR_ID}, + ) + + assert resp.status_code == 200 + data = resp.json() + assert data["kind"] == "calendar#event" + assert t.elapsed_ms < 5000 + + +# --------------------------------------------------------------------------- +# Test: Multi-step flows mimicking real bench scenarios +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_bench_flow_list_calendars_then_create_and_event(cal_client: AsyncClient): + """ + Mimics bench test_1: List calendars, create a new one, add an event to it. + Measures the combined latency of a typical 3-step agent operation. + """ + with _timed("FLOW: list calendars + create + event") as t_total: + # Step 1: List calendars + with _timed(" step1: calendarList.list") as t1: + list_resp = await cal_client.get("/users/me/calendarList") + assert list_resp.status_code == 200 + + # Step 2: Create new calendar + with _timed(" step2: calendars.insert") as t2: + create_resp = await cal_client.post( + "/calendars", + json={"summary": "Flow Test Calendar"}, + ) + assert create_resp.status_code == 200 + new_cal_id = create_resp.json()["id"] + + # Step 3: Create event on new calendar + with _timed(" step3: events.insert") as t3: + event_resp = await cal_client.post( + f"/calendars/{new_cal_id}/events", + json={ + "summary": "Test Event", + "start": { + "dateTime": "2018-06-20T10:00:00Z", + "timeZone": "UTC", + }, + "end": { + "dateTime": "2018-06-20T11:00:00Z", + "timeZone": "UTC", + }, + }, + ) + assert event_resp.status_code == 200 + + logger.info( + f" FLOW total={t_total.elapsed_ms:.0f}ms " + f"(list={t1.elapsed_ms:.0f}ms + create_cal={t2.elapsed_ms:.0f}ms " + f"+ create_event={t3.elapsed_ms:.0f}ms)" + ) + assert t_total.elapsed_ms < 15000 + + +@pytest.mark.asyncio +async def test_bench_flow_freebusy_then_create_event(cal_client: AsyncClient): + """ + Mimics bench test_1/test_2: Check free/busy, then create event at free time. + This is the most common 2-step pattern in the calendar bench. + """ + with _timed("FLOW: freeBusy + create event") as t_total: + # Step 1: Check free/busy + with _timed(" step1: freeBusy.query") as t1: + fb_resp = await cal_client.post( + "/freeBusy", + json={ + "timeMin": "2018-06-23T00:00:00Z", + "timeMax": "2018-06-24T00:00:00Z", + "items": [{"id": "oleksandra@test.com"}], + }, + ) + assert fb_resp.status_code == 200 + + # Step 2: Create event + with _timed(" step2: events.insert") as t2: + event_resp = await cal_client.post( + f"/calendars/{PRIMARY_CALENDAR_ID}/events", + json={ + "summary": "Telescope Alignment", + "start": { + "dateTime": "2018-06-23T19:30:00+03:00", + "timeZone": "Europe/Kyiv", + }, + "end": { + "dateTime": "2018-06-23T21:00:00+03:00", + "timeZone": "Europe/Kyiv", + }, + }, + ) + assert event_resp.status_code == 200 + + logger.info( + f" FLOW total={t_total.elapsed_ms:.0f}ms " + f"(freeBusy={t1.elapsed_ms:.0f}ms + create={t2.elapsed_ms:.0f}ms)" + ) + assert t_total.elapsed_ms < 10000 + + +@pytest.mark.asyncio +async def test_bench_flow_search_patch_delete(cal_client: AsyncClient): + """ + Mimics bench test_2: List events (search), patch one, delete another. + """ + with _timed("FLOW: search + patch + delete") as t_total: + # Step 1: Search events on harvest calendar + with _timed(" step1: events.list (search)") as t1: + list_resp = await cal_client.get( + f"/calendars/{HARVEST_CALENDAR_ID}/events", + params={"q": "Weed", "timeMin": "2017-01-01T00:00:00Z"}, + ) + assert list_resp.status_code == 200 + items = list_resp.json().get("items", []) + + # Step 2: Patch the failed rocket event (different calendar) + with _timed(" step2: events.patch") as t2: + patch_resp = await cal_client.patch( + f"/calendars/{PRIMARY_CALENDAR_ID}/events/{EVENT_FAILED_ROCKET_ID}", + json={"location": "Cape Canaveral"}, + ) + assert patch_resp.status_code == 200 + + # Step 3: Create then delete a throwaway event (don't mutate seed data) + tmp_resp = await cal_client.post( + f"/calendars/{HARVEST_CALENDAR_ID}/events", + json={ + "summary": "Throwaway for flow delete", + "start": {"dateTime": "2018-07-10T09:00:00Z", "timeZone": "UTC"}, + "end": {"dateTime": "2018-07-10T10:00:00Z", "timeZone": "UTC"}, + }, + ) + assert tmp_resp.status_code == 200 + tmp_event_id = tmp_resp.json()["id"] + + with _timed(" step3: events.delete") as t3: + delete_resp = await cal_client.delete( + f"/calendars/{HARVEST_CALENDAR_ID}/events/{tmp_event_id}" + ) + assert delete_resp.status_code in (200, 204) + + logger.info( + f" FLOW total={t_total.elapsed_ms:.0f}ms " + f"(search={t1.elapsed_ms:.0f}ms + patch={t2.elapsed_ms:.0f}ms " + f"+ delete={t3.elapsed_ms:.0f}ms)" + ) + assert t_total.elapsed_ms < 10000 + + +@pytest.mark.asyncio +async def test_bench_flow_create_cal_acl_events(cal_client: AsyncClient): + """ + Mimics bench test_1 full scenario: Create calendar, grant ACL, create event, + free/busy check, patch event, delete event. A 6-step agent operation. + """ + with _timed( + "FLOW: create_cal + acl + create_event + freebusy + patch + delete" + ) as t_total: + # Step 1: Create calendar + with _timed(" step1: calendars.insert"): + cal_resp = await cal_client.post( + "/calendars", + json={"summary": "Full Flow Calendar"}, + ) + assert cal_resp.status_code == 200 + cal_id = cal_resp.json()["id"] + + # Step 2: Grant ACL + with _timed(" step2: acl.insert"): + acl_resp = await cal_client.post( + f"/calendars/{cal_id}/acl", + json={ + "role": "writer", + "scope": {"type": "user", "value": "yuki@test.com"}, + }, + ) + assert acl_resp.status_code == 200 + + # Step 3: Create event + with _timed(" step3: events.insert"): + event_resp = await cal_client.post( + f"/calendars/{cal_id}/events", + json={ + "summary": "Watch Party", + "start": { + "dateTime": "2018-06-24T00:00:00-07:00", + "timeZone": "America/Los_Angeles", + }, + "end": { + "dateTime": "2018-06-24T03:00:00-07:00", + "timeZone": "America/Los_Angeles", + }, + }, + ) + assert event_resp.status_code == 200 + event_id = event_resp.json()["id"] + + # Step 4: FreeBusy check + with _timed(" step4: freeBusy.query"): + fb_resp = await cal_client.post( + "/freeBusy", + json={ + "timeMin": "2018-06-23T00:00:00Z", + "timeMax": "2018-06-25T00:00:00Z", + "items": [{"id": "oleksandra@test.com"}], + }, + ) + assert fb_resp.status_code == 200 + + # Step 5: Patch event + with _timed(" step5: events.patch"): + patch_resp = await cal_client.patch( + f"/calendars/{cal_id}/events/{event_id}", + json={"location": "Hillcrest Observatory Field"}, + ) + assert patch_resp.status_code == 200 + + # Step 6: Delete the event we just created + with _timed(" step6: events.delete"): + del_resp = await cal_client.delete(f"/calendars/{cal_id}/events/{event_id}") + assert del_resp.status_code in (200, 204) + + logger.info(f" FLOW total={t_total.elapsed_ms:.0f}ms") + assert t_total.elapsed_ms < 20000 + + +# --------------------------------------------------------------------------- +# Test: Repeated event listing (simulates an agent retrying/iterating) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_repeated_event_listings(cal_client: AsyncClient): + """ + Run 10 sequential event list requests to measure consistency. + """ + search_terms = [ + "Meeting", + "Party", + "Review", + "Ritual", + "Workshop", + "Yoga", + "Lunch", + "Shopping", + "Concert", + "Festival", + ] + + times = [] + for term in search_terms: + with _timed(f" events search '{term}'") as t: + resp = await cal_client.get( + f"/calendars/{PRIMARY_CALENDAR_ID}/events", + params={"q": term, "timeMin": "2017-01-01T00:00:00Z"}, + ) + assert resp.status_code == 200 + times.append(t.elapsed_ms) + + avg_ms = sum(times) / len(times) + p50 = sorted(times)[len(times) // 2] + max_ms = max(times) + + logger.info( + f" 10 event searches: avg={avg_ms:.0f}ms p50={p50:.0f}ms max={max_ms:.0f}ms" + ) + assert max_ms < 10000, f"Worst event search took {max_ms:.0f}ms (>10s)" + + +# --------------------------------------------------------------------------- +# Tests: Parallel requests (4 concurrent) — simulates real agent bench load +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_parallel_4_event_listings(cal_client: AsyncClient): + """ + Fire 4 event list requests in parallel and measure wall-clock time. + """ + calendars = [ + PRIMARY_CALENDAR_ID, + HARVEST_CALENDAR_ID, + DUNGEON_MASTERS_CALENDAR_ID, + TIMELINE_ALPHA_CALENDAR_ID, + ] + + async def do_list(cal_id: str) -> tuple[str, float, int]: + t0 = time.perf_counter() + resp = await cal_client.get( + f"/calendars/{cal_id}/events", + params={"timeMin": "2018-01-01T00:00:00Z"}, + ) + elapsed = (time.perf_counter() - t0) * 1000 + return cal_id, elapsed, resp.status_code + + with _timed("PARALLEL: 4 event listings") as wall: + results = await asyncio.gather(*(do_list(c) for c in calendars)) + + for cal_id, ms, code in results: + logger.info(f" parallel events.list '{cal_id}': {ms:.0f}ms status={code}") + assert code == 200 + + max_individual = max(ms for _, ms, _ in results) + logger.info( + f" wall_clock={wall.elapsed_ms:.0f}ms max_individual={max_individual:.0f}ms" + ) + assert wall.elapsed_ms < 10000, ( + f"4 parallel event listings took {wall.elapsed_ms:.0f}ms wall-clock (>10s)" + ) + + +@pytest.mark.asyncio +async def test_parallel_4_mixed_operations(cal_client: AsyncClient): + """ + Fire 4 different operation types in parallel — calendarList, events.list, + freeBusy, colors — simulating a realistic agent burst. + """ + + async def list_calendars() -> tuple[str, float, int]: + t0 = time.perf_counter() + resp = await cal_client.get("/users/me/calendarList") + return "calendarList", (time.perf_counter() - t0) * 1000, resp.status_code + + async def list_events() -> tuple[str, float, int]: + t0 = time.perf_counter() + resp = await cal_client.get( + f"/calendars/{PRIMARY_CALENDAR_ID}/events", + params={"timeMin": "2018-06-01T00:00:00Z"}, + ) + return "events.list", (time.perf_counter() - t0) * 1000, resp.status_code + + async def freebusy() -> tuple[str, float, int]: + t0 = time.perf_counter() + resp = await cal_client.post( + "/freeBusy", + json={ + "timeMin": "2018-06-23T00:00:00Z", + "timeMax": "2018-06-24T00:00:00Z", + "items": [{"id": "kenji@test.com"}], + }, + ) + return "freeBusy", (time.perf_counter() - t0) * 1000, resp.status_code + + async def colors() -> tuple[str, float, int]: + t0 = time.perf_counter() + resp = await cal_client.get("/colors") + return "colors", (time.perf_counter() - t0) * 1000, resp.status_code + + with _timed("PARALLEL: 4 mixed ops") as wall: + results = await asyncio.gather( + list_calendars(), list_events(), freebusy(), colors() + ) + + for op, ms, code in results: + logger.info(f" parallel {op}: {ms:.0f}ms status={code}") + assert code == 200 + + max_individual = max(ms for _, ms, _ in results) + logger.info( + f" wall_clock={wall.elapsed_ms:.0f}ms max_individual={max_individual:.0f}ms" + ) + assert wall.elapsed_ms < 10000, ( + f"4 parallel mixed ops took {wall.elapsed_ms:.0f}ms wall-clock (>10s)" + ) + + +@pytest.mark.asyncio +async def test_parallel_4_writes(cal_client: AsyncClient): + """ + Fire 4 write operations in parallel — create events simultaneously. + Tests DB write contention under concurrent load. + """ + + async def create_event(name: str) -> tuple[str, float, int]: + t0 = time.perf_counter() + resp = await cal_client.post( + f"/calendars/{PRIMARY_CALENDAR_ID}/events", + json={ + "summary": name, + "start": { + "dateTime": "2018-07-01T10:00:00-07:00", + "timeZone": "America/Los_Angeles", + }, + "end": { + "dateTime": "2018-07-01T11:00:00-07:00", + "timeZone": "America/Los_Angeles", + }, + }, + ) + return name, (time.perf_counter() - t0) * 1000, resp.status_code + + names = ["Parallel_A", "Parallel_B", "Parallel_C", "Parallel_D"] + + with _timed("PARALLEL: 4 event creates") as wall: + results = await asyncio.gather(*(create_event(n) for n in names)) + + for name, ms, code in results: + logger.info(f" parallel POST events '{name}': {ms:.0f}ms status={code}") + assert code == 200 + + max_individual = max(ms for _, ms, _ in results) + logger.info( + f" wall_clock={wall.elapsed_ms:.0f}ms max_individual={max_individual:.0f}ms" + ) + assert wall.elapsed_ms < 10000, ( + f"4 parallel event creates took {wall.elapsed_ms:.0f}ms wall-clock (>10s)" + ) + + +@pytest.mark.asyncio +async def test_parallel_4_freebusy_queries(cal_client: AsyncClient): + """ + Fire 4 free/busy queries in parallel with different calendars. + FreeBusy is one of the most common calendar bench operations. + """ + + async def do_freebusy(email: str) -> tuple[str, float, int]: + t0 = time.perf_counter() + resp = await cal_client.post( + "/freeBusy", + json={ + "timeMin": "2018-06-18T00:00:00Z", + "timeMax": "2018-06-25T00:00:00Z", + "items": [{"id": email}], + }, + ) + elapsed = (time.perf_counter() - t0) * 1000 + return email, elapsed, resp.status_code + + emails = [ + "oleksandra@test.com", + "kenji@test.com", + "amara@test.com", + "takeshi@test.com", + ] + + with _timed("PARALLEL: 4 freeBusy queries") as wall: + results = await asyncio.gather(*(do_freebusy(e) for e in emails)) + + for email, ms, code in results: + logger.info(f" parallel freeBusy '{email}': {ms:.0f}ms status={code}") + assert code == 200 + + max_individual = max(ms for _, ms, _ in results) + logger.info( + f" wall_clock={wall.elapsed_ms:.0f}ms max_individual={max_individual:.0f}ms" + ) + assert wall.elapsed_ms < 10000, ( + f"4 parallel freeBusy queries took {wall.elapsed_ms:.0f}ms wall-clock (>10s)" + ) diff --git a/backend/tests/validation/test_box_parity.py b/backend/tests/validation/test_box_parity.py index 6458d07..d049d58 100644 --- a/backend/tests/validation/test_box_parity.py +++ b/backend/tests/validation/test_box_parity.py @@ -243,6 +243,13 @@ def __init__(self, prod_token: str): # Mismatch counter self.mismatch_count = 0 + # Performance tracking: list of (label, prod_ms, replica_ms) + self.perf_records: list[tuple[str, float, float]] = [] + self._last_prod_ms: float = 0.0 + self._last_replica_ms: float = 0.0 + self._last_prod_endpoint: str = "" + self._auto_record_perf: bool = True + def log_mismatch( self, test_name: str, @@ -265,6 +272,61 @@ def log_summary(self): else: logger.warning("Total mismatches: %d", self.mismatch_count) + def print_perf_summary(self): + """Print performance summary for all recorded operations.""" + if not self.perf_records: + return + + print("\n" + "=" * 70) + print("PERFORMANCE SUMMARY") + print("=" * 70) + + prod_times = [r[1] for r in self.perf_records] + replica_times = [r[2] for r in self.perf_records] + + print(f" Total operations timed: {len(self.perf_records)}") + print(f"\n {'':40s} {'Prod':>10s} {'Replica':>10s} {'Delta':>10s}") + print(f" {'-' * 40} {'-' * 10} {'-' * 10} {'-' * 10}") + + # Aggregate stats + import statistics + + for label, vals in [("Prod (ms)", prod_times), ("Replica (ms)", replica_times)]: + if vals: + print( + f" {label:40s} min={min(vals):.0f} avg={statistics.mean(vals):.0f} " + f"p50={statistics.median(vals):.0f} p90={sorted(vals)[int(len(vals) * 0.9)]:.0f} " + f"p99={sorted(vals)[min(int(len(vals) * 0.99), len(vals) - 1)]:.0f} " + f"max={max(vals):.0f}" + ) + + # Show slowest 10 operations by replica time + print("\n Top 10 slowest replica operations:") + print(f" {'Operation':50s} {'Prod':>8s} {'Replica':>8s}") + print(f" {'-' * 50} {'-' * 8} {'-' * 8}") + for label, prod_ms, replica_ms in sorted( + self.perf_records, key=lambda x: x[2], reverse=True + )[:10]: + print(f" {label:50s} {prod_ms:7.0f}ms {replica_ms:7.0f}ms") + + # Show operations where replica is significantly slower than prod + slow_ops = [ + (lbl, p, r) for lbl, p, r in self.perf_records if r > p * 2 and r > 100 + ] + if slow_ops: + print("\n Operations where replica > 2x slower than prod (and > 100ms):") + print(f" {'Operation':50s} {'Prod':>8s} {'Replica':>8s} {'Ratio':>6s}") + print(f" {'-' * 50} {'-' * 8} {'-' * 8} {'-' * 6}") + for label, prod_ms, replica_ms in sorted( + slow_ops, key=lambda x: x[2] / max(x[1], 1), reverse=True + ): + ratio = replica_ms / max(prod_ms, 1) + print( + f" {label:50s} {prod_ms:7.0f}ms {replica_ms:7.0f}ms {ratio:5.1f}x" + ) + + print("=" * 70) + # ------------------------------------------------------------------------- # Environment Setup # ------------------------------------------------------------------------- @@ -324,7 +386,8 @@ def api_prod( if files: req_headers.pop("Content-Type", None) - return requests.request( + t0 = time.perf_counter() + resp = requests.request( method, url, json=json, @@ -333,6 +396,9 @@ def api_prod( params=params, headers=req_headers, ) + self._last_prod_ms = (time.perf_counter() - t0) * 1000 + self._last_prod_endpoint = f"{method} /{endpoint.lstrip('/')}" + return resp def api_replica( self, @@ -358,7 +424,8 @@ def api_replica( if files: req_headers.pop("Content-Type", None) - return requests.request( + t0 = time.perf_counter() + resp = requests.request( method, url, json=json, @@ -367,6 +434,26 @@ def api_replica( params=params, headers=req_headers, ) + self._last_replica_ms = (time.perf_counter() - t0) * 1000 + + # Auto-record: every replica call following a prod call forms a pair + if self._auto_record_perf and self._last_prod_endpoint: + self.perf_records.append( + (self._last_prod_endpoint, self._last_prod_ms, self._last_replica_ms) + ) + return resp + + def record_perf(self, label: str): + """Record the last prod/replica call times for the perf summary.""" + prod_ms = getattr(self, "_last_prod_ms", 0.0) + replica_ms = getattr(self, "_last_replica_ms", 0.0) + self.perf_records.append((label, prod_ms, replica_ms)) + + def _perf_tag(self) -> str: + """Return a formatted timing tag from the last prod/replica calls.""" + prod_ms = getattr(self, "_last_prod_ms", 0.0) + replica_ms = getattr(self, "_last_replica_ms", 0.0) + return f"[prod={prod_ms:.0f}ms, replica={replica_ms:.0f}ms]" # ------------------------------------------------------------------------- # File Upload Helpers @@ -496,6 +583,7 @@ def test_operation( prod_call: Callable[[], requests.Response], replica_call: Callable[[], requests.Response], validate_schema: bool = True, + expected_status_code: int | None = None, ) -> bool: """ Test an operation against both APIs. @@ -512,9 +600,16 @@ def test_operation( print(f" {name}...", end=" ") try: + # Disable auto-recording; we record manually with the test name + self._auto_record_perf = False prod_resp = prod_call() + prod_ms = self._last_prod_ms replica_resp = replica_call() + replica_ms = self._last_replica_ms + self._auto_record_perf = True + self.perf_records.append((name, prod_ms, replica_ms)) except Exception as e: + self._auto_record_perf = True print(f"EXCEPTION: {e}") self.log_mismatch(name, "exception", {"error": str(e)}) return False @@ -553,7 +648,9 @@ def test_operation( differences = self.compare_shapes(prod_shape, replica_shape, "data") if differences: - print("SCHEMA MISMATCH") + print( + f"SCHEMA MISMATCH [prod={prod_ms:.0f}ms, replica={replica_ms:.0f}ms]" + ) for diff in differences[:3]: print(f" {diff}") if len(differences) > 3: @@ -573,16 +670,18 @@ def test_operation( ) return False - print("PASS") + print(f"PASS [prod={prod_ms:.0f}ms, replica={replica_ms:.0f}ms]") return True elif not prod_ok and not replica_ok: # Both failed - check if error types are similar - print("(both failed)") + print(f"(both failed) [prod={prod_ms:.0f}ms, replica={replica_ms:.0f}ms]") return True else: - print("STATUS MISMATCH") + print( + f"STATUS MISMATCH [prod={prod_ms:.0f}ms, replica={replica_ms:.0f}ms]" + ) print(f" Prod: {prod_resp.status_code}") print(f" Replica: {replica_resp.status_code}") @@ -605,6 +704,7 @@ def test_operation( def setup_test_resources(self): """Create matching test resources in both environments.""" + self._auto_record_perf = False # Don't record setup calls print("\n📦 Setting up test resources...") # Get current user info @@ -665,9 +765,11 @@ def setup_test_resources(self): print(f" ✓ Replica file: {self.replica_file_id}") print() + self._auto_record_perf = True # Re-enable for test operations def cleanup_test_resources(self): """Clean up test resources created during testing.""" + self._auto_record_perf = False # Don't record cleanup calls print("\n🧹 Cleaning up test resources...") # Delete test folders (this also deletes files inside) @@ -3900,6 +4002,9 @@ def run_tests(self): print(f"TOTAL: {total_passed}/{total_tests} tests passed ({pct}%)") print("=" * 70) + # Performance summary + self.print_perf_summary() + # Save mismatch log self.log_summary() diff --git a/backend/utils/seed_box_template.py b/backend/utils/seed_box_template.py index cd51133..b30f2dd 100644 --- a/backend/utils/seed_box_template.py +++ b/backend/utils/seed_box_template.py @@ -62,6 +62,22 @@ def create_tables(conn, schema_name: str): _ = box_schema # Ensure all models are loaded Base.metadata.create_all(conn_with_schema, checkfirst=True) + # Enable pg_trgm for fast ILIKE search and create GIN trigram indexes + conn.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm")) + for tbl, col in [ + ("box_files", "name"), + ("box_files", "description"), + ("box_folders", "name"), + ("box_folders", "description"), + ]: + idx_name = f"ix_{tbl}_{col}_trgm" + conn.execute( + text( + f"CREATE INDEX IF NOT EXISTS {idx_name} " + f"ON {schema_name}.{tbl} USING gin ({col} gin_trgm_ops)" + ) + ) + def _validate_identifier(identifier: str, allowed_set: set[str], label: str) -> str: """Validate that an identifier is in the allowed set to prevent SQL injection.""" @@ -199,6 +215,9 @@ def insert_seed_data(conn, schema_name: str, seed_data: dict) -> SeedStats: # Print summary for file loading stats.print_summary() + # ---- Compute materialized paths for folders and files ---- + _compute_materialized_paths(seed_data) + for table_name in TABLE_ORDER: if table_name not in seed_data: continue @@ -229,6 +248,64 @@ def insert_seed_data(conn, schema_name: str, seed_data: dict) -> SeedStats: return stats +def _compute_materialized_paths(seed_data: dict) -> None: + """Compute and set the ``path`` column for box_folders and box_files. + + The path format is a slash-separated list of ancestor folder IDs from + root down to (but not including) the item itself. + + Examples: + Root folder (id=0): path = "/" + Child of root: path = "/0/" + Grandchild (parent=123): path = "/0/123/" + + For files the path represents the ancestor chain of its parent folder, + including the parent folder itself (same as the folder's own path + its id). + """ + folders = seed_data.get("box_folders", []) + if not folders: + return + + # Build lookup: folder_id -> record + folder_by_id: dict[str, dict] = {f["id"]: f for f in folders} + + # Compute path for each folder (memoised) + path_cache: dict[str, str] = {} + + def _folder_path(folder_id: str) -> str: + if folder_id in path_cache: + return path_cache[folder_id] + + rec = folder_by_id.get(folder_id) + if rec is None: + path_cache[folder_id] = "/" + return "/" + + pid = rec.get("parent_id") + if pid is None: + # Root folder + path_cache[folder_id] = "/" + return "/" + + parent_path = _folder_path(pid) + my_path = f"{parent_path}{pid}/" + path_cache[folder_id] = my_path + return my_path + + # Set path on every folder record + for f in folders: + f["path"] = _folder_path(f["id"]) + + # Set path on every file record (path = parent folder's path + parent_id) + for f in seed_data.get("box_files", []): + pid = f.get("parent_id") + if pid: + parent_path = _folder_path(pid) + f["path"] = f"{parent_path}{pid}/" + else: + f["path"] = "/" + + def register_public_template( conn, *, service: str, name: str, location: str, description: str | None = None ): diff --git a/backend/utils/seed_calendar_template.py b/backend/utils/seed_calendar_template.py index 04ac50b..63af358 100644 --- a/backend/utils/seed_calendar_template.py +++ b/backend/utils/seed_calendar_template.py @@ -203,7 +203,9 @@ def main(): # Try backend/seeds/ first (Docker), fall back to repo root (local dev) seeds_dir = Path(__file__).parent.parent / "seeds" / "calendar" if not seeds_dir.exists(): - seeds_dir = Path(__file__).parent.parent.parent / "examples" / "calendar" / "seeds" + seeds_dir = ( + Path(__file__).parent.parent.parent / "examples" / "calendar" / "seeds" + ) # Create empty base template create_template(engine, "calendar_base") @@ -214,7 +216,9 @@ def main(): for seed_file in seed_files: template_name = seed_file.stem create_template(engine, template_name, seed_file) - print(f"\nAll {1 + len(seed_files)} Calendar template(s) created successfully\n") + print( + f"\nAll {1 + len(seed_files)} Calendar template(s) created successfully\n" + ) else: print(f"\nSeeds directory not found: {seeds_dir}") print("Only calendar_base template created.\n") diff --git a/backend/utils/seed_linear_template.py b/backend/utils/seed_linear_template.py index afcd3b7..2dced1a 100644 --- a/backend/utils/seed_linear_template.py +++ b/backend/utils/seed_linear_template.py @@ -245,7 +245,9 @@ def main(): # Try backend/seeds/ first (Docker), fall back to repo root (local dev) seeds_dir = Path(__file__).parent.parent / "seeds" / "linear" if not seeds_dir.exists(): - seeds_dir = Path(__file__).parent.parent.parent / "examples" / "linear" / "seeds" + seeds_dir = ( + Path(__file__).parent.parent.parent / "examples" / "linear" / "seeds" + ) # Create empty base template create_template(engine, "linear_base") diff --git a/ops/Makefile b/ops/Makefile index ac3b09f..23ec0fd 100644 --- a/ops/Makefile +++ b/ops/Makefile @@ -22,6 +22,15 @@ test-log: ## Run tests (save to file) docker exec ops-backend-1 sh -c "python -m pytest tests/ -v --tb=long 2>&1 | tee /app/test_results.log" @echo "Results saved to: backend/test_results.log" +test-perf-box: ## Run Box performance tests + docker exec ops-backend-1 python -m pytest tests/performance/test_box_bench_perf.py -v -s + +test-perf-calendar: ## Run Calendar performance tests + docker exec ops-backend-1 python -m pytest tests/performance/test_calendar_bench_perf.py -v -s + +test-perf: ## Run all performance tests + docker exec ops-backend-1 python -m pytest tests/performance/ -v -s + reissue-key: docker exec -w /app ops-backend-1 python utils/reissue_dev_key.py