diff --git a/ai_plans/2026-06-21_backend-metrics-page.md b/ai_plans/2026-06-21_backend-metrics-page.md new file mode 100644 index 000000000..48fad92ac --- /dev/null +++ b/ai_plans/2026-06-21_backend-metrics-page.md @@ -0,0 +1,142 @@ +# Backend web metrics page (tokens / cost / duration / models / modes) + +**Branch:** `feature/web-metrics-page` (stacked off `feature/self-hosted-remote-task-control`) +**Date:** 2026-06-21 + +## Goal + +Add a metrics/analytics page to the self-hosted cloud web view (`self-hosted-cloudapi`) +showing, for the logged-in user, with a period filter: + +- **Tokens** used: input / output / cache-read / cache-write +- **Cost** +- **Session duration** +- **Models** used (dimension) +- **Modes** used (dimension) +- (bonus) **Providers** used + +## Evidence — where the data lives (verified against the live `stork_code` DB) + +The aggregation source is **`telemetry_events`**, NOT `task_messages`. + +- `task_messages` only holds _shared/live_ tasks (22 rows). `api_req_started` JSON + carries tokens/cost but **no model and no mode** (`ClineApiReqInfo` = + `tokensIn/tokensOut/cacheWrites/cacheReads/cost/apiProtocol`). +- `telemetry_events` has 387 rows. The **`LLM Completion`** event + (`TelemetryEventName.LLM_COMPLETION = "LLM Completion"`) carries every dimension: + + ```json + { + "mode": "code", + "apiProvider": "openrouter", + "modelId": "nvidia/nemotron-3-super-120b-a12b:free", + "taskId": "019eeb06-...", + "inputTokens": 27633, + "outputTokens": 1752, + "cacheReadTokens": 0, + "cacheWriteTokens": 0, + "cost": 0 + } + ``` + +- `telemetry_events.user_id` == web-session `user.id` + (both `user_2c8fdf212b024808aa7a1ba1a`) → scope aggregation to + `TelemetryEvent.user_id == user["user_id"]`. `organization_id` is null + (single self-hosted user), so user-scoping is sufficient. +- Properties are stored as **TEXT** (JSON string). Tests run on **SQLite** + (no jsonb operators) → aggregate in **Python** after loading rows, mirroring the + existing `_compute_metrics` server-side pattern. Volume is modest. + +## Decisions (confirmed with user) + +- **Session duration** = per-`taskId` span (max−min event ts), summed across tasks; + also surface the task count. +- **Charts**: real charts via a **vendored** library (consistent with + `static/vendor/{marked,purify,socket.io}.min.js` — no CDN). Use **Chart.js** + (single UMD file, no deps): per-day bars (tokens + cost) and doughnuts + (tokens by model / by mode). + +## Changes + +### 1. `src/services/metrics_service.py` (new) + +- `LLM_COMPLETION_EVENT = "LLM Completion"`. +- `PERIODS` map: `today`, `7d`, `30d`, `90d`, `all` → start `datetime` (UTC). + (`today` = start of current UTC day.) +- `async def compute_user_metrics(db, user_id, period) -> dict`: + - Select `TelemetryEvent` where `user_id == user_id`, + `event_type == LLM Completion`, `created_at >= start` (if not `all`), + order by `created_at`. + - Parse `properties` JSON per row; coerce numbers via a local `_num`. + - Accumulate: + - totals: `input/output/cache_read/cache_write` tokens, `cost`, + `completions` (row count). + - `by_model[modelId]`, `by_mode[mode]`, `by_provider[apiProvider]`: + tokens (in+out), cost, count. + - `by_day[YYYY-MM-DD]`: tokens (in+out), cost — for the time series. + - per-`taskId`: first/last ts → duration; sum → `total_duration_ms`, + `task_count`. + - Return a JSON-serializable dict: totals, sorted breakdown lists + (desc by tokens), `by_day` (chronological), `duration`, `task_count`, + `period`, and a `chart` payload (labels + datasets) ready for Chart.js. +- Reuse `_fmt_tokens` / `_fmt_duration` (move them from `web.py` into this + service, or import). Keep formatting helpers shared. + +### 2. `src/routers/web.py` + +- `GET /app/metrics?period=7d`: + - redirect to `/app/login` if no user. + - validate `period` (default `7d`, fall back to `7d` on unknown). + - call `compute_user_metrics`, render `metrics.html` with the dict + + `chart_json = json.dumps(chart_payload)` + the list of period options for the + selector. + +### 3. `src/web/templates/metrics.html` (new, extends `base.html`) + +- Period selector: links `?period=…` styled as a segmented control; active one + highlighted. +- Summary stat cards: total tokens (with in/out/cache breakdown), total cost, + session duration, task count, completion count. +- Two chart canvases: per-day bar (tokens & cost on dual axis) + two doughnuts + (tokens by model, by mode). +- Breakdown tables: by model, by mode, by provider (tokens / cost / count). +- Empty state when no events in the period. +- `{% block scripts %}`: ` +{% if metrics.has_data %} + + +{% endif %} +{% endblock %} diff --git a/self-hosted-cloudapi/src/web/templates/task_detail.html b/self-hosted-cloudapi/src/web/templates/task_detail.html index f33b219c5..7a1f27d18 100644 --- a/self-hosted-cloudapi/src/web/templates/task_detail.html +++ b/self-hosted-cloudapi/src/web/templates/task_detail.html @@ -12,6 +12,7 @@

{{ title }}

{% endif %} + {% if workspace %}
📁 {{ workspace }}
{% endif %} {% if share_url and not live %}
Shared link · read-only
{% endif %} diff --git a/self-hosted-cloudapi/src/web/templates/tasks_list.html b/self-hosted-cloudapi/src/web/templates/tasks_list.html index a161f5106..decc9e73c 100644 --- a/self-hosted-cloudapi/src/web/templates/tasks_list.html +++ b/self-hosted-cloudapi/src/web/templates/tasks_list.html @@ -9,7 +9,10 @@

Your tasks

{{ t.title }} + {% if t.workspace_label %}{{ t.workspace_label }}{% endif %} {{ t.message_count }} message{{ '' if t.message_count == 1 else 's' }} + {% if t.tokens %}{{ t.tokens }} tokens{% endif %} + {% if t.cost %}{{ t.cost }}{% endif %} {% if t.updated_at %}{{ t.updated_at.strftime('%Y-%m-%d %H:%M') }}{% endif %} diff --git a/self-hosted-cloudapi/tests/test_bridge.py b/self-hosted-cloudapi/tests/test_bridge.py index 6be91537c..008f438b2 100644 --- a/self-hosted-cloudapi/tests/test_bridge.py +++ b/self-hosted-cloudapi/tests/test_bridge.py @@ -305,6 +305,70 @@ async def test_task_event_message_relays_and_upserts( assert "hello from the task" in rows[0].message_data +async def test_task_event_stamps_workspace_path_from_registered_instance( + patch_session_factory, db_session, session_factory, stub_emit +): + """The live bridge stamps the registered instance's workspacePath on the task + it creates, so the web view can show which project/worktree it ran in.""" + await _seed_user(db_session, "owner") + + ws = "/home/krzych/Projekty/QUB-IT/Roo-Code" + registry.attach("ext_owner", "extension", "owner") + registry.register_extension("ext_owner", "owner", {"workspacePath": ws}) + + event = { + "taskId": "task-ws", + "type": EVT_MESSAGE, + "message": {"ts": 1, "type": "say", "say": "text", "text": "hi"}, + } + await sio_module.on_task_event("ext_owner", event) + + async with session_factory() as s: + task = (await s.execute(select(Task).where(Task.id == "task-ws"))).scalar_one() + assert task.workspace_path == ws + + +async def test_task_event_backfills_workspace_path_on_legacy_null_task( + patch_session_factory, db_session, session_factory, stub_emit +): + """A task that predates workspace tracking (workspace_path NULL) gets it filled + the first time the bridge reports a path — set once, never overwritten.""" + await _seed_user(db_session, "owner") + db_session.add(Task(id="task-legacy", user_id="owner", workspace_path=None)) + await db_session.commit() + + ws = "/home/krzych/Projekty/QUB-IT/Roo-Code" + registry.attach("ext_owner", "extension", "owner") + registry.register_extension("ext_owner", "owner", {"workspacePath": ws}) + + await sio_module.on_task_event( + "ext_owner", + { + "taskId": "task-legacy", + "type": EVT_MESSAGE, + "message": {"ts": 1, "type": "say", "say": "text", "text": "hi"}, + }, + ) + + async with session_factory() as s: + task = (await s.execute(select(Task).where(Task.id == "task-legacy"))).scalar_one() + assert task.workspace_path == ws + + # A later event from a different worktree must NOT move the task. + registry.register_extension("ext_owner", "owner", {"workspacePath": "/some/other/root"}) + await sio_module.on_task_event( + "ext_owner", + { + "taskId": "task-legacy", + "type": EVT_MESSAGE, + "message": {"ts": 2, "type": "say", "say": "text", "text": "more"}, + }, + ) + async with session_factory() as s: + task = (await s.execute(select(Task).where(Task.id == "task-legacy"))).scalar_one() + assert task.workspace_path == ws + + async def test_task_event_message_upsert_is_idempotent_by_ts( patch_session_factory, db_session, session_factory, stub_emit ): diff --git a/self-hosted-cloudapi/tests/test_web_and_share.py b/self-hosted-cloudapi/tests/test_web_and_share.py index d00ac2d69..f5a188b2c 100644 --- a/self-hosted-cloudapi/tests/test_web_and_share.py +++ b/self-hosted-cloudapi/tests/test_web_and_share.py @@ -20,7 +20,10 @@ from src.auth.web_session import get_web_user_optional, WebUser from src.models.user import User from src.models.task import Task, TaskMessage, TaskShare +from src.models.event import TelemetryEvent +from src.realtime.hub import registry from src.services.settings_service import get_extension_settings +from src.services.metrics_service import compute_user_metrics # --- helpers --------------------------------------------------------------- @@ -232,6 +235,50 @@ async def test_backfill_is_idempotent_on_reshare(client, db_session, session_fac assert tasks == 1 +async def test_backfill_persists_explicit_workspace_path(client, db_session, session_factory): + """The explicit client `workspacePath` field is stamped on the task, so an + offline share (no live bridge) still records its project/worktree.""" + await _seed_user(db_session) + from src.main import app + + ws = "/home/krzych/Projekty/QUB-IT/Roo-Code-worktree-x" + _override_current_user(app) + files, data = _backfill_files("task-ws-explicit", _msgs()) + data["workspacePath"] = ws + try: + resp = client.post("/api/events/backfill", files=files, data=data) + finally: + app.dependency_overrides.pop(get_current_user, None) + + assert resp.status_code == 200 + async with session_factory() as s: + task = (await s.execute(select(Task).where(Task.id == "task-ws-explicit"))).scalar_one() + assert task.workspace_path == ws + + +async def test_backfill_falls_back_to_registry_workspace_path(client, db_session, session_factory): + """An older client that doesn't send `workspacePath` still gets the project + recorded, sourced from the live registered instance for that user.""" + await _seed_user(db_session) + from src.main import app + + ws = "/home/krzych/Projekty/QUB-IT/Roo-Code" + registry.register_extension("ext_fallback", "user_test", {"workspacePath": ws}) + + _override_current_user(app) + files, data = _backfill_files("task-ws-fallback", _msgs()) # no workspacePath field + try: + resp = client.post("/api/events/backfill", files=files, data=data) + finally: + app.dependency_overrides.pop(get_current_user, None) + registry.detach("ext_fallback") + + assert resp.status_code == 200 + async with session_factory() as s: + task = (await s.execute(select(Task).where(Task.id == "task-ws-fallback"))).scalar_one() + assert task.workspace_path == ws + + # --- Web: /app requires a session ------------------------------------------ @@ -260,6 +307,98 @@ async def test_app_lists_owned_tasks(client, db_session, session_factory): assert "Build me a feature" in resp.text +async def test_app_list_and_detail_show_workspace(client, db_session, session_factory): + """The list shows the worktree basename (full path on hover); the detail header + shows the full path.""" + await _seed_user(db_session) + ws = "/home/krzych/Projekty/QUB-IT/Roo-Code-worktree-alpha" + async with session_factory() as s: + s.add(Task(id="task-ws-view", user_id="user_test", workspace_path=ws)) + s.add(TaskMessage(task_id="task-ws-view", message_data=json.dumps(_msgs()[0]))) + await s.commit() + + from src.main import app + + _override_web_user(app) + try: + list_resp = client.get("/app") + detail_resp = client.get("/app/tasks/task-ws-view") + finally: + app.dependency_overrides.pop(get_web_user_optional, None) + + assert list_resp.status_code == 200 + # Basename badge, full path as the hover title. + assert "Roo-Code-worktree-alpha" in list_resp.text + assert f'title="{ws}"' in list_resp.text + + assert detail_resp.status_code == 200 + assert ws in detail_resp.text + + +async def test_app_list_without_workspace_renders_cleanly(client, db_session, session_factory): + """A task with no workspace_path (legacy / bridge-off share) renders without a + project badge and does not error.""" + await _seed_user(db_session) + async with session_factory() as s: + s.add(Task(id="task-no-ws", user_id="user_test", workspace_path=None)) + s.add(TaskMessage(task_id="task-no-ws", message_data=json.dumps(_msgs()[0]))) + await s.commit() + + from src.main import app + + _override_web_user(app) + try: + resp = client.get("/app") + finally: + app.dependency_overrides.pop(get_web_user_optional, None) + + assert resp.status_code == 200 + assert "badge-workspace" not in resp.text + + +async def test_app_list_shows_cost_and_tokens(client, db_session, session_factory): + await _seed_user(db_session) + # Two api_req messages 65s apart so duration spans the whole conversation. + first = {"ts": 1000, "type": "say", "say": "text", "text": "Build me a feature"} + api_req = { + "ts": 66000, + "type": "say", + "say": "api_req_started", + "text": json.dumps( + { + "tokensIn": 96941, + "tokensOut": 3365, + "cacheWrites": 1200, + "cacheReads": 8400, + "cost": 0.1234, + } + ), + } + async with session_factory() as s: + s.add(Task(id="task-metrics", user_id="user_test")) + s.add(TaskMessage(task_id="task-metrics", message_data=json.dumps(first))) + s.add(TaskMessage(task_id="task-metrics", message_data=json.dumps(api_req))) + await s.commit() + + from src.main import app + + _override_web_user(app) + try: + resp = client.get("/app") + finally: + app.dependency_overrides.pop(get_web_user_optional, None) + + assert resp.status_code == 200 + # 96941 + 3365 = 100306 → "100.3k tokens"; cost rendered to 4 dp. + assert "100.3k tokens" in resp.text + assert "$0.1234" in resp.text + # Hover tooltip breakdown: in/out, cache, session duration, cost. + assert "↑ In: 96,941" in resp.text + assert "↓ Out: 3,365" in resp.text + assert "1,200 write / 8,400 read" in resp.text + assert "⏱ Session: 1m 5s" in resp.text + + # --- Web: task detail enforces ownership ----------------------------------- @@ -571,3 +710,167 @@ async def test_shared_nonowner_stays_readonly( body = resp.text assert 'id="live-controls"' not in body assert "/static/live.js" not in body + + +# --- Metrics page ---------------------------------------------------------- + + +def _llm_event( + user_id="user_test", + *, + model="modelX", + mode="code", + provider="openrouter", + task_id="task-a", + tin=1000, + tout=200, + cread=0, + cwrite=0, + cost=0.01, + created_at=None, +): + """Build an ``LLM Completion`` telemetry row mirroring the extension payload.""" + from datetime import datetime, timezone + + props = { + "mode": mode, + "apiProvider": provider, + "modelId": model, + "taskId": task_id, + "inputTokens": tin, + "outputTokens": tout, + "cacheReadTokens": cread, + "cacheWriteTokens": cwrite, + "cost": cost, + } + return TelemetryEvent( + user_id=user_id, + organization_id=None, + event_type="LLM Completion", + properties=json.dumps(props), + created_at=created_at or datetime.now(timezone.utc), + ) + + +async def test_metrics_redirects_to_login_without_session(client): + resp = client.get("/app/metrics", follow_redirects=False) + assert resp.status_code == 303 + assert resp.headers["location"] == "/app/login" + + +async def test_compute_user_metrics_aggregates_dimensions(db_session): + """Totals, breakdowns and per-task duration aggregate from LLM Completion events.""" + from datetime import datetime, timezone, timedelta + + await _seed_user(db_session) + base = datetime(2026, 6, 21, 12, 0, tzinfo=timezone.utc) + db_session.add_all( + [ + _llm_event(model="gpt-a", mode="code", task_id="t1", tin=1000, tout=200, + cwrite=50, cread=10, cost=0.02, created_at=base), + _llm_event(model="gpt-a", mode="code", task_id="t1", tin=500, tout=100, + cost=0.01, created_at=base + timedelta(minutes=5)), + _llm_event(model="llama-b", mode="architect", provider="openai", + task_id="t2", tin=300, tout=50, cost=0.0, created_at=base), + ] + ) + await db_session.commit() + + m = await compute_user_metrics(db_session, "user_test", period="all") + + assert m["totals"]["input"] == 1800 + assert m["totals"]["output"] == 350 + assert m["totals"]["cache_write"] == 50 + assert m["totals"]["cache_read"] == 10 + assert m["totals"]["total_tokens"] == 2150 + assert abs(m["totals"]["cost"] - 0.03) < 1e-9 + assert m["totals"]["completions"] == 3 + + # Two tasks; t1 spans 5 minutes, t2 is a single event (0 span). + assert m["task_count"] == 2 + assert m["duration_ms"] == 5 * 60 * 1000 + + # Models sorted desc by tokens: gpt-a (1800) before llama-b (350). + names = [r["name"] for r in m["by_model"]] + assert names == ["gpt-a", "llama-b"] + assert m["by_model"][0]["count"] == 2 + modes = {r["name"] for r in m["by_mode"]} + assert modes == {"code", "architect"} + providers = {r["name"] for r in m["by_provider"]} + assert providers == {"openrouter", "openai"} + + +async def test_compute_user_metrics_period_filters_old_events(db_session): + from datetime import datetime, timezone, timedelta + + await _seed_user(db_session) + now = datetime.now(timezone.utc) + db_session.add_all( + [ + _llm_event(task_id="recent", tin=100, tout=10, created_at=now), + _llm_event(task_id="old", tin=9999, tout=9999, + created_at=now - timedelta(days=40)), + ] + ) + await db_session.commit() + + m = await compute_user_metrics(db_session, "user_test", period="7d") + assert m["totals"]["completions"] == 1 + assert m["totals"]["input"] == 100 + + +async def test_compute_user_metrics_scopes_to_user(db_session): + await _seed_user(db_session) + await _seed_user(db_session, user_id="other", email="o@example.com") + db_session.add_all( + [ + _llm_event(user_id="user_test", tin=100, tout=10), + _llm_event(user_id="other", tin=5000, tout=5000), + ] + ) + await db_session.commit() + + m = await compute_user_metrics(db_session, "user_test", period="all") + assert m["totals"]["input"] == 100 + assert m["totals"]["completions"] == 1 + + +async def test_metrics_page_renders_dimensions(client, db_session, session_factory): + await _seed_user(db_session) + async with session_factory() as s: + s.add(_llm_event(model="nvidia/nemotron", mode="orchestrator", + provider="openrouter", tin=96941, tout=3365, cost=0.1234)) + await s.commit() + + from src.main import app + + _override_web_user(app) + try: + resp = client.get("/app/metrics?period=all") + finally: + app.dependency_overrides.pop(get_web_user_optional, None) + + assert resp.status_code == 200 + body = resp.text + assert "nvidia/nemotron" in body + assert "orchestrator" in body + assert "$0.1234" in body + # Chart payload + library are wired when there is data. + assert "/static/vendor/chart.umd.min.js" in body + assert 'id="metrics-data"' in body + + +async def test_metrics_page_empty_state(client, db_session): + await _seed_user(db_session) + from src.main import app + + _override_web_user(app) + try: + resp = client.get("/app/metrics") + finally: + app.dependency_overrides.pop(get_web_user_optional, None) + + assert resp.status_code == 200 + assert "No usage recorded" in resp.text + # No chart library loaded when there is nothing to plot. + assert "/static/vendor/chart.umd.min.js" not in resp.text diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 52dbb3e73..af4b0c8b6 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -3355,6 +3355,16 @@ export class ClineProvider return this.currentWorkspacePath || getWorkspacePath() } + /** + * Worktree root sent with backfill uploads so the cloud web view can attribute + * an offline task to its project. Implements TelemetryPropertiesProvider; kept + * out of the per-event telemetry properties to avoid leaking an absolute path + * into every event. + */ + public getTelemetryWorkspacePath(): string | undefined { + return this.cwd || undefined + } + /** * Delegate parent task and open child task. *