From c750bc8b64cee0ec3b0a916e922ba5cd220cc31a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Dre=C5=BCewski?= Date: Mon, 22 Jun 2026 11:09:03 +0200 Subject: [PATCH 1/3] fix(cloudapi): send public Host on Authentik back-channel calls; bundle self-hosted stack Primary fix ----------- OAuth callback returned 502 after Authentik login. Root cause: Authentik routes to its /application/o/* endpoints by HTTP Host header and rejects hosts with an underscore (auth_server is not a valid RFC-1123 hostname) with a 404, so back-channel token exchange to http://auth_server:9000 failed and the API rendered its own 502 page. The api now connects to AUTHENTIK_INTERNAL_URL for networking but presents the front-channel host (host of AUTHENTIK_BASE_URL, e.g. localhost:9000 or auth.tumblecode.dev) as the Host header on token/userinfo/discovery calls. Topology-independent: works for the bundled dev stack and a public app.tumblecode.dev deployment alike. - config/auth.py: get_back_channel_host_header() - src/auth/authentik.py: _back_channel_headers() on the 3 back-channel calls - tests/test_back_channel_host.py: header value + attachment on all calls - README.md / .env.example: Host-override rationale + production block Verified live against the running Authentik: old Host (auth_server:9000) -> 404; new Host (localhost:9000) + real client_secret + fake code -> 400 invalid_grant (request reaches the endpoint, client auth passes). Also includes the in-progress self-hosted stack snapshot ------------------------------------------------------- Bundled Authentik (server/worker/Postgres/Redis) + blueprint, Dockerfile entrypoint with db_bootstrap (FRESH/LEGACY/MANAGED), Makefile, web view tweaks, and the accompanying ai_plans docs. --- ...6-21_web-task-summary-strip-env-details.md | 75 +++++++++ ...-22_authentik-group-gate-and-app-rename.md | 57 +++++++ ...22_cloudapi-authentik-back-channel-host.md | 81 ++++++++++ .../2026-06-22_dockerize-cloud-backend.md | 52 ++++++ ...-06-22_fix-fresh-db-bootstrap-crashloop.md | 87 ++++++++++ .../2026-06-22_full-stack-docker-compose.md | 94 +++++++++++ ...name-stork-code-to-tumble-code-selfhost.md | 40 +++++ self-hosted-cloudapi/.dockerignore | 2 + self-hosted-cloudapi/.env.example | 151 ++++++++++++++++++ self-hosted-cloudapi/.gitignore | 2 + self-hosted-cloudapi/Dockerfile | 6 +- self-hosted-cloudapi/Makefile | 65 ++++++++ self-hosted-cloudapi/README.md | 113 ++++++++++--- .../authentik/blueprints/tumble-code.yaml | 89 +++++++++++ self-hosted-cloudapi/config/auth.py | 84 ++++++++-- self-hosted-cloudapi/config/settings.py | 20 ++- self-hosted-cloudapi/docker-compose.yml | 130 +++++++++++++-- self-hosted-cloudapi/docker-entrypoint.sh | 32 ++++ self-hosted-cloudapi/src/auth/authentik.py | 28 +++- self-hosted-cloudapi/src/db_bootstrap.py | 69 ++++++++ self-hosted-cloudapi/src/routers/web.py | 44 ++++- self-hosted-cloudapi/src/web/static/app.css | 39 ++++- self-hosted-cloudapi/src/web/static/render.js | 31 +++- .../tests/test_back_channel_host.py | 101 ++++++++++++ .../tests/test_web_and_share.py | 41 +++++ 25 files changed, 1471 insertions(+), 62 deletions(-) create mode 100644 ai_plans/2026-06-21_web-task-summary-strip-env-details.md create mode 100644 ai_plans/2026-06-22_authentik-group-gate-and-app-rename.md create mode 100644 ai_plans/2026-06-22_cloudapi-authentik-back-channel-host.md create mode 100644 ai_plans/2026-06-22_dockerize-cloud-backend.md create mode 100644 ai_plans/2026-06-22_fix-fresh-db-bootstrap-crashloop.md create mode 100644 ai_plans/2026-06-22_full-stack-docker-compose.md create mode 100644 ai_plans/2026-06-22_rename-stork-code-to-tumble-code-selfhost.md create mode 100644 self-hosted-cloudapi/.env.example create mode 100644 self-hosted-cloudapi/.gitignore create mode 100644 self-hosted-cloudapi/Makefile create mode 100644 self-hosted-cloudapi/authentik/blueprints/tumble-code.yaml create mode 100644 self-hosted-cloudapi/docker-entrypoint.sh create mode 100644 self-hosted-cloudapi/src/db_bootstrap.py create mode 100644 self-hosted-cloudapi/tests/test_back_channel_host.py diff --git a/ai_plans/2026-06-21_web-task-summary-strip-env-details.md b/ai_plans/2026-06-21_web-task-summary-strip-env-details.md new file mode 100644 index 0000000000..9810af2bd8 --- /dev/null +++ b/ai_plans/2026-06-21_web-task-summary-strip-env-details.md @@ -0,0 +1,75 @@ +# Web task summary: show strictly the user query, fold the environment block + +Date: 2026-06-21 +Area: self-hosted-cloudapi web view (task list + task detail) + +## Problem + +The task summary/title in the cloud web view shows machine-generated framing the +user never typed β€” e.g. "Current Mode / code". Root cause, proven from real data: + +The first user turn that reaches the cloud arrives in Roo Code's **API-prompt +form**, not the clean UI text. `api_conversation_history.json` first user message: + +``` + +uruchom wszystkie testy w langgrapha + +# VSCode Visible Files +... +# Current Mode +code +πŸ’» Code +unsloth/GLM-5.2-GGUF:UD-Q3_K_XL +... + +``` + +`_derive_title()` ([routers/web.py](../self-hosted-cloudapi/src/routers/web.py)) +takes the first text-bearing message verbatim, so the `` wrapper and +the `` block (mode, open tabs, file tree, cost…) bleed into +the title. The same raw text renders in the conversation body with no way to +separate the human query from the machine appendix. + +## Fix + +Treat the wrapped form as what it is: human query + machine appendix. + +### Backend β€” `_derive_title` (routers/web.py) + +Add `_strip_task_wrappers(text)`: + +1. Remove `…` (also the trailing, + unclosed case). +2. Unwrap the human message tag β€” `` / `` / `` β€” to + its inner content. +3. Plain text (already clean) passes through unchanged. + +`_derive_title` runs each candidate message through it before taking the first +non-empty line. Covers both the task list and the detail-page `

`. + +### Frontend β€” render.js conversation body + +Add `userContentHtml(text)`: split off the `` block, unwrap +the message tag, render the clean query as markdown, and append the environment +block as a **collapsed `
`** ("Environment details") so the full original +is one click away β€” satisfying "unfold to full length". Applied to the text / +user_feedback / user_feedback_diff rows. No tags present β†’ identical to today. + +### CSS β€” app.css + +Minimal styling for `details.env-details` (muted summary, monospace body). + +## Tests + +`tests/test_web_and_share.py`: a backfill whose first message is the wrapped +API-form turn β€” assert the rendered list/detail title is the bare query +("uruchom wszystkie testy w langgrapha"), with no `environment_details` / `Current +Mode` / `` leakage. + +## Out of scope + +- Message role classification (the initial task currently renders under the + "Assistant" label) β€” separate concern, not touched here. +- Title length cap stays at 100 chars; the full prompt is now visible in the + conversation body. diff --git a/ai_plans/2026-06-22_authentik-group-gate-and-app-rename.md b/ai_plans/2026-06-22_authentik-group-gate-and-app-rename.md new file mode 100644 index 0000000000..90d7e362c1 --- /dev/null +++ b/ai_plans/2026-06-22_authentik-group-gate-and-app-rename.md @@ -0,0 +1,57 @@ +# Authentik: gate Tumble Code by a group + rename the application + +**Date:** 2026-06-22 +**Scope:** `self-hosted-cloudapi/authentik/blueprints/` + +## Goal + +Two changes to the auto-provisioned Authentik blueprint, applied cleanly on a +fresh `docker compose up` (user will drop all `./.vol/*` first): + +1. Provision a **group** so access to Tumble Code is controlled by group + membership β€” add a user to the group β†’ they can sign in to Tumble Code. +2. **Rename** the application's display name from `Stork Code` β†’ `Tumble Code`. + +## Background (verified against Authentik docs) + +- Application access in Authentik is governed by **policy bindings** on the + application. A binding whose `group` field is set is a plain _group-membership_ + check β€” no separate policy object needed. +- **Default behaviour:** an application with _no_ bindings is open to everyone. + The moment one group binding is added, access is restricted to that group. +- **Superusers are not exempt** from application access bindings (superuser grants + _admin_ access, not _application_ access). The bootstrap `akadmin` account β€” the + one used to sign in during the extension OAuth flow β€” must therefore be a member + of the group, or it gets locked out of its own app. The blueprint adds `akadmin` + to the group on creation to prevent this. + +Source: Authentik blueprint Models + Bindings overview docs. + +## Changes (single file: `stork-code.yaml`) + +Internal IDs stay (`slug: stork-code`, `client_id`, provider name) β€” these are +referenced by the api's `AUTHENTIK_APP_SLUG` / `AUTHENTIK_CLIENT_ID` and must not +change. Only the public display string changes, per the rebrand principle. + +1. **New group entry** (`authentik_core.group`), id `tumble-code-group`, + name `Tumble Code Users`, with `akadmin` added as a member via + `!Find [authentik_core.user, [username, akadmin]]`. +2. **Application name** `Stork Code` β†’ `Tumble Code`. +3. **New policy binding** (`authentik_policies.policybinding`) targeting the + application (`!KeyOf stork-code-application`) with `group` + (`!KeyOf tumble-code-group`), `order: 0`, `enabled: true` β€” this is what turns + on the group gate. + +## How to use after `docker compose up` + +- Sign in to Authentik admin as `akadmin` (already in the group β†’ can use Tumble + Code immediately). +- To grant another person access: Directory β†’ Groups β†’ **Tumble Code Users** β†’ + add the user. No blueprint edit needed. + +## Not changed / why + +- Slug, client id/secret, provider name, `AUTHENTIK_APP_SLUG` β€” internal IDs the + api builds endpoints from; renaming them would be a wider, riskier change and + isn't what was asked. +- Blueprint filename kept as `stork-code.yaml` (internal). diff --git a/ai_plans/2026-06-22_cloudapi-authentik-back-channel-host.md b/ai_plans/2026-06-22_cloudapi-authentik-back-channel-host.md new file mode 100644 index 0000000000..561f4f76db --- /dev/null +++ b/ai_plans/2026-06-22_cloudapi-authentik-back-channel-host.md @@ -0,0 +1,81 @@ +# Fix Authentik back-channel 502 on OAuth callback (public-address ready) + +Branch: `fix/cloudapi-authentik-back-channel-host` + +## Symptom + +After logging in to the bundled Authentik, the browser lands on +`GET http://localhost:8085/auth/clerk/callback?code=...&state=...` with +**502 Bad Gateway**. + +## Root cause (proven, not inferred) + +The "502" is **not** a reverse-proxy error β€” it is the cloud API's own error +page, returned at [browser.py:267](../self-hosted-cloudapi/src/routers/browser.py#L267) +when the back-channel token exchange to Authentik throws. + +Evidence chain, gathered against the running stack: + +1. `api` container log: + `Token exchange failed: Client error '404 Not Found' for url 'http://auth_server:9000/application/o/token/'` +2. Every Authentik `/application/o/*` route 404s on the back-channel, while + `/-/health/live/` returns 200 β€” so the container is reachable, the routes are not. +3. The only variable is the HTTP `Host` header. Probing the same URL: + - `Host: auth_server:9000` β†’ **404** + - `Host: localhost` / `localhost:9000` / `auth.tumblecode.dev` / `evil.example.com` β†’ **200** +4. Narrowed to the underscore: `under_score.example.com` β†’ 404, `auth-server:9000` β†’ 200. + +**Authentik (Django) resolves the brand β€” and therefore serves its OAuth/OIDC +routes β€” from the `Host` header, and rejects hosts containing an underscore +(`auth_server` is not a valid RFC-1123 hostname) with a 404.** The compose +service is named `auth_server`, so the back-channel URL `http://auth_server:9000` +makes httpx send `Host: auth_server:9000` β†’ 404 β†’ token exchange fails β†’ 502 page. +The browser flow works only because the front-channel host (`localhost:9000`) is valid. + +The discovery doc's `issuer` merely echoes the request Host, and the token's real +`iss` is fixed at front-channel authorize time, so the topology-independent fix is +to make the back-channel present the **public front-channel host** as `Host`. + +## Fix + +Connect to the internal service name (for DNS) but send the front-channel host +(host of `AUTHENTIK_BASE_URL`) as `Host` on every server-to-server call. Works +identically for dev (`localhost:9000`) and prod (`auth.tumblecode.dev`). + +- [config/auth.py](../self-hosted-cloudapi/config/auth.py): add + `get_back_channel_host_header()` β†’ returns `urlsplit(authentik_base_url).netloc` + when `authentik_internal_url` is set, else `None`. +- [src/auth/authentik.py](../self-hosted-cloudapi/src/auth/authentik.py): add + `_back_channel_headers()` and apply it to `exchange_code_for_tokens`, + `get_userinfo`, `get_openid_configuration`. +- [.env.example](../self-hosted-cloudapi/.env.example): document the Host behaviour + and a full `app.tumblecode.dev` production block. +- [tests/test_back_channel_host.py](../self-hosted-cloudapi/tests/test_back_channel_host.py): + lock in the header value and that it is attached to all three calls. + +No compose change needed: the Host override neutralises the underscore, so +`AUTHENTIK_INTERNAL_URL=http://auth_server:9000` stays valid. + +## Production (app.tumblecode.dev) + +``` +API_BASE_URL=https://app.tumblecode.dev +AUTHENTIK_BASE_URL=https://auth.tumblecode.dev # front-channel host β†’ sent as Host +AUTHENTIK_INTERNAL_URL=http://auth_server:9000 # back-channel (in-cluster) +AUTHENTIK_REDIRECT_URI=https://app.tumblecode.dev/auth/clerk/callback +CORS_ORIGINS=https://app.tumblecode.dev +AUTHENTIK_CLIENT_SECRET= # provider is confidential +``` + +The provider `client_type` is `confidential`, so a matching `client_secret` is +mandatory in production (the bundled stack already shares one via env). The api +will send `Host: auth.tumblecode.dev` on back-channel calls. + +## Verification + +- Unit: `pytest tests/test_back_channel_host.py` + auth suites β†’ 22 passed. +- Live, against the running Authentik (simulating the patched code path): + - old (`Host: auth_server:9000`) β†’ **404** + - new (`Host: localhost:9000`) + real client_secret + fake code β†’ **400 `invalid_grant`** + β€” i.e. the request now reaches the token endpoint, client auth passes, only the + fake code is rejected. A real authorization code will succeed. diff --git a/ai_plans/2026-06-22_dockerize-cloud-backend.md b/ai_plans/2026-06-22_dockerize-cloud-backend.md new file mode 100644 index 0000000000..f54dc86886 --- /dev/null +++ b/ai_plans/2026-06-22_dockerize-cloud-backend.md @@ -0,0 +1,52 @@ +# Dockerize the self-hosted cloud backend + +**Date:** 2026-06-22 +**Scope:** `self-hosted-cloudapi/` + +## Goal + +Be able to run the self-hosted cloud API in a container. + +## Finding + +A `Dockerfile`, `.dockerignore`, and `docker-compose.yml` already existed and were +committed, but **the image did not build**. Proven by `docker build`: + +``` +OSError: Readme file does not exist: README.md +ERROR: process "/bin/sh -c uv sync --frozen --no-dev" did not complete successfully +``` + +### Root cause + +- `pyproject.toml` declares `readme = "README.md"` under `[project]`. +- The final `RUN uv sync --frozen --no-dev` installs the project itself, so hatchling + reads project metadata and requires `README.md` to be present. +- `.dockerignore` excluded `*.md` (and `README.md`), so the file was not in the build + context β†’ metadata validation fails. +- The earlier `uv sync ... --no-install-project` passes because it runs before + `COPY . .` and does not build the project, so it never touches the README. + +## Fix + +One line in `.dockerignore`: keep `README.md` in the build context while still +ignoring other markdown. + +``` +*.md +!README.md +``` + +## Verification + +1. `docker build -t roo-cloud-api:test .` β€” succeeds (was failing before). +2. `docker run ... uv run uvicorn src.main:app` β€” app imports cleanly through + uvicorn and reaches Pydantic settings validation; only stops on missing required + Authentik env vars, which `docker-compose.yml` supplies. Confirms the Python + entrypoint, dependency set, and app module are all sound in the image. + +## Notes / possible follow-ups (not done) + +- Container runs as root; a non-root `USER` could be added for hardening. +- A `HEALTHCHECK` and multi-stage build (smaller runtime image) are reasonable + future improvements but were out of scope for "make it build and run". diff --git a/ai_plans/2026-06-22_fix-fresh-db-bootstrap-crashloop.md b/ai_plans/2026-06-22_fix-fresh-db-bootstrap-crashloop.md new file mode 100644 index 0000000000..a33e75e48f --- /dev/null +++ b/ai_plans/2026-06-22_fix-fresh-db-bootstrap-crashloop.md @@ -0,0 +1,87 @@ +# Fix: api container crash-loop on a fresh database (Docker bring-up) + +**Date:** 2026-06-22 +**Scope:** `self-hosted-cloudapi/` + +## Symptom + +After `docker compose up -d`, every service is healthy **except `api`**, which is +stuck `Restarting (1)`. The backend is unreachable on `:8085`. Logs show: + +``` +sqlalchemy.exc.ProgrammingError: (...asyncpg...UndefinedTableError): +relation "authentik_state_store" does not exist +[SQL: ALTER TABLE authentik_state_store ALTER COLUMN created_at TYPE TIMESTAMP WITH TIME ZONE] +``` + +## Root cause (proven, not assumed) + +The Dockerfile `CMD` runs `alembic upgrade head` **before** the app starts. +On a fresh `./.vol/postgres` volume that migration chain cannot build a schema: + +- `a1b2c3d4e5f6_baseline.py` β€” `upgrade()` is `pass`. Creates **no tables**. Its + own docstring says it represents a pre-existing `create_all`'d DB you are meant + to `alembic stamp`. +- `b2c3d4e5f6a7_datetime_timezone.py` β€” immediately `ALTER`s `authentik_state_store` + (and `users`, `sessions`, …), tables that were never created β†’ **crash**. +- `c3d4…`, `d4e5…`, `e5f6…` β€” all evolution-only (`add_column`, `create_index`). + +The only thing that _creates_ tables is `Base.metadata.create_all` β€” in the app +lifespan ([src/main.py:30](../self-hosted-cloudapi/src/main.py#L30)), with the +ORM models as the single source of truth. But the app never starts, because +alembic crashes first in the `&&` chain. + +So: **alembic-first ordering + a no-op baseline = a fresh DB can never bootstrap.** + +Note a tempting non-fix: making the baseline `create_all`. That breaks too β€” +`create_all` produces the **head** schema, so the later `add_column` migrations +(`task_message_ts`, `task.workspace_path`) would then fail with _column already +exists_. The migrations are evolution steps for a _pre-head_ schema; they must not +be replayed against a freshly created head schema. + +## Fix + +Replace the blind `alembic upgrade head` with a small startup reconciler that +matches the project's actual design (models = source of truth; migrations = how +_existing_ deployments evolve): + +- **Fresh DB** (no `users` table): `Base.metadata.create_all` builds the current + schema, then `alembic stamp head` records every migration as already applied + (without running the evolution steps). +- **Legacy DB** (tables exist, no `alembic_version`): follow the baseline's + documented path β€” `alembic stamp a1b2c3d4e5f6` then `alembic upgrade head` β€” + so an old pre-tz schema gets evolved. +- **Managed DB** (`alembic_version` present): `alembic upgrade head` as normal. + +### Files + +- `src/db_bootstrap.py` (new) β€” async probe of the live DB; prints + `FRESH` / `LEGACY` / `MANAGED` and runs `create_all` in the `FRESH` case. Uses + the same engine/models as the app, so there is one schema source of truth. +- `docker-entrypoint.sh` (new) β€” runs the probe, dispatches the correct alembic + command per state, then `exec`s uvicorn. +- `Dockerfile` β€” `CMD` now runs `docker-entrypoint.sh` (copied + `chmod +x`). + +The app lifespan keeps its own idempotent `create_all` (harmless no-op once the +entrypoint has built the schema), so running the app outside Docker is unchanged. + +## Verification + +1. `docker compose down` + remove `./.vol/postgres` β†’ truly fresh DB. +2. `docker compose up -d` β†’ `api` reaches healthy/running, not restarting. +3. `docker compose logs api` shows `DB state: FRESH`, the stamp, and + `Application startup complete` β€” no `UndefinedTableError`. +4. `curl -fsS localhost:${PORT:-8085}/health` (or `/`) returns 200. +5. `docker compose exec api uv run alembic current` shows head + (`e5f6a7b8c9d0`), proving alembic and the schema agree. +6. Restart `api` β†’ `DB state: MANAGED`, `upgrade head` no-op, still healthy + (idempotency). +7. `uv run pytest` stays green (entrypoint is Docker-only; no app code path + changed). + +## Risks / follow-ups + +- The `LEGACY` branch assumes a pre-tz schema (the baseline's documented + assumption). A legacy DB that was `create_all`'d at _head_ and never stamped + would fail `upgrade` on the `add_column` steps β€” but that is the pre-existing + documented contract, not introduced here, and not the Docker path. diff --git a/ai_plans/2026-06-22_full-stack-docker-compose.md b/ai_plans/2026-06-22_full-stack-docker-compose.md new file mode 100644 index 0000000000..8506909596 --- /dev/null +++ b/ai_plans/2026-06-22_full-stack-docker-compose.md @@ -0,0 +1,94 @@ +# Full self-hosted stack in one docker-compose (API + Authentik) + +**Date:** 2026-06-22 +**Scope:** `self-hosted-cloudapi/` + +## Goal + +Make the whole self-hosted cloud backend runnable with a single +`docker compose up` β€” including Authentik and its database β€” instead of running +Authentik separately (it lived in `/opt/docker/llm/docker-compose.yaml`). + +## What was done + +### 1. Merged the Authentik stack into the cloudapi compose + +`docker-compose.yml` now defines `api`, `postgres` (API DB), and the bundled +Authentik: `auth_db`, `auth_redis`, `auth_server`, `auth_worker` β€” adapted from +the proven `/opt/docker/llm/docker-compose.yaml`. Changes vs. the source: + +- Bind mounts under a local `./.vol/` folder (`postgres`, `auth/postgres`, + `auth/redis`, `auth/data`, `auth/templates`, `auth/certs`) β€” mirroring the + proven `/opt/docker/llm` layout. `.vol/` is git- and docker-ignored. + Authentik mount paths kept as the known-good `/data`, `/templates`, `/certs`. +- Blueprint bind-mount `./authentik/blueprints:/blueprints/custom:ro` on server + - worker; the worker auto-applies it. +- `api.depends_on` waits for `postgres` healthy **and** `auth_server` healthy + (added Authentik's `ak healthcheck`). +- Dropped the host publish of the API `postgres` (was `5432:5432`) β€” nothing on + the host needs it and `5432` collides with the local voicebot-database. It + stays reachable in-network as `postgres:5432`. `auth_db` keeps `5544:5432`. +- Shared Authentik env via a YAML anchor (`&authentik_env` / `*authentik_env`). + +### 2. Fixed the OAuth split-horizon (root cause) + +`authentik_base_url` was used for both browser redirects and server-side httpx +calls. In one compose those need different hostnames: + +- browser β†’ `http://localhost:9000` +- api container β†’ `http://auth_server:9000` (its own localhost is not Authentik) + +**Proof it's safe:** the API mints its own `iss="rcc"` JWTs +([src/auth/static_token.py:21](../self-hosted-cloudapi/src/auth/static_token.py#L21)) +and never validates Authentik's issuer against a fixed host, so a split hostname +does not break token validation. + +**Fix (backward compatible):** + +- `config/settings.py`: new optional `authentik_internal_url`. +- `config/auth.py`: `_front_channel_base()` (authorize, end-session, issuer) uses + `authentik_base_url`; `_back_channel_base()` (token, userinfo, jwks, discovery) + uses `authentik_internal_url or authentik_base_url`. + +When `authentik_internal_url` is unset (every pre-existing deployment), behaviour +is identical to before. + +### 3. Auto-provision the OAuth2 provider/app via blueprint + +`authentik/blueprints/stork-code.yaml` creates the `stork-code` OAuth2 provider +(confidential, `client_id`/`client_secret`/redirect URI read from env via `!Env`, +scopes openid/email/profile via `!Find`, default authorization/invalidation flows +and self-signed signing key via `!Find`) and the bound application with +`slug: stork-code`. The api and the blueprint read the **same** +`AUTHENTIK_CLIENT_ID/SECRET`, so they stay in sync from one source of truth. No +manual Authentik clicking. + +Schema authored against the pinned `AUTHENTIK_TAG=2026.2.2`; it is the +version-sensitive piece (redirect_uris + property_mappings format) and is flagged +as such in the README and the blueprint header. + +### 4. `.env.example` + `README.md` + +Added the Authentik-stack knobs (`AUTH_PG_PASS`, `AUTHENTIK_SECRET_KEY`, +`PG_DB/PG_USER`, `AUTHENTIK_TAG`, `COMPOSE_PORT_HTTP/HTTPS`, bootstrap admin) and +`AUTHENTIK_INTERNAL_URL`, with generation hints. README rewritten to the +one-command flow + a service/port table + front/back-channel and blueprint +troubleshooting notes. + +## Verification + +- Config getters: with `AUTHENTIK_INTERNAL_URL` set, `get_authentik_token_url()` + uses `auth_server` while `get_authentik_authorize_url()` uses `localhost`; unset + β†’ both fall back to base. (see Verification run below) +- `uv run pytest` β€” existing suite stays green. +- `docker compose config` parses; `docker compose up -d` β†’ all services healthy, + `docker compose logs auth_worker` shows the blueprint applied; api back-channel + reaches `http://auth_server:9000/.../.well-known/openid-configuration`; + end-to-end sign-in works (browser β†’ localhost:9000 β†’ callback β†’ session). + +## Risks / follow-ups + +- Blueprint schema may need a tweak for a different `AUTHENTIK_TAG`; the worker + log / _System β†’ Blueprints_ surfaces it immediately. +- TLS / production domains handled by the existing `API_BASE_URL` / + `AUTHENTIK_BASE_URL` knobs; the split-URL change makes the domain case work too. diff --git a/ai_plans/2026-06-22_rename-stork-code-to-tumble-code-selfhost.md b/ai_plans/2026-06-22_rename-stork-code-to-tumble-code-selfhost.md new file mode 100644 index 0000000000..9824c5c43d --- /dev/null +++ b/ai_plans/2026-06-22_rename-stork-code-to-tumble-code-selfhost.md @@ -0,0 +1,40 @@ +# Rename `stork-code` β†’ `tumble-code` in self-hosted cloud + +**Date:** 2026-06-22 +**Scope:** `self-hosted-cloudapi/` only + +## Goal + +Align the self-hosted cloud stack's Authentik app identity with the +Roo Code β†’ Tumble Code rebrand. Rename the public-facing Authentik slug / +client id / application / blueprint name from `stork-code` to `tumble-code`. + +## What changes + +Replace the **hyphenated** string `stork-code` β†’ `tumble-code` everywhere: + +- `authentik/blueprints/stork-code.yaml` β†’ renamed to `tumble-code.yaml`; + internal ids (`stork-code-provider`, `stork-code-application`), names, slugs, + `client_id` default, and the `!KeyOf` references all become `tumble-code*`. +- `.env`, `.env.example`, `.env.backup` β€” `AUTHENTIK_APP_SLUG`, + `AUTHENTIK_CLIENT_ID`, and the blueprint-path comment. +- `docker-compose.yml` β€” `AUTHENTIK_APP_SLUG` / `AUTHENTIK_CLIENT_ID` defaults. +- `README.md` β€” blueprint filename references. +- `config/settings.py` β€” `authentik_app_slug` Field default. + +## What does NOT change + +- `.env.backup:13` `DATABASE_URL=...@localhost:5544/stork_code` β€” the **DB name** + (underscore) points at the real existing Postgres database on the host. + Renaming the string without renaming the DB would break the connection, so + it's left as-is. Not a "stork-code" app mention. +- The blueprint is bind-mounted by directory (`./authentik/blueprints:/blueprints/custom`), + so renaming the file does not affect compose wiring. + +## Note for operators + +After this change, the Authentik OAuth2 provider/application slug becomes +`tumble-code`. The extension's OAuth client config (`AUTHENTIK_CLIENT_ID`) and +any existing Authentik state must use the new slug; a fresh blueprint apply +creates the new app. An already-provisioned `stork-code` app in a running +Authentik will need re-provisioning or manual slug update. diff --git a/self-hosted-cloudapi/.dockerignore b/self-hosted-cloudapi/.dockerignore index 8781ecf403..234e68d203 100644 --- a/self-hosted-cloudapi/.dockerignore +++ b/self-hosted-cloudapi/.dockerignore @@ -6,5 +6,7 @@ __pycache__ .env .env.example *.md +!README.md tests/ .vscode/ +.vol/ diff --git a/self-hosted-cloudapi/.env.example b/self-hosted-cloudapi/.env.example new file mode 100644 index 0000000000..c9342bc4cc --- /dev/null +++ b/self-hosted-cloudapi/.env.example @@ -0,0 +1,151 @@ +# ============================================================================ +# Self-Hosted Roo Code Cloud API β€” environment template +# Copy to `.env` and fill in real values. Anything left blank uses the default +# from config/settings.py (where one exists). +# ============================================================================ + +# --- Core (required) ------------------------------------------------------- + +# PostgreSQL DSN. For Docker Compose this is set automatically inside the +# `api` container. For local `uv run uvicorn ...` development, point at a +# Postgres instance you control. SQLite (`sqlite+aiosqlite:///./dev.db`) +# also works for quick smoke tests. +DATABASE_URL=postgresql://roo:password@localhost:5432/roo_cloud + +# Random secret used for signing internal artifacts (state tokens, etc.). +# Generate with: `python -c "import secrets; print(secrets.token_urlsafe(48))"` +SECRET_KEY=change-me-to-a-random-secret-key + +# Public URL of THIS API. Used in OAuth redirect construction and logging. +API_BASE_URL=http://localhost:8085 + +# Port the server listens on (default 8085). +PORT=8085 + +# --- JWT signing ----------------------------------------------------------- + +# HS256 (shared secret) is simpler; RS256 (asymmetric) is recommended for +# multi-service deployments. +JWT_ALGORITHM=HS256 + +# HS256: shared secret used for both signing and verification. +# Generate with: `python -c "import secrets; print(secrets.token_urlsafe(48))"` +JWT_SECRET=change-me-to-a-random-jwt-secret + +# RS256 only: paths to PEM keys. Leave commented if using HS256. +# JWT_PRIVATE_KEY=/path/to/private.pem +# JWT_PUBLIC_KEY=/path/to/public.pem + +# --- Authentik OAuth (required) ------------------------------------------- + +# Browser-facing (front-channel) URL of Authentik β€” the URL your browser is +# redirected to for login. With the bundled stack this is the published port. +AUTHENTIK_BASE_URL=http://localhost:9000 + +# Internal (back-channel) Authentik URL the API uses for server-to-server calls +# (token/userinfo/jwks/discovery). In docker-compose the api container reaches +# Authentik by service name, NOT via localhost. Leave UNSET for a single-host +# (non-compose) deployment β€” it then falls back to AUTHENTIK_BASE_URL. +# +# NOTE: Authentik routes by HTTP Host header, and the api always presents the +# *front-channel* host (host of AUTHENTIK_BASE_URL) as Host on these calls. That +# is why the underscore in `auth_server` is fine here: the connection uses the +# service name for DNS, but the Host sent is taken from AUTHENTIK_BASE_URL. +AUTHENTIK_INTERNAL_URL=http://auth_server:9000 + +# Authentik Application slug β€” used to build /application/o//* URLs. +# Must match the application slug created by the blueprint. +AUTHENTIK_APP_SLUG=tumble-code + +# OAuth2 client credentials. With the bundled stack these are the source of +# truth: the Authentik blueprint provisions the provider to MATCH these values, +# so pick any strong random secret up front (the api and Authentik share them). +# AUTHENTIK_CLIENT_SECRET: openssl rand -hex 32 +AUTHENTIK_CLIENT_ID=tumble-code +AUTHENTIK_CLIENT_SECRET= + +# Must match the redirect URI registered in Authentik exactly (the blueprint +# registers this value). Default flow expects `{API_BASE_URL}/auth/clerk/callback`. +AUTHENTIK_REDIRECT_URI=http://localhost:8085/auth/clerk/callback + +# --- Production (public address) example ----------------------------------- +# For a public deployment where the API is served at https://app.tumblecode.dev +# and Authentik at https://auth.tumblecode.dev, set: +# +# API_BASE_URL=https://app.tumblecode.dev +# AUTHENTIK_BASE_URL=https://auth.tumblecode.dev # front-channel (browser) +# AUTHENTIK_INTERNAL_URL=http://auth_server:9000 # back-channel (in-cluster) +# AUTHENTIK_REDIRECT_URI=https://app.tumblecode.dev/auth/clerk/callback +# CORS_ORIGINS=https://app.tumblecode.dev +# AUTHENTIK_CLIENT_SECRET= # REQUIRED: client is confidential +# +# The api sends `Host: auth.tumblecode.dev` (from AUTHENTIK_BASE_URL) on every +# back-channel call, so Authentik resolves the brand correctly even though the +# connection targets the internal service name. + +# --- CORS ------------------------------------------------------------------ + +# Comma-separated list, JSON array, or `*` for development. +CORS_ORIGINS=* + +# --- LLM proxy ------------------------------------------------------------- + +# Provider used when a model id has no provider/* prefix. +DEFAULT_LLM_PROVIDER=openai + +# Upstream API keys. Only fill in the ones you actually proxy to. +OPENAI_API_KEY= +ANTHROPIC_API_KEY= +GOOGLE_API_KEY= +XAI_API_KEY= + +# --- Marketplace ----------------------------------------------------------- + +# `yaml` reads from MARKETPLACE_YAML_DIR; `database` reads from the DB. +MARKETPLACE_SOURCE=yaml +MARKETPLACE_YAML_DIR=./config/marketplace + +# --- Optional features ---------------------------------------------------- + +# When false, /api/extension/credit-balance returns {balance: 0}. +CREDIT_SYSTEM_ENABLED=false + +# When false, /api/extension/bridge/config returns 404. +BRIDGE_ENABLED=true + +# When false, telemetry endpoints accept-and-ignore. +TELEMETRY_ENABLED=true + +# slowapi-based per-IP rate limiting on all routes. +RATE_LIMIT_ENABLED=true +RATE_LIMIT_REQUESTS_PER_MINUTE=60 + +# ============================================================================ +# Bundled Authentik stack (docker compose only) +# These configure the Authentik server/worker + their Postgres/Redis that come +# up alongside the API. Not used for local `uv run` development. +# ============================================================================ + +# Authentik image tag. Keep in sync with the blueprint schema (see +# authentik/blueprints/tumble-code.yaml). +AUTHENTIK_TAG=2026.2.2 + +# Authentik's own Postgres (separate from the API's DB). +PG_DB=authentik +PG_USER=authentik +# REQUIRED β€” Authentik refuses to start without it. Generate: openssl rand -hex 32 +AUTH_PG_PASS= + +# REQUIRED β€” Authentik secret key (cookie/session signing). openssl rand -base64 60 +AUTHENTIK_SECRET_KEY= + +# Published Authentik ports (host side). Must line up with AUTHENTIK_BASE_URL. +COMPOSE_PORT_HTTP=9000 +COMPOSE_PORT_HTTPS=9443 + +# First-run admin (akadmin). This is the account you log into the Authentik UI +# with AND sign in as during the extension OAuth flow. Set a password to enable +# non-interactive bootstrap. Generate token: openssl rand -hex 32 +AUTHENTIK_BOOTSTRAP_PASSWORD= +AUTHENTIK_BOOTSTRAP_EMAIL=admin@example.com +AUTHENTIK_BOOTSTRAP_TOKEN= diff --git a/self-hosted-cloudapi/.gitignore b/self-hosted-cloudapi/.gitignore new file mode 100644 index 0000000000..642e9c2020 --- /dev/null +++ b/self-hosted-cloudapi/.gitignore @@ -0,0 +1,2 @@ +# Local bind-mount data for docker-compose (Postgres, Redis, Authentik state) +.vol/ diff --git a/self-hosted-cloudapi/Dockerfile b/self-hosted-cloudapi/Dockerfile index 6a6ad1912a..f5798a3624 100644 --- a/self-hosted-cloudapi/Dockerfile +++ b/self-hosted-cloudapi/Dockerfile @@ -15,6 +15,8 @@ RUN uv sync --frozen --no-dev --no-install-project # Copy application COPY . . RUN uv sync --frozen --no-dev +RUN chmod +x docker-entrypoint.sh -# Run Alembic migrations on startup, then start the app -CMD ["sh", "-c", "uv run alembic upgrade head && uv run uvicorn src.main:app --host 0.0.0.0 --port ${PORT:-8085}"] +# Reconcile the schema (create_all + stamp on a fresh DB, upgrade on a managed +# one β€” see docker-entrypoint.sh / src/db_bootstrap.py), then start the app. +CMD ["./docker-entrypoint.sh"] diff --git a/self-hosted-cloudapi/Makefile b/self-hosted-cloudapi/Makefile new file mode 100644 index 0000000000..22bbc14386 --- /dev/null +++ b/self-hosted-cloudapi/Makefile @@ -0,0 +1,65 @@ +# Self-Hosted Roo Code Cloud API β€” developer tasks +# +# Run `make help` to list targets. Local targets use uv; the docker-* targets +# wrap docker compose. + +# Allow overriding host/port without editing this file: `make dev PORT=8000` +HOST ?= 0.0.0.0 +PORT ?= 8085 + +.DEFAULT_GOAL := help + +.PHONY: help install dev run migrate revision downgrade test test-cov lint \ + fmt clean docker-build docker-up docker-down docker-logs docker-migrate + +help: ## Show this help + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) \ + | sort \ + | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-16s\033[0m %s\n", $$1, $$2}' + +## --- Local (uv) --------------------------------------------------------------- + +install: ## Install all dependencies (incl. dev) into .venv + uv sync --extra dev + +dev: ## Run the API with autoreload (local) + uv run uvicorn src.main:app --reload --host $(HOST) --port $(PORT) + +run: ## Run the API without autoreload (local) + uv run uvicorn src.main:app --host $(HOST) --port $(PORT) + +migrate: ## Apply all pending DB migrations + uv run alembic upgrade head + +revision: ## Create a new autogenerated migration: make revision m="message" + uv run alembic revision --autogenerate -m "$(m)" + +downgrade: ## Roll back the most recent migration + uv run alembic downgrade -1 + +test: ## Run the test suite + uv run pytest + +test-cov: ## Run tests with a coverage report + uv run pytest --cov=src --cov-report=term-missing + +clean: ## Remove caches and build artifacts + find . -type d -name __pycache__ -prune -exec rm -rf {} + + rm -rf .pytest_cache .coverage htmlcov + +## --- Docker ------------------------------------------------------------------- + +docker-build: ## Build the API image + docker compose build + +docker-up: ## Start API + Postgres in the background + docker compose up -d + +docker-down: ## Stop and remove the containers + docker compose down + +docker-logs: ## Follow the API container logs + docker compose logs -f api + +docker-migrate: ## Run migrations inside the running API container + docker compose exec api uv run alembic upgrade head diff --git a/self-hosted-cloudapi/README.md b/self-hosted-cloudapi/README.md index 2f0b772e67..891a8330e1 100644 --- a/self-hosted-cloudapi/README.md +++ b/self-hosted-cloudapi/README.md @@ -4,37 +4,92 @@ A self-hosted replacement for the Roo Code Cloud API, compatible with the existi ## Quick Start -### Prerequisites +### Running the full stack with Docker Compose (recommended) -- Python 3.12+ -- [uv](https://docs.astral.sh/uv/getting-started/installation/) (Python package manager) -- PostgreSQL 16+ -- Authentik (for OAuth authentication) -- Docker & Docker Compose (optional, for containerized deployment) - -### Environment Setup - -1. Copy `.env.example` to `.env` and fill in the required values: +`docker compose up` brings up **everything**: this API and its Postgres, plus a +bundled **Authentik** (server, worker, Postgres, Redis). The Authentik OAuth2 +provider and application are **auto-provisioned from a blueprint** +([`authentik/blueprints/tumble-code.yaml`](authentik/blueprints/tumble-code.yaml)), +so there is no manual Authentik OAuth setup. ```bash cp .env.example .env -``` -2. Key environment variables: - - `DATABASE_URL`: PostgreSQL connection string - - `AUTHENTIK_BASE_URL`: Your Authentik instance URL - - `AUTHENTIK_CLIENT_ID`: OAuth2 client ID from Authentik - - `API_BASE_URL`: Public URL of this API server +# Fill in the REQUIRED secrets in .env: +# SECRET_KEY, JWT_SECRET β€” openssl rand -hex 32 +# AUTHENTIK_CLIENT_SECRET β€” openssl rand -hex 32 (shared with the blueprint) +# AUTH_PG_PASS β€” openssl rand -hex 32 (Authentik's DB) +# AUTHENTIK_SECRET_KEY β€” openssl rand -base64 60 +# AUTHENTIK_BOOTSTRAP_PASSWORD β€” the akadmin password you'll log in with -### Running with Docker Compose +docker compose up -d --build +``` + +Services and ports: + +| Service | URL / port | Purpose | +| ------------- | --------------------- | ------------------------------------ | +| `api` | http://localhost:8085 | The cloud API the extension talks to | +| `auth_server` | http://localhost:9000 | Authentik (login UI + OAuth) | +| `postgres` | in-network only | API database | +| `auth_db` | localhost:5544 | Authentik database | + +Log in to Authentik at http://localhost:9000 with `akadmin` / +`AUTHENTIK_BOOTSTRAP_PASSWORD`. The same account is what you sign in with during +the extension's OAuth flow. + +> **Front-channel vs back-channel URLs.** `AUTHENTIK_BASE_URL` +> (`http://localhost:9000`) is what the _browser_ is redirected to; +> `AUTHENTIK_INTERNAL_URL` (`http://auth_server:9000`) is what the _api container_ +> uses for server-to-server calls (token/userinfo/jwks). Both are preset in +> `docker-compose.yml` β€” only change them if you front Authentik with a real +> domain/reverse proxy (then point `AUTHENTIK_BASE_URL` at the public domain and +> leave `AUTHENTIK_INTERNAL_URL` as the in-network service URL). +> +> **Why the api overrides the `Host` header on back-channel calls.** Authentik +> resolves a request's _brand_ β€” and therefore serves its `/application/o/*` +> routes β€” from the HTTP `Host` header, and rejects hosts containing an +> underscore (`auth_server` is not a valid RFC-1123 hostname) with a **404**. +> So the api connects to `AUTHENTIK_INTERNAL_URL` for networking but sends the +> _front-channel_ host (the host of `AUTHENTIK_BASE_URL`, e.g. `localhost:9000` +> or `auth.tumblecode.dev`) as `Host`. This is automatic β€” you don't configure +> it β€” and it is why the underscore in the default `auth_server` service name is +> harmless. If back-channel token exchange ever 404s, this is the mechanism to +> look at (see [`config/auth.py`](config/auth.py) β†’ `get_back_channel_host_header`). + +#### Production example (public address) + +For a public deployment where the API is served at `https://app.tumblecode.dev` +and Authentik at `https://auth.tumblecode.dev`, set in `.env`: ```bash -docker-compose up -d +API_BASE_URL=https://app.tumblecode.dev +AUTHENTIK_BASE_URL=https://auth.tumblecode.dev # front-channel; also sent as Host on back-channel +AUTHENTIK_INTERNAL_URL=http://auth_server:9000 # back-channel (in-cluster service name) +AUTHENTIK_REDIRECT_URI=https://app.tumblecode.dev/auth/clerk/callback +CORS_ORIGINS=https://app.tumblecode.dev +AUTHENTIK_CLIENT_SECRET= # REQUIRED: the provider is confidential ``` -### Running Locally +The api sends `Host: auth.tumblecode.dev` (taken from `AUTHENTIK_BASE_URL`) on +every back-channel call, so Authentik resolves the brand correctly even though +the connection targets the internal service name. The provider's `client_type` +is `confidential`, so a matching `AUTHENTIK_CLIENT_SECRET` is mandatory. + +> **Blueprint troubleshooting.** The provider/app are created by the worker on +> first boot. Check it applied with `docker compose logs auth_worker | grep -i +blueprint`, or in the Authentik UI under **System β†’ Blueprints**. The blueprint +> schema is Authentik-version-sensitive; if it errors, adjust +> `authentik/blueprints/tumble-code.yaml` for your `AUTHENTIK_TAG`. + +### Running the API locally (without Docker) + +Requires Python 3.12+, [uv](https://docs.astral.sh/uv/getting-started/installation/), +a PostgreSQL 16+ you control, and an Authentik instance. ```bash +cp .env.example .env # set DATABASE_URL + the AUTHENTIK_* values + # Install dependencies uv sync @@ -42,9 +97,15 @@ uv sync uv run alembic upgrade head # Start the server -uv run uvicorn src.main:app --reload --host 0.0.0.0 --port 8000 +uv run uvicorn src.main:app --reload --host 0.0.0.0 --port 8085 ``` +For a non-compose deployment, leave `AUTHENTIK_INTERNAL_URL` unset β€” it falls +back to `AUTHENTIK_BASE_URL`. + +A [`Makefile`](Makefile) wraps these commands (`make help`, `make dev`, +`make docker-up`, …). + ## Configuring the Roo Code Extension In VS Code, open Settings (`Ctrl+,` / `Cmd+,`) and search for `roo-cline` to configure these settings: @@ -96,6 +157,18 @@ In VS Code, open Settings (`Ctrl+,` / `Cmd+,`) and search for `roo-cline` to con - Verify the Authentik redirect URI is set to `{API_BASE_URL}/auth/clerk/callback` - Check the API server logs for errors during the token exchange or user creation +**`502 Bad Gateway` on `/auth/clerk/callback` right after Authentik login:** + +- This is the API's own error page, returned when the **back-channel token + exchange** to Authentik fails β€” not a reverse-proxy error. +- Check the API logs: `docker compose logs api | grep -i "token exchange"`. + A `404 Not Found` for `…/application/o/token/` means Authentik rejected the + request's `Host`. The api derives that `Host` from `AUTHENTIK_BASE_URL`, so + ensure it is a valid hostname (no underscores) and points at the host your + Authentik brand serves. See _Why the api overrides the `Host` header_ above. +- A `400 invalid_client` instead means `AUTHENTIK_CLIENT_SECRET` is missing or + does not match the value the blueprint provisioned (the provider is confidential). + ## Authentik Setup 1. Deploy Authentik with Docker Compose diff --git a/self-hosted-cloudapi/authentik/blueprints/tumble-code.yaml b/self-hosted-cloudapi/authentik/blueprints/tumble-code.yaml new file mode 100644 index 0000000000..ea0d4dc7ca --- /dev/null +++ b/self-hosted-cloudapi/authentik/blueprints/tumble-code.yaml @@ -0,0 +1,89 @@ +# yaml-language-server: $schema=https://goauthentik.io/blueprints/schema.json +# +# Auto-provisions the OAuth2 provider + application the cloud API signs in +# against. Applied automatically by the auth_worker on startup (mounted at +# /blueprints/custom). State is visible in the Authentik admin under +# System -> Blueprints. +# +# Credentials and the redirect URI are read from the container environment +# (set in docker-compose.yml / .env), so the api and Authentik stay in sync +# from a single source of truth. The application slug MUST equal the api's +# AUTHENTIK_APP_SLUG (default: tumble-code), since app-specific endpoints +# (jwks, end-session, discovery) are built from it. +# +# NOTE: the oauth2provider schema (notably redirect_uris and property_mappings) +# is Authentik-version-sensitive. This is authored for the pinned AUTHENTIK_TAG +# (2026.2.2). If the worker logs a blueprint error, this file is the thing to +# adjust. +version: 1 +metadata: + name: tumble-code OAuth2 provider + application + labels: + blueprints.goauthentik.io/instantiate: "true" +entries: + # 1. OAuth2 / OpenID provider + - model: authentik_providers_oauth2.oauth2provider + state: present + id: tumble-code-provider + identifiers: + name: tumble-code + attrs: + name: tumble-code + client_type: confidential + client_id: !Env [AUTHENTIK_CLIENT_ID, tumble-code] + client_secret: !Env [AUTHENTIK_CLIENT_SECRET, ""] + authorization_flow: + !Find [authentik_flows.flow, [slug, default-provider-authorization-implicit-consent]] + invalidation_flow: + !Find [authentik_flows.flow, [slug, default-provider-invalidation-flow]] + signing_key: + !Find [authentik_crypto.certificatekeypair, [name, "authentik Self-signed Certificate"]] + redirect_uris: + - matching_mode: strict + url: !Env [AUTHENTIK_REDIRECT_URI, "http://localhost:8085/auth/clerk/callback"] + property_mappings: + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]] + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, email]] + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, profile]] + + # 2. Access group. Add users to this group (Directory -> Groups -> "Tumble + # Code Users") to let them sign in to Tumble Code; the binding in entry 4 + # gates the application on membership. akadmin is added here so the bootstrap + # superuser (the account used for the extension OAuth flow) is not locked out + # of its own app β€” superuser status does NOT bypass application bindings. + - model: authentik_core.group + state: present + id: tumble-code-group + identifiers: + name: Tumble Code Users + attrs: + name: Tumble Code Users + users: + - !Find [authentik_core.user, [username, akadmin]] + + # 3. Application bound to the provider. slug must match AUTHENTIK_APP_SLUG. + - model: authentik_core.application + state: present + id: tumble-code-application + identifiers: + slug: tumble-code + attrs: + name: Tumble Code + slug: tumble-code + provider: !KeyOf tumble-code-provider + policy_engine_mode: any + + # 4. Group gate: bind the group to the application. With this binding present, + # only members of "Tumble Code Users" can access the application (an app with + # no bindings is open to everyone; the first binding restricts it). + - model: authentik_policies.policybinding + state: present + identifiers: + target: !KeyOf tumble-code-application + group: !KeyOf tumble-code-group + order: 0 + attrs: + target: !KeyOf tumble-code-application + group: !KeyOf tumble-code-group + order: 0 + enabled: true diff --git a/self-hosted-cloudapi/config/auth.py b/self-hosted-cloudapi/config/auth.py index ca0d456c5a..dae8c1d356 100644 --- a/self-hosted-cloudapi/config/auth.py +++ b/self-hosted-cloudapi/config/auth.py @@ -1,38 +1,94 @@ -"""Auth-related configuration helpers.""" +"""Auth-related configuration helpers. + +Authentik is reached over two channels that may need different hostnames: + +* **front-channel** β€” URLs the *browser* is redirected to (`authorize`, + `end-session`). These must be publicly reachable, e.g. ``http://localhost:9000``. +* **back-channel** β€” URLs the *api server* fetches over httpx (`token`, + `userinfo`, `jwks`, discovery). Inside a single docker-compose these must use + the compose service name (e.g. ``http://auth_server:9000``) because the api + container's own ``localhost`` is not Authentik. + +``settings.authentik_internal_url`` configures the back-channel base; when unset +it falls back to ``authentik_base_url`` so single-host deployments are unchanged. + +Brand / Host header +------------------- +Authentik resolves a request's *brand* β€” and therefore serves its +``/application/o/*`` routes β€” from the HTTP ``Host`` header. The back-channel +base is an in-network service name (e.g. ``http://auth_server:9000`` in the +bundled compose stack), so httpx would send ``Host: auth_server:9000``. Authentik +(Django) rejects that with **404 on every application route** because the +underscore makes ``auth_server`` an invalid RFC-1123 hostname. The browser flow +works only because the front-channel host (``localhost:9000`` in dev, +``auth.tumblecode.dev`` in production) is valid. + +So back-channel calls must connect to the service name (for DNS) but present the +public front-channel host as ``Host`` β€” see ``get_back_channel_host_header``. +""" + +from typing import Optional +from urllib.parse import urlsplit from config.settings import settings +def _front_channel_base() -> str: + """Base URL for endpoints the browser is redirected to.""" + return settings.authentik_base_url + + +def _back_channel_base() -> str: + """Base URL for endpoints the api server fetches itself.""" + return settings.authentik_internal_url or settings.authentik_base_url + + +def get_back_channel_host_header() -> Optional[str]: + """``Host`` header to send on back-channel (server-to-server) requests. + + Returns the public *front-channel* host (host[:port] of + ``authentik_base_url`` β€” e.g. ``auth.tumblecode.dev`` or ``localhost:9000``) + whenever a distinct internal URL is configured, so Authentik resolves the + correct brand instead of 404-ing on the internal service name. + + Returns ``None`` when no internal URL is set (front == back channel); httpx's + default ``Host`` already matches, so no override is needed. + """ + if not settings.authentik_internal_url: + return None + return urlsplit(settings.authentik_base_url).netloc or None + + def get_authentik_authorize_url() -> str: - """Get the Authentik authorization endpoint URL.""" - return f"{settings.authentik_base_url}/application/o/authorize/" + """Get the Authentik authorization endpoint URL (front-channel / browser).""" + return f"{_front_channel_base()}/application/o/authorize/" def get_authentik_token_url() -> str: - """Get the Authentik token endpoint URL.""" - return f"{settings.authentik_base_url}/application/o/token/" + """Get the Authentik token endpoint URL (back-channel / server).""" + return f"{_back_channel_base()}/application/o/token/" def get_authentik_userinfo_url() -> str: - """Get the Authentik userinfo endpoint URL.""" - return f"{settings.authentik_base_url}/application/o/userinfo/" + """Get the Authentik userinfo endpoint URL (back-channel / server).""" + return f"{_back_channel_base()}/application/o/userinfo/" def get_authentik_issuer_url() -> str: """Get the Authentik issuer URL.""" - return f"{settings.authentik_base_url}/application/o/{settings.authentik_app_slug}/" + return f"{_front_channel_base()}/application/o/{settings.authentik_app_slug}/" def get_authentik_end_session_url() -> str: - """Get the Authentik end-session (logout) endpoint URL.""" - return f"{settings.authentik_base_url}/application/o/{settings.authentik_app_slug}/end-session/" + """Get the Authentik end-session (logout) endpoint URL (front-channel / browser).""" + return f"{_front_channel_base()}/application/o/{settings.authentik_app_slug}/end-session/" def get_authentik_jwks_url() -> str: - """Get the Authentik JWKS endpoint URL.""" - return f"{settings.authentik_base_url}/application/o/{settings.authentik_app_slug}/jwks/" + """Get the Authentik JWKS endpoint URL (back-channel / server).""" + return f"{_back_channel_base()}/application/o/{settings.authentik_app_slug}/jwks/" def get_authentik_discovery_url() -> str: - """Get the Authentik OpenID discovery document URL.""" - return f"{settings.authentik_base_url}/application/o/{settings.authentik_app_slug}/.well-known/openid-configuration" + """Get the Authentik OpenID discovery document URL (back-channel / server).""" + return f"{_back_channel_base()}/application/o/{settings.authentik_app_slug}/.well-known/openid-configuration" diff --git a/self-hosted-cloudapi/config/settings.py b/self-hosted-cloudapi/config/settings.py index 34a02243e4..132ead6345 100644 --- a/self-hosted-cloudapi/config/settings.py +++ b/self-hosted-cloudapi/config/settings.py @@ -10,7 +10,12 @@ class Settings(BaseSettings): """Roo Cloud API settings.""" - model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8") + # extra="ignore": the same .env is shared with docker-compose and carries + # infra-only keys (COMPOSE_PORT_*, AUTHENTIK_BOOTSTRAP_*, AUTH_PG_PASS, …) + # that this app doesn't define. Ignore them instead of failing to start. + model_config = SettingsConfigDict( + env_file=".env", env_file_encoding="utf-8", extra="ignore" + ) # Core database_url: str = Field(..., description="PostgreSQL connection string") @@ -25,8 +30,17 @@ class Settings(BaseSettings): jwt_secret: Optional[str] = None # Authentik OAuth - authentik_base_url: str = Field(..., description="Authentik instance URL") - authentik_app_slug: str = Field("stork-code", description="Authentik application slug for app-specific endpoints") + authentik_base_url: str = Field(..., description="Authentik instance URL (browser-facing / front-channel)") + # Internal (container-network) Authentik URL for server-to-server calls + # (token/userinfo/discovery/jwks). In a single docker-compose the api + # container cannot reach the browser-facing `localhost:9000` β€” it must use + # the compose service name (e.g. http://auth_server:9000). Falls back to + # authentik_base_url when unset, so existing single-host deployments are + # unaffected. + authentik_internal_url: Optional[str] = Field( + None, description="Internal Authentik URL for back-channel calls; falls back to authentik_base_url" + ) + authentik_app_slug: str = Field("tumble-code", description="Authentik application slug for app-specific endpoints") authentik_client_id: str = Field(..., description="OAuth2 client ID") authentik_client_secret: Optional[str] = None authentik_redirect_uri: str = Field(..., description="OAuth2 redirect URI") diff --git a/self-hosted-cloudapi/docker-compose.yml b/self-hosted-cloudapi/docker-compose.yml index 7993f70dd9..5951d52427 100644 --- a/self-hosted-cloudapi/docker-compose.yml +++ b/self-hosted-cloudapi/docker-compose.yml @@ -1,6 +1,11 @@ -version: "3.8" - +# Full self-hosted stack: cloud API + its Postgres, plus a bundled Authentik +# (server, worker, Postgres, Redis). `docker compose up -d` brings up everything +# and the Authentik OAuth2 provider/application is auto-provisioned from the +# blueprint in ./authentik/blueprints, so no manual Authentik clicking is needed. +# +# Reference for the Authentik services: /opt/docker/llm/docker-compose.yaml services: + # --- Cloud API ------------------------------------------------------------ api: build: . ports: @@ -11,12 +16,18 @@ services: SECRET_KEY: ${SECRET_KEY:-change-me-to-a-random-secret-key} JWT_ALGORITHM: ${JWT_ALGORITHM:-HS256} JWT_SECRET: ${JWT_SECRET:-change-me-to-a-random-jwt-secret} + # Browser-facing Authentik URL (front-channel: authorize / logout redirects). AUTHENTIK_BASE_URL: ${AUTHENTIK_BASE_URL:-http://localhost:9000} - AUTHENTIK_APP_SLUG: ${AUTHENTIK_APP_SLUG:-stork-code} - AUTHENTIK_CLIENT_ID: ${AUTHENTIK_CLIENT_ID:-your-client-id} + # In-network Authentik URL (back-channel: token / userinfo / jwks / discovery). + # The api container's own localhost is not Authentik, so it must reach the + # auth_server service by name. + AUTHENTIK_INTERNAL_URL: ${AUTHENTIK_INTERNAL_URL:-http://auth_server:9000} + AUTHENTIK_APP_SLUG: ${AUTHENTIK_APP_SLUG:-tumble-code} + # Same credentials the blueprint provisions into Authentik. + AUTHENTIK_CLIENT_ID: ${AUTHENTIK_CLIENT_ID:-tumble-code} AUTHENTIK_CLIENT_SECRET: ${AUTHENTIK_CLIENT_SECRET:-} - AUTHENTIK_REDIRECT_URI: ${AUTHENTIK_REDIRECT_URI:-https://roo.example.com/auth/clerk/callback} - API_BASE_URL: ${API_BASE_URL:-https://roo.example.com} + AUTHENTIK_REDIRECT_URI: ${AUTHENTIK_REDIRECT_URI:-http://localhost:8085/auth/clerk/callback} + API_BASE_URL: ${API_BASE_URL:-http://localhost:8085} CORS_ORIGINS: ${CORS_ORIGINS:-*} DEFAULT_LLM_PROVIDER: ${DEFAULT_LLM_PROVIDER:-openai} OPENAI_API_KEY: ${OPENAI_API_KEY:-} @@ -33,23 +44,120 @@ services: depends_on: postgres: condition: service_healthy + auth_server: + condition: service_healthy restart: unless-stopped postgres: image: postgres:16-alpine - ports: - - "5432:5432" + # Not published to the host: nothing outside the compose network needs it, + # and 5432 collides with other local Postgres instances. Reachable in-network + # as postgres:5432. environment: POSTGRES_USER: roo POSTGRES_PASSWORD: password POSTGRES_DB: roo_cloud volumes: - - postgres_data:/var/lib/postgresql/data + - ./.vol/postgres:/var/lib/postgresql/data healthcheck: test: ["CMD-SHELL", "pg_isready -U roo"] interval: 5s timeout: 5s retries: 5 + restart: unless-stopped + + # --- Authentik ------------------------------------------------------------ + auth_db: + image: docker.io/library/postgres:16-alpine + environment: + POSTGRES_DB: ${PG_DB:-authentik} + POSTGRES_USER: ${PG_USER:-authentik} + POSTGRES_PASSWORD: ${AUTH_PG_PASS:?database password required} + healthcheck: + test: ["CMD-SHELL", "pg_isready -d $${POSTGRES_DB} -U $${POSTGRES_USER}"] + interval: 30s + timeout: 5s + retries: 5 + start_period: 20s + ports: + # Host port remapped 5432 -> 5544 to avoid colliding with other local + # Postgres instances. Internal port stays 5432 (auth_server/worker reach it + # as auth_db:5432). Override AUTH_DB_PORT to coexist with another stack. + - "${AUTH_DB_PORT:-5544}:5432" + volumes: + - ./.vol/auth/postgres:/var/lib/postgresql/data + restart: unless-stopped + + auth_redis: + image: docker.io/library/redis:alpine + command: --save 60 1 --loglevel warning + healthcheck: + test: ["CMD-SHELL", "redis-cli ping | grep PONG"] + interval: 30s + timeout: 5s + retries: 5 + start_period: 10s + volumes: + - ./.vol/auth/redis:/data + restart: unless-stopped + + auth_server: + image: ${AUTHENTIK_IMAGE:-ghcr.io/goauthentik/server}:${AUTHENTIK_TAG:-2026.2.2} + command: server + environment: &authentik_env + AUTHENTIK_POSTGRESQL__HOST: auth_db + AUTHENTIK_POSTGRESQL__NAME: ${PG_DB:-authentik} + AUTHENTIK_POSTGRESQL__USER: ${PG_USER:-authentik} + AUTHENTIK_POSTGRESQL__PASSWORD: ${AUTH_PG_PASS} + AUTHENTIK_REDIS__HOST: auth_redis + AUTHENTIK_SECRET_KEY: ${AUTHENTIK_SECRET_KEY:?secret key required} + # First-run admin (akadmin). Lets you log in to the Authentik UI and is the + # account you sign in with during the extension OAuth flow. + AUTHENTIK_BOOTSTRAP_PASSWORD: ${AUTHENTIK_BOOTSTRAP_PASSWORD:-} + AUTHENTIK_BOOTSTRAP_EMAIL: ${AUTHENTIK_BOOTSTRAP_EMAIL:-admin@example.com} + AUTHENTIK_BOOTSTRAP_TOKEN: ${AUTHENTIK_BOOTSTRAP_TOKEN:-} + # Consumed by the blueprint's !Env tags to provision the OAuth2 provider. + AUTHENTIK_CLIENT_ID: ${AUTHENTIK_CLIENT_ID:-tumble-code} + AUTHENTIK_CLIENT_SECRET: ${AUTHENTIK_CLIENT_SECRET:-} + AUTHENTIK_REDIRECT_URI: ${AUTHENTIK_REDIRECT_URI:-http://localhost:8085/auth/clerk/callback} + healthcheck: + test: ["CMD", "ak", "healthcheck"] + start_period: 60s + interval: 30s + timeout: 30s + retries: 8 + ports: + - "${COMPOSE_PORT_HTTP:-9000}:9000" + - "${COMPOSE_PORT_HTTPS:-9443}:9443" + shm_size: 512mb + volumes: + - ./.vol/auth/data:/data + - ./.vol/auth/templates:/templates + - ./authentik/blueprints:/blueprints/custom:ro + depends_on: + auth_db: + condition: service_healthy + auth_redis: + condition: service_healthy + restart: unless-stopped -volumes: - postgres_data: + auth_worker: + image: ${AUTHENTIK_IMAGE:-ghcr.io/goauthentik/server}:${AUTHENTIK_TAG:-2026.2.2} + command: worker + # The worker is what applies blueprints, so it needs the same env (incl. the + # AUTHENTIK_CLIENT_* values the blueprint reads via !Env). + environment: *authentik_env + user: root + shm_size: 512mb + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - ./.vol/auth/data:/data + - ./.vol/auth/certs:/certs + - ./.vol/auth/templates:/templates + - ./authentik/blueprints:/blueprints/custom:ro + depends_on: + auth_db: + condition: service_healthy + auth_redis: + condition: service_healthy + restart: unless-stopped diff --git a/self-hosted-cloudapi/docker-entrypoint.sh b/self-hosted-cloudapi/docker-entrypoint.sh new file mode 100644 index 0000000000..d30760d687 --- /dev/null +++ b/self-hosted-cloudapi/docker-entrypoint.sh @@ -0,0 +1,32 @@ +#!/bin/sh +# Reconcile the database schema with Alembic, then start the API. +# +# The schema is defined by the ORM models (create_all), while the migration +# chain only *evolves* existing deployments. So how we bring Alembic in sync +# depends on what state the database is in β€” see src/db_bootstrap.py. +set -e + +STATE="$(uv run python -m src.db_bootstrap)" +echo "DB state: ${STATE}" + +case "${STATE}" in + FRESH) + # create_all already built the head schema; just record migrations as applied. + uv run alembic stamp head + ;; + LEGACY) + # Pre-Alembic database (built by an older create_all): adopt the baseline, + # then run the evolution migrations. + uv run alembic stamp a1b2c3d4e5f6 + uv run alembic upgrade head + ;; + MANAGED) + uv run alembic upgrade head + ;; + *) + echo "Unexpected DB state: '${STATE}'" >&2 + exit 1 + ;; +esac + +exec uv run uvicorn src.main:app --host 0.0.0.0 --port "${PORT:-8085}" diff --git a/self-hosted-cloudapi/src/auth/authentik.py b/self-hosted-cloudapi/src/auth/authentik.py index 57fea94fa4..b77d1d0931 100644 --- a/self-hosted-cloudapi/src/auth/authentik.py +++ b/self-hosted-cloudapi/src/auth/authentik.py @@ -16,9 +16,24 @@ get_authentik_end_session_url, get_authentik_jwks_url, get_authentik_discovery_url, + get_back_channel_host_header, ) +def _back_channel_headers(extra: Optional[Dict[str, str]] = None) -> Dict[str, str]: + """Headers for server-to-server Authentik calls, including the brand ``Host``. + + Authentik routes to its OAuth/OIDC endpoints by Host header, so back-channel + requests (which connect to the internal service name) must present the public + front-channel host or Authentik 404s. See ``config.auth`` for the full why. + """ + headers: Dict[str, str] = dict(extra or {}) + host = get_back_channel_host_header() + if host: + headers["Host"] = host + return headers + + def generate_pkce_pair() -> tuple[str, str]: """Generate a PKCE code verifier and code challenge.""" code_verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).decode().rstrip("=") @@ -66,7 +81,9 @@ async def exchange_code_for_tokens( response = await client.post( get_authentik_token_url(), data=token_data, - headers={"Content-Type": "application/x-www-form-urlencoded"}, + headers=_back_channel_headers( + {"Content-Type": "application/x-www-form-urlencoded"} + ), ) response.raise_for_status() return response.json() @@ -77,7 +94,9 @@ async def get_userinfo(access_token: str) -> Dict[str, Any]: async with httpx.AsyncClient() as client: response = await client.get( get_authentik_userinfo_url(), - headers={"Authorization": f"Bearer {access_token}"}, + headers=_back_channel_headers( + {"Authorization": f"Bearer {access_token}"} + ), ) response.raise_for_status() return response.json() @@ -86,6 +105,9 @@ async def get_userinfo(access_token: str) -> Dict[str, Any]: async def get_openid_configuration() -> Dict[str, Any]: """Fetch the OpenID Connect discovery document from Authentik.""" async with httpx.AsyncClient() as client: - response = await client.get(get_authentik_discovery_url()) + response = await client.get( + get_authentik_discovery_url(), + headers=_back_channel_headers(), + ) response.raise_for_status() return response.json() diff --git a/self-hosted-cloudapi/src/db_bootstrap.py b/self-hosted-cloudapi/src/db_bootstrap.py new file mode 100644 index 0000000000..463ab40084 --- /dev/null +++ b/self-hosted-cloudapi/src/db_bootstrap.py @@ -0,0 +1,69 @@ +"""Startup schema reconciler β€” classify the database and seed a fresh one. + +Why this exists: the migration chain's baseline (a1b2c3d4e5f6) is a no-op and the +later migrations are evolution-only (ALTER/ADD COLUMN). The schema is actually +built by ``Base.metadata.create_all`` from the ORM models β€” the single source of +truth. So a *fresh* database cannot be bootstrapped by ``alembic upgrade head``. + +This module probes the live DB and prints one of: + + FRESH no application tables -> we create_all here; caller should `stamp head` + LEGACY app tables, no alembic -> caller should `stamp baseline && upgrade head` + MANAGED app tables + alembic -> caller should `upgrade head` + +The presence of *application tables* (not the alembic_version table) is the real +signal. A database with an ``alembic_version`` row but no app tables is a failed +bootstrap β€” the previous, broken `alembic upgrade head` stamped the no-op baseline +and then crashed on the first ALTER. We treat that as FRESH so it self-heals: +``create_all`` builds the schema and ``stamp head`` overwrites the stale version. + +Only ``FRESH`` performs DDL (create_all); the alembic step is left to the +entrypoint so its output is logged like any other migration run. +""" + +import asyncio + +from sqlalchemy import inspect +from sqlalchemy.ext.asyncio import AsyncEngine + +from src.database import Base +import src.models # noqa: F401 -- registers every table on Base.metadata + + +async def classify_and_seed(engine: AsyncEngine) -> str: + """Classify ``engine``'s database; create_all when it has no app tables.""" + async with engine.begin() as conn: + def probe(sync_conn): + insp = inspect(sync_conn) + return insp.has_table("alembic_version"), insp.has_table("users") + + has_alembic, has_app_tables = await conn.run_sync(probe) + + if has_app_tables: + return "MANAGED" if has_alembic else "LEGACY" + + # No app tables: either a brand-new DB or a failed prior bootstrap that + # left only a stale alembic_version. Build the schema from the models; + # the entrypoint then `stamp head` (overwriting any stale version row). + await conn.run_sync(Base.metadata.create_all) + return "FRESH" + + +def main() -> None: + # Imported lazily so importing this module (e.g. for tests) doesn't construct + # the app engine / require full settings. + from src.database import engine + + async def _run() -> str: + try: + return await classify_and_seed(engine) + finally: + # Dispose within the same loop the connections were opened on; + # disposing from a second asyncio.run() raises "Event loop is closed". + await engine.dispose() + + print(asyncio.run(_run())) + + +if __name__ == "__main__": + main() diff --git a/self-hosted-cloudapi/src/routers/web.py b/self-hosted-cloudapi/src/routers/web.py index 04b1ad90d1..8b1b63bcf5 100644 --- a/self-hosted-cloudapi/src/routers/web.py +++ b/self-hosted-cloudapi/src/routers/web.py @@ -12,6 +12,7 @@ import json import logging +import re from pathlib import Path from typing import Optional @@ -61,6 +62,30 @@ def _asset_version() -> str: # Message says/asks whose text is the most representative task title. _TITLE_MAX = 100 +# Roo Code's first user turn can reach the cloud in API-prompt form: the typed +# text wrapped in //, trailed by a machine-built +# block (current mode, open tabs, file tree, cost…). None +# of the environment block is the user's query, so strip it before deriving a +# title. Match the trailing/unclosed case too (the block is always last). +_ENV_DETAILS_RE = re.compile(r".*?(?:|\Z)", re.DOTALL) +_MSG_WRAPPER_RE = re.compile(r"<(user_message|task|feedback)>(.*?)", re.DOTALL) + + +def _strip_task_wrappers(text: str) -> str: + """Reduce a raw conversation message to the human-authored query. + + Drops the machine ```` appendix and unwraps the + ````/````/```` tag to its inner content. Plain + text (already clean) passes through unchanged. + """ + if not text: + return "" + cleaned = _ENV_DETAILS_RE.sub("", text) + match = _MSG_WRAPPER_RE.search(cleaned) + if match: + cleaned = match.group(2) + return cleaned.strip() + def _workspace_label(path: str | None) -> str | None: """Compact project/worktree name for a badge: the last path segment. @@ -79,13 +104,22 @@ def _workspace_label(path: str | None) -> str | None: def _derive_title(messages: list[dict]) -> str: - """Pick a human-readable title from the conversation (first text-bearing msg).""" + """Pick a human-readable title from the conversation (first text-bearing msg). + + The first candidate is unwrapped to the user's query (machine framing such as + ```` is dropped) so the title reflects what the user + actually typed, not the current mode/file tree the extension appended. + """ for msg in messages: text = (msg.get("text") or "").strip() - if text and not text.startswith("{"): - first_line = text.splitlines()[0].strip() - if first_line: - return first_line[:_TITLE_MAX] + ("…" if len(first_line) > _TITLE_MAX else "") + if not text or text.startswith("{"): + continue + query = _strip_task_wrappers(text) + if not query: + continue + first_line = query.splitlines()[0].strip() + if first_line: + return first_line[:_TITLE_MAX] + ("…" if len(first_line) > _TITLE_MAX else "") return "Untitled task" diff --git a/self-hosted-cloudapi/src/web/static/app.css b/self-hosted-cloudapi/src/web/static/app.css index 08d1e6a830..4b97286c88 100644 --- a/self-hosted-cloudapi/src/web/static/app.css +++ b/self-hosted-cloudapi/src/web/static/app.css @@ -448,6 +448,24 @@ details[open] summary { margin-bottom: 0.5rem; } +/* Machine environment_details appendix on a user prompt β€” collapsed by default, + unfolds to the full original text. */ +details.env-details { + margin-top: 0.5rem; + border-top: 1px dashed var(--border); + padding-top: 0.4rem; +} +details.env-details > summary { + font-size: 0.78rem; + text-transform: uppercase; + letter-spacing: 0.03em; +} +details.env-details > pre { + margin-top: 0.4rem; + max-height: 22rem; + overflow: auto; +} + /* Foldable rows: the is the header β€” one collapsible line. */ .msg.foldable > details > summary.msg-head { list-style: none; @@ -752,7 +770,7 @@ details[open] summary { .breakdown-grid { display: grid; - grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); + grid-template-columns: 1fr; gap: 0.85rem; } .breakdown-card { @@ -763,6 +781,7 @@ details[open] summary { } table.breakdown { width: 100%; + table-layout: fixed; border-collapse: collapse; font-size: 0.85rem; } @@ -775,6 +794,23 @@ table.breakdown th { letter-spacing: 0.03em; padding: 0.25rem 0.4rem; border-bottom: 1px solid var(--border); + white-space: nowrap; +} +/* Name column flexes; numeric columns get fixed widths so values never clip. */ +table.breakdown th:nth-child(2), +table.breakdown td:nth-child(2) { + width: 5rem; +} +table.breakdown th:nth-child(3), +table.breakdown td:nth-child(3) { + width: 5.5rem; +} +table.breakdown th:nth-child(4), +table.breakdown td:nth-child(4) { + width: 3.5rem; +} +table.breakdown th:nth-child(n + 2) { + text-align: right; } table.breakdown td { padding: 0.3rem 0.4rem; @@ -785,7 +821,6 @@ table.breakdown tr:last-child td { } .bd-name { font-family: var(--mono); - max-width: 160px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; diff --git a/self-hosted-cloudapi/src/web/static/render.js b/self-hosted-cloudapi/src/web/static/render.js index bb2726cff7..6aa7d220d8 100644 --- a/self-hosted-cloudapi/src/web/static/render.js +++ b/self-hosted-cloudapi/src/web/static/render.js @@ -17,6 +17,33 @@ return DOMPurify.sanitize(marked.parse(String(text))) } + // Roo Code's first user turn (and resumed turns) can arrive wrapped: the typed + // text inside //, trailed by a machine-built + // block (current mode, open tabs, file tree, cost…). + // Render the human query; tuck the environment block into a collapsed fold so + // the full original is still one click away. Plain text passes through as-is. + function userContentHtml(text) { + if (!text) return "" + let body = String(text) + let env = "" + const envMatch = body.match(/([\s\S]*?)(?:<\/environment_details>|$)/) + if (envMatch) { + env = envMatch[1].trim() + body = body.slice(0, envMatch.index) + body.slice(envMatch.index + envMatch[0].length) + } + const wrap = body.match(/<(user_message|task|feedback)>([\s\S]*?)<\/\1>/) + if (wrap) body = wrap[2] + let html = md(body.trim()) + if (env) { + html += + '
Environment details' + + "
" +
+				escapeHtml(env) +
+				"
" + } + return html + } + function escapeHtml(s) { return String(s == null ? "" : s) .replace(/&/g, "&") @@ -70,7 +97,7 @@ switch (kind) { case "user_feedback": case "user_feedback_diff": - return { role: "user", label: "You", icon: "\u{1F464}", body: md(m.text) } + return { role: "user", label: "You", icon: "\u{1F464}", body: userContentHtml(m.text) } case "text": if (!m.text && !(m.images && m.images.length)) return null @@ -78,7 +105,7 @@ role: "assistant", label: "Assistant", icon: "\u{1F916}", - body: md(m.text) + images(m), + body: userContentHtml(m.text) + images(m), fold: true, activity: "Responding…", } diff --git a/self-hosted-cloudapi/tests/test_back_channel_host.py b/self-hosted-cloudapi/tests/test_back_channel_host.py new file mode 100644 index 0000000000..57607f1687 --- /dev/null +++ b/self-hosted-cloudapi/tests/test_back_channel_host.py @@ -0,0 +1,101 @@ +"""Back-channel Host header behaviour. + +Authentik routes to its OAuth/OIDC endpoints by HTTP Host header and 404s on an +invalid host (e.g. the compose service name `auth_server`, whose underscore is an +invalid RFC-1123 hostname). The api therefore presents the public front-channel +host (host of AUTHENTIK_BASE_URL) on every server-to-server call. These tests +lock that in so the OAuth callback can't silently regress to a 502. +""" + +import pytest + +import config.auth as auth_cfg +from config.auth import get_back_channel_host_header +from config.settings import settings +import src.auth.authentik as authentik + + +def test_host_header_is_front_channel_when_internal_url_set(monkeypatch): + monkeypatch.setattr(settings, "authentik_base_url", "https://auth.tumblecode.dev") + monkeypatch.setattr(settings, "authentik_internal_url", "http://auth_server:9000") + + host = get_back_channel_host_header() + + assert host == "auth.tumblecode.dev" + assert "_" not in host # the bug: underscore hosts get 404'd by Authentik + + +def test_host_header_keeps_port_for_dev_stack(monkeypatch): + monkeypatch.setattr(settings, "authentik_base_url", "http://localhost:9000") + monkeypatch.setattr(settings, "authentik_internal_url", "http://auth_server:9000") + + assert get_back_channel_host_header() == "localhost:9000" + + +def test_host_header_none_for_single_host(monkeypatch): + # No internal URL β†’ front == back channel β†’ httpx's default Host is correct. + monkeypatch.setattr(settings, "authentik_internal_url", None) + + assert get_back_channel_host_header() is None + + +class _FakeResp: + def __init__(self, data): + self._data = data + + def raise_for_status(self): + return None + + def json(self): + return self._data + + +class _CapturingClient: + """Stand-in for httpx.AsyncClient that records the headers it was called with.""" + + last_headers: dict = {} + + async def __aenter__(self): + return self + + async def __aexit__(self, *exc): + return False + + async def post(self, url, data=None, headers=None): + _CapturingClient.last_headers = headers or {} + return _FakeResp({"access_token": "fake"}) + + async def get(self, url, headers=None): + _CapturingClient.last_headers = headers or {} + return _FakeResp({"sub": "fake"}) + + +@pytest.fixture +def capture_httpx(monkeypatch): + monkeypatch.setattr(authentik.httpx, "AsyncClient", _CapturingClient) + monkeypatch.setattr(settings, "authentik_base_url", "https://auth.tumblecode.dev") + monkeypatch.setattr(settings, "authentik_internal_url", "http://auth_server:9000") + return _CapturingClient + + +async def test_token_exchange_sends_brand_host(capture_httpx): + await authentik.exchange_code_for_tokens("code", "verifier") + + headers = capture_httpx.last_headers + assert headers["Host"] == "auth.tumblecode.dev" + # Existing content-type header is preserved alongside the injected Host. + assert headers["Content-Type"] == "application/x-www-form-urlencoded" + + +async def test_userinfo_sends_brand_host(capture_httpx): + await authentik.get_userinfo("access-token") + + headers = capture_httpx.last_headers + assert headers["Host"] == "auth.tumblecode.dev" + assert headers["Authorization"] == "Bearer access-token" + + +async def test_discovery_sends_brand_host(capture_httpx): + await authentik.get_openid_configuration() + + assert capture_httpx.last_headers["Host"] == "auth.tumblecode.dev" diff --git a/self-hosted-cloudapi/tests/test_web_and_share.py b/self-hosted-cloudapi/tests/test_web_and_share.py index f5a188b2cc..d8808fa5f7 100644 --- a/self-hosted-cloudapi/tests/test_web_and_share.py +++ b/self-hosted-cloudapi/tests/test_web_and_share.py @@ -307,6 +307,47 @@ async def test_app_lists_owned_tasks(client, db_session, session_factory): assert "Build me a feature" in resp.text +async def test_title_strips_environment_details_wrapper(client, db_session, session_factory): + """A first turn in Roo Code's API-prompt form (typed text wrapped in + , trailed by a machine block) yields a + title of just the user's query β€” no mode/file-tree leakage.""" + await _seed_user(db_session) + wrapped = ( + "\n" + "uruchom wszystkie testy w langgrapha\n" + " \n" + "# VSCode Visible Files\n.roo/rules/rules.md\n\n" + "# Current Mode\ncode\nπŸ’» Code\n" + "" + ) + async with session_factory() as s: + s.add(Task(id="task-wrapped", user_id="user_test")) + s.add( + TaskMessage( + task_id="task-wrapped", + message_data=json.dumps({"ts": 1, "type": "say", "say": "text", "text": wrapped}), + ) + ) + await s.commit() + + from src.main import app + + _override_web_user(app) + try: + list_resp = client.get("/app") + detail_resp = client.get("/app/tasks/task-wrapped") + finally: + app.dependency_overrides.pop(get_web_user_optional, None) + + assert list_resp.status_code == 200 + assert "uruchom wszystkie testy w langgrapha" in list_resp.text + # The machine framing must not bleed into the title. + for leak in ("environment_details", "Current Mode", "", ""): + assert leak not in list_resp.text + assert detail_resp.status_code == 200 + assert "uruchom wszystkie testy w langgrapha" in detail_resp.text + + async def test_app_list_and_detail_show_workspace(client, db_session, session_factory): """The list shows the worktree basename (full path on hover); the detail header shows the full path.""" From 80ef25fd0699a52b15c3e71baf2763017bddf335 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Dre=C5=BCewski?= Date: Mon, 22 Jun 2026 12:50:52 +0200 Subject: [PATCH 2/3] fix(cloud): share always uploads full local task history MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Share only backfilled the full task.json on a 404 (TaskNotFoundError), assuming any existing server row was complete. When the live bridge had created a partial row (it connected mid-task and captured only later messages, while the offline-run opening turns were never uploaded), share returned 200, backfill was skipped, and the partial copy was shared β€” the web view showed tokens/cost (from api_req_started rows) but "Untitled task" and no conversation. Backfill the authoritative full local history before sharing whenever messages are available; keep the 404 path as a best-effort retry. backfill_messages replaces the task's stored rows, so this is idempotent. --- ...6-06-22_share-always-backfill-full-task.md | 133 ++++++++++++++++++ packages/cloud/src/CloudService.ts | 17 ++- .../cloud/src/__tests__/CloudService.test.ts | 15 +- 3 files changed, 159 insertions(+), 6 deletions(-) create mode 100644 ai_plans/2026-06-22_share-always-backfill-full-task.md diff --git a/ai_plans/2026-06-22_share-always-backfill-full-task.md b/ai_plans/2026-06-22_share-always-backfill-full-task.md new file mode 100644 index 0000000000..c6148b1bb9 --- /dev/null +++ b/ai_plans/2026-06-22_share-always-backfill-full-task.md @@ -0,0 +1,133 @@ +# Share always uploads the full local task history + +**Date:** 2026-06-22 +**Branch:** `fix/share-always-backfill-full-task` (stack off current `fix/cloudapi-authentik-back-channel-host`) +**Status:** proposed + +## Symptom (user report) + +> I ran my task prior to the backend running and shared the task afterwards. +> All I can see on the web is "Untitled task", tokens in/out and cost β€” not the +> conversation. I want the task synced to the backend _in whole_ in such cases. + +## Root cause (code-traced) + +The shared/web task views are rendered **entirely from the `task_messages` +table**: + +- title β†’ `_derive_title(messages)` β€” `web.py:106` +- tokens/cost β†’ `_compute_metrics(messages)` (client-side in `render.js`, same + source) β€” `web.py:131` +- conversation body β†’ `messages_json` β€” `web.py:419`, `web.py:427` + +`task_messages` has exactly two writers: + +1. `backfill_messages(...)` β€” uploads the full `task.json` and **replaces** all + rows for the task (`telemetry_service.py:42`). Triggered by the extension via + `POST /api/events/backfill`. +2. `upsert_task_message(...)` β€” the live remote-control **bridge** persisting one + streamed message at a time (`sio.py:187`, `telemetry_service.py:88`). + +Plain telemetry events (`POST /api/events`) only write the **`telemetry_events`** +table β€” never `task_messages` (`telemetry_service.py:24`). + +### Why "Untitled task" + tokens but no conversation + +`_derive_title` skips any message whose `text` starts with `{` (JSON), i.e. +`api_req_started` rows (`web.py:115`); `_compute_metrics` reads exactly those +`api_req_started` rows for tokens/cost. So the stored set contained +`api_req_started` rows (β†’ metrics) but **not** the user's text turn (β†’ title +falls back to "Untitled task", and the body is near-empty). A _full_ backfill +always carries the opening user message β†’ would produce a real title. Therefore +**backfill never ran**: share returned HTTP 200 because a (partial) task row +already existed, so the `TaskNotFoundError` branch was never entered. + +### The flawed gate + +`CloudService.shareTask` (`packages/cloud/src/CloudService.ts:315`) backfills the +full local history **only** inside `catch (TaskNotFoundError)` β€” i.e. only when +the server has _no_ row at all: + +```ts +try { + return await this.shareService!.shareTask(taskId, visibility) +} catch (error) { + if (error instanceof TaskNotFoundError && clineMessages) { + await this.telemetryClient!.backfillMessages(clineMessages, taskId) + return await this.shareService!.shareTask(taskId, visibility) + } + throw error +} +``` + +When a **partial** row already exists (the bridge connected mid-task and captured +only the later messages, while the offline-run opening turns were never +uploaded), share succeeds, backfill is skipped, and the partial copy is what gets +shared. `CloudService.test.ts:511` ("without retry when successful") codifies the +current assumption that a successful share needs no backfill. + +Inferred (not observed in a live DB): the specific reason the server copy was +partial is the bridge timing above. The fix is independent of that cause β€” it +uploads the authoritative full local history regardless of why the server copy +was incomplete. + +## Fix + +The extension holds the **authoritative, complete** history for its own task +(`provider.getCurrentTask().clineMessages`). `backfill_messages` is idempotent β€” +it deletes and re-inserts the task's rows β€” so it is safe to call on every share. + +Change `CloudService.shareTask` to **backfill the full local history first** +(when messages are available), then share. Keep the `TaskNotFoundError` retry as +a fallback, since `backfillMessages` swallows its own network errors and may have +silently no-op'd: + +```ts +public async shareTask(taskId, visibility = "organization", clineMessages?) { + this.ensureInitialized() + + // The extension is the source of truth for its own task. The server copy may + // be absent (task ran while the backend was unreachable) or partial (the live + // bridge connected mid-task and only captured later messages). Upload the full + // local history before sharing so the shared view shows the whole conversation + // and a real title β€” not just the api_req_started fragments. backfillMessages + // replaces the task's stored rows, so this is safe on every share. + if (clineMessages?.length) { + await this.telemetryClient!.backfillMessages(clineMessages, taskId) + } + + try { + return await this.shareService!.shareTask(taskId, visibility) + } catch (error) { + if (error instanceof TaskNotFoundError && clineMessages?.length) { + // backfill above is best-effort (it swallows network errors); retry once. + await this.telemetryClient!.backfillMessages(clineMessages, taskId) + return await this.shareService!.shareTask(taskId, visibility) + } + throw error + } +} +``` + +Behavior when `clineMessages` is not provided (programmatic callers) is unchanged: +no up-front backfill, original 404 path applies. + +## Files + +- `packages/cloud/src/CloudService.ts` β€” reorder backfill to run before share. +- `packages/cloud/src/__tests__/CloudService.test.ts` β€” update the + "successful share" case to expect one up-front `backfillMessages` call; keep + the 404-retry and no-messages cases (adjust call counts). + +## Trade-off + +Every explicit share now uploads the full `task.json` once, even when the server +already had it via live streaming. Acceptable for a user-initiated action, and +the only way to guarantee completeness without an extra "what does the server +have?" round-trip. Correctness over a micro-optimization. + +## Verification + +- Unit: `pnpm --filter @roo-code/cloud test` (CloudService share suite). +- Manual: run a task with the backend down, start the backend, share β†’ shared + page shows full conversation + real title (not "Untitled task"). diff --git a/packages/cloud/src/CloudService.ts b/packages/cloud/src/CloudService.ts index 43f52d4b18..a444fd2411 100644 --- a/packages/cloud/src/CloudService.ts +++ b/packages/cloud/src/CloudService.ts @@ -319,11 +319,24 @@ export class CloudService extends EventEmitter implements Di ) { this.ensureInitialized() + // The extension is the source of truth for its own task. The server copy may + // be absent (the task ran while the backend was unreachable) or partial (the + // live bridge connected mid-task and only captured the later messages, while + // the opening turns were never uploaded). Upload the full local history before + // sharing so the shared view shows the whole conversation and a real title β€” + // not just the api_req_started fragments that yield "Untitled task" + metrics. + // backfillMessages replaces the task's stored rows, so this is safe to run on + // every share. + if (clineMessages?.length) { + await this.telemetryClient!.backfillMessages(clineMessages, taskId) + } + try { return await this.shareService!.shareTask(taskId, visibility) } catch (error) { - if (error instanceof TaskNotFoundError && clineMessages) { - // Backfill messages and retry. + if (error instanceof TaskNotFoundError && clineMessages?.length) { + // The up-front backfill is best-effort (it swallows network errors), so + // a TaskNotFoundError here means it silently no-op'd β€” retry once. await this.telemetryClient!.backfillMessages(clineMessages, taskId) return await this.shareService!.shareTask(taskId, visibility) } diff --git a/packages/cloud/src/__tests__/CloudService.test.ts b/packages/cloud/src/__tests__/CloudService.test.ts index 8c557ae7ad..846a37fed4 100644 --- a/packages/cloud/src/__tests__/CloudService.test.ts +++ b/packages/cloud/src/__tests__/CloudService.test.ts @@ -508,7 +508,7 @@ describe("CloudService", () => { cloudService = await CloudService.createInstance(mockContext) }) - it("should call shareTask without retry when successful", async () => { + it("should backfill the full local history up front, then share once", async () => { const taskId = "test-task-id" const visibility = "organization" const clineMessages: ClineMessage[] = [ @@ -528,9 +528,13 @@ describe("CloudService", () => { const result = await cloudService.shareTask(taskId, visibility, clineMessages) + // The extension is the source of truth: it uploads the full local history + // before sharing so a partial/missing server copy can't leak into the + // shared view. One backfill, one (successful) share β€” no 404 retry. + expect(mockTelemetryClient.backfillMessages).toHaveBeenCalledTimes(1) + expect(mockTelemetryClient.backfillMessages).toHaveBeenCalledWith(clineMessages, taskId) expect(mockShareService.shareTask).toHaveBeenCalledTimes(1) expect(mockShareService.shareTask).toHaveBeenCalledWith(taskId, visibility) - expect(mockTelemetryClient.backfillMessages).not.toHaveBeenCalled() expect(result).toEqual(expectedResult) }) @@ -561,7 +565,9 @@ describe("CloudService", () => { expect(mockShareService.shareTask).toHaveBeenCalledTimes(2) expect(mockShareService.shareTask).toHaveBeenNthCalledWith(1, taskId, visibility) expect(mockShareService.shareTask).toHaveBeenNthCalledWith(2, taskId, visibility) - expect(mockTelemetryClient.backfillMessages).toHaveBeenCalledTimes(1) + // Once up front, once more on the 404 fallback (the up-front backfill is + // best-effort and may have silently failed on a network error). + expect(mockTelemetryClient.backfillMessages).toHaveBeenCalledTimes(2) expect(mockTelemetryClient.backfillMessages).toHaveBeenCalledWith(clineMessages, taskId) expect(result).toEqual(expectedResult) }) @@ -596,8 +602,9 @@ describe("CloudService", () => { await expect(cloudService.shareTask(taskId, visibility, clineMessages)).rejects.toThrow(genericError) + // The up-front backfill still runs, but a non-404 error is not retried. + expect(mockTelemetryClient.backfillMessages).toHaveBeenCalledTimes(1) expect(mockShareService.shareTask).toHaveBeenCalledTimes(1) - expect(mockTelemetryClient.backfillMessages).not.toHaveBeenCalled() }) it("should work with default parameters", async () => { From 37a1455eaabab0cd195980d75e869ffe780f078a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Dre=C5=BCewski?= Date: Mon, 22 Jun 2026 17:45:47 +0200 Subject: [PATCH 3/3] fix(cloud): attribute live tasks to the originating window's worktree The cloud web view labels a task's worktree from task.workspace_path, stamped once and never overwritten. For live (bridge) tasks the first stamp read the path from a per-user registry that tracks only one extension instance ('newest wins'), and the message event didn't carry which window produced it. With multiple windows under one account, a task could be stamped with another window's worktree. Carry the originating window's workspacePath on each Message bridge event and prefer it over the registry singleton on the backend, mirroring the share/backfill path. --- ...fix-worktree-misattribution-live-bridge.md | 68 +++++++++++++++++++ .../cloud/src/bridge/BridgeOrchestrator.ts | 4 ++ .../__tests__/BridgeOrchestrator.test.ts | 3 + packages/types/src/cloud.ts | 6 ++ self-hosted-cloudapi/src/realtime/sio.py | 11 +-- self-hosted-cloudapi/tests/test_bridge.py | 33 +++++++++ 6 files changed, 121 insertions(+), 4 deletions(-) create mode 100644 ai_plans/2026-06-22_fix-worktree-misattribution-live-bridge.md diff --git a/ai_plans/2026-06-22_fix-worktree-misattribution-live-bridge.md b/ai_plans/2026-06-22_fix-worktree-misattribution-live-bridge.md new file mode 100644 index 0000000000..9c859c369e --- /dev/null +++ b/ai_plans/2026-06-22_fix-worktree-misattribution-live-bridge.md @@ -0,0 +1,68 @@ +# Fix: live task attributed to the wrong worktree in the cloud web view + +Date: 2026-06-22 +Branch: fix/share-always-backfill-full-task (stacked on the share/backfill work) + +## Symptom + +Ran a task in the `lids-uniform-api` window; the cloud web view labelled its +worktree as `septicoBackend` (a different project that was also open). + +## Root cause (proven, not assumed) + +1. The web "worktree" badge is just the last path segment of + `task.workspace_path` β€” `_workspace_label()` in + `self-hosted-cloudapi/src/routers/web.py:90`. + +2. `task.workspace_path` is stamped **once and never overwritten** β€” + `_stamp_workspace_path()` in + `self-hosted-cloudapi/src/services/telemetry_service.py:9-21`. First non-empty + value wins (a task "never moves workspaces"). + +3. The bridge is default-ON, so for a live task the **first** stamp happens while + messages stream, in `on_task_event` β†’ `upsert_task_message` + (`self-hosted-cloudapi/src/realtime/sio.py:187-196`). There the + `workspace_path` is read from `registry.instance(user_id)["workspacePath"]`. + +4. The registry tracks **at most one extension instance per `user_id`** β€” "the + most recently registered wins" (`self-hosted-cloudapi/src/realtime/hub.py:7-8, +31-32, 56-57`). The bridge captures `workspacePath` once at start + (`src/extension/bridge.ts:93`) and sends it in `register` + (`packages/cloud/src/bridge/BridgeOrchestrator.ts:123-130`). + +5. The live `message` event carries only `taskId` + `message` β€” **not** the + worktree that produced it (`BridgeOrchestrator.ts:147-155`). The backend has + to infer it from the global, user-keyed registry. + +=> With two windows open under one cloud account, both bridges register under the +same `user_id`; the registry holds only whichever registered/reconnected last. A +task streamed from window A is stamped with window B's path. The sticky stamp +then blocks the later (correct) share/backfill value from fixing it. + +## Fix + +Make the worktree root travel **with** each task event. Every window's bridge +already knows its own correct `workspacePath` (its own `BridgeOrchestrator` +instance), so: + +- `packages/types/src/cloud.ts` β€” add optional `workspacePath` to the `Message` + `taskBridgeEvent` schema (optional => older clients still validate). +- `packages/cloud/src/bridge/BridgeOrchestrator.ts` β€” include + `workspacePath: this.options.workspacePath` when emitting the `Message` event. +- `self-hosted-cloudapi/src/realtime/sio.py` β€” prefer the event's + `workspacePath`, falling back to the registry instance for older clients. + Mirrors what `events.py` backfill already does (explicit field, registry + fallback). + +## Scope / non-goals + +- Stamping stays once-only & sticky β€” correct semantics. Already-mis-stamped + rows do **not** self-heal; this only prevents future mis-attribution. +- No change to the single-window case (event value == register value). + +## Tests + +- TS: `BridgeOrchestrator` forwards `workspacePath` on the `Message` event. +- PY: an event whose `workspacePath` differs from the registered instance stamps + the **event's** value (precedence); the registry fallback still works when the + event omits it (existing test). diff --git a/packages/cloud/src/bridge/BridgeOrchestrator.ts b/packages/cloud/src/bridge/BridgeOrchestrator.ts index 603cb54a25..61170ed22d 100644 --- a/packages/cloud/src/bridge/BridgeOrchestrator.ts +++ b/packages/cloud/src/bridge/BridgeOrchestrator.ts @@ -151,6 +151,10 @@ export class BridgeOrchestrator { taskId: payload.taskId, action: payload.action ?? "", message: payload.message, + // Stamp this window's worktree root on the event so the backend + // attributes the task to the project it actually ran in, instead of + // the user-keyed registry singleton (wrong with multiple windows). + workspacePath: this.options.workspacePath, }) } const onState: BusListener = (...args) => void this.pushInstanceState(args[0] as string) diff --git a/packages/cloud/src/bridge/__tests__/BridgeOrchestrator.test.ts b/packages/cloud/src/bridge/__tests__/BridgeOrchestrator.test.ts index 886682335e..7cb462b11f 100644 --- a/packages/cloud/src/bridge/__tests__/BridgeOrchestrator.test.ts +++ b/packages/cloud/src/bridge/__tests__/BridgeOrchestrator.test.ts @@ -123,6 +123,9 @@ describe("BridgeOrchestrator", () => { type: TaskBridgeEventName.Message, taskId: "task-9", action: "created", + // Each window stamps its own worktree root so the backend can attribute + // the task correctly even when several windows share one cloud account. + workspacePath: "/work", }) }) diff --git a/packages/types/src/cloud.ts b/packages/types/src/cloud.ts index 0edd3bf4c3..8c7c1fa1f1 100644 --- a/packages/types/src/cloud.ts +++ b/packages/types/src/cloud.ts @@ -462,6 +462,12 @@ export const taskBridgeEventSchema = z.discriminatedUnion("type", [ taskId: z.string(), action: z.string(), message: clineMessageSchema, + // Worktree root of the window that produced this message. Carried per-event + // so the backend attributes a live task to the project it actually ran in, + // rather than the user-keyed registry singleton (which holds only the most + // recently registered window when several share one cloud account). + // Optional so older extension clients still validate. + workspacePath: z.string().optional(), }), z.object({ type: z.literal(TaskBridgeEventName.TaskModeSwitched), diff --git a/self-hosted-cloudapi/src/realtime/sio.py b/self-hosted-cloudapi/src/realtime/sio.py index bc7188da6f..7f1b06b5a5 100644 --- a/self-hosted-cloudapi/src/realtime/sio.py +++ b/self-hosted-cloudapi/src/realtime/sio.py @@ -185,10 +185,13 @@ async def on_task_event(sid, data): registry.update_instance_state(user_id, data) if evt_type == EVT_MESSAGE and isinstance(data.get("message"), dict): - # The registered instance carries the worktree root the bridge is - # attached to; stamp it on the task so the web view can show the project. - instance = registry.instance(user_id) or {} - workspace_path = instance.get("workspacePath") + # Worktree root: prefer the value the originating window stamped on the + # event β€” correct even when several windows share one cloud account, since + # the registry tracks only one instance per user. Fall back to the + # registered instance for older clients that don't send it. + workspace_path = data.get("workspacePath") or ( + registry.instance(user_id) or {} + ).get("workspacePath") try: async with async_session_factory() as db: await upsert_task_message( diff --git a/self-hosted-cloudapi/tests/test_bridge.py b/self-hosted-cloudapi/tests/test_bridge.py index 008f438b2c..5b7188a137 100644 --- a/self-hosted-cloudapi/tests/test_bridge.py +++ b/self-hosted-cloudapi/tests/test_bridge.py @@ -328,6 +328,39 @@ async def test_task_event_stamps_workspace_path_from_registered_instance( assert task.workspace_path == ws +async def test_task_event_prefers_event_workspace_path_over_registry( + patch_session_factory, db_session, session_factory, stub_emit +): + """When several windows share one cloud account the registry holds only the + most recently registered window's path. The originating window stamps its own + worktree root on the event, which must take precedence so the task is + attributed to the project it actually ran in.""" + await _seed_user(db_session, "owner") + + # Registry points at a *different* window (the last to register). + registry.attach("ext_owner", "extension", "owner") + registry.register_extension( + "ext_owner", "owner", {"workspacePath": "/home/krzych/Projekty/septicoBackend"} + ) + + event_ws = "/home/krzych/Projekty/lids-uniform-api" + await sio_module.on_task_event( + "ext_owner", + { + "taskId": "task-evt-ws", + "type": EVT_MESSAGE, + "workspacePath": event_ws, + "message": {"ts": 1, "type": "say", "say": "text", "text": "hi"}, + }, + ) + + async with session_factory() as s: + task = ( + await s.execute(select(Task).where(Task.id == "task-evt-ws")) + ).scalar_one() + assert task.workspace_path == event_ws + + async def test_task_event_backfills_workspace_path_on_legacy_null_task( patch_session_factory, db_session, session_factory, stub_emit ):