From c750bc8b64cee0ec3b0a916e922ba5cd220cc31a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Dre=C5=BCewski?= Date: Mon, 22 Jun 2026 11:09:03 +0200 Subject: [PATCH 1/2] fix(cloudapi): send public Host on Authentik back-channel calls; bundle self-hosted stack Primary fix ----------- OAuth callback returned 502 after Authentik login. Root cause: Authentik routes to its /application/o/* endpoints by HTTP Host header and rejects hosts with an underscore (auth_server is not a valid RFC-1123 hostname) with a 404, so back-channel token exchange to http://auth_server:9000 failed and the API rendered its own 502 page. The api now connects to AUTHENTIK_INTERNAL_URL for networking but presents the front-channel host (host of AUTHENTIK_BASE_URL, e.g. localhost:9000 or auth.tumblecode.dev) as the Host header on token/userinfo/discovery calls. Topology-independent: works for the bundled dev stack and a public app.tumblecode.dev deployment alike. - config/auth.py: get_back_channel_host_header() - src/auth/authentik.py: _back_channel_headers() on the 3 back-channel calls - tests/test_back_channel_host.py: header value + attachment on all calls - README.md / .env.example: Host-override rationale + production block Verified live against the running Authentik: old Host (auth_server:9000) -> 404; new Host (localhost:9000) + real client_secret + fake code -> 400 invalid_grant (request reaches the endpoint, client auth passes). Also includes the in-progress self-hosted stack snapshot ------------------------------------------------------- Bundled Authentik (server/worker/Postgres/Redis) + blueprint, Dockerfile entrypoint with db_bootstrap (FRESH/LEGACY/MANAGED), Makefile, web view tweaks, and the accompanying ai_plans docs. --- ...6-21_web-task-summary-strip-env-details.md | 75 +++++++++ ...-22_authentik-group-gate-and-app-rename.md | 57 +++++++ ...22_cloudapi-authentik-back-channel-host.md | 81 ++++++++++ .../2026-06-22_dockerize-cloud-backend.md | 52 ++++++ ...-06-22_fix-fresh-db-bootstrap-crashloop.md | 87 ++++++++++ .../2026-06-22_full-stack-docker-compose.md | 94 +++++++++++ ...name-stork-code-to-tumble-code-selfhost.md | 40 +++++ self-hosted-cloudapi/.dockerignore | 2 + self-hosted-cloudapi/.env.example | 151 ++++++++++++++++++ self-hosted-cloudapi/.gitignore | 2 + self-hosted-cloudapi/Dockerfile | 6 +- self-hosted-cloudapi/Makefile | 65 ++++++++ self-hosted-cloudapi/README.md | 113 ++++++++++--- .../authentik/blueprints/tumble-code.yaml | 89 +++++++++++ self-hosted-cloudapi/config/auth.py | 84 ++++++++-- self-hosted-cloudapi/config/settings.py | 20 ++- self-hosted-cloudapi/docker-compose.yml | 130 +++++++++++++-- self-hosted-cloudapi/docker-entrypoint.sh | 32 ++++ self-hosted-cloudapi/src/auth/authentik.py | 28 +++- self-hosted-cloudapi/src/db_bootstrap.py | 69 ++++++++ self-hosted-cloudapi/src/routers/web.py | 44 ++++- self-hosted-cloudapi/src/web/static/app.css | 39 ++++- self-hosted-cloudapi/src/web/static/render.js | 31 +++- .../tests/test_back_channel_host.py | 101 ++++++++++++ .../tests/test_web_and_share.py | 41 +++++ 25 files changed, 1471 insertions(+), 62 deletions(-) create mode 100644 ai_plans/2026-06-21_web-task-summary-strip-env-details.md create mode 100644 ai_plans/2026-06-22_authentik-group-gate-and-app-rename.md create mode 100644 ai_plans/2026-06-22_cloudapi-authentik-back-channel-host.md create mode 100644 ai_plans/2026-06-22_dockerize-cloud-backend.md create mode 100644 ai_plans/2026-06-22_fix-fresh-db-bootstrap-crashloop.md create mode 100644 ai_plans/2026-06-22_full-stack-docker-compose.md create mode 100644 ai_plans/2026-06-22_rename-stork-code-to-tumble-code-selfhost.md create mode 100644 self-hosted-cloudapi/.env.example create mode 100644 self-hosted-cloudapi/.gitignore create mode 100644 self-hosted-cloudapi/Makefile create mode 100644 self-hosted-cloudapi/authentik/blueprints/tumble-code.yaml create mode 100644 self-hosted-cloudapi/docker-entrypoint.sh create mode 100644 self-hosted-cloudapi/src/db_bootstrap.py create mode 100644 self-hosted-cloudapi/tests/test_back_channel_host.py diff --git a/ai_plans/2026-06-21_web-task-summary-strip-env-details.md b/ai_plans/2026-06-21_web-task-summary-strip-env-details.md new file mode 100644 index 0000000000..9810af2bd8 --- /dev/null +++ b/ai_plans/2026-06-21_web-task-summary-strip-env-details.md @@ -0,0 +1,75 @@ +# Web task summary: show strictly the user query, fold the environment block + +Date: 2026-06-21 +Area: self-hosted-cloudapi web view (task list + task detail) + +## Problem + +The task summary/title in the cloud web view shows machine-generated framing the +user never typed β€” e.g. "Current Mode / code". Root cause, proven from real data: + +The first user turn that reaches the cloud arrives in Roo Code's **API-prompt +form**, not the clean UI text. `api_conversation_history.json` first user message: + +``` + +uruchom wszystkie testy w langgrapha + +# VSCode Visible Files +... +# Current Mode +code +πŸ’» Code +unsloth/GLM-5.2-GGUF:UD-Q3_K_XL +... + +``` + +`_derive_title()` ([routers/web.py](../self-hosted-cloudapi/src/routers/web.py)) +takes the first text-bearing message verbatim, so the `` wrapper and +the `` block (mode, open tabs, file tree, cost…) bleed into +the title. The same raw text renders in the conversation body with no way to +separate the human query from the machine appendix. + +## Fix + +Treat the wrapped form as what it is: human query + machine appendix. + +### Backend β€” `_derive_title` (routers/web.py) + +Add `_strip_task_wrappers(text)`: + +1. Remove `…` (also the trailing, + unclosed case). +2. Unwrap the human message tag β€” `` / `` / `` β€” to + its inner content. +3. Plain text (already clean) passes through unchanged. + +`_derive_title` runs each candidate message through it before taking the first +non-empty line. Covers both the task list and the detail-page `

`. + +### Frontend β€” render.js conversation body + +Add `userContentHtml(text)`: split off the `` block, unwrap +the message tag, render the clean query as markdown, and append the environment +block as a **collapsed `
`** ("Environment details") so the full original +is one click away β€” satisfying "unfold to full length". Applied to the text / +user_feedback / user_feedback_diff rows. No tags present β†’ identical to today. + +### CSS β€” app.css + +Minimal styling for `details.env-details` (muted summary, monospace body). + +## Tests + +`tests/test_web_and_share.py`: a backfill whose first message is the wrapped +API-form turn β€” assert the rendered list/detail title is the bare query +("uruchom wszystkie testy w langgrapha"), with no `environment_details` / `Current +Mode` / `` leakage. + +## Out of scope + +- Message role classification (the initial task currently renders under the + "Assistant" label) β€” separate concern, not touched here. +- Title length cap stays at 100 chars; the full prompt is now visible in the + conversation body. diff --git a/ai_plans/2026-06-22_authentik-group-gate-and-app-rename.md b/ai_plans/2026-06-22_authentik-group-gate-and-app-rename.md new file mode 100644 index 0000000000..90d7e362c1 --- /dev/null +++ b/ai_plans/2026-06-22_authentik-group-gate-and-app-rename.md @@ -0,0 +1,57 @@ +# Authentik: gate Tumble Code by a group + rename the application + +**Date:** 2026-06-22 +**Scope:** `self-hosted-cloudapi/authentik/blueprints/` + +## Goal + +Two changes to the auto-provisioned Authentik blueprint, applied cleanly on a +fresh `docker compose up` (user will drop all `./.vol/*` first): + +1. Provision a **group** so access to Tumble Code is controlled by group + membership β€” add a user to the group β†’ they can sign in to Tumble Code. +2. **Rename** the application's display name from `Stork Code` β†’ `Tumble Code`. + +## Background (verified against Authentik docs) + +- Application access in Authentik is governed by **policy bindings** on the + application. A binding whose `group` field is set is a plain _group-membership_ + check β€” no separate policy object needed. +- **Default behaviour:** an application with _no_ bindings is open to everyone. + The moment one group binding is added, access is restricted to that group. +- **Superusers are not exempt** from application access bindings (superuser grants + _admin_ access, not _application_ access). The bootstrap `akadmin` account β€” the + one used to sign in during the extension OAuth flow β€” must therefore be a member + of the group, or it gets locked out of its own app. The blueprint adds `akadmin` + to the group on creation to prevent this. + +Source: Authentik blueprint Models + Bindings overview docs. + +## Changes (single file: `stork-code.yaml`) + +Internal IDs stay (`slug: stork-code`, `client_id`, provider name) β€” these are +referenced by the api's `AUTHENTIK_APP_SLUG` / `AUTHENTIK_CLIENT_ID` and must not +change. Only the public display string changes, per the rebrand principle. + +1. **New group entry** (`authentik_core.group`), id `tumble-code-group`, + name `Tumble Code Users`, with `akadmin` added as a member via + `!Find [authentik_core.user, [username, akadmin]]`. +2. **Application name** `Stork Code` β†’ `Tumble Code`. +3. **New policy binding** (`authentik_policies.policybinding`) targeting the + application (`!KeyOf stork-code-application`) with `group` + (`!KeyOf tumble-code-group`), `order: 0`, `enabled: true` β€” this is what turns + on the group gate. + +## How to use after `docker compose up` + +- Sign in to Authentik admin as `akadmin` (already in the group β†’ can use Tumble + Code immediately). +- To grant another person access: Directory β†’ Groups β†’ **Tumble Code Users** β†’ + add the user. No blueprint edit needed. + +## Not changed / why + +- Slug, client id/secret, provider name, `AUTHENTIK_APP_SLUG` β€” internal IDs the + api builds endpoints from; renaming them would be a wider, riskier change and + isn't what was asked. +- Blueprint filename kept as `stork-code.yaml` (internal). diff --git a/ai_plans/2026-06-22_cloudapi-authentik-back-channel-host.md b/ai_plans/2026-06-22_cloudapi-authentik-back-channel-host.md new file mode 100644 index 0000000000..561f4f76db --- /dev/null +++ b/ai_plans/2026-06-22_cloudapi-authentik-back-channel-host.md @@ -0,0 +1,81 @@ +# Fix Authentik back-channel 502 on OAuth callback (public-address ready) + +Branch: `fix/cloudapi-authentik-back-channel-host` + +## Symptom + +After logging in to the bundled Authentik, the browser lands on +`GET http://localhost:8085/auth/clerk/callback?code=...&state=...` with +**502 Bad Gateway**. + +## Root cause (proven, not inferred) + +The "502" is **not** a reverse-proxy error β€” it is the cloud API's own error +page, returned at [browser.py:267](../self-hosted-cloudapi/src/routers/browser.py#L267) +when the back-channel token exchange to Authentik throws. + +Evidence chain, gathered against the running stack: + +1. `api` container log: + `Token exchange failed: Client error '404 Not Found' for url 'http://auth_server:9000/application/o/token/'` +2. Every Authentik `/application/o/*` route 404s on the back-channel, while + `/-/health/live/` returns 200 β€” so the container is reachable, the routes are not. +3. The only variable is the HTTP `Host` header. Probing the same URL: + - `Host: auth_server:9000` β†’ **404** + - `Host: localhost` / `localhost:9000` / `auth.tumblecode.dev` / `evil.example.com` β†’ **200** +4. Narrowed to the underscore: `under_score.example.com` β†’ 404, `auth-server:9000` β†’ 200. + +**Authentik (Django) resolves the brand β€” and therefore serves its OAuth/OIDC +routes β€” from the `Host` header, and rejects hosts containing an underscore +(`auth_server` is not a valid RFC-1123 hostname) with a 404.** The compose +service is named `auth_server`, so the back-channel URL `http://auth_server:9000` +makes httpx send `Host: auth_server:9000` β†’ 404 β†’ token exchange fails β†’ 502 page. +The browser flow works only because the front-channel host (`localhost:9000`) is valid. + +The discovery doc's `issuer` merely echoes the request Host, and the token's real +`iss` is fixed at front-channel authorize time, so the topology-independent fix is +to make the back-channel present the **public front-channel host** as `Host`. + +## Fix + +Connect to the internal service name (for DNS) but send the front-channel host +(host of `AUTHENTIK_BASE_URL`) as `Host` on every server-to-server call. Works +identically for dev (`localhost:9000`) and prod (`auth.tumblecode.dev`). + +- [config/auth.py](../self-hosted-cloudapi/config/auth.py): add + `get_back_channel_host_header()` β†’ returns `urlsplit(authentik_base_url).netloc` + when `authentik_internal_url` is set, else `None`. +- [src/auth/authentik.py](../self-hosted-cloudapi/src/auth/authentik.py): add + `_back_channel_headers()` and apply it to `exchange_code_for_tokens`, + `get_userinfo`, `get_openid_configuration`. +- [.env.example](../self-hosted-cloudapi/.env.example): document the Host behaviour + and a full `app.tumblecode.dev` production block. +- [tests/test_back_channel_host.py](../self-hosted-cloudapi/tests/test_back_channel_host.py): + lock in the header value and that it is attached to all three calls. + +No compose change needed: the Host override neutralises the underscore, so +`AUTHENTIK_INTERNAL_URL=http://auth_server:9000` stays valid. + +## Production (app.tumblecode.dev) + +``` +API_BASE_URL=https://app.tumblecode.dev +AUTHENTIK_BASE_URL=https://auth.tumblecode.dev # front-channel host β†’ sent as Host +AUTHENTIK_INTERNAL_URL=http://auth_server:9000 # back-channel (in-cluster) +AUTHENTIK_REDIRECT_URI=https://app.tumblecode.dev/auth/clerk/callback +CORS_ORIGINS=https://app.tumblecode.dev +AUTHENTIK_CLIENT_SECRET= # provider is confidential +``` + +The provider `client_type` is `confidential`, so a matching `client_secret` is +mandatory in production (the bundled stack already shares one via env). The api +will send `Host: auth.tumblecode.dev` on back-channel calls. + +## Verification + +- Unit: `pytest tests/test_back_channel_host.py` + auth suites β†’ 22 passed. +- Live, against the running Authentik (simulating the patched code path): + - old (`Host: auth_server:9000`) β†’ **404** + - new (`Host: localhost:9000`) + real client_secret + fake code β†’ **400 `invalid_grant`** + β€” i.e. the request now reaches the token endpoint, client auth passes, only the + fake code is rejected. A real authorization code will succeed. diff --git a/ai_plans/2026-06-22_dockerize-cloud-backend.md b/ai_plans/2026-06-22_dockerize-cloud-backend.md new file mode 100644 index 0000000000..f54dc86886 --- /dev/null +++ b/ai_plans/2026-06-22_dockerize-cloud-backend.md @@ -0,0 +1,52 @@ +# Dockerize the self-hosted cloud backend + +**Date:** 2026-06-22 +**Scope:** `self-hosted-cloudapi/` + +## Goal + +Be able to run the self-hosted cloud API in a container. + +## Finding + +A `Dockerfile`, `.dockerignore`, and `docker-compose.yml` already existed and were +committed, but **the image did not build**. Proven by `docker build`: + +``` +OSError: Readme file does not exist: README.md +ERROR: process "/bin/sh -c uv sync --frozen --no-dev" did not complete successfully +``` + +### Root cause + +- `pyproject.toml` declares `readme = "README.md"` under `[project]`. +- The final `RUN uv sync --frozen --no-dev` installs the project itself, so hatchling + reads project metadata and requires `README.md` to be present. +- `.dockerignore` excluded `*.md` (and `README.md`), so the file was not in the build + context β†’ metadata validation fails. +- The earlier `uv sync ... --no-install-project` passes because it runs before + `COPY . .` and does not build the project, so it never touches the README. + +## Fix + +One line in `.dockerignore`: keep `README.md` in the build context while still +ignoring other markdown. + +``` +*.md +!README.md +``` + +## Verification + +1. `docker build -t roo-cloud-api:test .` β€” succeeds (was failing before). +2. `docker run ... uv run uvicorn src.main:app` β€” app imports cleanly through + uvicorn and reaches Pydantic settings validation; only stops on missing required + Authentik env vars, which `docker-compose.yml` supplies. Confirms the Python + entrypoint, dependency set, and app module are all sound in the image. + +## Notes / possible follow-ups (not done) + +- Container runs as root; a non-root `USER` could be added for hardening. +- A `HEALTHCHECK` and multi-stage build (smaller runtime image) are reasonable + future improvements but were out of scope for "make it build and run". diff --git a/ai_plans/2026-06-22_fix-fresh-db-bootstrap-crashloop.md b/ai_plans/2026-06-22_fix-fresh-db-bootstrap-crashloop.md new file mode 100644 index 0000000000..a33e75e48f --- /dev/null +++ b/ai_plans/2026-06-22_fix-fresh-db-bootstrap-crashloop.md @@ -0,0 +1,87 @@ +# Fix: api container crash-loop on a fresh database (Docker bring-up) + +**Date:** 2026-06-22 +**Scope:** `self-hosted-cloudapi/` + +## Symptom + +After `docker compose up -d`, every service is healthy **except `api`**, which is +stuck `Restarting (1)`. The backend is unreachable on `:8085`. Logs show: + +``` +sqlalchemy.exc.ProgrammingError: (...asyncpg...UndefinedTableError): +relation "authentik_state_store" does not exist +[SQL: ALTER TABLE authentik_state_store ALTER COLUMN created_at TYPE TIMESTAMP WITH TIME ZONE] +``` + +## Root cause (proven, not assumed) + +The Dockerfile `CMD` runs `alembic upgrade head` **before** the app starts. +On a fresh `./.vol/postgres` volume that migration chain cannot build a schema: + +- `a1b2c3d4e5f6_baseline.py` β€” `upgrade()` is `pass`. Creates **no tables**. Its + own docstring says it represents a pre-existing `create_all`'d DB you are meant + to `alembic stamp`. +- `b2c3d4e5f6a7_datetime_timezone.py` β€” immediately `ALTER`s `authentik_state_store` + (and `users`, `sessions`, …), tables that were never created β†’ **crash**. +- `c3d4…`, `d4e5…`, `e5f6…` β€” all evolution-only (`add_column`, `create_index`). + +The only thing that _creates_ tables is `Base.metadata.create_all` β€” in the app +lifespan ([src/main.py:30](../self-hosted-cloudapi/src/main.py#L30)), with the +ORM models as the single source of truth. But the app never starts, because +alembic crashes first in the `&&` chain. + +So: **alembic-first ordering + a no-op baseline = a fresh DB can never bootstrap.** + +Note a tempting non-fix: making the baseline `create_all`. That breaks too β€” +`create_all` produces the **head** schema, so the later `add_column` migrations +(`task_message_ts`, `task.workspace_path`) would then fail with _column already +exists_. The migrations are evolution steps for a _pre-head_ schema; they must not +be replayed against a freshly created head schema. + +## Fix + +Replace the blind `alembic upgrade head` with a small startup reconciler that +matches the project's actual design (models = source of truth; migrations = how +_existing_ deployments evolve): + +- **Fresh DB** (no `users` table): `Base.metadata.create_all` builds the current + schema, then `alembic stamp head` records every migration as already applied + (without running the evolution steps). +- **Legacy DB** (tables exist, no `alembic_version`): follow the baseline's + documented path β€” `alembic stamp a1b2c3d4e5f6` then `alembic upgrade head` β€” + so an old pre-tz schema gets evolved. +- **Managed DB** (`alembic_version` present): `alembic upgrade head` as normal. + +### Files + +- `src/db_bootstrap.py` (new) β€” async probe of the live DB; prints + `FRESH` / `LEGACY` / `MANAGED` and runs `create_all` in the `FRESH` case. Uses + the same engine/models as the app, so there is one schema source of truth. +- `docker-entrypoint.sh` (new) β€” runs the probe, dispatches the correct alembic + command per state, then `exec`s uvicorn. +- `Dockerfile` β€” `CMD` now runs `docker-entrypoint.sh` (copied + `chmod +x`). + +The app lifespan keeps its own idempotent `create_all` (harmless no-op once the +entrypoint has built the schema), so running the app outside Docker is unchanged. + +## Verification + +1. `docker compose down` + remove `./.vol/postgres` β†’ truly fresh DB. +2. `docker compose up -d` β†’ `api` reaches healthy/running, not restarting. +3. `docker compose logs api` shows `DB state: FRESH`, the stamp, and + `Application startup complete` β€” no `UndefinedTableError`. +4. `curl -fsS localhost:${PORT:-8085}/health` (or `/`) returns 200. +5. `docker compose exec api uv run alembic current` shows head + (`e5f6a7b8c9d0`), proving alembic and the schema agree. +6. Restart `api` β†’ `DB state: MANAGED`, `upgrade head` no-op, still healthy + (idempotency). +7. `uv run pytest` stays green (entrypoint is Docker-only; no app code path + changed). + +## Risks / follow-ups + +- The `LEGACY` branch assumes a pre-tz schema (the baseline's documented + assumption). A legacy DB that was `create_all`'d at _head_ and never stamped + would fail `upgrade` on the `add_column` steps β€” but that is the pre-existing + documented contract, not introduced here, and not the Docker path. diff --git a/ai_plans/2026-06-22_full-stack-docker-compose.md b/ai_plans/2026-06-22_full-stack-docker-compose.md new file mode 100644 index 0000000000..8506909596 --- /dev/null +++ b/ai_plans/2026-06-22_full-stack-docker-compose.md @@ -0,0 +1,94 @@ +# Full self-hosted stack in one docker-compose (API + Authentik) + +**Date:** 2026-06-22 +**Scope:** `self-hosted-cloudapi/` + +## Goal + +Make the whole self-hosted cloud backend runnable with a single +`docker compose up` β€” including Authentik and its database β€” instead of running +Authentik separately (it lived in `/opt/docker/llm/docker-compose.yaml`). + +## What was done + +### 1. Merged the Authentik stack into the cloudapi compose + +`docker-compose.yml` now defines `api`, `postgres` (API DB), and the bundled +Authentik: `auth_db`, `auth_redis`, `auth_server`, `auth_worker` β€” adapted from +the proven `/opt/docker/llm/docker-compose.yaml`. Changes vs. the source: + +- Bind mounts under a local `./.vol/` folder (`postgres`, `auth/postgres`, + `auth/redis`, `auth/data`, `auth/templates`, `auth/certs`) β€” mirroring the + proven `/opt/docker/llm` layout. `.vol/` is git- and docker-ignored. + Authentik mount paths kept as the known-good `/data`, `/templates`, `/certs`. +- Blueprint bind-mount `./authentik/blueprints:/blueprints/custom:ro` on server + - worker; the worker auto-applies it. +- `api.depends_on` waits for `postgres` healthy **and** `auth_server` healthy + (added Authentik's `ak healthcheck`). +- Dropped the host publish of the API `postgres` (was `5432:5432`) β€” nothing on + the host needs it and `5432` collides with the local voicebot-database. It + stays reachable in-network as `postgres:5432`. `auth_db` keeps `5544:5432`. +- Shared Authentik env via a YAML anchor (`&authentik_env` / `*authentik_env`). + +### 2. Fixed the OAuth split-horizon (root cause) + +`authentik_base_url` was used for both browser redirects and server-side httpx +calls. In one compose those need different hostnames: + +- browser β†’ `http://localhost:9000` +- api container β†’ `http://auth_server:9000` (its own localhost is not Authentik) + +**Proof it's safe:** the API mints its own `iss="rcc"` JWTs +([src/auth/static_token.py:21](../self-hosted-cloudapi/src/auth/static_token.py#L21)) +and never validates Authentik's issuer against a fixed host, so a split hostname +does not break token validation. + +**Fix (backward compatible):** + +- `config/settings.py`: new optional `authentik_internal_url`. +- `config/auth.py`: `_front_channel_base()` (authorize, end-session, issuer) uses + `authentik_base_url`; `_back_channel_base()` (token, userinfo, jwks, discovery) + uses `authentik_internal_url or authentik_base_url`. + +When `authentik_internal_url` is unset (every pre-existing deployment), behaviour +is identical to before. + +### 3. Auto-provision the OAuth2 provider/app via blueprint + +`authentik/blueprints/stork-code.yaml` creates the `stork-code` OAuth2 provider +(confidential, `client_id`/`client_secret`/redirect URI read from env via `!Env`, +scopes openid/email/profile via `!Find`, default authorization/invalidation flows +and self-signed signing key via `!Find`) and the bound application with +`slug: stork-code`. The api and the blueprint read the **same** +`AUTHENTIK_CLIENT_ID/SECRET`, so they stay in sync from one source of truth. No +manual Authentik clicking. + +Schema authored against the pinned `AUTHENTIK_TAG=2026.2.2`; it is the +version-sensitive piece (redirect_uris + property_mappings format) and is flagged +as such in the README and the blueprint header. + +### 4. `.env.example` + `README.md` + +Added the Authentik-stack knobs (`AUTH_PG_PASS`, `AUTHENTIK_SECRET_KEY`, +`PG_DB/PG_USER`, `AUTHENTIK_TAG`, `COMPOSE_PORT_HTTP/HTTPS`, bootstrap admin) and +`AUTHENTIK_INTERNAL_URL`, with generation hints. README rewritten to the +one-command flow + a service/port table + front/back-channel and blueprint +troubleshooting notes. + +## Verification + +- Config getters: with `AUTHENTIK_INTERNAL_URL` set, `get_authentik_token_url()` + uses `auth_server` while `get_authentik_authorize_url()` uses `localhost`; unset + β†’ both fall back to base. (see Verification run below) +- `uv run pytest` β€” existing suite stays green. +- `docker compose config` parses; `docker compose up -d` β†’ all services healthy, + `docker compose logs auth_worker` shows the blueprint applied; api back-channel + reaches `http://auth_server:9000/.../.well-known/openid-configuration`; + end-to-end sign-in works (browser β†’ localhost:9000 β†’ callback β†’ session). + +## Risks / follow-ups + +- Blueprint schema may need a tweak for a different `AUTHENTIK_TAG`; the worker + log / _System β†’ Blueprints_ surfaces it immediately. +- TLS / production domains handled by the existing `API_BASE_URL` / + `AUTHENTIK_BASE_URL` knobs; the split-URL change makes the domain case work too. diff --git a/ai_plans/2026-06-22_rename-stork-code-to-tumble-code-selfhost.md b/ai_plans/2026-06-22_rename-stork-code-to-tumble-code-selfhost.md new file mode 100644 index 0000000000..9824c5c43d --- /dev/null +++ b/ai_plans/2026-06-22_rename-stork-code-to-tumble-code-selfhost.md @@ -0,0 +1,40 @@ +# Rename `stork-code` β†’ `tumble-code` in self-hosted cloud + +**Date:** 2026-06-22 +**Scope:** `self-hosted-cloudapi/` only + +## Goal + +Align the self-hosted cloud stack's Authentik app identity with the +Roo Code β†’ Tumble Code rebrand. Rename the public-facing Authentik slug / +client id / application / blueprint name from `stork-code` to `tumble-code`. + +## What changes + +Replace the **hyphenated** string `stork-code` β†’ `tumble-code` everywhere: + +- `authentik/blueprints/stork-code.yaml` β†’ renamed to `tumble-code.yaml`; + internal ids (`stork-code-provider`, `stork-code-application`), names, slugs, + `client_id` default, and the `!KeyOf` references all become `tumble-code*`. +- `.env`, `.env.example`, `.env.backup` β€” `AUTHENTIK_APP_SLUG`, + `AUTHENTIK_CLIENT_ID`, and the blueprint-path comment. +- `docker-compose.yml` β€” `AUTHENTIK_APP_SLUG` / `AUTHENTIK_CLIENT_ID` defaults. +- `README.md` β€” blueprint filename references. +- `config/settings.py` β€” `authentik_app_slug` Field default. + +## What does NOT change + +- `.env.backup:13` `DATABASE_URL=...@localhost:5544/stork_code` β€” the **DB name** + (underscore) points at the real existing Postgres database on the host. + Renaming the string without renaming the DB would break the connection, so + it's left as-is. Not a "stork-code" app mention. +- The blueprint is bind-mounted by directory (`./authentik/blueprints:/blueprints/custom`), + so renaming the file does not affect compose wiring. + +## Note for operators + +After this change, the Authentik OAuth2 provider/application slug becomes +`tumble-code`. The extension's OAuth client config (`AUTHENTIK_CLIENT_ID`) and +any existing Authentik state must use the new slug; a fresh blueprint apply +creates the new app. An already-provisioned `stork-code` app in a running +Authentik will need re-provisioning or manual slug update. diff --git a/self-hosted-cloudapi/.dockerignore b/self-hosted-cloudapi/.dockerignore index 8781ecf403..234e68d203 100644 --- a/self-hosted-cloudapi/.dockerignore +++ b/self-hosted-cloudapi/.dockerignore @@ -6,5 +6,7 @@ __pycache__ .env .env.example *.md +!README.md tests/ .vscode/ +.vol/ diff --git a/self-hosted-cloudapi/.env.example b/self-hosted-cloudapi/.env.example new file mode 100644 index 0000000000..c9342bc4cc --- /dev/null +++ b/self-hosted-cloudapi/.env.example @@ -0,0 +1,151 @@ +# ============================================================================ +# Self-Hosted Roo Code Cloud API β€” environment template +# Copy to `.env` and fill in real values. Anything left blank uses the default +# from config/settings.py (where one exists). +# ============================================================================ + +# --- Core (required) ------------------------------------------------------- + +# PostgreSQL DSN. For Docker Compose this is set automatically inside the +# `api` container. For local `uv run uvicorn ...` development, point at a +# Postgres instance you control. SQLite (`sqlite+aiosqlite:///./dev.db`) +# also works for quick smoke tests. +DATABASE_URL=postgresql://roo:password@localhost:5432/roo_cloud + +# Random secret used for signing internal artifacts (state tokens, etc.). +# Generate with: `python -c "import secrets; print(secrets.token_urlsafe(48))"` +SECRET_KEY=change-me-to-a-random-secret-key + +# Public URL of THIS API. Used in OAuth redirect construction and logging. +API_BASE_URL=http://localhost:8085 + +# Port the server listens on (default 8085). +PORT=8085 + +# --- JWT signing ----------------------------------------------------------- + +# HS256 (shared secret) is simpler; RS256 (asymmetric) is recommended for +# multi-service deployments. +JWT_ALGORITHM=HS256 + +# HS256: shared secret used for both signing and verification. +# Generate with: `python -c "import secrets; print(secrets.token_urlsafe(48))"` +JWT_SECRET=change-me-to-a-random-jwt-secret + +# RS256 only: paths to PEM keys. Leave commented if using HS256. +# JWT_PRIVATE_KEY=/path/to/private.pem +# JWT_PUBLIC_KEY=/path/to/public.pem + +# --- Authentik OAuth (required) ------------------------------------------- + +# Browser-facing (front-channel) URL of Authentik β€” the URL your browser is +# redirected to for login. With the bundled stack this is the published port. +AUTHENTIK_BASE_URL=http://localhost:9000 + +# Internal (back-channel) Authentik URL the API uses for server-to-server calls +# (token/userinfo/jwks/discovery). In docker-compose the api container reaches +# Authentik by service name, NOT via localhost. Leave UNSET for a single-host +# (non-compose) deployment β€” it then falls back to AUTHENTIK_BASE_URL. +# +# NOTE: Authentik routes by HTTP Host header, and the api always presents the +# *front-channel* host (host of AUTHENTIK_BASE_URL) as Host on these calls. That +# is why the underscore in `auth_server` is fine here: the connection uses the +# service name for DNS, but the Host sent is taken from AUTHENTIK_BASE_URL. +AUTHENTIK_INTERNAL_URL=http://auth_server:9000 + +# Authentik Application slug β€” used to build /application/o//* URLs. +# Must match the application slug created by the blueprint. +AUTHENTIK_APP_SLUG=tumble-code + +# OAuth2 client credentials. With the bundled stack these are the source of +# truth: the Authentik blueprint provisions the provider to MATCH these values, +# so pick any strong random secret up front (the api and Authentik share them). +# AUTHENTIK_CLIENT_SECRET: openssl rand -hex 32 +AUTHENTIK_CLIENT_ID=tumble-code +AUTHENTIK_CLIENT_SECRET= + +# Must match the redirect URI registered in Authentik exactly (the blueprint +# registers this value). Default flow expects `{API_BASE_URL}/auth/clerk/callback`. +AUTHENTIK_REDIRECT_URI=http://localhost:8085/auth/clerk/callback + +# --- Production (public address) example ----------------------------------- +# For a public deployment where the API is served at https://app.tumblecode.dev +# and Authentik at https://auth.tumblecode.dev, set: +# +# API_BASE_URL=https://app.tumblecode.dev +# AUTHENTIK_BASE_URL=https://auth.tumblecode.dev # front-channel (browser) +# AUTHENTIK_INTERNAL_URL=http://auth_server:9000 # back-channel (in-cluster) +# AUTHENTIK_REDIRECT_URI=https://app.tumblecode.dev/auth/clerk/callback +# CORS_ORIGINS=https://app.tumblecode.dev +# AUTHENTIK_CLIENT_SECRET= # REQUIRED: client is confidential +# +# The api sends `Host: auth.tumblecode.dev` (from AUTHENTIK_BASE_URL) on every +# back-channel call, so Authentik resolves the brand correctly even though the +# connection targets the internal service name. + +# --- CORS ------------------------------------------------------------------ + +# Comma-separated list, JSON array, or `*` for development. +CORS_ORIGINS=* + +# --- LLM proxy ------------------------------------------------------------- + +# Provider used when a model id has no provider/* prefix. +DEFAULT_LLM_PROVIDER=openai + +# Upstream API keys. Only fill in the ones you actually proxy to. +OPENAI_API_KEY= +ANTHROPIC_API_KEY= +GOOGLE_API_KEY= +XAI_API_KEY= + +# --- Marketplace ----------------------------------------------------------- + +# `yaml` reads from MARKETPLACE_YAML_DIR; `database` reads from the DB. +MARKETPLACE_SOURCE=yaml +MARKETPLACE_YAML_DIR=./config/marketplace + +# --- Optional features ---------------------------------------------------- + +# When false, /api/extension/credit-balance returns {balance: 0}. +CREDIT_SYSTEM_ENABLED=false + +# When false, /api/extension/bridge/config returns 404. +BRIDGE_ENABLED=true + +# When false, telemetry endpoints accept-and-ignore. +TELEMETRY_ENABLED=true + +# slowapi-based per-IP rate limiting on all routes. +RATE_LIMIT_ENABLED=true +RATE_LIMIT_REQUESTS_PER_MINUTE=60 + +# ============================================================================ +# Bundled Authentik stack (docker compose only) +# These configure the Authentik server/worker + their Postgres/Redis that come +# up alongside the API. Not used for local `uv run` development. +# ============================================================================ + +# Authentik image tag. Keep in sync with the blueprint schema (see +# authentik/blueprints/tumble-code.yaml). +AUTHENTIK_TAG=2026.2.2 + +# Authentik's own Postgres (separate from the API's DB). +PG_DB=authentik +PG_USER=authentik +# REQUIRED β€” Authentik refuses to start without it. Generate: openssl rand -hex 32 +AUTH_PG_PASS= + +# REQUIRED β€” Authentik secret key (cookie/session signing). openssl rand -base64 60 +AUTHENTIK_SECRET_KEY= + +# Published Authentik ports (host side). Must line up with AUTHENTIK_BASE_URL. +COMPOSE_PORT_HTTP=9000 +COMPOSE_PORT_HTTPS=9443 + +# First-run admin (akadmin). This is the account you log into the Authentik UI +# with AND sign in as during the extension OAuth flow. Set a password to enable +# non-interactive bootstrap. Generate token: openssl rand -hex 32 +AUTHENTIK_BOOTSTRAP_PASSWORD= +AUTHENTIK_BOOTSTRAP_EMAIL=admin@example.com +AUTHENTIK_BOOTSTRAP_TOKEN= diff --git a/self-hosted-cloudapi/.gitignore b/self-hosted-cloudapi/.gitignore new file mode 100644 index 0000000000..642e9c2020 --- /dev/null +++ b/self-hosted-cloudapi/.gitignore @@ -0,0 +1,2 @@ +# Local bind-mount data for docker-compose (Postgres, Redis, Authentik state) +.vol/ diff --git a/self-hosted-cloudapi/Dockerfile b/self-hosted-cloudapi/Dockerfile index 6a6ad1912a..f5798a3624 100644 --- a/self-hosted-cloudapi/Dockerfile +++ b/self-hosted-cloudapi/Dockerfile @@ -15,6 +15,8 @@ RUN uv sync --frozen --no-dev --no-install-project # Copy application COPY . . RUN uv sync --frozen --no-dev +RUN chmod +x docker-entrypoint.sh -# Run Alembic migrations on startup, then start the app -CMD ["sh", "-c", "uv run alembic upgrade head && uv run uvicorn src.main:app --host 0.0.0.0 --port ${PORT:-8085}"] +# Reconcile the schema (create_all + stamp on a fresh DB, upgrade on a managed +# one β€” see docker-entrypoint.sh / src/db_bootstrap.py), then start the app. +CMD ["./docker-entrypoint.sh"] diff --git a/self-hosted-cloudapi/Makefile b/self-hosted-cloudapi/Makefile new file mode 100644 index 0000000000..22bbc14386 --- /dev/null +++ b/self-hosted-cloudapi/Makefile @@ -0,0 +1,65 @@ +# Self-Hosted Roo Code Cloud API β€” developer tasks +# +# Run `make help` to list targets. Local targets use uv; the docker-* targets +# wrap docker compose. + +# Allow overriding host/port without editing this file: `make dev PORT=8000` +HOST ?= 0.0.0.0 +PORT ?= 8085 + +.DEFAULT_GOAL := help + +.PHONY: help install dev run migrate revision downgrade test test-cov lint \ + fmt clean docker-build docker-up docker-down docker-logs docker-migrate + +help: ## Show this help + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) \ + | sort \ + | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-16s\033[0m %s\n", $$1, $$2}' + +## --- Local (uv) --------------------------------------------------------------- + +install: ## Install all dependencies (incl. dev) into .venv + uv sync --extra dev + +dev: ## Run the API with autoreload (local) + uv run uvicorn src.main:app --reload --host $(HOST) --port $(PORT) + +run: ## Run the API without autoreload (local) + uv run uvicorn src.main:app --host $(HOST) --port $(PORT) + +migrate: ## Apply all pending DB migrations + uv run alembic upgrade head + +revision: ## Create a new autogenerated migration: make revision m="message" + uv run alembic revision --autogenerate -m "$(m)" + +downgrade: ## Roll back the most recent migration + uv run alembic downgrade -1 + +test: ## Run the test suite + uv run pytest + +test-cov: ## Run tests with a coverage report + uv run pytest --cov=src --cov-report=term-missing + +clean: ## Remove caches and build artifacts + find . -type d -name __pycache__ -prune -exec rm -rf {} + + rm -rf .pytest_cache .coverage htmlcov + +## --- Docker ------------------------------------------------------------------- + +docker-build: ## Build the API image + docker compose build + +docker-up: ## Start API + Postgres in the background + docker compose up -d + +docker-down: ## Stop and remove the containers + docker compose down + +docker-logs: ## Follow the API container logs + docker compose logs -f api + +docker-migrate: ## Run migrations inside the running API container + docker compose exec api uv run alembic upgrade head diff --git a/self-hosted-cloudapi/README.md b/self-hosted-cloudapi/README.md index 2f0b772e67..891a8330e1 100644 --- a/self-hosted-cloudapi/README.md +++ b/self-hosted-cloudapi/README.md @@ -4,37 +4,92 @@ A self-hosted replacement for the Roo Code Cloud API, compatible with the existi ## Quick Start -### Prerequisites +### Running the full stack with Docker Compose (recommended) -- Python 3.12+ -- [uv](https://docs.astral.sh/uv/getting-started/installation/) (Python package manager) -- PostgreSQL 16+ -- Authentik (for OAuth authentication) -- Docker & Docker Compose (optional, for containerized deployment) - -### Environment Setup - -1. Copy `.env.example` to `.env` and fill in the required values: +`docker compose up` brings up **everything**: this API and its Postgres, plus a +bundled **Authentik** (server, worker, Postgres, Redis). The Authentik OAuth2 +provider and application are **auto-provisioned from a blueprint** +([`authentik/blueprints/tumble-code.yaml`](authentik/blueprints/tumble-code.yaml)), +so there is no manual Authentik OAuth setup. ```bash cp .env.example .env -``` -2. Key environment variables: - - `DATABASE_URL`: PostgreSQL connection string - - `AUTHENTIK_BASE_URL`: Your Authentik instance URL - - `AUTHENTIK_CLIENT_ID`: OAuth2 client ID from Authentik - - `API_BASE_URL`: Public URL of this API server +# Fill in the REQUIRED secrets in .env: +# SECRET_KEY, JWT_SECRET β€” openssl rand -hex 32 +# AUTHENTIK_CLIENT_SECRET β€” openssl rand -hex 32 (shared with the blueprint) +# AUTH_PG_PASS β€” openssl rand -hex 32 (Authentik's DB) +# AUTHENTIK_SECRET_KEY β€” openssl rand -base64 60 +# AUTHENTIK_BOOTSTRAP_PASSWORD β€” the akadmin password you'll log in with -### Running with Docker Compose +docker compose up -d --build +``` + +Services and ports: + +| Service | URL / port | Purpose | +| ------------- | --------------------- | ------------------------------------ | +| `api` | http://localhost:8085 | The cloud API the extension talks to | +| `auth_server` | http://localhost:9000 | Authentik (login UI + OAuth) | +| `postgres` | in-network only | API database | +| `auth_db` | localhost:5544 | Authentik database | + +Log in to Authentik at http://localhost:9000 with `akadmin` / +`AUTHENTIK_BOOTSTRAP_PASSWORD`. The same account is what you sign in with during +the extension's OAuth flow. + +> **Front-channel vs back-channel URLs.** `AUTHENTIK_BASE_URL` +> (`http://localhost:9000`) is what the _browser_ is redirected to; +> `AUTHENTIK_INTERNAL_URL` (`http://auth_server:9000`) is what the _api container_ +> uses for server-to-server calls (token/userinfo/jwks). Both are preset in +> `docker-compose.yml` β€” only change them if you front Authentik with a real +> domain/reverse proxy (then point `AUTHENTIK_BASE_URL` at the public domain and +> leave `AUTHENTIK_INTERNAL_URL` as the in-network service URL). +> +> **Why the api overrides the `Host` header on back-channel calls.** Authentik +> resolves a request's _brand_ β€” and therefore serves its `/application/o/*` +> routes β€” from the HTTP `Host` header, and rejects hosts containing an +> underscore (`auth_server` is not a valid RFC-1123 hostname) with a **404**. +> So the api connects to `AUTHENTIK_INTERNAL_URL` for networking but sends the +> _front-channel_ host (the host of `AUTHENTIK_BASE_URL`, e.g. `localhost:9000` +> or `auth.tumblecode.dev`) as `Host`. This is automatic β€” you don't configure +> it β€” and it is why the underscore in the default `auth_server` service name is +> harmless. If back-channel token exchange ever 404s, this is the mechanism to +> look at (see [`config/auth.py`](config/auth.py) β†’ `get_back_channel_host_header`). + +#### Production example (public address) + +For a public deployment where the API is served at `https://app.tumblecode.dev` +and Authentik at `https://auth.tumblecode.dev`, set in `.env`: ```bash -docker-compose up -d +API_BASE_URL=https://app.tumblecode.dev +AUTHENTIK_BASE_URL=https://auth.tumblecode.dev # front-channel; also sent as Host on back-channel +AUTHENTIK_INTERNAL_URL=http://auth_server:9000 # back-channel (in-cluster service name) +AUTHENTIK_REDIRECT_URI=https://app.tumblecode.dev/auth/clerk/callback +CORS_ORIGINS=https://app.tumblecode.dev +AUTHENTIK_CLIENT_SECRET= # REQUIRED: the provider is confidential ``` -### Running Locally +The api sends `Host: auth.tumblecode.dev` (taken from `AUTHENTIK_BASE_URL`) on +every back-channel call, so Authentik resolves the brand correctly even though +the connection targets the internal service name. The provider's `client_type` +is `confidential`, so a matching `AUTHENTIK_CLIENT_SECRET` is mandatory. + +> **Blueprint troubleshooting.** The provider/app are created by the worker on +> first boot. Check it applied with `docker compose logs auth_worker | grep -i +blueprint`, or in the Authentik UI under **System β†’ Blueprints**. The blueprint +> schema is Authentik-version-sensitive; if it errors, adjust +> `authentik/blueprints/tumble-code.yaml` for your `AUTHENTIK_TAG`. + +### Running the API locally (without Docker) + +Requires Python 3.12+, [uv](https://docs.astral.sh/uv/getting-started/installation/), +a PostgreSQL 16+ you control, and an Authentik instance. ```bash +cp .env.example .env # set DATABASE_URL + the AUTHENTIK_* values + # Install dependencies uv sync @@ -42,9 +97,15 @@ uv sync uv run alembic upgrade head # Start the server -uv run uvicorn src.main:app --reload --host 0.0.0.0 --port 8000 +uv run uvicorn src.main:app --reload --host 0.0.0.0 --port 8085 ``` +For a non-compose deployment, leave `AUTHENTIK_INTERNAL_URL` unset β€” it falls +back to `AUTHENTIK_BASE_URL`. + +A [`Makefile`](Makefile) wraps these commands (`make help`, `make dev`, +`make docker-up`, …). + ## Configuring the Roo Code Extension In VS Code, open Settings (`Ctrl+,` / `Cmd+,`) and search for `roo-cline` to configure these settings: @@ -96,6 +157,18 @@ In VS Code, open Settings (`Ctrl+,` / `Cmd+,`) and search for `roo-cline` to con - Verify the Authentik redirect URI is set to `{API_BASE_URL}/auth/clerk/callback` - Check the API server logs for errors during the token exchange or user creation +**`502 Bad Gateway` on `/auth/clerk/callback` right after Authentik login:** + +- This is the API's own error page, returned when the **back-channel token + exchange** to Authentik fails β€” not a reverse-proxy error. +- Check the API logs: `docker compose logs api | grep -i "token exchange"`. + A `404 Not Found` for `…/application/o/token/` means Authentik rejected the + request's `Host`. The api derives that `Host` from `AUTHENTIK_BASE_URL`, so + ensure it is a valid hostname (no underscores) and points at the host your + Authentik brand serves. See _Why the api overrides the `Host` header_ above. +- A `400 invalid_client` instead means `AUTHENTIK_CLIENT_SECRET` is missing or + does not match the value the blueprint provisioned (the provider is confidential). + ## Authentik Setup 1. Deploy Authentik with Docker Compose diff --git a/self-hosted-cloudapi/authentik/blueprints/tumble-code.yaml b/self-hosted-cloudapi/authentik/blueprints/tumble-code.yaml new file mode 100644 index 0000000000..ea0d4dc7ca --- /dev/null +++ b/self-hosted-cloudapi/authentik/blueprints/tumble-code.yaml @@ -0,0 +1,89 @@ +# yaml-language-server: $schema=https://goauthentik.io/blueprints/schema.json +# +# Auto-provisions the OAuth2 provider + application the cloud API signs in +# against. Applied automatically by the auth_worker on startup (mounted at +# /blueprints/custom). State is visible in the Authentik admin under +# System -> Blueprints. +# +# Credentials and the redirect URI are read from the container environment +# (set in docker-compose.yml / .env), so the api and Authentik stay in sync +# from a single source of truth. The application slug MUST equal the api's +# AUTHENTIK_APP_SLUG (default: tumble-code), since app-specific endpoints +# (jwks, end-session, discovery) are built from it. +# +# NOTE: the oauth2provider schema (notably redirect_uris and property_mappings) +# is Authentik-version-sensitive. This is authored for the pinned AUTHENTIK_TAG +# (2026.2.2). If the worker logs a blueprint error, this file is the thing to +# adjust. +version: 1 +metadata: + name: tumble-code OAuth2 provider + application + labels: + blueprints.goauthentik.io/instantiate: "true" +entries: + # 1. OAuth2 / OpenID provider + - model: authentik_providers_oauth2.oauth2provider + state: present + id: tumble-code-provider + identifiers: + name: tumble-code + attrs: + name: tumble-code + client_type: confidential + client_id: !Env [AUTHENTIK_CLIENT_ID, tumble-code] + client_secret: !Env [AUTHENTIK_CLIENT_SECRET, ""] + authorization_flow: + !Find [authentik_flows.flow, [slug, default-provider-authorization-implicit-consent]] + invalidation_flow: + !Find [authentik_flows.flow, [slug, default-provider-invalidation-flow]] + signing_key: + !Find [authentik_crypto.certificatekeypair, [name, "authentik Self-signed Certificate"]] + redirect_uris: + - matching_mode: strict + url: !Env [AUTHENTIK_REDIRECT_URI, "http://localhost:8085/auth/clerk/callback"] + property_mappings: + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]] + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, email]] + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, profile]] + + # 2. Access group. Add users to this group (Directory -> Groups -> "Tumble + # Code Users") to let them sign in to Tumble Code; the binding in entry 4 + # gates the application on membership. akadmin is added here so the bootstrap + # superuser (the account used for the extension OAuth flow) is not locked out + # of its own app β€” superuser status does NOT bypass application bindings. + - model: authentik_core.group + state: present + id: tumble-code-group + identifiers: + name: Tumble Code Users + attrs: + name: Tumble Code Users + users: + - !Find [authentik_core.user, [username, akadmin]] + + # 3. Application bound to the provider. slug must match AUTHENTIK_APP_SLUG. + - model: authentik_core.application + state: present + id: tumble-code-application + identifiers: + slug: tumble-code + attrs: + name: Tumble Code + slug: tumble-code + provider: !KeyOf tumble-code-provider + policy_engine_mode: any + + # 4. Group gate: bind the group to the application. With this binding present, + # only members of "Tumble Code Users" can access the application (an app with + # no bindings is open to everyone; the first binding restricts it). + - model: authentik_policies.policybinding + state: present + identifiers: + target: !KeyOf tumble-code-application + group: !KeyOf tumble-code-group + order: 0 + attrs: + target: !KeyOf tumble-code-application + group: !KeyOf tumble-code-group + order: 0 + enabled: true diff --git a/self-hosted-cloudapi/config/auth.py b/self-hosted-cloudapi/config/auth.py index ca0d456c5a..dae8c1d356 100644 --- a/self-hosted-cloudapi/config/auth.py +++ b/self-hosted-cloudapi/config/auth.py @@ -1,38 +1,94 @@ -"""Auth-related configuration helpers.""" +"""Auth-related configuration helpers. + +Authentik is reached over two channels that may need different hostnames: + +* **front-channel** β€” URLs the *browser* is redirected to (`authorize`, + `end-session`). These must be publicly reachable, e.g. ``http://localhost:9000``. +* **back-channel** β€” URLs the *api server* fetches over httpx (`token`, + `userinfo`, `jwks`, discovery). Inside a single docker-compose these must use + the compose service name (e.g. ``http://auth_server:9000``) because the api + container's own ``localhost`` is not Authentik. + +``settings.authentik_internal_url`` configures the back-channel base; when unset +it falls back to ``authentik_base_url`` so single-host deployments are unchanged. + +Brand / Host header +------------------- +Authentik resolves a request's *brand* β€” and therefore serves its +``/application/o/*`` routes β€” from the HTTP ``Host`` header. The back-channel +base is an in-network service name (e.g. ``http://auth_server:9000`` in the +bundled compose stack), so httpx would send ``Host: auth_server:9000``. Authentik +(Django) rejects that with **404 on every application route** because the +underscore makes ``auth_server`` an invalid RFC-1123 hostname. The browser flow +works only because the front-channel host (``localhost:9000`` in dev, +``auth.tumblecode.dev`` in production) is valid. + +So back-channel calls must connect to the service name (for DNS) but present the +public front-channel host as ``Host`` β€” see ``get_back_channel_host_header``. +""" + +from typing import Optional +from urllib.parse import urlsplit from config.settings import settings +def _front_channel_base() -> str: + """Base URL for endpoints the browser is redirected to.""" + return settings.authentik_base_url + + +def _back_channel_base() -> str: + """Base URL for endpoints the api server fetches itself.""" + return settings.authentik_internal_url or settings.authentik_base_url + + +def get_back_channel_host_header() -> Optional[str]: + """``Host`` header to send on back-channel (server-to-server) requests. + + Returns the public *front-channel* host (host[:port] of + ``authentik_base_url`` β€” e.g. ``auth.tumblecode.dev`` or ``localhost:9000``) + whenever a distinct internal URL is configured, so Authentik resolves the + correct brand instead of 404-ing on the internal service name. + + Returns ``None`` when no internal URL is set (front == back channel); httpx's + default ``Host`` already matches, so no override is needed. + """ + if not settings.authentik_internal_url: + return None + return urlsplit(settings.authentik_base_url).netloc or None + + def get_authentik_authorize_url() -> str: - """Get the Authentik authorization endpoint URL.""" - return f"{settings.authentik_base_url}/application/o/authorize/" + """Get the Authentik authorization endpoint URL (front-channel / browser).""" + return f"{_front_channel_base()}/application/o/authorize/" def get_authentik_token_url() -> str: - """Get the Authentik token endpoint URL.""" - return f"{settings.authentik_base_url}/application/o/token/" + """Get the Authentik token endpoint URL (back-channel / server).""" + return f"{_back_channel_base()}/application/o/token/" def get_authentik_userinfo_url() -> str: - """Get the Authentik userinfo endpoint URL.""" - return f"{settings.authentik_base_url}/application/o/userinfo/" + """Get the Authentik userinfo endpoint URL (back-channel / server).""" + return f"{_back_channel_base()}/application/o/userinfo/" def get_authentik_issuer_url() -> str: """Get the Authentik issuer URL.""" - return f"{settings.authentik_base_url}/application/o/{settings.authentik_app_slug}/" + return f"{_front_channel_base()}/application/o/{settings.authentik_app_slug}/" def get_authentik_end_session_url() -> str: - """Get the Authentik end-session (logout) endpoint URL.""" - return f"{settings.authentik_base_url}/application/o/{settings.authentik_app_slug}/end-session/" + """Get the Authentik end-session (logout) endpoint URL (front-channel / browser).""" + return f"{_front_channel_base()}/application/o/{settings.authentik_app_slug}/end-session/" def get_authentik_jwks_url() -> str: - """Get the Authentik JWKS endpoint URL.""" - return f"{settings.authentik_base_url}/application/o/{settings.authentik_app_slug}/jwks/" + """Get the Authentik JWKS endpoint URL (back-channel / server).""" + return f"{_back_channel_base()}/application/o/{settings.authentik_app_slug}/jwks/" def get_authentik_discovery_url() -> str: - """Get the Authentik OpenID discovery document URL.""" - return f"{settings.authentik_base_url}/application/o/{settings.authentik_app_slug}/.well-known/openid-configuration" + """Get the Authentik OpenID discovery document URL (back-channel / server).""" + return f"{_back_channel_base()}/application/o/{settings.authentik_app_slug}/.well-known/openid-configuration" diff --git a/self-hosted-cloudapi/config/settings.py b/self-hosted-cloudapi/config/settings.py index 34a02243e4..132ead6345 100644 --- a/self-hosted-cloudapi/config/settings.py +++ b/self-hosted-cloudapi/config/settings.py @@ -10,7 +10,12 @@ class Settings(BaseSettings): """Roo Cloud API settings.""" - model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8") + # extra="ignore": the same .env is shared with docker-compose and carries + # infra-only keys (COMPOSE_PORT_*, AUTHENTIK_BOOTSTRAP_*, AUTH_PG_PASS, …) + # that this app doesn't define. Ignore them instead of failing to start. + model_config = SettingsConfigDict( + env_file=".env", env_file_encoding="utf-8", extra="ignore" + ) # Core database_url: str = Field(..., description="PostgreSQL connection string") @@ -25,8 +30,17 @@ class Settings(BaseSettings): jwt_secret: Optional[str] = None # Authentik OAuth - authentik_base_url: str = Field(..., description="Authentik instance URL") - authentik_app_slug: str = Field("stork-code", description="Authentik application slug for app-specific endpoints") + authentik_base_url: str = Field(..., description="Authentik instance URL (browser-facing / front-channel)") + # Internal (container-network) Authentik URL for server-to-server calls + # (token/userinfo/discovery/jwks). In a single docker-compose the api + # container cannot reach the browser-facing `localhost:9000` β€” it must use + # the compose service name (e.g. http://auth_server:9000). Falls back to + # authentik_base_url when unset, so existing single-host deployments are + # unaffected. + authentik_internal_url: Optional[str] = Field( + None, description="Internal Authentik URL for back-channel calls; falls back to authentik_base_url" + ) + authentik_app_slug: str = Field("tumble-code", description="Authentik application slug for app-specific endpoints") authentik_client_id: str = Field(..., description="OAuth2 client ID") authentik_client_secret: Optional[str] = None authentik_redirect_uri: str = Field(..., description="OAuth2 redirect URI") diff --git a/self-hosted-cloudapi/docker-compose.yml b/self-hosted-cloudapi/docker-compose.yml index 7993f70dd9..5951d52427 100644 --- a/self-hosted-cloudapi/docker-compose.yml +++ b/self-hosted-cloudapi/docker-compose.yml @@ -1,6 +1,11 @@ -version: "3.8" - +# Full self-hosted stack: cloud API + its Postgres, plus a bundled Authentik +# (server, worker, Postgres, Redis). `docker compose up -d` brings up everything +# and the Authentik OAuth2 provider/application is auto-provisioned from the +# blueprint in ./authentik/blueprints, so no manual Authentik clicking is needed. +# +# Reference for the Authentik services: /opt/docker/llm/docker-compose.yaml services: + # --- Cloud API ------------------------------------------------------------ api: build: . ports: @@ -11,12 +16,18 @@ services: SECRET_KEY: ${SECRET_KEY:-change-me-to-a-random-secret-key} JWT_ALGORITHM: ${JWT_ALGORITHM:-HS256} JWT_SECRET: ${JWT_SECRET:-change-me-to-a-random-jwt-secret} + # Browser-facing Authentik URL (front-channel: authorize / logout redirects). AUTHENTIK_BASE_URL: ${AUTHENTIK_BASE_URL:-http://localhost:9000} - AUTHENTIK_APP_SLUG: ${AUTHENTIK_APP_SLUG:-stork-code} - AUTHENTIK_CLIENT_ID: ${AUTHENTIK_CLIENT_ID:-your-client-id} + # In-network Authentik URL (back-channel: token / userinfo / jwks / discovery). + # The api container's own localhost is not Authentik, so it must reach the + # auth_server service by name. + AUTHENTIK_INTERNAL_URL: ${AUTHENTIK_INTERNAL_URL:-http://auth_server:9000} + AUTHENTIK_APP_SLUG: ${AUTHENTIK_APP_SLUG:-tumble-code} + # Same credentials the blueprint provisions into Authentik. + AUTHENTIK_CLIENT_ID: ${AUTHENTIK_CLIENT_ID:-tumble-code} AUTHENTIK_CLIENT_SECRET: ${AUTHENTIK_CLIENT_SECRET:-} - AUTHENTIK_REDIRECT_URI: ${AUTHENTIK_REDIRECT_URI:-https://roo.example.com/auth/clerk/callback} - API_BASE_URL: ${API_BASE_URL:-https://roo.example.com} + AUTHENTIK_REDIRECT_URI: ${AUTHENTIK_REDIRECT_URI:-http://localhost:8085/auth/clerk/callback} + API_BASE_URL: ${API_BASE_URL:-http://localhost:8085} CORS_ORIGINS: ${CORS_ORIGINS:-*} DEFAULT_LLM_PROVIDER: ${DEFAULT_LLM_PROVIDER:-openai} OPENAI_API_KEY: ${OPENAI_API_KEY:-} @@ -33,23 +44,120 @@ services: depends_on: postgres: condition: service_healthy + auth_server: + condition: service_healthy restart: unless-stopped postgres: image: postgres:16-alpine - ports: - - "5432:5432" + # Not published to the host: nothing outside the compose network needs it, + # and 5432 collides with other local Postgres instances. Reachable in-network + # as postgres:5432. environment: POSTGRES_USER: roo POSTGRES_PASSWORD: password POSTGRES_DB: roo_cloud volumes: - - postgres_data:/var/lib/postgresql/data + - ./.vol/postgres:/var/lib/postgresql/data healthcheck: test: ["CMD-SHELL", "pg_isready -U roo"] interval: 5s timeout: 5s retries: 5 + restart: unless-stopped + + # --- Authentik ------------------------------------------------------------ + auth_db: + image: docker.io/library/postgres:16-alpine + environment: + POSTGRES_DB: ${PG_DB:-authentik} + POSTGRES_USER: ${PG_USER:-authentik} + POSTGRES_PASSWORD: ${AUTH_PG_PASS:?database password required} + healthcheck: + test: ["CMD-SHELL", "pg_isready -d $${POSTGRES_DB} -U $${POSTGRES_USER}"] + interval: 30s + timeout: 5s + retries: 5 + start_period: 20s + ports: + # Host port remapped 5432 -> 5544 to avoid colliding with other local + # Postgres instances. Internal port stays 5432 (auth_server/worker reach it + # as auth_db:5432). Override AUTH_DB_PORT to coexist with another stack. + - "${AUTH_DB_PORT:-5544}:5432" + volumes: + - ./.vol/auth/postgres:/var/lib/postgresql/data + restart: unless-stopped + + auth_redis: + image: docker.io/library/redis:alpine + command: --save 60 1 --loglevel warning + healthcheck: + test: ["CMD-SHELL", "redis-cli ping | grep PONG"] + interval: 30s + timeout: 5s + retries: 5 + start_period: 10s + volumes: + - ./.vol/auth/redis:/data + restart: unless-stopped + + auth_server: + image: ${AUTHENTIK_IMAGE:-ghcr.io/goauthentik/server}:${AUTHENTIK_TAG:-2026.2.2} + command: server + environment: &authentik_env + AUTHENTIK_POSTGRESQL__HOST: auth_db + AUTHENTIK_POSTGRESQL__NAME: ${PG_DB:-authentik} + AUTHENTIK_POSTGRESQL__USER: ${PG_USER:-authentik} + AUTHENTIK_POSTGRESQL__PASSWORD: ${AUTH_PG_PASS} + AUTHENTIK_REDIS__HOST: auth_redis + AUTHENTIK_SECRET_KEY: ${AUTHENTIK_SECRET_KEY:?secret key required} + # First-run admin (akadmin). Lets you log in to the Authentik UI and is the + # account you sign in with during the extension OAuth flow. + AUTHENTIK_BOOTSTRAP_PASSWORD: ${AUTHENTIK_BOOTSTRAP_PASSWORD:-} + AUTHENTIK_BOOTSTRAP_EMAIL: ${AUTHENTIK_BOOTSTRAP_EMAIL:-admin@example.com} + AUTHENTIK_BOOTSTRAP_TOKEN: ${AUTHENTIK_BOOTSTRAP_TOKEN:-} + # Consumed by the blueprint's !Env tags to provision the OAuth2 provider. + AUTHENTIK_CLIENT_ID: ${AUTHENTIK_CLIENT_ID:-tumble-code} + AUTHENTIK_CLIENT_SECRET: ${AUTHENTIK_CLIENT_SECRET:-} + AUTHENTIK_REDIRECT_URI: ${AUTHENTIK_REDIRECT_URI:-http://localhost:8085/auth/clerk/callback} + healthcheck: + test: ["CMD", "ak", "healthcheck"] + start_period: 60s + interval: 30s + timeout: 30s + retries: 8 + ports: + - "${COMPOSE_PORT_HTTP:-9000}:9000" + - "${COMPOSE_PORT_HTTPS:-9443}:9443" + shm_size: 512mb + volumes: + - ./.vol/auth/data:/data + - ./.vol/auth/templates:/templates + - ./authentik/blueprints:/blueprints/custom:ro + depends_on: + auth_db: + condition: service_healthy + auth_redis: + condition: service_healthy + restart: unless-stopped -volumes: - postgres_data: + auth_worker: + image: ${AUTHENTIK_IMAGE:-ghcr.io/goauthentik/server}:${AUTHENTIK_TAG:-2026.2.2} + command: worker + # The worker is what applies blueprints, so it needs the same env (incl. the + # AUTHENTIK_CLIENT_* values the blueprint reads via !Env). + environment: *authentik_env + user: root + shm_size: 512mb + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - ./.vol/auth/data:/data + - ./.vol/auth/certs:/certs + - ./.vol/auth/templates:/templates + - ./authentik/blueprints:/blueprints/custom:ro + depends_on: + auth_db: + condition: service_healthy + auth_redis: + condition: service_healthy + restart: unless-stopped diff --git a/self-hosted-cloudapi/docker-entrypoint.sh b/self-hosted-cloudapi/docker-entrypoint.sh new file mode 100644 index 0000000000..d30760d687 --- /dev/null +++ b/self-hosted-cloudapi/docker-entrypoint.sh @@ -0,0 +1,32 @@ +#!/bin/sh +# Reconcile the database schema with Alembic, then start the API. +# +# The schema is defined by the ORM models (create_all), while the migration +# chain only *evolves* existing deployments. So how we bring Alembic in sync +# depends on what state the database is in β€” see src/db_bootstrap.py. +set -e + +STATE="$(uv run python -m src.db_bootstrap)" +echo "DB state: ${STATE}" + +case "${STATE}" in + FRESH) + # create_all already built the head schema; just record migrations as applied. + uv run alembic stamp head + ;; + LEGACY) + # Pre-Alembic database (built by an older create_all): adopt the baseline, + # then run the evolution migrations. + uv run alembic stamp a1b2c3d4e5f6 + uv run alembic upgrade head + ;; + MANAGED) + uv run alembic upgrade head + ;; + *) + echo "Unexpected DB state: '${STATE}'" >&2 + exit 1 + ;; +esac + +exec uv run uvicorn src.main:app --host 0.0.0.0 --port "${PORT:-8085}" diff --git a/self-hosted-cloudapi/src/auth/authentik.py b/self-hosted-cloudapi/src/auth/authentik.py index 57fea94fa4..b77d1d0931 100644 --- a/self-hosted-cloudapi/src/auth/authentik.py +++ b/self-hosted-cloudapi/src/auth/authentik.py @@ -16,9 +16,24 @@ get_authentik_end_session_url, get_authentik_jwks_url, get_authentik_discovery_url, + get_back_channel_host_header, ) +def _back_channel_headers(extra: Optional[Dict[str, str]] = None) -> Dict[str, str]: + """Headers for server-to-server Authentik calls, including the brand ``Host``. + + Authentik routes to its OAuth/OIDC endpoints by Host header, so back-channel + requests (which connect to the internal service name) must present the public + front-channel host or Authentik 404s. See ``config.auth`` for the full why. + """ + headers: Dict[str, str] = dict(extra or {}) + host = get_back_channel_host_header() + if host: + headers["Host"] = host + return headers + + def generate_pkce_pair() -> tuple[str, str]: """Generate a PKCE code verifier and code challenge.""" code_verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).decode().rstrip("=") @@ -66,7 +81,9 @@ async def exchange_code_for_tokens( response = await client.post( get_authentik_token_url(), data=token_data, - headers={"Content-Type": "application/x-www-form-urlencoded"}, + headers=_back_channel_headers( + {"Content-Type": "application/x-www-form-urlencoded"} + ), ) response.raise_for_status() return response.json() @@ -77,7 +94,9 @@ async def get_userinfo(access_token: str) -> Dict[str, Any]: async with httpx.AsyncClient() as client: response = await client.get( get_authentik_userinfo_url(), - headers={"Authorization": f"Bearer {access_token}"}, + headers=_back_channel_headers( + {"Authorization": f"Bearer {access_token}"} + ), ) response.raise_for_status() return response.json() @@ -86,6 +105,9 @@ async def get_userinfo(access_token: str) -> Dict[str, Any]: async def get_openid_configuration() -> Dict[str, Any]: """Fetch the OpenID Connect discovery document from Authentik.""" async with httpx.AsyncClient() as client: - response = await client.get(get_authentik_discovery_url()) + response = await client.get( + get_authentik_discovery_url(), + headers=_back_channel_headers(), + ) response.raise_for_status() return response.json() diff --git a/self-hosted-cloudapi/src/db_bootstrap.py b/self-hosted-cloudapi/src/db_bootstrap.py new file mode 100644 index 0000000000..463ab40084 --- /dev/null +++ b/self-hosted-cloudapi/src/db_bootstrap.py @@ -0,0 +1,69 @@ +"""Startup schema reconciler β€” classify the database and seed a fresh one. + +Why this exists: the migration chain's baseline (a1b2c3d4e5f6) is a no-op and the +later migrations are evolution-only (ALTER/ADD COLUMN). The schema is actually +built by ``Base.metadata.create_all`` from the ORM models β€” the single source of +truth. So a *fresh* database cannot be bootstrapped by ``alembic upgrade head``. + +This module probes the live DB and prints one of: + + FRESH no application tables -> we create_all here; caller should `stamp head` + LEGACY app tables, no alembic -> caller should `stamp baseline && upgrade head` + MANAGED app tables + alembic -> caller should `upgrade head` + +The presence of *application tables* (not the alembic_version table) is the real +signal. A database with an ``alembic_version`` row but no app tables is a failed +bootstrap β€” the previous, broken `alembic upgrade head` stamped the no-op baseline +and then crashed on the first ALTER. We treat that as FRESH so it self-heals: +``create_all`` builds the schema and ``stamp head`` overwrites the stale version. + +Only ``FRESH`` performs DDL (create_all); the alembic step is left to the +entrypoint so its output is logged like any other migration run. +""" + +import asyncio + +from sqlalchemy import inspect +from sqlalchemy.ext.asyncio import AsyncEngine + +from src.database import Base +import src.models # noqa: F401 -- registers every table on Base.metadata + + +async def classify_and_seed(engine: AsyncEngine) -> str: + """Classify ``engine``'s database; create_all when it has no app tables.""" + async with engine.begin() as conn: + def probe(sync_conn): + insp = inspect(sync_conn) + return insp.has_table("alembic_version"), insp.has_table("users") + + has_alembic, has_app_tables = await conn.run_sync(probe) + + if has_app_tables: + return "MANAGED" if has_alembic else "LEGACY" + + # No app tables: either a brand-new DB or a failed prior bootstrap that + # left only a stale alembic_version. Build the schema from the models; + # the entrypoint then `stamp head` (overwriting any stale version row). + await conn.run_sync(Base.metadata.create_all) + return "FRESH" + + +def main() -> None: + # Imported lazily so importing this module (e.g. for tests) doesn't construct + # the app engine / require full settings. + from src.database import engine + + async def _run() -> str: + try: + return await classify_and_seed(engine) + finally: + # Dispose within the same loop the connections were opened on; + # disposing from a second asyncio.run() raises "Event loop is closed". + await engine.dispose() + + print(asyncio.run(_run())) + + +if __name__ == "__main__": + main() diff --git a/self-hosted-cloudapi/src/routers/web.py b/self-hosted-cloudapi/src/routers/web.py index 04b1ad90d1..8b1b63bcf5 100644 --- a/self-hosted-cloudapi/src/routers/web.py +++ b/self-hosted-cloudapi/src/routers/web.py @@ -12,6 +12,7 @@ import json import logging +import re from pathlib import Path from typing import Optional @@ -61,6 +62,30 @@ def _asset_version() -> str: # Message says/asks whose text is the most representative task title. _TITLE_MAX = 100 +# Roo Code's first user turn can reach the cloud in API-prompt form: the typed +# text wrapped in //, trailed by a machine-built +# block (current mode, open tabs, file tree, cost…). None +# of the environment block is the user's query, so strip it before deriving a +# title. Match the trailing/unclosed case too (the block is always last). +_ENV_DETAILS_RE = re.compile(r".*?(?:|\Z)", re.DOTALL) +_MSG_WRAPPER_RE = re.compile(r"<(user_message|task|feedback)>(.*?)", re.DOTALL) + + +def _strip_task_wrappers(text: str) -> str: + """Reduce a raw conversation message to the human-authored query. + + Drops the machine ```` appendix and unwraps the + ````/````/```` tag to its inner content. Plain + text (already clean) passes through unchanged. + """ + if not text: + return "" + cleaned = _ENV_DETAILS_RE.sub("", text) + match = _MSG_WRAPPER_RE.search(cleaned) + if match: + cleaned = match.group(2) + return cleaned.strip() + def _workspace_label(path: str | None) -> str | None: """Compact project/worktree name for a badge: the last path segment. @@ -79,13 +104,22 @@ def _workspace_label(path: str | None) -> str | None: def _derive_title(messages: list[dict]) -> str: - """Pick a human-readable title from the conversation (first text-bearing msg).""" + """Pick a human-readable title from the conversation (first text-bearing msg). + + The first candidate is unwrapped to the user's query (machine framing such as + ```` is dropped) so the title reflects what the user + actually typed, not the current mode/file tree the extension appended. + """ for msg in messages: text = (msg.get("text") or "").strip() - if text and not text.startswith("{"): - first_line = text.splitlines()[0].strip() - if first_line: - return first_line[:_TITLE_MAX] + ("…" if len(first_line) > _TITLE_MAX else "") + if not text or text.startswith("{"): + continue + query = _strip_task_wrappers(text) + if not query: + continue + first_line = query.splitlines()[0].strip() + if first_line: + return first_line[:_TITLE_MAX] + ("…" if len(first_line) > _TITLE_MAX else "") return "Untitled task" diff --git a/self-hosted-cloudapi/src/web/static/app.css b/self-hosted-cloudapi/src/web/static/app.css index 08d1e6a830..4b97286c88 100644 --- a/self-hosted-cloudapi/src/web/static/app.css +++ b/self-hosted-cloudapi/src/web/static/app.css @@ -448,6 +448,24 @@ details[open] summary { margin-bottom: 0.5rem; } +/* Machine environment_details appendix on a user prompt β€” collapsed by default, + unfolds to the full original text. */ +details.env-details { + margin-top: 0.5rem; + border-top: 1px dashed var(--border); + padding-top: 0.4rem; +} +details.env-details > summary { + font-size: 0.78rem; + text-transform: uppercase; + letter-spacing: 0.03em; +} +details.env-details > pre { + margin-top: 0.4rem; + max-height: 22rem; + overflow: auto; +} + /* Foldable rows: the is the header β€” one collapsible line. */ .msg.foldable > details > summary.msg-head { list-style: none; @@ -752,7 +770,7 @@ details[open] summary { .breakdown-grid { display: grid; - grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); + grid-template-columns: 1fr; gap: 0.85rem; } .breakdown-card { @@ -763,6 +781,7 @@ details[open] summary { } table.breakdown { width: 100%; + table-layout: fixed; border-collapse: collapse; font-size: 0.85rem; } @@ -775,6 +794,23 @@ table.breakdown th { letter-spacing: 0.03em; padding: 0.25rem 0.4rem; border-bottom: 1px solid var(--border); + white-space: nowrap; +} +/* Name column flexes; numeric columns get fixed widths so values never clip. */ +table.breakdown th:nth-child(2), +table.breakdown td:nth-child(2) { + width: 5rem; +} +table.breakdown th:nth-child(3), +table.breakdown td:nth-child(3) { + width: 5.5rem; +} +table.breakdown th:nth-child(4), +table.breakdown td:nth-child(4) { + width: 3.5rem; +} +table.breakdown th:nth-child(n + 2) { + text-align: right; } table.breakdown td { padding: 0.3rem 0.4rem; @@ -785,7 +821,6 @@ table.breakdown tr:last-child td { } .bd-name { font-family: var(--mono); - max-width: 160px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; diff --git a/self-hosted-cloudapi/src/web/static/render.js b/self-hosted-cloudapi/src/web/static/render.js index bb2726cff7..6aa7d220d8 100644 --- a/self-hosted-cloudapi/src/web/static/render.js +++ b/self-hosted-cloudapi/src/web/static/render.js @@ -17,6 +17,33 @@ return DOMPurify.sanitize(marked.parse(String(text))) } + // Roo Code's first user turn (and resumed turns) can arrive wrapped: the typed + // text inside //, trailed by a machine-built + // block (current mode, open tabs, file tree, cost…). + // Render the human query; tuck the environment block into a collapsed fold so + // the full original is still one click away. Plain text passes through as-is. + function userContentHtml(text) { + if (!text) return "" + let body = String(text) + let env = "" + const envMatch = body.match(/([\s\S]*?)(?:<\/environment_details>|$)/) + if (envMatch) { + env = envMatch[1].trim() + body = body.slice(0, envMatch.index) + body.slice(envMatch.index + envMatch[0].length) + } + const wrap = body.match(/<(user_message|task|feedback)>([\s\S]*?)<\/\1>/) + if (wrap) body = wrap[2] + let html = md(body.trim()) + if (env) { + html += + '
Environment details' + + "
" +
+				escapeHtml(env) +
+				"
" + } + return html + } + function escapeHtml(s) { return String(s == null ? "" : s) .replace(/&/g, "&") @@ -70,7 +97,7 @@ switch (kind) { case "user_feedback": case "user_feedback_diff": - return { role: "user", label: "You", icon: "\u{1F464}", body: md(m.text) } + return { role: "user", label: "You", icon: "\u{1F464}", body: userContentHtml(m.text) } case "text": if (!m.text && !(m.images && m.images.length)) return null @@ -78,7 +105,7 @@ role: "assistant", label: "Assistant", icon: "\u{1F916}", - body: md(m.text) + images(m), + body: userContentHtml(m.text) + images(m), fold: true, activity: "Responding…", } diff --git a/self-hosted-cloudapi/tests/test_back_channel_host.py b/self-hosted-cloudapi/tests/test_back_channel_host.py new file mode 100644 index 0000000000..57607f1687 --- /dev/null +++ b/self-hosted-cloudapi/tests/test_back_channel_host.py @@ -0,0 +1,101 @@ +"""Back-channel Host header behaviour. + +Authentik routes to its OAuth/OIDC endpoints by HTTP Host header and 404s on an +invalid host (e.g. the compose service name `auth_server`, whose underscore is an +invalid RFC-1123 hostname). The api therefore presents the public front-channel +host (host of AUTHENTIK_BASE_URL) on every server-to-server call. These tests +lock that in so the OAuth callback can't silently regress to a 502. +""" + +import pytest + +import config.auth as auth_cfg +from config.auth import get_back_channel_host_header +from config.settings import settings +import src.auth.authentik as authentik + + +def test_host_header_is_front_channel_when_internal_url_set(monkeypatch): + monkeypatch.setattr(settings, "authentik_base_url", "https://auth.tumblecode.dev") + monkeypatch.setattr(settings, "authentik_internal_url", "http://auth_server:9000") + + host = get_back_channel_host_header() + + assert host == "auth.tumblecode.dev" + assert "_" not in host # the bug: underscore hosts get 404'd by Authentik + + +def test_host_header_keeps_port_for_dev_stack(monkeypatch): + monkeypatch.setattr(settings, "authentik_base_url", "http://localhost:9000") + monkeypatch.setattr(settings, "authentik_internal_url", "http://auth_server:9000") + + assert get_back_channel_host_header() == "localhost:9000" + + +def test_host_header_none_for_single_host(monkeypatch): + # No internal URL β†’ front == back channel β†’ httpx's default Host is correct. + monkeypatch.setattr(settings, "authentik_internal_url", None) + + assert get_back_channel_host_header() is None + + +class _FakeResp: + def __init__(self, data): + self._data = data + + def raise_for_status(self): + return None + + def json(self): + return self._data + + +class _CapturingClient: + """Stand-in for httpx.AsyncClient that records the headers it was called with.""" + + last_headers: dict = {} + + async def __aenter__(self): + return self + + async def __aexit__(self, *exc): + return False + + async def post(self, url, data=None, headers=None): + _CapturingClient.last_headers = headers or {} + return _FakeResp({"access_token": "fake"}) + + async def get(self, url, headers=None): + _CapturingClient.last_headers = headers or {} + return _FakeResp({"sub": "fake"}) + + +@pytest.fixture +def capture_httpx(monkeypatch): + monkeypatch.setattr(authentik.httpx, "AsyncClient", _CapturingClient) + monkeypatch.setattr(settings, "authentik_base_url", "https://auth.tumblecode.dev") + monkeypatch.setattr(settings, "authentik_internal_url", "http://auth_server:9000") + return _CapturingClient + + +async def test_token_exchange_sends_brand_host(capture_httpx): + await authentik.exchange_code_for_tokens("code", "verifier") + + headers = capture_httpx.last_headers + assert headers["Host"] == "auth.tumblecode.dev" + # Existing content-type header is preserved alongside the injected Host. + assert headers["Content-Type"] == "application/x-www-form-urlencoded" + + +async def test_userinfo_sends_brand_host(capture_httpx): + await authentik.get_userinfo("access-token") + + headers = capture_httpx.last_headers + assert headers["Host"] == "auth.tumblecode.dev" + assert headers["Authorization"] == "Bearer access-token" + + +async def test_discovery_sends_brand_host(capture_httpx): + await authentik.get_openid_configuration() + + assert capture_httpx.last_headers["Host"] == "auth.tumblecode.dev" diff --git a/self-hosted-cloudapi/tests/test_web_and_share.py b/self-hosted-cloudapi/tests/test_web_and_share.py index f5a188b2cc..d8808fa5f7 100644 --- a/self-hosted-cloudapi/tests/test_web_and_share.py +++ b/self-hosted-cloudapi/tests/test_web_and_share.py @@ -307,6 +307,47 @@ async def test_app_lists_owned_tasks(client, db_session, session_factory): assert "Build me a feature" in resp.text +async def test_title_strips_environment_details_wrapper(client, db_session, session_factory): + """A first turn in Roo Code's API-prompt form (typed text wrapped in + , trailed by a machine block) yields a + title of just the user's query β€” no mode/file-tree leakage.""" + await _seed_user(db_session) + wrapped = ( + "\n" + "uruchom wszystkie testy w langgrapha\n" + " \n" + "# VSCode Visible Files\n.roo/rules/rules.md\n\n" + "# Current Mode\ncode\nπŸ’» Code\n" + "" + ) + async with session_factory() as s: + s.add(Task(id="task-wrapped", user_id="user_test")) + s.add( + TaskMessage( + task_id="task-wrapped", + message_data=json.dumps({"ts": 1, "type": "say", "say": "text", "text": wrapped}), + ) + ) + await s.commit() + + from src.main import app + + _override_web_user(app) + try: + list_resp = client.get("/app") + detail_resp = client.get("/app/tasks/task-wrapped") + finally: + app.dependency_overrides.pop(get_web_user_optional, None) + + assert list_resp.status_code == 200 + assert "uruchom wszystkie testy w langgrapha" in list_resp.text + # The machine framing must not bleed into the title. + for leak in ("environment_details", "Current Mode", "", ""): + assert leak not in list_resp.text + assert detail_resp.status_code == 200 + assert "uruchom wszystkie testy w langgrapha" in detail_resp.text + + async def test_app_list_and_detail_show_workspace(client, db_session, session_factory): """The list shows the worktree basename (full path on hover); the detail header shows the full path.""" From 80ef25fd0699a52b15c3e71baf2763017bddf335 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Dre=C5=BCewski?= Date: Mon, 22 Jun 2026 12:50:52 +0200 Subject: [PATCH 2/2] fix(cloud): share always uploads full local task history MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Share only backfilled the full task.json on a 404 (TaskNotFoundError), assuming any existing server row was complete. When the live bridge had created a partial row (it connected mid-task and captured only later messages, while the offline-run opening turns were never uploaded), share returned 200, backfill was skipped, and the partial copy was shared β€” the web view showed tokens/cost (from api_req_started rows) but "Untitled task" and no conversation. Backfill the authoritative full local history before sharing whenever messages are available; keep the 404 path as a best-effort retry. backfill_messages replaces the task's stored rows, so this is idempotent. --- ...6-06-22_share-always-backfill-full-task.md | 133 ++++++++++++++++++ packages/cloud/src/CloudService.ts | 17 ++- .../cloud/src/__tests__/CloudService.test.ts | 15 +- 3 files changed, 159 insertions(+), 6 deletions(-) create mode 100644 ai_plans/2026-06-22_share-always-backfill-full-task.md diff --git a/ai_plans/2026-06-22_share-always-backfill-full-task.md b/ai_plans/2026-06-22_share-always-backfill-full-task.md new file mode 100644 index 0000000000..c6148b1bb9 --- /dev/null +++ b/ai_plans/2026-06-22_share-always-backfill-full-task.md @@ -0,0 +1,133 @@ +# Share always uploads the full local task history + +**Date:** 2026-06-22 +**Branch:** `fix/share-always-backfill-full-task` (stack off current `fix/cloudapi-authentik-back-channel-host`) +**Status:** proposed + +## Symptom (user report) + +> I ran my task prior to the backend running and shared the task afterwards. +> All I can see on the web is "Untitled task", tokens in/out and cost β€” not the +> conversation. I want the task synced to the backend _in whole_ in such cases. + +## Root cause (code-traced) + +The shared/web task views are rendered **entirely from the `task_messages` +table**: + +- title β†’ `_derive_title(messages)` β€” `web.py:106` +- tokens/cost β†’ `_compute_metrics(messages)` (client-side in `render.js`, same + source) β€” `web.py:131` +- conversation body β†’ `messages_json` β€” `web.py:419`, `web.py:427` + +`task_messages` has exactly two writers: + +1. `backfill_messages(...)` β€” uploads the full `task.json` and **replaces** all + rows for the task (`telemetry_service.py:42`). Triggered by the extension via + `POST /api/events/backfill`. +2. `upsert_task_message(...)` β€” the live remote-control **bridge** persisting one + streamed message at a time (`sio.py:187`, `telemetry_service.py:88`). + +Plain telemetry events (`POST /api/events`) only write the **`telemetry_events`** +table β€” never `task_messages` (`telemetry_service.py:24`). + +### Why "Untitled task" + tokens but no conversation + +`_derive_title` skips any message whose `text` starts with `{` (JSON), i.e. +`api_req_started` rows (`web.py:115`); `_compute_metrics` reads exactly those +`api_req_started` rows for tokens/cost. So the stored set contained +`api_req_started` rows (β†’ metrics) but **not** the user's text turn (β†’ title +falls back to "Untitled task", and the body is near-empty). A _full_ backfill +always carries the opening user message β†’ would produce a real title. Therefore +**backfill never ran**: share returned HTTP 200 because a (partial) task row +already existed, so the `TaskNotFoundError` branch was never entered. + +### The flawed gate + +`CloudService.shareTask` (`packages/cloud/src/CloudService.ts:315`) backfills the +full local history **only** inside `catch (TaskNotFoundError)` β€” i.e. only when +the server has _no_ row at all: + +```ts +try { + return await this.shareService!.shareTask(taskId, visibility) +} catch (error) { + if (error instanceof TaskNotFoundError && clineMessages) { + await this.telemetryClient!.backfillMessages(clineMessages, taskId) + return await this.shareService!.shareTask(taskId, visibility) + } + throw error +} +``` + +When a **partial** row already exists (the bridge connected mid-task and captured +only the later messages, while the offline-run opening turns were never +uploaded), share succeeds, backfill is skipped, and the partial copy is what gets +shared. `CloudService.test.ts:511` ("without retry when successful") codifies the +current assumption that a successful share needs no backfill. + +Inferred (not observed in a live DB): the specific reason the server copy was +partial is the bridge timing above. The fix is independent of that cause β€” it +uploads the authoritative full local history regardless of why the server copy +was incomplete. + +## Fix + +The extension holds the **authoritative, complete** history for its own task +(`provider.getCurrentTask().clineMessages`). `backfill_messages` is idempotent β€” +it deletes and re-inserts the task's rows β€” so it is safe to call on every share. + +Change `CloudService.shareTask` to **backfill the full local history first** +(when messages are available), then share. Keep the `TaskNotFoundError` retry as +a fallback, since `backfillMessages` swallows its own network errors and may have +silently no-op'd: + +```ts +public async shareTask(taskId, visibility = "organization", clineMessages?) { + this.ensureInitialized() + + // The extension is the source of truth for its own task. The server copy may + // be absent (task ran while the backend was unreachable) or partial (the live + // bridge connected mid-task and only captured later messages). Upload the full + // local history before sharing so the shared view shows the whole conversation + // and a real title β€” not just the api_req_started fragments. backfillMessages + // replaces the task's stored rows, so this is safe on every share. + if (clineMessages?.length) { + await this.telemetryClient!.backfillMessages(clineMessages, taskId) + } + + try { + return await this.shareService!.shareTask(taskId, visibility) + } catch (error) { + if (error instanceof TaskNotFoundError && clineMessages?.length) { + // backfill above is best-effort (it swallows network errors); retry once. + await this.telemetryClient!.backfillMessages(clineMessages, taskId) + return await this.shareService!.shareTask(taskId, visibility) + } + throw error + } +} +``` + +Behavior when `clineMessages` is not provided (programmatic callers) is unchanged: +no up-front backfill, original 404 path applies. + +## Files + +- `packages/cloud/src/CloudService.ts` β€” reorder backfill to run before share. +- `packages/cloud/src/__tests__/CloudService.test.ts` β€” update the + "successful share" case to expect one up-front `backfillMessages` call; keep + the 404-retry and no-messages cases (adjust call counts). + +## Trade-off + +Every explicit share now uploads the full `task.json` once, even when the server +already had it via live streaming. Acceptable for a user-initiated action, and +the only way to guarantee completeness without an extra "what does the server +have?" round-trip. Correctness over a micro-optimization. + +## Verification + +- Unit: `pnpm --filter @roo-code/cloud test` (CloudService share suite). +- Manual: run a task with the backend down, start the backend, share β†’ shared + page shows full conversation + real title (not "Untitled task"). diff --git a/packages/cloud/src/CloudService.ts b/packages/cloud/src/CloudService.ts index 43f52d4b18..a444fd2411 100644 --- a/packages/cloud/src/CloudService.ts +++ b/packages/cloud/src/CloudService.ts @@ -319,11 +319,24 @@ export class CloudService extends EventEmitter implements Di ) { this.ensureInitialized() + // The extension is the source of truth for its own task. The server copy may + // be absent (the task ran while the backend was unreachable) or partial (the + // live bridge connected mid-task and only captured the later messages, while + // the opening turns were never uploaded). Upload the full local history before + // sharing so the shared view shows the whole conversation and a real title β€” + // not just the api_req_started fragments that yield "Untitled task" + metrics. + // backfillMessages replaces the task's stored rows, so this is safe to run on + // every share. + if (clineMessages?.length) { + await this.telemetryClient!.backfillMessages(clineMessages, taskId) + } + try { return await this.shareService!.shareTask(taskId, visibility) } catch (error) { - if (error instanceof TaskNotFoundError && clineMessages) { - // Backfill messages and retry. + if (error instanceof TaskNotFoundError && clineMessages?.length) { + // The up-front backfill is best-effort (it swallows network errors), so + // a TaskNotFoundError here means it silently no-op'd β€” retry once. await this.telemetryClient!.backfillMessages(clineMessages, taskId) return await this.shareService!.shareTask(taskId, visibility) } diff --git a/packages/cloud/src/__tests__/CloudService.test.ts b/packages/cloud/src/__tests__/CloudService.test.ts index 8c557ae7ad..846a37fed4 100644 --- a/packages/cloud/src/__tests__/CloudService.test.ts +++ b/packages/cloud/src/__tests__/CloudService.test.ts @@ -508,7 +508,7 @@ describe("CloudService", () => { cloudService = await CloudService.createInstance(mockContext) }) - it("should call shareTask without retry when successful", async () => { + it("should backfill the full local history up front, then share once", async () => { const taskId = "test-task-id" const visibility = "organization" const clineMessages: ClineMessage[] = [ @@ -528,9 +528,13 @@ describe("CloudService", () => { const result = await cloudService.shareTask(taskId, visibility, clineMessages) + // The extension is the source of truth: it uploads the full local history + // before sharing so a partial/missing server copy can't leak into the + // shared view. One backfill, one (successful) share β€” no 404 retry. + expect(mockTelemetryClient.backfillMessages).toHaveBeenCalledTimes(1) + expect(mockTelemetryClient.backfillMessages).toHaveBeenCalledWith(clineMessages, taskId) expect(mockShareService.shareTask).toHaveBeenCalledTimes(1) expect(mockShareService.shareTask).toHaveBeenCalledWith(taskId, visibility) - expect(mockTelemetryClient.backfillMessages).not.toHaveBeenCalled() expect(result).toEqual(expectedResult) }) @@ -561,7 +565,9 @@ describe("CloudService", () => { expect(mockShareService.shareTask).toHaveBeenCalledTimes(2) expect(mockShareService.shareTask).toHaveBeenNthCalledWith(1, taskId, visibility) expect(mockShareService.shareTask).toHaveBeenNthCalledWith(2, taskId, visibility) - expect(mockTelemetryClient.backfillMessages).toHaveBeenCalledTimes(1) + // Once up front, once more on the 404 fallback (the up-front backfill is + // best-effort and may have silently failed on a network error). + expect(mockTelemetryClient.backfillMessages).toHaveBeenCalledTimes(2) expect(mockTelemetryClient.backfillMessages).toHaveBeenCalledWith(clineMessages, taskId) expect(result).toEqual(expectedResult) }) @@ -596,8 +602,9 @@ describe("CloudService", () => { await expect(cloudService.shareTask(taskId, visibility, clineMessages)).rejects.toThrow(genericError) + // The up-front backfill still runs, but a non-404 error is not retried. + expect(mockTelemetryClient.backfillMessages).toHaveBeenCalledTimes(1) expect(mockShareService.shareTask).toHaveBeenCalledTimes(1) - expect(mockTelemetryClient.backfillMessages).not.toHaveBeenCalled() }) it("should work with default parameters", async () => {