diff --git a/.gitignore b/.gitignore index bd146c5..a507cef 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,7 @@ go-webservice/go-webservice go-jwks/go-jwks go-jwks-multi/go-jwks-multi go-oidc/go-oidc +kong-mcp/mcp-authgate +kong-mcp/mcp-authgate-linux +kong-mcp/kong-mcp .env diff --git a/README.md b/README.md index 38d041f..38cd530 100644 --- a/README.md +++ b/README.md @@ -19,10 +19,11 @@ Multi-language usage examples for AuthGate authentication (Go, Python, Bash). | [go-jwks](go-jwks/) | API protection (offline) | JWKS public-key validation | Go | Go 1.25+ | | [go-jwks-multi](go-jwks-multi/) | API protection (N iss) | JWKS validation (multi) | Go | Go 1.25+ | | [go-oidc](go-oidc/) | Web login (no SDK) | Auth Code (coreos/go-oidc) | Go | Go 1.25+ | +| [kong-mcp](kong-mcp/) | MCP gateway (Kong) | PKCE entry + JWKS validation | Go | Go 1.25+, Kong | ## Environment Setup -All examples require `AUTHGATE_URL` and `CLIENT_ID`. M2M examples additionally require `CLIENT_SECRET`. +All examples except [kong-mcp](kong-mcp/) require `AUTHGATE_URL` and `CLIENT_ID`. M2M examples additionally require `CLIENT_SECRET`. The kong-mcp gateway reads no environment variables — configure it via the plugin block in [`kong-mcp/kong.yml`](kong-mcp/kong.yml) (`issuer`, `gateway_origin`, `jwks_uri`, ...). Set via environment variables: @@ -40,7 +41,7 @@ CLIENT_ID=your-client-id CLIENT_SECRET=your-client-secret # M2M only ``` -All examples automatically load `.env` if present. Environment variables take precedence over `.env` values. +All env-configured examples automatically load `.env` if present (kong-mcp does not read `.env`). Environment variables take precedence over `.env` values. ## Interactive CLI Authentication @@ -138,6 +139,23 @@ go run main.go # then open http://localhost:8080/ ``` +## MCP OAuth Gateway (Kong) + +A Kong [go-pdk](https://github.com/Kong/go-pdk) plugin that puts one OAuth front +door in front of every MCP server. Internal MCP services stop accepting +hand-written PATs and instead require an AuthGate-issued access token. Kong does +not run the OAuth flow — it only advertises the entry point (`401` + +`WWW-Authenticate` → RFC 9728 Protected Resource Metadata) and validates the +returned token offline with **RS256 + JWKS** (alg-confusion blocked), then +forwards it to the MCP backend. The MCP client runs Auth Code + PKCE against +AuthGate itself. One plugin config covers all MCP servers. + +```bash +cd kong-mcp +go mod tidy && go build -o mcp-authgate . # build the plugin +docker compose up --build # demo stack: Kong + stub MCP upstreams +``` + ## OAuth 2.0 Flows - **Authorization Code + PKCE** — Browser-based login, most secure for CLI tools on machines with a browser. The client opens a browser, the user authenticates, and a code is exchanged for tokens. diff --git a/go-jwks-multi/testissuer/main.go b/go-jwks-multi/testissuer/main.go index f500aa7..0e78572 100644 --- a/go-jwks-multi/testissuer/main.go +++ b/go-jwks-multi/testissuer/main.go @@ -162,6 +162,10 @@ func (i *issuer) sign(w http.ResponseWriter, r *http.Request) { sub := def(q.Get("sub"), "test-user-1") scope := def(q.Get("scope"), "email profile") clientID := def(q.Get("client_id"), "test-client") + // AuthGate stamps a "type" claim on its tokens (access vs refresh); default + // to "access" so minted tokens pass resource servers that require it, and + // allow ?type=refresh to mint a non-access token for rejection tests. + tokenType := def(q.Get("type"), "access") domain := q.Get("domain") sa := q.Get("sa") project := q.Get("project") @@ -181,6 +185,7 @@ func (i *issuer) sign(w http.ResponseWriter, r *http.Request) { "exp": now.Add(time.Duration(ttlSec) * time.Second).Unix(), "client_id": clientID, "scope": scope, + "type": tokenType, } // Server-attested claims are only set when explicitly requested, so you // can mint "missing claim" tokens to exercise the resource server's diff --git a/kong-mcp/.dockerignore b/kong-mcp/.dockerignore new file mode 100644 index 0000000..4af8d61 --- /dev/null +++ b/kong-mcp/.dockerignore @@ -0,0 +1,9 @@ +# Keep the image build context to the Go sources: editing docs or the +# runtime-mounted kong.yml must not invalidate the build cache. +*.md +kong.yml +docker-compose.yml +Dockerfile +.dockerignore +mcp-authgate +kong-mcp diff --git a/kong-mcp/Dockerfile b/kong-mcp/Dockerfile new file mode 100644 index 0000000..e6935f9 --- /dev/null +++ b/kong-mcp/Dockerfile @@ -0,0 +1,28 @@ +# Build the Go plugin as a standalone pluginserver binary, then bake it into the +# Kong image. go-pdk plugins are ordinary executables that speak the Kong +# pluginserver RPC protocol over stdio — no cgo, no .so loading. +# +# Pin the binary to the FINAL image's architecture. Under BuildKit, TARGETOS / +# TARGETARCH are populated (e.g. by `docker build --platform`) and the binary +# cross-compiles to that target, matching the kong image it lands in. Under the +# classic builder they are empty, and an empty GOOS/GOARCH means "use the native +# default" — which is also the daemon's arch, so both stages still agree. Either +# way the binary's arch matches the kong container. A mismatch would make the +# pluginserver fail with `exec format error` and emit *nothing* on stdout, and +# Kong would die at init with `failed decoding plugin info: Expected value but +# found T_END at character 1` (an empty `-dump`). +FROM golang:1.26 AS build +ARG TARGETOS +ARG TARGETARCH +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \ + go build -trimpath -ldflags="-s -w" -o /mcp-authgate . + +FROM kong:3.9 +COPY --from=build /mcp-authgate /usr/local/bin/mcp-authgate +USER kong +HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \ + CMD kong health diff --git a/kong-mcp/HANDS-ON.zh-TW.md b/kong-mcp/HANDS-ON.zh-TW.md new file mode 100644 index 0000000..f9a0dd9 --- /dev/null +++ b/kong-mcp/HANDS-ON.zh-TW.md @@ -0,0 +1,517 @@ +# kong-mcp 安裝與驗證手冊(macOS 實機操作版) + +這份手冊帶你在 **macOS** 上,從零把 `mcp-authgate` plugin 跑起來,並用一個本機 +測試簽發者(test issuer)完整走完 MCP OAuth 握手,逐列驗證安全性質——**不需要 +真的 AuthGate**。 + +> 本手冊的每個指令都在 Apple Silicon(M1 Max / arm64)+ colima + Docker 24 +> 上實機跑過。Intel Mac 請把 `GOARCH=arm64` 改成 `GOARCH=amd64`。 + +產品說明、設定欄位、設計理由請看 [README.md](README.md) / [README.zh-TW.md](README.zh-TW.md)。 +這裡只談「怎麼一步步跑起來並驗證」。 + +--- + +## 0. 前置需求 + +| 工具 | 確認指令 | 備註 | +| -------------- | ---------------------------------------------------- | ------------------------------------------------------------ | +| Go 1.25.10+ | `go version` | 編譯 plugin 與 test issuer(`go.mod` 的 `go` 指令為 1.25.10)| +| Docker | `docker version` | Docker Desktop、colima、OrbStack 皆可 | +| Docker Compose | `docker compose version` 或 `docker-compose version` | v2 即可。本機若只有獨立版 `docker-compose`,下面指令照用即可 | +| curl / openssl | 內建 | 驗證用 | +| python3 | 內建 | 只用來把 JSON 印得好看,非必要 | + +> **colima 使用者**:先確認 daemon 起來了(`colima status`,沒有就 `colima start`)。 +> 本手冊用到的 `host.docker.internal`(容器連回 macOS host)在 colima / 一般 +> dockerd 上預設沒有,靠 compose 檔裡的 `extra_hosts: host.docker.internal:host-gateway` +> 補上——已經幫你寫好,不用改。 + +切到範例目錄: + +```bash +cd kong-mcp +``` + +--- + +## 1. 為什麼有「本機版」compose 檔 + +倉庫附的 [`docker-compose.yml`](docker-compose.yml) 走 [`Dockerfile`](Dockerfile), +**在 Docker 內** `go mod download` 再編譯 plugin。這在一般網路沒問題,但若你在 +**有 TLS 攔截 proxy 的公司網路**(例如憑證被替換),Docker build 會卡在: + +```bash +go: github.com/Kong/go-pdk@v0.11.0: ... tls: failed to verify certificate: +x509: certificate signed by unknown authority +``` + +因為 BuildKit 容器內不帶你 macOS 的企業根憑證。為了繞過這點、也讓驗證更快,本 +手冊改用 **本機交叉編譯 + 把 binary 掛進現成 `kong:3.9` image** 的方式,對應檔案: + +- [`docker-compose.local.yml`](docker-compose.local.yml):掛載本機編好的 binary, + 並補上 `host.docker.internal` 與 test-issuer 設定。 +- [`kong.local.yml`](kong.local.yml):把 plugin 指向本機 test issuer 的設定 + (正式設定請看 [`kong.yml`](kong.yml) 的 placeholder)。 + +> 公司網路沒有攔截 proxy 的話,你也可以直接 `docker compose up --build` 走原始 +> 流程,跳過第 2 步的交叉編譯;但仍需要第 3 步的 test issuer 才能驗證 token。 + +--- + +## 2. 編譯 plugin + +go-pdk plugin 是一支普通的執行檔(講 pluginserver RPC,無 cgo、無 `.so`)。 + +**本機自用 / 手動 smoke test:** + +```bash +go mod tidy +go build -o mcp-authgate . +./mcp-authgate -dump | head # 應印出 plugin schema(JSON) +``` + +**給 Linux 容器掛載用(本手冊主線)——交叉編譯:** + +```bash +# Apple Silicon: +CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o mcp-authgate-linux . +# Intel Mac:把 arm64 換成 amd64 +file mcp-authgate-linux # 應顯示 ELF 64-bit ... ARM aarch64(或 x86-64) +``` + +> `GOARCH` 要對齊 **Docker VM 的架構**,不是你 shell 的架構。Apple Silicon 上的 +> colima / Docker Desktop 預設跑 arm64 VM,所以用 `arm64`。 + +--- + +## 3. 啟動本機 test issuer(假 AuthGate) + +倉庫附了一個本機簽發者 [`../go-jwks-multi/testissuer`](../go-jwks-multi/testissuer): +啟動時產生一對 RSA-2048 金鑰,提供 OIDC discovery、JWKS,以及一個 `/sign` 端點 +讓你任意鑄造 RS256 JWT。**它會幫你簽任何東西,純測試用,只綁 loopback、絕不可 +對外。** 鑄出的 token 預設帶 `type=access`(plugin 只接受 access token);想看 +plugin 擋掉非 access token,在 `/sign` 加 `&type=refresh` 重打 Row 3 會拿到 401。 + +開一個**新終端機分頁**,讓它在前景跑: + +```bash +cd ../go-jwks-multi +go run ./testissuer +``` + +看到這樣就成功了(`auth-a` 在 `:9001`,`auth-b` 在 `:9002`): + +``` +issuer "auth-a" on http://127.0.0.1:9001 (kid=auth-a-...) +issuer "auth-b" on http://127.0.0.1:9002 (kid=auth-b-...) +``` + +快速確認 JWKS 出得來: + +```bash +curl -s http://127.0.0.1:9001/jwks.json | head -c 120; echo +``` + +> **issuer 與 jwks_uri 的關鍵差異**(已寫進 `kong.local.yml`): +> - `issuer`:拿來跟 token 的 `iss` claim **逐字元比對**。test issuer 鑄的 token +> `iss` 是 `http://127.0.0.1:9001`,所以設定也填這個。 +> - `jwks_uri`:由 **Kong 容器**去抓金鑰,所以要填**容器連得到 host** 的位址—— +> `http://host.docker.internal:9001/jwks.json`,不能用 `127.0.0.1`(那是容器自己)。 + +--- + +## 4. 啟動 Kong demo stack + +回到 `kong-mcp` 目錄,用本機版 compose 啟動(DB-less Kong + 兩個 stub MCP upstream): + +```bash +cd ../kong-mcp +docker-compose -f docker-compose.local.yml up -d +# 若你的 Docker 有 compose v2 子指令,等價於: +# docker compose -f docker-compose.local.yml up -d +``` + +確認三個容器都 Up: + +```bash +docker-compose -f docker-compose.local.yml ps +``` + +- proxy(MCP 流量入口):`http://localhost:8000` +- admin API:**只綁容器內 loopback、不對外發布**(未認證、可整份換掉設定)。需要 + 除錯時用 `docker exec curl http://127.0.0.1:8001/...`。 + +看 plugin 有沒有正常掛載: + +```bash +docker-compose -f docker-compose.local.yml logs kong | grep -i pluginserver +# 應看到 "loading protocol ProtoBuf:1 for plugin mcp-authgate" +``` + +--- + +## 5. 驗證矩陣(逐列實測) + +設一個方便的變數: + +```bash +GW=http://localhost:8000 +``` + +### Row 1 — 沒帶 token → 401 挑戰 + +```bash +curl -si $GW/mcp/gitea | sed -n '1p;/WWW-Authenticate/p' +``` + +預期: + +``` +HTTP/1.1 401 Unauthorized +WWW-Authenticate: Bearer resource_metadata="http://localhost:8000/.well-known/oauth-protected-resource/mcp/gitea" +``` + +### Row 2 — PRM 文件(Protected Resource Metadata, RFC 9728) + +```bash +curl -s $GW/.well-known/oauth-protected-resource/mcp/gitea +``` + +預期(含 `resource`、`authorization_servers`、`scopes_supported`): + +```json +{"authorization_servers":["http://127.0.0.1:9001"],"bearer_methods_supported":["header"],"resource":"http://localhost:8000/mcp/gitea","scopes_supported":["mcp:gitea"]} +``` + +### Row 3 — 有效 token → 轉發到 upstream(200) + +向 test issuer 鑄一顆帶正確 scope 的 token,再打 gitea 路由: + +```bash +GOOD=$(curl -s 'http://127.0.0.1:9001/sign?scope=mcp:gitea&sub=alice') +curl -si $GW/mcp/gitea -H "Authorization: Bearer $GOOD" | sed -n '1p;$p' +``` + +預期: + +``` +HTTP/1.1 200 OK +hello from mcp-gitea +``` + +### Row 4 — 過期 token → 401 + +`kong.local.yml` 設了 `leeway_seconds: 60`(容忍 60 秒時鐘誤差),所以要真的過期 +**得超過 ttl + 60 秒**。鑄一顆 5 秒 token,等 70 秒: + +```bash +EXP=$(curl -s 'http://127.0.0.1:9001/sign?scope=mcp:gitea&ttl=5') +sleep 70 +curl -si $GW/mcp/gitea -H "Authorization: Bearer $EXP" | sed -n '1p;$p' +``` + +預期: + +``` +HTTP/1.1 401 Unauthorized +{"error":"invalid_token","error_description":"invalid or expired access token"} +``` + +### Row 5a — 缺少必要 scope → 403 + +```bash +NOSCOPE=$(curl -s 'http://127.0.0.1:9001/sign?scope=email&sub=alice') +curl -si $GW/mcp/gitea -H "Authorization: Bearer $NOSCOPE" | sed -n '1p;/WWW-Authenticate/p' +``` + +預期(challenge 帶 `insufficient_scope` + 缺的 scope): + +``` +HTTP/1.1 403 Forbidden +WWW-Authenticate: Bearer resource_metadata="...", error="insufficient_scope", scope="mcp:gitea" +``` + +### Row 5b — audience 不符 → 401(安全關鍵) + +`mcp-sentry` 路由在 `kong.local.yml` 設了 `require_audience: true`,預期 `aud` 等於 +`gateway_origin + resource_path`=`http://localhost:8000/mcp/sentry`。 + +先用**錯的 aud**(test issuer 預設 `aud` 是 `https://api.example.com`): + +```bash +BADAUD=$(curl -s 'http://127.0.0.1:9001/sign?scope=mcp:sentry&sub=alice') +curl -si $GW/mcp/sentry -H "Authorization: Bearer $BADAUD" | sed -n '1p;$p' +# 預期 401 invalid_token(aud 不符) +``` + +再用**正確的 aud**: + +```bash +GOODAUD=$(curl -sG 'http://127.0.0.1:9001/sign' \ + --data-urlencode 'scope=mcp:sentry' \ + --data-urlencode 'aud=http://localhost:8000/mcp/sentry') +curl -si $GW/mcp/sentry -H "Authorization: Bearer $GOODAUD" | sed -n '1p;$p' +# 預期 200 hello from mcp-sentry +``` + +### Row 5c — HS256 偽造(alg confusion)→ 401(安全關鍵) + +plugin 把接受的演算法 pin 在 `RS256/384/512`,任何 `HS*` 一律拒絕——擋掉「拿 +RSA 公鑰當 HMAC 密鑰簽 HS256」的經典偽造。用 openssl 手刻一顆 HS256: + +```bash +header=$(printf '{"alg":"HS256","typ":"JWT","kid":"x"}' | openssl base64 -A | tr '+/' '-_' | tr -d '=') +payload=$(printf '{"iss":"http://127.0.0.1:9001","scope":"mcp:gitea","exp":9999999999,"sub":"attacker"}' | openssl base64 -A | tr '+/' '-_' | tr -d '=') +sig=$(printf '%s.%s' "$header" "$payload" | openssl dgst -sha256 -hmac "secret" -binary | openssl base64 -A | tr '+/' '-_' | tr -d '=') +HS="$header.$payload.$sig" +curl -si $GW/mcp/gitea -H "Authorization: Bearer $HS" | sed -n '1p;$p' +``` + +預期: + +``` +HTTP/1.1 401 Unauthorized +{"error":"invalid_token","error_description":"invalid or expired access token"} +``` + +--- + +## 6. 驗證最近修掉的三個安全問題 + +### 6a. 偽造身分 header 會被覆寫(trust-header smuggling) + +plugin 在轉發前會**先清掉** client 自帶的 `X-MCP-Subject` / `X-MCP-Scope`,再填入 +**token 裡驗證過的** `sub` / `scope`。後端被告知「無條件信任這兩個 header」,所以 +這道清除是身分不被偽造的關鍵。 + +stub 的 `http-echo` upstream 不會回放 header,要看到效果,臨時把 gitea upstream +換成會回放 header 的 echo 服務: + +```bash +# 1) 在 Kong 的網路上起一個 header echo 容器 +NET=$(docker inspect "$(docker-compose -f docker-compose.local.yml ps -q kong)" --format '{{range $k,$v := .NetworkSettings.Networks}}{{$k}}{{end}}') +docker run -d --rm --name mcp-echo --network "$NET" mendhak/http-https-echo:31 + +# 2) 暫時把 kong.local.yml 的 gitea upstream 指到 echo,重建 kong +cp kong.local.yml /tmp/kong.local.yml.bak +sed -i '' 's#url: http://mcp-gitea:3000#url: http://mcp-echo:8080#' kong.local.yml +docker-compose -f docker-compose.local.yml up -d --force-recreate kong +sleep 8 + +# 3) 帶有效 token,同時偽造 X-MCP-Subject / X-MCP-Scope +GOOD=$(curl -s 'http://127.0.0.1:9001/sign?scope=mcp:gitea&sub=alice@corp') +curl -s $GW/mcp/gitea \ + -H "Authorization: Bearer $GOOD" \ + -H "X-MCP-Subject: attacker@evil" \ + -H "X-MCP-Scope: admin:everything" \ + | python3 -c "import sys,json; h=json.load(sys.stdin)['headers']; print('x-mcp-subject ->', h.get('x-mcp-subject')); print('x-mcp-scope ->', h.get('x-mcp-scope'))" +``` + +預期——偽造值被丟棄,換成 token 裡的真實身分: + +``` +x-mcp-subject -> alice@corp +x-mcp-scope -> mcp:gitea +``` + +還原設定、清掉 echo 容器: + +```bash +cp /tmp/kong.local.yml.bak kong.local.yml +docker rm -f mcp-echo +docker-compose -f docker-compose.local.yml up -d --force-recreate kong +``` + +### 6b. 重複 Authorization header → 400 + +```bash +GOOD=$(curl -s 'http://127.0.0.1:9001/sign?scope=mcp:gitea&sub=alice') +curl -si $GW/mcp/gitea \ + -H "Authorization: Bearer $GOOD" \ + -H "Authorization: token stolen-pat" | sed -n '1p' +# 預期 HTTP/1.1 400 Bad Request +``` + +> **觀察到的細節**:在 Kong 前面,底層 nginx 會在 plugin 執行**之前**就以 400 +> 擋掉重複的 `Authorization` header(回應是 Kong 的通用 `{"message":"Bad request"}`、 +> 帶 `Connection: close`、沒有 `WWW-Authenticate`)。plugin 內的多值檢查是 +> **深度防禦**——在非 Kong 前置或 nginx 行為改變時才會由 plugin 自己回 +> `400 invalid_request`。兩種情況都不會把未驗證的第二組憑證轉發出去。 + +### 6c. 垃圾 token → 401(不是 5xx) + +```bash +curl -si $GW/mcp/gitea -H "Authorization: Bearer not.a.jwt" | sed -n '1p;$p' +# 預期 401 invalid_token +``` + +--- + +## 7. JWKS 失效時的行為(503,不是 401) + +金鑰抓不到是 **gateway 端**的事,plugin 回 `503 temporarily_unavailable`(而非 +401),這樣 spec-compliant 的 client 不會誤以為「token 壞了」去重跑整套 OAuth。 +模擬:把 test issuer 關掉,再用一顆**沒被 cache 過的新 kid**……實務上 keyfunc 會 +快取已抓到的金鑰,最直接的觀察是「placeholder 設定(`auth.example.com` 無 JWKS) +下打 Row 3 會得到 503」。在本機 test issuer 流程中,停掉 issuer 後既有金鑰仍可用, +屬於正常的高可用設計(已抓到的金鑰每小時背景刷新、抓取逾時上限 10 秒、失敗的抓取 +永不被 cache)。 + +--- + +## 8. 接真 AuthGate(替換掉 test issuer) + +前面 §3–§7 用本機 test issuer 把流程跑通。要接你**自己架的 AuthGate**,把 token +來源從 test issuer 換成真的 AuthGate 即可——倉庫附了一組現成檔案: + +- [`kong.authgate.yml`](kong.authgate.yml):指向真 AuthGate 的 plugin 設定。 +- [`docker-compose.authgate.yml`](docker-compose.authgate.yml):掛上面那份設定 + + 本機 binary + `host.docker.internal`。 + +以下以 AuthGate 跑在 macOS host 的 `http://localhost:8080` 為例。 + +### 8.1 先從 AuthGate 的 discovery 抓真實值 + +**別用猜的**——`issuer` / `jwks_uri` 一律以 discovery 文件為準: + +```bash +curl -s http://localhost:8080/.well-known/openid-configuration \ + | python3 -m json.tool | grep -iE '"issuer"|jwks_uri|token_endpoint|scopes_supported' +``` + +範例輸出(你的可能不同,以實際為準): + +``` +"issuer": "http://localhost:8080", +"jwks_uri": "http://localhost:8080/.well-known/jwks.json", +"token_endpoint": "http://localhost:8080/oauth/token", +"scopes_supported": ["openid", "profile", "email"], +``` + +### 8.2 唯一的雷:`localhost` vs `host.docker.internal` + +換成 `http://localhost:8080`(純 HTTP)少了自簽 TLS 與 `.local` DNS 兩個麻煩,但 +**還剩一個**:Kong 在容器裡,容器的 `localhost` 是它自己,不是 macOS host。所以: + +| 欄位 | 值 | 為什麼 | +| --- | --- | --- | +| `issuer` | `http://localhost:8080` | 拿來跟 token 的 `iss` **逐字元比對**(plugin 不連它,只比字串) | +| `jwks_uri` | `http://host.docker.internal:8080/.well-known/jwks.json` | 由 **Kong 容器**去抓,要填容器連得到 host 的位址 | +| `gateway_origin` | `http://localhost:8000` | 不變(這是 Kong proxy,給 host 端 client / 組 PRM URL 用) | + +`kong.authgate.yml` 已經照這樣寫好;`docker-compose.authgate.yml` 也已含 +`extra_hosts: host.docker.internal:host-gateway`,**都不用改**。把 `discovery` 抓到的 +`issuer` / `jwks_uri` 主機名對齊你的環境即可。 + +### 8.3 ⚠️ scope:AuthGate 沒發的 scope 一定 403 + +上面 `scopes_supported` 只有 `openid profile email`,**沒有 `mcp:gitea`**。若 plugin +設 `required_scopes: [mcp:gitea]`,再有效的 token 也會 `403 insufficient_scope`。 +`kong.authgate.yml` 已避開這點:gitea 路由 `required_scopes: []`(不檢查),sentry +路由用 `email`(AuthGate 真的會發)示範強制。要照產品語意用 `mcp:gitea`,得先去 +AuthGate 端註冊並發給該 client。 + +### 8.4 切換到 AuthGate stack + +```bash +cd ../kong-mcp +docker-compose -f docker-compose.local.yml down # 停掉 test-issuer stack(同一個 :8000) +docker-compose -f docker-compose.authgate.yml up -d +docker-compose -f docker-compose.authgate.yml logs kong | grep -i pluginserver # 確認 plugin 載入 +``` + +### 8.5 不用帳密就能驗「JWKS 連線通不通」 + +接真 AuthGate 最常見的失敗是 `jwks_uri` 容器連不到 → 所有 token 變 `503`。有個小 +技巧能**不用任何憑證**就分辨「連線問題」還是「token 問題」:拿一顆**格式正確但簽章 +對不上**的 token(例如本機 test issuer 簽的)丟進去—— + +```bash +# test issuer 仍在 :9001 的話: +WELLFORMED=$(curl -s 'http://127.0.0.1:9001/sign?scope=email&sub=probe') +curl -si http://localhost:8000/mcp/gitea -H "Authorization: Bearer $WELLFORMED" | sed -n '1p;$p' +``` + +- 回 **`401 invalid_token`** → plugin 成功抓到 AuthGate 的 JWKS(只是 kid 對不上)→ + **連線 OK** ✅ +- 回 **`503 temporarily_unavailable`** → JWKS 抓不到 → 檢查 `jwks_uri` 是不是用了 + `host.docker.internal`、AuthGate 有沒有在跑。 + +### 8.6 拿真 token 跑 Row 3(需要 client 憑證) + +token 來源從 test issuer 的 `/sign` 換成走 OAuth。倉庫的 +[`../go-jwks/get-token.sh`](../go-jwks/get-token.sh) 走 client_credentials,免瀏覽器、 +最適合貼進 `$GOOD`: + +```bash +cd ../go-jwks +# 先 --decode 確認:alg=RS256、iss==你的 issuer、scope 有 email +ISSUER_URL=http://localhost:8080 CLIENT_ID= CLIENT_SECRET= \ + bash get-token.sh --scope "email" --decode + +GOOD=$(ISSUER_URL=http://localhost:8080 CLIENT_ID= CLIENT_SECRET= \ + bash get-token.sh --scope "email") +curl -si http://localhost:8000/mcp/gitea -H "Authorization: Bearer $GOOD" | sed -n '1p;$p' # 預期 200 +curl -si http://localhost:8000/mcp/sentry -H "Authorization: Bearer $GOOD" | sed -n '1p;$p' # 預期 200(帶 email scope) +``` + +需要帶真實使用者身分(`sub`)的 token 就改用 [`../bash-cli/main.sh`](../bash-cli) 或 +`../go-cli`(Auth Code / Device Code),拿到的 access token 一樣貼進 `$GOOD`。 + +> **preflight 三項**(README 也有,最容易漏): +> 1. 用的是 **access token**、不是 `id_token`——`--decode` 後 header `alg` 必須是 +> `RS256`、`kid` 對得上 JWKS。 +> 2. token 的 `iss` 與設定的 `issuer` **逐字元相同**(差一個結尾 `/` 就 401)。 +> 3. `aud`:先 `require_audience: false`。要開之前先 `--decode` 看 token 的 `aud` +> 到底是什麼,再決定 `audience` 要不要覆寫。 + +§5 的其他列(Row 1/2 挑戰與 PRM、Row 5c HS256 偽造、§6 的 6a/6b/6c)與 token 來源 +無關,照跑即可——只有「需要有效 token」的 Row 3/4/5a/5b 改用上面的真 token。 + +--- + +## 9. 收尾與清理 + +```bash +# 停掉 Kong demo stack(依你用的那一份) +docker-compose -f docker-compose.local.yml down # test issuer 版 +docker-compose -f docker-compose.authgate.yml down # 真 AuthGate 版 + +# 停掉 test issuer:回到它的終端機分頁按 Ctrl-C + +# 移除編譯產物(已被 .gitignore 忽略) +rm -f mcp-authgate mcp-authgate-linux +``` + +--- + +## 10. 常見問題(macOS) + +| 症狀 | 原因 / 解法 | +| ------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `docker compose up --build` 卡在 `go mod download` 的 x509 憑證錯誤 | 公司網路 TLS 攔截,BuildKit 容器內缺企業根憑證。改走本手冊第 2+4 步的本機交叉編譯 + `docker-compose.local.yml`。 | +| Row 3 一直 `503 temporarily_unavailable` | Kong 容器抓不到 `jwks_uri`。確認 issuer 在跑,且 `jwks_uri` 用 `host.docker.internal`(不是 `127.0.0.1` / `localhost`),且 compose 檔有 `extra_hosts: host.docker.internal:host-gateway`。用 §8.5 的無帳密技巧分辨連線問題。 | +| Row 3 變成 `401 invalid_token` | `issuer` 設定值與 token 的 `iss` 不一致(差一個結尾斜線也會錯)。`--decode` 看 token 的 `iss`,設定的 `issuer` 要逐字元相同。 | +| 接真 AuthGate,有效 token 卻 `403 insufficient_scope` | `required_scopes` 要求了 AuthGate 沒發的 scope(例如 `mcp:gitea`,但 AuthGate 只有 `openid profile email`)。改成 AuthGate 真的會發的 scope,或在 AuthGate 端註冊該 scope。見 §8.3。 | +| `exec format error` / plugin 起不來 | 交叉編譯的 `GOARCH` 跟 Docker VM 架構不符。Apple Silicon 用 `arm64`、Intel 用 `amd64`。 | +| Kong 啟動就掛在 `failed decoding plugin info: Expected value but found T_END at character 1` | Kong 跑 `QUERY_CMD`(`mcp-authgate -dump`)拿到**空 stdout**。先驗 binary:`docker-compose -f docker-compose.local.yml run --rm --entrypoint /usr/local/bin/mcp-authgate kong -dump` 應印出 `{"Protocol":"ProtoBuf:1",...}`。空白 / `exec format error` = binary 與 kong 容器架構錯位(見上一列)。本機掛載版:用對的 `GOARCH` 重新交叉編譯(見第 2 步)再 `docker-compose -f docker-compose.local.yml up -d --force-recreate kong`;走 `Dockerfile` build 版:架構由 build arg `TARGETARCH`(`docker build --platform` 帶入)釘住,重建用 `docker compose build --no-cache kong && docker compose up -d --force-recreate`。 | +| `docker compose` 說 unknown command | 你的環境只有獨立版 `docker-compose`。把指令裡的 `docker compose` 換成 `docker-compose` 即可(功能相同)。 | +| 改了 `kong.*.yml` 沒生效 | DB-less Kong 在啟動時讀設定。改完要 `docker-compose -f up -d --force-recreate kong`。 | + +--- + +## 附錄:驗證結果速查 + +| # | 測試 | 指令重點 | 預期 | +| --- | ------------------------- | ------------------------------------------------------------ | ----------------------------------------------------------- | +| 1 | 未認證挑戰 | `curl -si $GW/mcp/gitea` | 401 + `WWW-Authenticate` | +| 2 | PRM 文件 | `curl -s $GW/.well-known/oauth-protected-resource/mcp/gitea` | JSON(resource / authorization_servers / scopes_supported) | +| 3 | 有效 token | `Bearer $GOOD` | 200,轉發 upstream | +| 4 | 過期 token(>ttl+leeway) | `Bearer $EXP`(等 70s) | 401 invalid_token | +| 5a | 缺 scope | `Bearer $NOSCOPE` | 403 insufficient_scope | +| 5b | audience 不符 / 相符 | sentry 路由(require_audience) | 401 / 200 | +| 5c | HS256 偽造 | 手刻 HS256 | 401(alg confusion 被擋) | +| 6a | 偽造身分 header | 同時帶 `X-MCP-Subject: attacker` | 被覆寫成 token 的 `sub` | +| 6b | 重複 Authorization | 兩個 `Authorization` header | 400 | +| 6c | 垃圾 token | `Bearer not.a.jwt` | 401 | diff --git a/kong-mcp/README.md b/kong-mcp/README.md new file mode 100644 index 0000000..689c46a --- /dev/null +++ b/kong-mcp/README.md @@ -0,0 +1,214 @@ +# kong-mcp — Unified MCP OAuth front door (Kong + AuthGate) + +> 繁體中文版本請見 [README.zh-TW.md](README.zh-TW.md) + +A Kong [go-pdk](https://github.com/Kong/go-pdk) plugin that puts **one OAuth +front door in front of every MCP server**. Internal MCP services already sit +behind [Kong](https://github.com/Kong/kong); this plugin makes them stop +accepting hand-written PATs and instead require an AuthGate-issued OAuth access +token — validated locally with **RS256 + JWKS**, then forwarded to the MCP +backend. + +```mermaid +graph LR + client["MCP client
(runs PKCE itself)"] + kong["Kong
+ mcp-authgate plugin"] + authgate["AuthGate
Authorization Server"] + mcp["MCP server(s)
gitea / sentry"] + + client <-->|"MCP requests
+ 401 challenge / PRM"| kong + kong -->|"forward + X-MCP-Subject / X-MCP-Scope"| mcp + client -->|"Auth Code + PKCE
/authorize · /token"| authgate + kong -.->|"JWKS fetch
(cached, auto-rotated)"| authgate +``` + +**Kong does not run the OAuth flow.** It only _advertises where the flow is_ +(steps ②③) and _verifies the token that comes back_ (step ⑤). The MCP client +runs Auth Code + PKCE against AuthGate by itself. One plugin config covers all MCP +servers — attach it to each service with a different `resource_path`. + +## The handshake + +This is the MCP authorization handshake (the 2025-06 MCP spec on top of RFC 9728 +Protected Resource Metadata and RFC 6750 bearer tokens). The numbers map to the +`main.go` comments: + +```mermaid +sequenceDiagram + participant C as MCP client + participant K as Kong + mcp-authgate + participant A as AuthGate + participant M as MCP server + + C->>K: GET /mcp/gitea (no token) + K-->>C: ② 401 + WWW-Authenticate:
Bearer resource_metadata="‹PRM URL›" + C->>K: GET /.well-known/oauth-protected-resource/mcp/gitea + K-->>C: ③ 200 Protected Resource Metadata
(authorization_servers, scopes) + C->>A: Auth Code + PKCE (/authorize, /token) + A-->>C: RS256 access token + K-)A: fetch JWKS (cached / auto-rotated) + C->>K: GET /mcp/gitea + Bearer ‹jwt› + Note over K: ⑤ verify sig(JWKS) + iss + exp + type=access
+ scope (+ aud when require_audience) + K->>M: forward + X-MCP-Subject / X-MCP-Scope + M-->>K: 200 + K-->>C: 200 +``` + +| Step | Who | What happens | +| ---- | ----------------- | -------------------------------------------------------------------------------------------------------------------------------- | +| ② | Kong → client | Request with no/invalid token → `401` + `WWW-Authenticate: Bearer resource_metadata=""` | +| ③ | Kong → client | Client fetches `` → plugin serves Protected Resource Metadata (which AuthGate, which scopes) | +| — | client ↔ AuthGate | Client discovers AuthGate from the metadata and runs **Auth Code + PKCE** to get an access token | +| ⑤ | Kong | Client retries with `Authorization: Bearer ` → plugin verifies **sig (JWKS) + iss + exp + `type=access` + scope** (and **aud** only when `require_audience` is on) → forwards upstream | + +## Why RS256 + JWKS (not HS256) + +- **No shared secret on the gateway.** With HS256 the gateway would have to hold + AuthGate's signing secret — putting a forge-anything key on the edge. With + RS256 + JWKS, Kong only ever sees the **public** key. +- **Zero-touch key rotation.** Rotate keys in AuthGate's JWKS; Kong picks them up + automatically (keyfunc background refresh). No Kong config change. +- **Alg-confusion is blocked.** The plugin pins accepted algorithms to + `RS256/RS384/RS512` and refuses `HS*`. This defeats the classic + forgery where an attacker signs HS256 using the RSA _public_ key as the HMAC + secret. (Validation matrix row 5, last item, tests exactly this.) + +The verification engine is [`MicahParks/keyfunc`](https://github.com/MicahParks/keyfunc), +which handles JWKS fetch, in-memory cache, background rotation, and rate-limited +refetch on an unknown `kid` — the parts that are easy to get wrong by hand in Lua. + +## Configuration reference + +One plugin instance per MCP resource. See `kong.yml` for full examples. + +| Field | Required | Description | +| ------------------ | -------- | ---------------------------------------------------------------------------------------------- | +| `issuer` | ✅ | AuthGate base URL. Must equal the token's `iss` claim byte-for-byte. | +| `gateway_origin` | ✅ | Externally reachable Kong origin, e.g. `https://gw.example.com`. Used to build the PRM URL. | +| `resource_path` | ✅ | This resource's path, e.g. `/mcp/gitea`. | +| `jwks_uri` | ✅ | AuthGate JWKS endpoint (RS256). Accepted algs are always pinned to the RS family. | +| `required_scopes` | | All listed scopes must be present in the token's `scope`, else `403 insufficient_scope`. | +| `audience` | | Expected `aud` for **token validation only**. Defaults to `gateway_origin + resource_path`. The PRM `resource` always stays the canonical URL (RFC 9728 §3.3), so set this only when AuthGate emits a fixed non-URL `aud`. | +| `require_audience` | | Enforce `aud` only when `true`. Keep `false` until AuthGate emits a per-resource `aud` — but see the replay warning below. When enabling it, set `audience` to AuthGate's actual `aud` string (the default `gateway_origin + resource_path` is an exact, scheme/slash-sensitive match and will `401` every token if it differs). | +| `leeway_seconds` | | Clock-skew tolerance for `exp`/`nbf`. Recommend `60`. Must be ≥ 0. | + +Only tokens with `type=access` are accepted; AuthGate refresh tokens (same key, +`iss`, `aud`, and `scope`, differing only by `type` and a longer `exp`) are +rejected with `401 invalid_token`. + +> go-pdk schemas can't mark fields required, so the four required fields are +> validated on the first request instead — a missing one fails every request +> with `500 server_error` and a critical log line, not a silent misbehavior. + +> **Routing gotcha.** Each Kong route must match **both** `resource_path` and its +> PRM path (`/.well-known/oauth-protected-resource` + `resource_path`). Otherwise Kong has no route to +> hand the client's step ③ lookup to and the plugin never serves the metadata. +> See the `paths:` lists in `kong.yml`. + +> **Cross-resource replay warning.** With `require_audience: false` (the shipped +> default), `aud` is **not** checked, so the only thing distinguishing one MCP +> resource from another is `scope`. A token minted with multiple scopes (e.g. +> `mcp:gitea mcp:sentry`) is accepted at **every** resource whose scope it +> carries, and because the raw bearer is forwarded upstream unchanged, a backend +> that receives it can replay it against a sibling resource. Bind tokens to a +> resource with `aud` (turn on `require_audience` once AuthGate emits a +> per-resource `aud`) before treating resources as isolated. + +## 1. Build the plugin + +go-pdk plugins are ordinary executables that speak the pluginserver RPC protocol +— no cgo, no `.so`. A full `go build` needs network access for the go-pdk +protobuf transitive deps; run it locally: + +```bash +cd kong-mcp +go mod tidy && go build -o mcp-authgate . +``` + +## 2. Wire it into Kong + +Register the plugin and point the pluginserver at the binary (env vars, shown in +`docker-compose.yml`): + +```bash +KONG_PLUGINS=bundled,mcp-authgate +KONG_PLUGINSERVER_NAMES=mcp-authgate +KONG_PLUGINSERVER_MCP_AUTHGATE_START_CMD=/usr/local/bin/mcp-authgate +KONG_PLUGINSERVER_MCP_AUTHGATE_QUERY_CMD=/usr/local/bin/mcp-authgate -dump +``` + +## 3. Run the demo stack + +```bash +cd kong-mcp +docker compose up --build +``` + +This starts DB-less Kong (proxy on `:8000`; the unauthenticated admin API is +bound to container-loopback and not published — see `docker-compose.yml`) with +two stub MCP upstreams. Edit `kong.yml` so `issuer` / `gateway_origin` / +`jwks_uri` point at your real AuthGate before expecting tokens to validate. + +## 4. Validation matrix + +After `docker compose up`, exercise the handshake. Replace `$GW` with +`http://localhost:8000` for the demo (or your `gateway_origin`). + +> Rows 1–2 work against the stub demo as shipped. Rows 3–5b need real tokens: +> point `issuer` / `jwks_uri` in `kong.yml` at an AuthGate first (with the +> placeholder config they fail with `503 temporarily_unavailable`, since +> `auth.example.com` has no JWKS to fetch). Row 5c is the exception — an HS256 +> forgery is rejected with `401 invalid_token` *before* any JWKS fetch (the alg +> is pinned first), so it returns `401` even against the placeholder config. + +| # | Test | Command | Expect | +| --- | ---------------------------- | --------------------------------------------------------------------------------------- | ----------------------------------------------------------------- | +| 1 | Unauthenticated → challenge | `curl -i $GW/mcp/gitea` | `401` + `WWW-Authenticate: Bearer resource_metadata="…"` | +| 2 | PRM document served | `curl -s $GW/.well-known/oauth-protected-resource/mcp/gitea` | JSON with `resource`, `authorization_servers`, `scopes_supported` | +| 3 | Valid token → forwarded | `curl -i $GW/mcp/gitea -H "Authorization: Bearer $GOOD"` | `200` from the MCP upstream | +| 4 | Expired token | `curl -i $GW/mcp/gitea -H "Authorization: Bearer $EXPIRED"` | `401 invalid_token` | +| 5a | Missing scope | token without `required_scopes` → `curl -i $GW/mcp/gitea -H "Authorization: Bearer $X"` | `403 insufficient_scope` | +| 5b | **Cross-audience** | token issued for a different resource, with `require_audience: true` | `401 invalid_token` (aud mismatch) | +| 5c | **HS256 forgery (key bits)** | forge an HS256 token using the RSA public key as the HMAC secret | `401 invalid_token` — **must be rejected** (alg confusion) | + +Rows **5b** and **5c** are the security-critical ones — run them before going +live. + +## AuthGate-side preflight + +Before this works end-to-end, confirm three things on AuthGate (decode a real +**access token**, not just the `id_token`): + +1. **JWKS resolves.** `GET /.well-known/openid-configuration` → its + `jwks_uri` returns a non-empty `keys` array. +2. **Access tokens are RS256.** Decode an actual access token; its header `alg` + is `RS256` (not `HS256`) and its `kid` matches a key in the JWKS. AuthGate's + default is often `JWT_SECRET` (HS256) — make sure you've moved **access + tokens** (not only `id_token`) to asymmetric signing. +3. **Issuer matches.** The token's `iss` equals the plugin's `issuer` config, + byte-for-byte (mind the trailing slash). + +## Operational notes + +- **JWKS endpoint must be highly available.** If the **initial** fetch fails, + token requests get `503 temporarily_unavailable` (not `401`, so clients don't + re-run OAuth) and it is retried on the next request — a failed initial fetch is + never cached. Fetch waits are capped at 10s and run under a per-URI lock, so a + slow AuthGate can't stall traffic for other resources. Caveat: once keys are + cached, a token whose `kid` is unknown returns `401 invalid_token` (offline + validation can't tell "key rotated in mid-outage" from "forged kid"), and an + hourly refresh that pulls a JWKS containing one malformed key can drop the + cached keys until a clean refresh. Keep the JWKS valid and overlap keys + generously during rotation. +- **Browser-based MCP clients need CORS.** A CORS preflight (`OPTIONS`, no + `Authorization`) is answered with the `401` challenge; put Kong's `cors` + plugin on the route if web-hosted clients must reach the gateway. +- **Overlap keys during rotation.** Keep the old and new keys in the JWKS + together for a window so in-flight tokens aren't killed mid-rotation. +- **Keep access-token TTLs short.** Like any offline validation, a revoked token + stays valid until its `exp` — minutes, not hours. +- **The bearer token is forwarded upstream unchanged.** Kong adds `X-MCP-Subject` + / `X-MCP-Scope` but does **not** strip or exchange the `Authorization` header, + so each MCP backend receives a live, replayable token. Trust your MCP backends + accordingly, and bind tokens with `aud` (see the replay warning above) so a + backend can't reuse one against a sibling resource. diff --git a/kong-mcp/README.zh-TW.md b/kong-mcp/README.zh-TW.md new file mode 100644 index 0000000..40d17e0 --- /dev/null +++ b/kong-mcp/README.zh-TW.md @@ -0,0 +1,199 @@ +# kong-mcp — MCP 統一 OAuth 入口(Kong + AuthGate) + +> English version: [README.md](README.md) + +一個 Kong [go-pdk](https://github.com/Kong/go-pdk) plugin,在**所有 MCP server +前面架起單一的 OAuth 入口**。公司內部的 MCP 服務本來就掛在 +[Kong](https://github.com/Kong/kong) 後面;這個 plugin 讓它們不再接受各自手填的 +PAT,改成要求 AuthGate 簽發的 OAuth access token——在本地用 **RS256 + JWKS** +驗證後,再把 token 往後面的 MCP server 送。 + +```mermaid +graph LR + client["MCP client
(自己跑 PKCE)"] + kong["Kong
+ mcp-authgate plugin"] + authgate["AuthGate
授權伺服器"] + mcp["MCP server(s)
gitea / sentry"] + + client <-->|"MCP 請求
+ 401 挑戰 / PRM"| kong + kong -->|"放行 + X-MCP-Subject / X-MCP-Scope"| mcp + client -->|"Auth Code + PKCE
/authorize · /token"| authgate + kong -.->|"抓 JWKS
(快取、自動輪替)"| authgate +``` + +**Kong 不跑 OAuth 流程。** 它只負責_告訴 client 流程在哪裡_(步驟 ②③),以及 +_驗證跑完流程後拿回來的 token_(步驟 ⑤)。MCP client 自己對 AuthGate 跑 Auth Code + PKCE。 +一套 plugin 設定就能同時罩住所有 MCP server——對每個 service 掛上去、各自填不同的 +`resource_path` 即可。 + +## 認證握手流程 + +這是 MCP 的授權握手(2025-06 MCP spec,建構在 RFC 9728 Protected Resource +Metadata 與 RFC 6750 bearer token 之上)。編號對應 `main.go` 裡的註解: + +```mermaid +sequenceDiagram + participant C as MCP client + participant K as Kong + mcp-authgate + participant A as AuthGate + participant M as MCP server + + C->>K: GET /mcp/gitea(未帶 token) + K-->>C: ② 401 + WWW-Authenticate:
Bearer resource_metadata="‹PRM URL›" + C->>K: GET /.well-known/oauth-protected-resource/mcp/gitea + K-->>C: ③ 200 Protected Resource Metadata
(authorization_servers、scopes) + C->>A: Auth Code + PKCE(/authorize、/token) + A-->>C: RS256 access token + K-)A: 抓 JWKS(快取/自動輪替) + C->>K: GET /mcp/gitea + Bearer ‹jwt› + Note over K: ⑤ 驗 簽章(JWKS) + iss + exp + type=access
+ scope(aud 僅在 require_audience 時) + K->>M: 放行 + X-MCP-Subject / X-MCP-Scope + M-->>K: 200 + K-->>C: 200 +``` + +| 步驟 | 由誰 | 發生什麼事 | +| ---- | ----------------- | -------------------------------------------------------------------------------------------------------------- | +| ② | Kong → client | 沒帶 / 帶錯 token 的請求 → `401` + `WWW-Authenticate: Bearer resource_metadata=""` | +| ③ | Kong → client | client 去抓 `` → plugin 回傳 Protected Resource Metadata(要用哪個 AuthGate、要哪些 scope) | +| — | client ↔ AuthGate | client 從 metadata 找到 AuthGate,自己跑 **Auth Code + PKCE** 換 access token | +| ⑤ | Kong | client 帶 `Authorization: Bearer ` 重試 → plugin 驗 **簽章(JWKS) + iss + exp + `type=access` + scope**(**aud** 僅在 `require_audience` 開啟時驗)→ 放行往後送 | + +## 為什麼選 RS256 + JWKS(不是 HS256) + +- **gateway 上不放共享密鑰。** 用 HS256 的話,gateway 得持有 AuthGate 的簽章密鑰 + ——等於把一把「能偽造任何 token」的鑰匙擺在最外緣。RS256 + JWKS 之下,Kong 永遠 + 只摸得到**公鑰**。 +- **金鑰輪替零接觸。** 在 AuthGate 的 JWKS 換金鑰,Kong 會自動接手(keyfunc 背景 + 輪替),不用改 Kong 設定。 +- **擋掉 alg-confusion。** plugin 把接受的演算法鎖死在 + `RS256/RS384/RS512`、拒絕 `HS*`。這擋掉了最經典的偽造手法:攻擊者拿 RSA **公鑰** + 當 HMAC 金鑰去簽 HS256。(驗證矩陣第 5 列最後一項就是專門測這個。) + +驗證引擎是 [`MicahParks/keyfunc`](https://github.com/MicahParks/keyfunc),它把 +JWKS 的抓取、記憶體快取、背景輪替、未知 `kid` 的限流補抓全包好了——這些正是用 Lua +自己刻最容易出錯的部分。 + +## 設定參數 + +每個 MCP 資源對應一個 plugin 實例。完整範例見 `kong.yml`。 + +| 參數 | 必填 | 說明 | +| ------------------ | ---- | ----------------------------------------------------------------------------------- | +| `issuer` | ✅ | AuthGate base URL,必須與 token 的 `iss` claim 逐字元相符。 | +| `gateway_origin` | ✅ | 對外可達的 Kong origin,例如 `https://gw.example.com`,用來組出 PRM URL。 | +| `resource_path` | ✅ | 此資源的路徑,例如 `/mcp/gitea`。 | +| `jwks_uri` | ✅ | AuthGate JWKS endpoint(RS256)。接受的演算法固定鎖在 RS 家族。 | +| `required_scopes` | | token 的 `scope` 必須包含全部所列項目,否則 `403 insufficient_scope`。 | +| `audience` | | **只影響 token 的 `aud` 驗證**,預設為 `gateway_origin + resource_path`。PRM 的 `resource` 永遠維持 canonical URL(RFC 9728 §3.3),只有在 AuthGate 發固定的非 URL `aud` 時才需要設。 | +| `require_audience` | | 設 `true` 才強制檢查 `aud`。AuthGate 還沒發出 per-resource `aud` 前先維持 `false`——但請看下方的重放警告。開啟時要把 `audience` 設成 AuthGate 實際的 `aud` 字串(預設的 `gateway_origin + resource_path` 是逐字元、區分 scheme/斜線的精確比對,不符就會把每顆 token 都 `401`)。 | +| `leeway_seconds` | | `exp`/`nbf` 的時鐘偏移容忍秒數,建議 `60`。必須 ≥ 0。 | + +只接受 `type=access` 的 token;AuthGate 的 refresh token(金鑰、`iss`、`aud`、 +`scope` 都相同,只有 `type` 與較長的 `exp` 不同)會被回 `401 invalid_token` 拒絕。 + +> go-pdk 產生的 schema 無法標記必填欄位,所以四個必填欄位改在第一個請求時驗證—— +> 缺欄位時所有請求都會回 `500 server_error` 並寫一行 critical log,而不是默默地 +> 行為異常。 + +> **路由陷阱。** 每條 Kong route 必須**同時**匹配 `resource_path` 與其 PRM 路徑 +> (`/.well-known/oauth-protected-resource` + `resource_path`)。否則 client 在步驟 ③ 來抓 metadata 時 +> Kong 沒有對應 route 可交給 plugin,plugin 就不會回傳 metadata。請看 `kong.yml` +> 裡每條 route 的 `paths:` 清單。 + +> **跨資源重放警告。** 在 `require_audience: false`(出廠預設)下不會檢查 `aud`, +> 所以區分不同 MCP 資源的只剩 `scope`。一顆帶多個 scope 的 token(例如 +> `mcp:gitea mcp:sentry`)會在**每個**它帶有對應 scope 的資源上都被接受;又因為原始 +> bearer 會原封不動往後送,收到它的後端可以拿去重放到另一個資源。把資源視為彼此隔離 +> 之前,請先用 `aud` 綁定 token(等 AuthGate 發出 per-resource `aud` 後開啟 +> `require_audience`)。 + +## 1. 編譯 plugin + +go-pdk plugin 是會講 pluginserver RPC 協定的一般執行檔——不用 cgo、也不是 `.so`。 +完整 `go build` 需要網路(go-pdk 的 protobuf 相依),請在本機跑: + +```bash +cd kong-mcp +go mod tidy && go build -o mcp-authgate . +``` + +## 2. 接進 Kong + +註冊 plugin,並把 pluginserver 指向 binary(環境變數,見 `docker-compose.yml`): + +```bash +KONG_PLUGINS=bundled,mcp-authgate +KONG_PLUGINSERVER_NAMES=mcp-authgate +KONG_PLUGINSERVER_MCP_AUTHGATE_START_CMD=/usr/local/bin/mcp-authgate +KONG_PLUGINSERVER_MCP_AUTHGATE_QUERY_CMD=/usr/local/bin/mcp-authgate -dump +``` + +## 3. 啟動示範環境 + +```bash +cd kong-mcp +docker compose up --build +``` + +這會啟動 DB-less 的 Kong(proxy 在 `:8000`;未認證的 admin API 綁在 container 的 +loopback、不對外發布——見 `docker-compose.yml`)加兩個假的 MCP upstream。在期待 +token 能通過驗證之前,請先改 `kong.yml` 讓 `issuer` / `gateway_origin` / +`jwks_uri` 指向你們真正的 AuthGate。 + +## 4. 驗證矩陣 + +`docker compose up` 後實際跑一遍握手。示範環境把 `$GW` 換成 +`http://localhost:8000`(或你的 `gateway_origin`)。 + +> 第 1–2 列用內建的 stub demo 就能跑。第 3–5b 列需要真的 token:先把 `kong.yml` +> 的 `issuer` / `jwks_uri` 指向 AuthGate(用預設的 placeholder 設定會回 +> `503 temporarily_unavailable`,因為 `auth.example.com` 抓不到 JWKS)。第 5c 列是 +> 例外——HS256 偽造會在抓 JWKS *之前* 就先被擋(演算法先被鎖定),回 +> `401 invalid_token`,所以即使用 placeholder 設定也是 `401`。 + +| # | 測試項目 | 指令 | 預期結果 | +| --- | -------------------------- | --------------------------------------------------------------------------------------- | --------------------------------------------------------------- | +| 1 | 未認證 → 挑戰 | `curl -i $GW/mcp/gitea` | `401` + `WWW-Authenticate: Bearer resource_metadata="…"` | +| 2 | 回傳 PRM 文件 | `curl -s $GW/.well-known/oauth-protected-resource/mcp/gitea` | JSON 含 `resource`、`authorization_servers`、`scopes_supported` | +| 3 | 有效 token → 放行 | `curl -i $GW/mcp/gitea -H "Authorization: Bearer $GOOD"` | MCP upstream 回 `200` | +| 4 | 過期 token | `curl -i $GW/mcp/gitea -H "Authorization: Bearer $EXPIRED"` | `401 invalid_token` | +| 5a | 缺少 scope | 沒有 `required_scopes` 的 token → `curl -i $GW/mcp/gitea -H "Authorization: Bearer $X"` | `403 insufficient_scope` | +| 5b | **跨 audience** | 為另一個資源簽發的 token,且 `require_audience: true` | `401 invalid_token`(aud 不符) | +| 5c | **HS256 偽造(金鑰位元)** | 拿 RSA 公鑰當 HMAC 金鑰偽造一顆 HS256 token | `401 invalid_token` — **必須被擋**(alg confusion) | + +第 **5b** 與 **5c** 列是安全關鍵——上線前務必跑過。 + +## AuthGate 端動手前確認 + +要端到端跑通之前,先在 AuthGate 確認三件事(解一顆實際的 **access token**,不是只看 +`id_token`): + +1. **JWKS 取得出 keys。** `GET /.well-known/openid-configuration` → + 其 `jwks_uri` 回傳非空的 `keys` 陣列。 +2. **access token 是 RS256 簽。** 解一顆實際的 access token,header 的 `alg` 是 + `RS256`(不是 `HS256`),且 `kid` 對得到 JWKS 裡某把 key。AuthGate 預設常是 + `JWT_SECRET`(HS256)——請確認你們真的已經把 **access token**(不只 `id_token`) + 切到非對稱簽。 +3. **iss 一致。** token 的 `iss` 與 plugin 設定的 `issuer` 逐字元相符(注意結尾斜線)。 + +## 維運注意事項 + +- **JWKS endpoint 要高可用。** **初次**抓取失敗時,帶 token 的請求會回 + `503 temporarily_unavailable`(不是 `401`,client 不會誤以為要重跑 OAuth), + 下一個請求會重試——初次抓失敗的結果不會被快取。抓取等待上限 10 秒,且在 per-URI + 鎖下進行,慢的 AuthGate 不會卡住其他資源的流量。注意:key 一旦進了快取,帶未知 + `kid` 的 token 會回 `401 invalid_token`(離線驗證分不出「輪替期間剛換上的 key」與 + 「偽造的 kid」),而且每小時的更新若抓到含一把壞 key 的 JWKS,可能把已快取的 key + 清掉直到下一次乾淨的更新。輪替時請保持 JWKS 有效,並讓新舊 key 充分並存。 +- **瀏覽器端的 MCP client 需要 CORS。** CORS preflight(`OPTIONS`、不帶 + `Authorization`)會被回 `401` 挑戰;若有 web client 要直連 gateway,請在 + route 上掛 Kong 的 `cors` plugin。 +- **輪替時讓新舊 key 並存。** 讓舊 key 與新 key 在 JWKS 並存一段 overlap 時間, + 在途 token 才不會被誤殺。 +- **access token TTL 設短。** 跟所有離線驗證一樣,被撤銷的 token 會一直有效到它的 + `exp`——以分鐘計、不要以小時計。 +- **bearer token 會原封不動往後送。** Kong 會加上 `X-MCP-Subject` / `X-MCP-Scope`, + 但**不會**移除或換掉 `Authorization` header,所以每個 MCP 後端都會拿到一顆可重放的 + 有效 token。請據此信任你的 MCP 後端,並用 `aud` 綁定 token(見上方的重放警告), + 讓後端無法拿它去重放到另一個資源。 diff --git a/kong-mcp/docker-compose.authgate.yml b/kong-mcp/docker-compose.authgate.yml new file mode 100644 index 0000000..9524dcd --- /dev/null +++ b/kong-mcp/docker-compose.authgate.yml @@ -0,0 +1,47 @@ +# Like docker-compose.local.yml, but mounts kong.authgate.yml (wired to a real +# AuthGate on the host at http://localhost:8080) instead of the test-issuer +# config. Mounts a locally cross-compiled binary, so build it first: +# +# CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o mcp-authgate-linux . +# docker-compose -f docker-compose.authgate.yml up -d +# +# (GOARCH=amd64 on Intel Macs / x86_64 Docker VMs.) +services: + kong: + image: kong:3.9 + ports: + - "8000:8000" # proxy (MCP traffic enters here) + depends_on: + - mcp-gitea + - mcp-sentry + environment: + KONG_DATABASE: "off" + KONG_DECLARATIVE_CONFIG: /kong/kong.yml + KONG_PROXY_LISTEN: "0.0.0.0:8000" + # The admin API is unauthenticated and can replace the whole gateway + # config (POST /config) even DB-less, so bind it to the container's + # loopback only: unreachable from the sibling MCP upstream containers and + # not published to the host. Reach it for debugging via + # `docker exec curl http://127.0.0.1:8001/...`. + KONG_ADMIN_LISTEN: "127.0.0.1:8001" + + # Register the Go plugin and wire the pluginserver to its binary. + KONG_PLUGINS: "bundled,mcp-authgate" + KONG_PLUGINSERVER_NAMES: "mcp-authgate" + KONG_PLUGINSERVER_MCP_AUTHGATE_START_CMD: "/usr/local/bin/mcp-authgate" + KONG_PLUGINSERVER_MCP_AUTHGATE_QUERY_CMD: "/usr/local/bin/mcp-authgate -dump" + extra_hosts: + # Lets the plugin reach AuthGate running on the macOS host at + # host.docker.internal:8080 (jwks_uri in kong.authgate.yml). + - "host.docker.internal:host-gateway" + volumes: + - ./kong.authgate.yml:/kong/kong.yml:ro + - ./mcp-authgate-linux:/usr/local/bin/mcp-authgate:ro + + # Stand-in MCP upstreams so routes resolve. Replace with your real MCP servers. + mcp-gitea: + image: hashicorp/http-echo + command: ["-text", "hello from mcp-gitea", "-listen", ":3000"] + mcp-sentry: + image: hashicorp/http-echo + command: ["-text", "hello from mcp-sentry", "-listen", ":3000"] diff --git a/kong-mcp/docker-compose.local.yml b/kong-mcp/docker-compose.local.yml new file mode 100644 index 0000000..152a5c4 --- /dev/null +++ b/kong-mcp/docker-compose.local.yml @@ -0,0 +1,48 @@ +# Local-build variant of docker-compose.yml: instead of building the plugin +# inside Docker (blocked when the network has a TLS-intercepting proxy), it +# mounts a locally cross-compiled binary into the stock kong:3.9 image. +# +# CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o mcp-authgate-linux . +# docker-compose -f docker-compose.local.yml up -d +# +# (GOARCH=amd64 on Intel Macs / x86_64 Docker VMs.) +services: + kong: + image: kong:3.9 + ports: + - "8000:8000" # proxy (MCP traffic enters here) + depends_on: + - mcp-gitea + - mcp-sentry + environment: + KONG_DATABASE: "off" + KONG_DECLARATIVE_CONFIG: /kong/kong.yml + KONG_PROXY_LISTEN: "0.0.0.0:8000" + # The admin API is unauthenticated and can replace the whole gateway + # config (POST /config) even DB-less, so bind it to the container's + # loopback only: unreachable from the sibling MCP upstream containers and + # not published to the host. Reach it for debugging via + # `docker exec curl http://127.0.0.1:8001/...`. + KONG_ADMIN_LISTEN: "127.0.0.1:8001" + + # Register the Go plugin and wire the pluginserver to its binary. + KONG_PLUGINS: "bundled,mcp-authgate" + KONG_PLUGINSERVER_NAMES: "mcp-authgate" + KONG_PLUGINSERVER_MCP_AUTHGATE_START_CMD: "/usr/local/bin/mcp-authgate" + KONG_PLUGINSERVER_MCP_AUTHGATE_QUERY_CMD: "/usr/local/bin/mcp-authgate -dump" + extra_hosts: + # Lets the plugin reach a test issuer running on the macOS host at + # host.docker.internal (jwks_uri in kong.local.yml). Native on Docker + # Desktop; this line makes it work on colima/plain dockerd too. + - "host.docker.internal:host-gateway" + volumes: + - ./kong.local.yml:/kong/kong.yml:ro + - ./mcp-authgate-linux:/usr/local/bin/mcp-authgate:ro + + # Stand-in MCP upstreams so routes resolve. Replace with your real MCP servers. + mcp-gitea: + image: hashicorp/http-echo + command: ["-text", "hello from mcp-gitea", "-listen", ":3000"] + mcp-sentry: + image: hashicorp/http-echo + command: ["-text", "hello from mcp-sentry", "-listen", ":3000"] diff --git a/kong-mcp/docker-compose.yml b/kong-mcp/docker-compose.yml new file mode 100644 index 0000000..19e6469 --- /dev/null +++ b/kong-mcp/docker-compose.yml @@ -0,0 +1,43 @@ +# Minimal DB-less Kong running the mcp-authgate Go plugin. +# +# docker compose up --build +# +# Then exercise the handshake against http://localhost:8000 (see README §4). +# Point `issuer` / `gateway_origin` / `jwks_uri` in kong.yml at your real +# AuthGate before expecting tokens to validate. +services: + kong: + build: . + ports: + - "8000:8000" # proxy (MCP traffic enters here) + depends_on: + - mcp-gitea + - mcp-sentry + environment: + KONG_DATABASE: "off" + KONG_DECLARATIVE_CONFIG: /kong/kong.yml + KONG_PROXY_LISTEN: "0.0.0.0:8000" + # The admin API is unauthenticated and can replace the whole gateway + # config (POST /config) even DB-less, so bind it to the container's + # loopback only: unreachable from other compose containers (the MCP + # upstreams here) and not published to the host. The demo loads its + # config from KONG_DECLARATIVE_CONFIG, so the admin API isn't needed to + # run it; bind it to 0.0.0.0 (and publish the port) only for ad-hoc + # debugging on a trusted network. + KONG_ADMIN_LISTEN: "127.0.0.1:8001" + + # Register the Go plugin and wire the pluginserver to its binary. + KONG_PLUGINS: "bundled,mcp-authgate" + KONG_PLUGINSERVER_NAMES: "mcp-authgate" + KONG_PLUGINSERVER_MCP_AUTHGATE_START_CMD: "/usr/local/bin/mcp-authgate" + KONG_PLUGINSERVER_MCP_AUTHGATE_QUERY_CMD: "/usr/local/bin/mcp-authgate -dump" + volumes: + - ./kong.yml:/kong/kong.yml:ro + + # Stand-in MCP upstreams so routes resolve. Replace with your real MCP servers. + mcp-gitea: + image: hashicorp/http-echo + command: ["-text", "hello from mcp-gitea", "-listen", ":3000"] + mcp-sentry: + image: hashicorp/http-echo + command: ["-text", "hello from mcp-sentry", "-listen", ":3000"] diff --git a/kong-mcp/go.mod b/kong-mcp/go.mod new file mode 100644 index 0000000..0c63bb6 --- /dev/null +++ b/kong-mcp/go.mod @@ -0,0 +1,16 @@ +module github.com/go-authgate/examples/kong-mcp + +go 1.25.10 + +require ( + github.com/Kong/go-pdk v0.11.0 + github.com/MicahParks/jwkset v0.8.0 + github.com/MicahParks/keyfunc/v3 v3.6.1 + github.com/golang-jwt/jwt/v5 v5.3.0 + golang.org/x/time v0.9.0 +) + +require ( + github.com/ugorji/go/codec v1.2.12 // indirect + google.golang.org/protobuf v1.33.0 // indirect +) diff --git a/kong-mcp/go.sum b/kong-mcp/go.sum new file mode 100644 index 0000000..41b0db2 --- /dev/null +++ b/kong-mcp/go.sum @@ -0,0 +1,26 @@ +github.com/Kong/go-pdk v0.11.0 h1:kq+73rs82EWN9psS1uA6N5Q2e1j00E6CqGOyYyuZwq8= +github.com/Kong/go-pdk v0.11.0/go.mod h1:a45ch8JrWiKe69++FuNuWCT3TrpWNHmJLho0Js/m3Bg= +github.com/MicahParks/jwkset v0.8.0 h1:jHtclI38Gibmu17XMI6+6/UB59srp58pQVxePHRK5o8= +github.com/MicahParks/jwkset v0.8.0/go.mod h1:fVrj6TmG1aKlJEeceAz7JsXGTXEn72zP1px3us53JrA= +github.com/MicahParks/keyfunc/v3 v3.6.1 h1:A8A5zGZ8XmRyxizSY7s5FLY/aSplrnEBLCOrC0D1ojM= +github.com/MicahParks/keyfunc/v3 v3.6.1/go.mod h1:y6Ed3dMgNKTcpxbaQHD8mmrYDUZWJAxteddA6OQj+ag= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= +github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= +github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= +golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/kong-mcp/kong.authgate.yml b/kong-mcp/kong.authgate.yml new file mode 100644 index 0000000..48981be --- /dev/null +++ b/kong-mcp/kong.authgate.yml @@ -0,0 +1,64 @@ +# Local config wired to a REAL AuthGate running on the macOS host at +# http://localhost:8080. Use with docker-compose.authgate.yml. +# +# Differs from kong.local.yml (which targets the bundled test issuer) only in +# where the tokens come from — see HANDS-ON.zh-TW.md §10. +# +# The localhost-vs-container split (the one gotcha plain HTTP doesn't remove): +# - issuer: matches the token's `iss` claim byte-for-byte. AuthGate's +# discovery reports issuer "http://localhost:8080", so that's the value. +# - jwks_uri: fetched BY KONG (inside the container), so it must be a host +# address the container can reach — host.docker.internal:8080, NOT +# localhost:8080 (which, inside the container, is the container itself). +# - gateway_origin: the externally reachable Kong proxy, unchanged. +_format_version: "3.0" + +services: + # gitea route — frictionless: no scope requirement, so ANY valid AuthGate + # access token is forwarded. AuthGate's scopes_supported is only + # `openid profile email` (no `mcp:gitea`), so leaving required_scopes here + # avoids a guaranteed 403. Register `mcp:gitea` on AuthGate and add it back + # to enforce it. + - name: mcp-gitea + url: http://mcp-gitea:3000 + routes: + - name: mcp-gitea + paths: + - /mcp/gitea + - /.well-known/oauth-protected-resource/mcp/gitea + strip_path: true + plugins: + - name: mcp-authgate + config: + issuer: http://localhost:8080 + gateway_origin: http://localhost:8000 + resource_path: /mcp/gitea + jwks_uri: http://host.docker.internal:8080/.well-known/jwks.json + required_scopes: [] + # Enable only after decoding a real access token and confirming its + # `aud` == gateway_origin + resource_path; otherwise every token 401s. + require_audience: false + leeway_seconds: 60 + + # sentry route — demonstrates scope enforcement using `email`, a scope + # AuthGate actually issues. A token requested with `--scope "email"` passes; + # one without it gets 403 insufficient_scope. + - name: mcp-sentry + url: http://mcp-sentry:3000 + routes: + - name: mcp-sentry + paths: + - /mcp/sentry + - /.well-known/oauth-protected-resource/mcp/sentry + strip_path: true + plugins: + - name: mcp-authgate + config: + issuer: http://localhost:8080 + gateway_origin: http://localhost:8000 + resource_path: /mcp/sentry + jwks_uri: http://host.docker.internal:8080/.well-known/jwks.json + required_scopes: + - email + require_audience: false + leeway_seconds: 60 diff --git a/kong-mcp/kong.local.yml b/kong-mcp/kong.local.yml new file mode 100644 index 0000000..7af1e61 --- /dev/null +++ b/kong-mcp/kong.local.yml @@ -0,0 +1,54 @@ +# Local hands-on config — wires the plugin to the bundled test issuer +# (../go-jwks-multi/testissuer) instead of a real AuthGate, so the whole +# handshake can be exercised end-to-end on a laptop. Used by +# docker-compose.local.yml. See HANDS-ON.zh-TW.md. +# +# Key trick for the issuer/jwks split: +# - issuer: must match the token's `iss` claim byte-for-byte. The test +# issuer mints `iss: http://127.0.0.1:9001`, so that's what we compare to. +# - jwks_uri: Kong (in the container) fetches keys from here, so it must be +# reachable FROM the container — host.docker.internal:9001, not 127.0.0.1. +_format_version: "3.0" + +services: + - name: mcp-gitea + url: http://mcp-gitea:3000 + routes: + - name: mcp-gitea + paths: + - /mcp/gitea + - /.well-known/oauth-protected-resource/mcp/gitea + strip_path: true + plugins: + - name: mcp-authgate + config: + issuer: http://127.0.0.1:9001 + gateway_origin: http://localhost:8000 + resource_path: /mcp/gitea + jwks_uri: http://host.docker.internal:9001/jwks.json + required_scopes: + - mcp:gitea + require_audience: false + leeway_seconds: 60 + + # Same plugin, but with require_audience: true so the cross-audience + # security row (5b) can be exercised. + - name: mcp-sentry + url: http://mcp-sentry:3000 + routes: + - name: mcp-sentry + paths: + - /mcp/sentry + - /.well-known/oauth-protected-resource/mcp/sentry + strip_path: true + plugins: + - name: mcp-authgate + config: + issuer: http://127.0.0.1:9001 + gateway_origin: http://localhost:8000 + resource_path: /mcp/sentry + jwks_uri: http://host.docker.internal:9001/jwks.json + required_scopes: + - mcp:sentry + require_audience: true + leeway_seconds: 60 diff --git a/kong-mcp/kong.yml b/kong-mcp/kong.yml new file mode 100644 index 0000000..6773b77 --- /dev/null +++ b/kong-mcp/kong.yml @@ -0,0 +1,56 @@ +# Kong DB-less declarative config — one mcp-authgate plugin per MCP server. +# +# The same plugin protects every MCP service; only `resource_path`, +# `required_scopes`, and the upstream `url` change. Swap the example.com +# hosts for your AuthGate and gateway origins before loading. +# +# IMPORTANT: each route must ALSO match its Protected Resource Metadata path +# (/.well-known/oauth-protected-resource + resource_path), otherwise Kong has +# no route to hand the client's PRM lookup to and the plugin never runs. +_format_version: "3.0" + +services: + - name: mcp-gitea + url: http://mcp-gitea:3000 + routes: + - name: mcp-gitea + paths: + - /mcp/gitea + - /.well-known/oauth-protected-resource/mcp/gitea + # Kong's default, spelled out because it decides what the upstream + # sees: true -> the MCP server serves "/"; set false if it expects + # the original /mcp/gitea path. + strip_path: true + plugins: + - name: mcp-authgate + config: + issuer: https://auth.example.com + gateway_origin: https://gw.example.com + resource_path: /mcp/gitea + jwks_uri: https://auth.example.com/.well-known/jwks.json + required_scopes: + - mcp:gitea + # Flip to true once AuthGate emits a per-resource `aud` and the + # access token's aud == gateway_origin + resource_path. + require_audience: false + leeway_seconds: 60 + + - name: mcp-sentry + url: http://mcp-sentry:3000 + routes: + - name: mcp-sentry + paths: + - /mcp/sentry + - /.well-known/oauth-protected-resource/mcp/sentry + strip_path: true + plugins: + - name: mcp-authgate + config: + issuer: https://auth.example.com + gateway_origin: https://gw.example.com + resource_path: /mcp/sentry + jwks_uri: https://auth.example.com/.well-known/jwks.json + required_scopes: + - mcp:sentry + require_audience: false + leeway_seconds: 60 diff --git a/kong-mcp/main.go b/kong-mcp/main.go new file mode 100644 index 0000000..0eb5af2 --- /dev/null +++ b/kong-mcp/main.go @@ -0,0 +1,476 @@ +// Package main: Kong (go-pdk) plugin — unified MCP OAuth front door (steps 2/3/5) +// in front of any number of MCP servers, backed by AuthGate and verifying tokens +// with RS256 + JWKS. +// +// The MCP authorization handshake (2025-06 spec, building on RFC 9728 / RFC 6750): +// +// (2) 401 + WWW-Authenticate: Bearer resource_metadata="" +// — tell an unauthenticated client *where the flow lives*, not how to run it. +// (3) GET /.well-known/oauth-protected-resource/ +// — serve Protected Resource Metadata (RFC 9728): which AuthGate to use, +// which scopes, how to present the token. +// (5) verify the RS256 access token against AuthGate's JWKS +// (signature + iss + exp + type, plus scope when required_scopes is set and +// aud only when require_audience is on), then forward upstream to the MCP server. +// +// Kong never runs the OAuth flow. The MCP client drives Auth Code + PKCE against +// AuthGate itself; Kong only advertises the entry point and validates what comes +// back. One plugin config protects one MCP resource; attach it to as many +// services as you have MCP servers. +// +// Accepted algorithms are pinned to the RS family, so a token signed HS256 with +// the RSA *public* key (the classic alg-confusion forgery) is rejected. JWKS +// fetch / cache / background rotation / rate-limited refetch on an unknown kid +// are handled by MicahParks/keyfunc + jwkset, configured to fail fast: a failed +// initial fetch surfaces as 503 instead of being cached as an empty key set +// (once keys are cached, an unknown kid is a 401 — see Access), and the fetch +// runs under a per-URI lock so a slow AuthGate cannot stall the whole gateway. +package main + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "log/slog" + "net/url" + "os" + "slices" + "strings" + "sync" + "time" + + "github.com/Kong/go-pdk" + "github.com/Kong/go-pdk/server" + "github.com/MicahParks/jwkset" + "github.com/MicahParks/keyfunc/v3" + "github.com/golang-jwt/jwt/v5" + "golang.org/x/time/rate" +) + +var ( + Version = "0.3.0" + Priority = 1000 +) + +const ( + // wellKnownPrefix is fixed by RFC 9728 — clients derive it themselves, so + // it is not configurable. + wellKnownPrefix = "/.well-known/oauth-protected-resource" + // jwksHTTPTimeout caps every JWKS fetch and unknown-kid refetch wait; the + // library default of one minute would let a slow AuthGate stall requests. + jwksHTTPTimeout = 10 * time.Second +) + +// rsMethods pins accepted algorithms to the RS family: never accept HS* when +// expecting RS -> no alg confusion. +var rsMethods = []string{"RS256", "RS384", "RS512"} + +// Config is the plugin schema (one instance per MCP resource/service). +type Config struct { + Issuer string `json:"issuer"` // AuthGate base URL == token iss + GatewayOrigin string `json:"gateway_origin"` // externally reachable Kong origin + ResourcePath string `json:"resource_path"` // e.g. /mcp/gitea + Audience string `json:"audience"` // expected aud; default GatewayOrigin+ResourcePath + RequiredScopes []string `json:"required_scopes"` // all must be present + JWKSURI string `json:"jwks_uri"` // AuthGate JWKS endpoint (RS256) + RequireAudience bool `json:"require_audience"` // false until AuthGate emits per-resource aud + LeewaySeconds int `json:"leeway_seconds"` // clock-skew tolerance for exp/nbf + + // derived once per instance — Access runs per request, config never changes + setupOnce sync.Once + setupErr error + parser *jwt.Parser + prmPath string // wellKnownPrefix + ResourcePath + bearerMeta string // WWW-Authenticate challenge pointing at this resource's PRM + requiredScopeStr string // RequiredScopes joined with spaces (for the 403 challenge) +} + +func New() any { return &Config{} } + +// setup validates required fields and derives per-instance values. go-pdk's +// generated schema cannot mark fields required, so this is the only layer that +// can reject a half-filled config — better one loud 500 than e.g. silently +// skipping issuer validation (golang-jwt ignores WithIssuer("")). +func (conf *Config) setup() error { + conf.setupOnce.Do(func() { + var missing []string + for _, f := range []struct{ name, value string }{ + {"issuer", conf.Issuer}, + {"gateway_origin", conf.GatewayOrigin}, + {"resource_path", conf.ResourcePath}, + {"jwks_uri", conf.JWKSURI}, + } { + if f.value == "" { + missing = append(missing, f.name) + } + } + if len(missing) > 0 { + conf.setupErr = fmt.Errorf("missing required plugin config: %s", strings.Join(missing, ", ")) + return + } + + // shape checks: a non-empty but malformed path/origin would otherwise + // concatenate into a silently-broken PRM URL that no Kong route matches + // (e.g. resource_path "mcp/gitea" -> ".../oauth-protected-resourcemcp/gitea"), + // failing every request with no diagnostic. Fail loudly instead. + var invalid []string + if !strings.HasPrefix(conf.ResourcePath, "/") { + invalid = append(invalid, `resource_path must start with "/"`) + } + // a trailing slash makes prmPath end in "/", which the exact match in + // Access (TrimSuffix(path,"/") == prmPath) can never satisfy, so the + // metadata route would silently never serve. + if strings.HasSuffix(conf.ResourcePath, "/") { + invalid = append(invalid, `resource_path must not end with "/"`) + } + if strings.HasSuffix(conf.GatewayOrigin, "/") { + invalid = append(invalid, `gateway_origin must not end with "/"`) + } + // issuer/gateway_origin/jwks_uri are concatenated into URLs (PRM URL, + // audience) and fetched (JWKS); a relative or schemeless value would + // otherwise surface only at traffic time as an opaque per-request 503 + // (jwks_uri) or a silent universal 401 (issuer). + for _, u := range []struct{ name, value string }{ + {"issuer", conf.Issuer}, + {"gateway_origin", conf.GatewayOrigin}, + {"jwks_uri", conf.JWKSURI}, + } { + parsed, err := url.Parse(u.value) + if err != nil || !parsed.IsAbs() || (parsed.Scheme != "http" && parsed.Scheme != "https") { + invalid = append(invalid, u.name+` must be an absolute http(s) URL`) + } + } + if conf.LeewaySeconds < 0 { + invalid = append(invalid, "leeway_seconds must not be negative") + } + if len(invalid) > 0 { + conf.setupErr = fmt.Errorf("invalid plugin config: %s", strings.Join(invalid, "; ")) + return + } + + conf.prmPath = wellKnownPrefix + conf.ResourcePath + conf.bearerMeta = fmt.Sprintf(`Bearer resource_metadata="%s"`, conf.GatewayOrigin+conf.prmPath) + conf.requiredScopeStr = strings.Join(conf.RequiredScopes, " ") + + opts := []jwt.ParserOption{ + jwt.WithValidMethods(rsMethods), + jwt.WithIssuer(conf.Issuer), + jwt.WithExpirationRequired(), + } + if conf.LeewaySeconds > 0 { + opts = append(opts, jwt.WithLeeway(time.Duration(conf.LeewaySeconds)*time.Second)) + } + if conf.RequireAudience { + opts = append(opts, jwt.WithAudience(conf.audience())) + } + conf.parser = jwt.NewParser(opts...) // goroutine-safe, reused across requests + }) + return conf.setupErr +} + +func (conf *Config) audience() string { + if conf.Audience != "" { + return conf.Audience + } + return conf.GatewayOrigin + conf.ResourcePath +} + +// JWKS cache: the plugin server is long-lived, so one self-refreshing keyfunc +// per JWKS URI is shared across the whole process. Construction performs a +// synchronous initial HTTP fetch (up to jwksHTTPTimeout); it runs under a +// per-URI lock, never a process-global one, so a slow or unreachable AuthGate +// stalls only the first cold caller for that URI — not warm requests, and not +// requests for a different URI. +var ( + jwksMu sync.RWMutex // guards jwksCache reads/writes + jwksCache = map[string]keyfunc.Keyfunc{} // built keyfuncs, keyed by URI + jwksInitMu sync.Mutex // guards jwksInit + jwksInit = map[string]*sync.Mutex{} // per-URI construction lock +) + +// errJWKSUnavailable marks verification-infrastructure failures (as opposed to +// defects in the presented token) so Access can answer 503 instead of 401. +var errJWKSUnavailable = errors.New("JWKS unavailable") + +// getJWKS builds (once per URI) a keyfunc with hourly background refresh and +// rate-limited refetch on an unknown kid. Unlike keyfunc.NewDefault, a failed +// first fetch is returned as an error — not cached as an empty key set that +// would 401 every token until the next refresh window — so the next request +// simply retries. +func getJWKS(uri string) (keyfunc.Keyfunc, error) { + jwksMu.RLock() + k, ok := jwksCache[uri] + jwksMu.RUnlock() + if ok { + return k, nil + } + + // cold path: serialize construction per URI so concurrent first callers + // build exactly one keyfunc — but hold only this URI's lock (not jwksMu) + // across the blocking fetch below, so other URIs and warm reads never wait. + jwksInitMu.Lock() + initMu, ok := jwksInit[uri] + if !ok { + initMu = &sync.Mutex{} + jwksInit[uri] = initMu + } + jwksInitMu.Unlock() + + initMu.Lock() + defer initMu.Unlock() + + // another caller may have built it while we waited for initMu + jwksMu.RLock() + k, ok = jwksCache[uri] + jwksMu.RUnlock() + if ok { + return k, nil + } + + // the context lives as long as the cached keyfunc; cancel only on + // construction failure so the refresh goroutine doesn't leak per retry + ctx, cancel := context.WithCancel(context.Background()) + cached := false + defer func() { + if !cached { + cancel() + } + }() + store, err := jwkset.NewStorageFromHTTP(uri, jwkset.HTTPClientStorageOptions{ + Ctx: ctx, + HTTPTimeout: jwksHTTPTimeout, + RefreshInterval: time.Hour, + RefreshErrorHandler: func(ctx context.Context, err error) { + slog.Error("failed to refresh JWK Set", "url", uri, "error", err) + }, + }) + if err != nil { + return nil, err + } + client, err := jwkset.NewHTTPClient(jwkset.HTTPClientOptions{ + HTTPURLs: map[string]jwkset.Storage{uri: store}, + RateLimitWaitMax: jwksHTTPTimeout, + RefreshUnknownKID: rate.NewLimiter(rate.Every(5*time.Minute), 1), + }) + if err != nil { + return nil, err + } + k, err = keyfunc.New(keyfunc.Options{Ctx: ctx, Storage: client}) + if err != nil { + return nil, err + } + jwksMu.Lock() + jwksCache[uri] = k + jwksMu.Unlock() + cached = true + return k, nil +} + +func (conf *Config) keyFunc(token *jwt.Token) (any, error) { + kf, err := getJWKS(conf.JWKSURI) + if err != nil { + return nil, fmt.Errorf("%w: %v", errJWKSUnavailable, err) + } + return kf.Keyfunc(token) +} + +func exitJSON(kong *pdk.PDK, status int, v any, headers map[string][]string) { + body, _ := json.Marshal(v) + if headers == nil { + headers = map[string][]string{} + } + headers["Content-Type"] = []string{"application/json"} + kong.Response.Exit(status, body, headers) +} + +// hasCtrl reports whether s contains a control character (incl. CR/LF). Such a +// byte in a claim that is forwarded as a header value could split or smuggle an +// upstream header, and in a scope string would be swallowed by strings.Fields. +func hasCtrl(s string) bool { + return strings.IndexFunc(s, func(r rune) bool { return r < 0x20 || r == 0x7f }) >= 0 +} + +func hasAllScopes(scope string, required []string) bool { + have := strings.Fields(scope) + for _, r := range required { + if !slices.Contains(have, r) { + return false + } + } + return true +} + +func (conf *Config) Access(kong *pdk.PDK) { + if err := conf.setup(); err != nil { + _ = kong.Log.Crit(err.Error()) + exitJSON(kong, 500, map[string]string{ + "error": "server_error", + "error_description": "plugin misconfigured; see gateway logs", + }, nil) + return + } + + path, err := kong.Request.GetPath() + if err != nil { + exitJSON(kong, 500, map[string]string{ + "error": "server_error", + "error_description": "cannot read request path", + }, nil) + return + } + + // (3) serve Protected Resource Metadata — matched exactly (plus a + // trailing-slash variant) so a prefix route can never answer for another + // resource's metadata path; safe methods only (GET per RFC 9728 §3.1, plus + // HEAD), everything else -> 405 + if strings.TrimSuffix(path, "/") == conf.prmPath { + if method, _ := kong.Request.GetMethod(); method != "GET" && method != "HEAD" { + exitJSON(kong, 405, map[string]string{ + "error": "method_not_allowed", + "error_description": "resource metadata is served via GET", + }, map[string][]string{"Allow": {"GET, HEAD"}}) + return + } + prm := map[string]any{ + // RFC 9728 §3.3: must equal the identifier the well-known URL was + // derived from — never the aud override, which only tunes token + // validation + "resource": conf.GatewayOrigin + conf.ResourcePath, + "authorization_servers": []string{conf.Issuer}, + "bearer_methods_supported": []string{"header"}, + } + if len(conf.RequiredScopes) > 0 { // optional member: omit rather than null + prm["scopes_supported"] = conf.RequiredScopes + } + exitJSON(kong, 200, prm, nil) + return + } + + challenge := func(status int, wwwAuth, errCode, desc string) { + exitJSON(kong, status, + map[string]string{"error": errCode, "error_description": desc}, + map[string][]string{"WWW-Authenticate": {wwwAuth}}) + } + + // (2) challenge unless the client presented a Bearer token (RFC 6750 §2.1); + // other Authorization schemes are rejected, not parsed as a token. No + // error attribute here: a bare challenge means "no credentials yet" (§3.1). + // Read every occurrence: the request is forwarded with all of its headers, + // so validating one Authorization value while proxying others would let a + // client smuggle an unvalidated credential past the gateway. A PDK error + // is a gateway fault, not "no credentials" — answer 5xx, not a challenge. + headers, err := kong.Request.GetHeaders(1000) + if err != nil { + exitJSON(kong, 500, map[string]string{ + "error": "server_error", + "error_description": "cannot read request headers", + }, nil) + return + } + var auths []string + for name, values := range headers { + if strings.EqualFold(name, "Authorization") { + auths = append(auths, values...) + } + } + if len(auths) > 1 { + challenge(400, conf.bearerMeta+`, error="invalid_request"`, + "invalid_request", "multiple Authorization headers are not allowed") + return + } + var raw string + if len(auths) == 1 { + if auth := auths[0]; len(auth) > 7 && strings.EqualFold(auth[:7], "Bearer ") { + raw = strings.TrimSpace(auth[7:]) + } + } + if raw == "" { + challenge(401, conf.bearerMeta, "unauthorized", "missing bearer token") + return + } + + // (5) validate RS256 (JWKS) + iss + aud + exp + claims := jwt.MapClaims{} + if _, err := conf.parser.ParseWithClaims(raw, claims, conf.keyFunc); err != nil { + if errors.Is(err, errJWKSUnavailable) { + // infrastructure problem, not a token problem: don't tell the + // client to re-run OAuth, and keep the details in the logs + _ = kong.Log.Err("JWKS fetch failed: ", err.Error()) + exitJSON(kong, 503, map[string]string{ + "error": "temporarily_unavailable", + "error_description": "token verification keys are unavailable", + }, nil) + return + } + _ = kong.Log.Info("rejected token: ", err.Error()) + challenge(401, conf.bearerMeta+`, error="invalid_token"`, + "invalid_token", "invalid or expired access token") + return + } + + sub, _ := claims["sub"].(string) + scope, _ := claims["scope"].(string) + + // reject anything that is not an access token: AuthGate signs refresh + // tokens with the same key, iss, aud, and scope — only the "type" claim and + // a longer exp differ — so without this check a leaked refresh token would + // be accepted as a bearer credential, defeating the short access-token TTL. + // Mirrors AuthGate's own resource-server validation. + if t, _ := claims["type"].(string); t != "access" { + _ = kong.Log.Info("rejected non-access token; type=", t) + challenge(401, conf.bearerMeta+`, error="invalid_token"`, + "invalid_token", "not an access token") + return + } + + // sub/scope are forwarded as upstream headers and scope feeds the check + // below; a control char (CR/LF) could split a header or smuggle a scope + // token (strings.Fields would swallow it). A real AuthGate token never + // carries one, so reject rather than forward. + if hasCtrl(sub) || hasCtrl(scope) { + _ = kong.Log.Info("rejected token with control chars in sub/scope") + challenge(401, conf.bearerMeta+`, error="invalid_token"`, + "invalid_token", "malformed token claims") + return + } + + if len(conf.RequiredScopes) > 0 && !hasAllScopes(scope, conf.RequiredScopes) { + challenge(403, + fmt.Sprintf(`%s, error="insufficient_scope", scope="%s"`, conf.bearerMeta, conf.requiredScopeStr), + "insufficient_scope", "requires scope: "+conf.requiredScopeStr) + return + } + + // surface identity to the MCP backend — clear inbound copies first so a + // client can never smuggle its own values through the trusted headers. + // Fail closed: if a clear/set is not confirmed, proxying anyway would + // forward client-supplied values on headers the backend is told to trust. + for _, h := range []struct{ name, value string }{ + {"X-MCP-Subject", sub}, + {"X-MCP-Scope", scope}, + } { + err := kong.ServiceRequest.ClearHeader(h.name) + if err == nil && h.value != "" { + err = kong.ServiceRequest.SetHeader(h.name, h.value) + } + if err != nil { + _ = kong.Log.Err("failed to set trusted header ", h.name, ": ", err.Error()) + exitJSON(kong, 500, map[string]string{ + "error": "server_error", + "error_description": "cannot set identity headers", + }, nil) + return + } + } + // fall through -> Kong forwards to upstream (Authorization preserved) +} + +// main exits non-zero on a failed start: server.StartServer returns socket +// errors without logging, and a silent exit 0 reads as a healthy pluginserver. +func main() { + if err := server.StartServer(New, Version, Priority); err != nil { + slog.Error("plugin server exited", "error", err) + os.Exit(1) + } +}