diff --git a/contrib/sni-router/Caddyfile b/contrib/sni-router/Caddyfile index d3ec52803..31b708bdd 100644 --- a/contrib/sni-router/Caddyfile +++ b/contrib/sni-router/Caddyfile @@ -10,14 +10,16 @@ # to Caddy's access log. The `tls` wrapper must follow so that TLS # is terminated on the unwrapped connection. # - # `allow` lists the networks permitted to send PROXY headers. These - # ranges cover docker compose's default bridge networks; tighten - # them if you pin a specific subnet in docker-compose.yml. + # `allow` lists the networks permitted to send PROXY headers. + # 127.0.0.1/32 covers HAProxy reaching Caddy over host loopback (HAProxy + # runs in network_mode: host and connects to the published 127.0.0.1 + # port). The RFC1918 ranges cover mtg → Caddy on the compose bridge + # (fronting path; see "Fronting loop" in README.md). servers :8443 { listener_wrappers { proxy_protocol { timeout 5s - allow 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 + allow 127.0.0.1/32 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 } tls } diff --git a/contrib/sni-router/README.md b/contrib/sni-router/README.md index e0e4e5bad..6e48dd3b1 100644 --- a/contrib/sni-router/README.md +++ b/contrib/sni-router/README.md @@ -63,6 +63,29 @@ must stay in sync: If you disable one, disable all four, otherwise the backend will fail to parse the connection. +### Why HAProxy uses `network_mode: host` + +A published port on a bridge network rewrites the source IP of inbound +connections to the bridge gateway before HAProxy sees it (Docker's +`docker-proxy`, Podman's `slirp4netns`/`pasta`), so the PROXY v2 header +HAProxy forwards downstream carries that gateway address, not the real +client. Host-mode HAProxy binds in the host netns directly, no NAT in +the path, and the rewrite never happens. mtg and Caddy stay on the +compose bridge and are published on `127.0.0.1` only — HAProxy reaches +them over host loopback. `mtg-config.toml` does not need to change; +fronting still uses `host = "web"` over compose-network DNS. + +**Trade-offs.** +- HAProxy owns the host's `:443` and `:80` — don't run anything else + on those ports. +- Linux host only. On Docker Desktop (macOS/Windows), "host" means + the Linux VM, not the user's machine, so external clients can't + reach the proxy. +- If you run Docker with `userns-remap`, the in-container "root" + loses the privilege to bind `<1024` on the host; either disable + `userns-remap` for this stack or lower `net.ipv4.ip_unprivileged_port_start` + on the host. + ## Fronting loop (why `[domain-fronting]` is set explicitly) When mtg sees TLS that isn't valid Telegram (a probe or a browser diff --git a/contrib/sni-router/docker-compose.yml b/contrib/sni-router/docker-compose.yml index 54344a257..7ff735622 100644 --- a/contrib/sni-router/docker-compose.yml +++ b/contrib/sni-router/docker-compose.yml @@ -27,9 +27,10 @@ x-domain-env: &domain-env services: haproxy: image: haproxy:lts-alpine - ports: - - "443:443" - - "80:80" + # Host netns so HAProxy sees real client IPs (v4/v6) instead of the + # bridge gateway address. Linux host only; see README → "Why HAProxy + # uses network_mode: host" for the rationale and trade-off. + network_mode: host volumes: - ./haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg:ro,Z environment: @@ -38,16 +39,16 @@ services: - mtg - web restart: unless-stopped - sysctls: - - net.ipv4.ip_unprivileged_port_start=80 mtg: # FIXME: :master until #480 lands in a tagged release; switch back to :2/:3 after release image: nineseconds/mtg:master volumes: - ./mtg-config.toml:/config/config.toml:ro,Z - expose: - - "3128" + # Published on host loopback only — HAProxy (host netns) reaches it via + # 127.0.0.1. + ports: + - "127.0.0.1:3128:3128" restart: unless-stopped extra_hosts: - "host.containers.internal:host-gateway" @@ -58,9 +59,11 @@ services: - ./Caddyfile:/etc/caddy/Caddyfile:ro,Z - caddy_data:/data - ./www:/srv:ro,Z - expose: - - "80" - - "8443" + # Published on host loopback only — HAProxy reaches Caddy on 127.0.0.1. + # Port 8080 (not 80) on the host because HAProxy already owns host :80. + ports: + - "127.0.0.1:8080:80" + - "127.0.0.1:8443:8443" environment: <<: *domain-env restart: unless-stopped diff --git a/contrib/sni-router/haproxy.cfg b/contrib/sni-router/haproxy.cfg index 14aba963b..e3bd444ad 100644 --- a/contrib/sni-router/haproxy.cfg +++ b/contrib/sni-router/haproxy.cfg @@ -23,7 +23,9 @@ defaults # --- HTTP :80 — ACME challenges + redirect ----------------------------------- frontend http - bind *:80 + # Explicit v4 + v6 binds so IPv6 clients are accepted regardless of + # the host's net.ipv6.bindv6only sysctl. + bind :80,[::]:80 mode http # Let Caddy answer ACME HTTP-01 challenges for Let's Encrypt. @@ -35,7 +37,7 @@ frontend http # --- TLS :443 — SNI-based routing ------------------------------------------- frontend tls - bind *:443 + bind :443,[::]:443 tcp-request inspect-delay 5s tcp-request content accept if { req_ssl_hello_type 1 } @@ -46,18 +48,23 @@ frontend tls default_backend web +# Backends reach mtg and web on host loopback — they publish to 127.0.0.1 +# (see docker-compose.yml), and HAProxy runs in the host netns +# (network_mode: host). PROXY v2 still carries the real client address +# (v4 or v6) end-to-end, independent of the loopback transport. + backend mtg # send-proxy-v2 prepends a PROXY protocol v2 header so mtg sees the # real client IP instead of HAProxy's. mtg must have # `proxy-protocol-listener = true` in its config. - server mtg mtg:3128 send-proxy-v2 + server mtg 127.0.0.1:3128 send-proxy-v2 backend web # send-proxy-v2 prepends a PROXY protocol v2 header so Caddy logs the # real client IP instead of HAProxy's. Caddy must enable the # proxy_protocol listener wrapper on :8443 (see Caddyfile). - server web web:8443 send-proxy-v2 + server web 127.0.0.1:8443 send-proxy-v2 backend web_acme mode http - server web web:80 + server web 127.0.0.1:8080