From 4a4e001980322e869c934d851e50fbea58490db1 Mon Sep 17 00:00:00 2001 From: Alexey Dolotov Date: Mon, 18 May 2026 11:53:57 +0000 Subject: [PATCH 1/3] sni-router: switch HAProxy to host networking for real client IPs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bridge ingress (Docker's docker-proxy userland forwarder, Podman's slirp4netns/pasta) rewrites the source IP of inbound connections on a published port to the bridge gateway address. HAProxy then stamps that gateway address into the PROXY v2 header it forwards to mtg and Caddy, so neither backend ever sees a real client IP. Move HAProxy into the host netns (network_mode: host) so it binds :443/:80 directly with no NAT in the path. mtg and Caddy stay on the compose bridge and are published on 127.0.0.1 only; HAProxy reaches them via host loopback and PROXY v2 carries the real client IP (v4 or v6) end-to-end. Also accept IPv6 clients explicitly on the HAProxy frontends — `bind *:443` is IPv4-only and missed v6 clients on hosts where the previous example happened to "work" only because of dual-stack quirks. Add 127.0.0.0/8 to Caddy's PROXY allow-list to cover the new loopback hop from HAProxy. README gains a short subsection explaining the host-mode choice and its trade-off (HAProxy occupies host :443/:80). Diagnosed and tested by @bam80 on Fedora + Docker 29. Fixes #498. --- contrib/sni-router/Caddyfile | 10 ++++++---- contrib/sni-router/README.md | 20 +++++++++++++++++++ contrib/sni-router/docker-compose.yml | 28 +++++++++++++++++---------- contrib/sni-router/haproxy.cfg | 20 ++++++++++++++----- 4 files changed, 59 insertions(+), 19 deletions(-) diff --git a/contrib/sni-router/Caddyfile b/contrib/sni-router/Caddyfile index d3ec52803..d98ae568e 100644 --- a/contrib/sni-router/Caddyfile +++ b/contrib/sni-router/Caddyfile @@ -10,14 +10,16 @@ # to Caddy's access log. The `tls` wrapper must follow so that TLS # is terminated on the unwrapped connection. # - # `allow` lists the networks permitted to send PROXY headers. These - # ranges cover docker compose's default bridge networks; tighten - # them if you pin a specific subnet in docker-compose.yml. + # `allow` lists the networks permitted to send PROXY headers. + # 127.0.0.0/8 covers HAProxy reaching Caddy over host loopback (HAProxy + # runs in network_mode: host and connects to the published 127.0.0.1 + # port). The RFC1918 ranges cover mtg → Caddy on the compose bridge + # (fronting path; see "Fronting loop" in README.md). servers :8443 { listener_wrappers { proxy_protocol { timeout 5s - allow 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 + allow 127.0.0.0/8 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 } tls } diff --git a/contrib/sni-router/README.md b/contrib/sni-router/README.md index e0e4e5bad..54cdbbfc6 100644 --- a/contrib/sni-router/README.md +++ b/contrib/sni-router/README.md @@ -63,6 +63,26 @@ must stay in sync: If you disable one, disable all four, otherwise the backend will fail to parse the connection. +### Why HAProxy uses `network_mode: host` + +When a container is on a bridge network and a port is published with +`ports: "443:443"`, the source IP of inbound connections is rewritten +to the bridge gateway before HAProxy sees it — Docker's `docker-proxy` +userland forwarder accepts on the host and re-opens the connection +from the gateway; Podman's `slirp4netns` / `pasta` does the same in +rootless mode. The PROXY v2 header HAProxy then sends downstream +carries that gateway address (e.g. `172.x.x.1`), not the real client. + +`network_mode: host` puts HAProxy in the host network namespace, so it +binds `:443` / `:80` directly with no NAT in the path and observes the +true source address of every connection. mtg and Caddy stay on the +compose bridge and are published only on `127.0.0.1` — HAProxy reaches +them via host loopback, and the PROXY v2 header carries the real +client IP (v4 or v6) end-to-end. + +Trade-off: HAProxy occupies the host's `:443` and `:80`. Don't run +anything else on those ports on the same host. + ## Fronting loop (why `[domain-fronting]` is set explicitly) When mtg sees TLS that isn't valid Telegram (a probe or a browser diff --git a/contrib/sni-router/docker-compose.yml b/contrib/sni-router/docker-compose.yml index 54344a257..52804f5f4 100644 --- a/contrib/sni-router/docker-compose.yml +++ b/contrib/sni-router/docker-compose.yml @@ -27,9 +27,16 @@ x-domain-env: &domain-env services: haproxy: image: haproxy:lts-alpine - ports: - - "443:443" - - "80:80" + # network_mode: host lets HAProxy see real client source IPs (v4 and v6) + # instead of the docker/podman bridge gateway. Bridge ingress (docker-proxy + # userland forwarder, podman slirp4netns/pasta) rewrites the source address + # of inbound connections to the gateway; with host networking HAProxy binds + # in the host netns directly and the rewrite never happens. See the + # "Real client IPs" section of README.md. + # + # Trade-off: HAProxy occupies host :443 and :80. Don't run anything else + # on those ports. + network_mode: host volumes: - ./haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg:ro,Z environment: @@ -38,16 +45,16 @@ services: - mtg - web restart: unless-stopped - sysctls: - - net.ipv4.ip_unprivileged_port_start=80 mtg: # FIXME: :master until #480 lands in a tagged release; switch back to :2/:3 after release image: nineseconds/mtg:master volumes: - ./mtg-config.toml:/config/config.toml:ro,Z - expose: - - "3128" + # Published on host loopback only — HAProxy (host netns) reaches it via + # 127.0.0.1:3128. Not exposed on any public interface. + ports: + - "127.0.0.1:3128:3128" restart: unless-stopped extra_hosts: - "host.containers.internal:host-gateway" @@ -58,9 +65,10 @@ services: - ./Caddyfile:/etc/caddy/Caddyfile:ro,Z - caddy_data:/data - ./www:/srv:ro,Z - expose: - - "80" - - "8443" + # Published on host loopback only — HAProxy reaches Caddy on 127.0.0.1. + ports: + - "127.0.0.1:8080:80" + - "127.0.0.1:8443:8443" environment: <<: *domain-env restart: unless-stopped diff --git a/contrib/sni-router/haproxy.cfg b/contrib/sni-router/haproxy.cfg index 14aba963b..b6a70d220 100644 --- a/contrib/sni-router/haproxy.cfg +++ b/contrib/sni-router/haproxy.cfg @@ -23,7 +23,11 @@ defaults # --- HTTP :80 — ACME challenges + redirect ----------------------------------- frontend http - bind *:80 + # Explicit v4 + v6 binds so IPv6 clients are accepted regardless of the + # host's IPV6_V6ONLY sysctl. v6only on the v6 bind avoids the + # "address in use" overlap on dual-stack hosts. + bind 0.0.0.0:80 + bind [::]:80 v6only mode http # Let Caddy answer ACME HTTP-01 challenges for Let's Encrypt. @@ -35,7 +39,8 @@ frontend http # --- TLS :443 — SNI-based routing ------------------------------------------- frontend tls - bind *:443 + bind 0.0.0.0:443 + bind [::]:443 v6only tcp-request inspect-delay 5s tcp-request content accept if { req_ssl_hello_type 1 } @@ -46,18 +51,23 @@ frontend tls default_backend web +# Backends reach mtg and web on host loopback — they publish to 127.0.0.1 +# (see docker-compose.yml), and HAProxy runs in the host netns +# (network_mode: host). PROXY v2 still carries the real client address +# (v4 or v6) end-to-end, independent of the loopback transport. + backend mtg # send-proxy-v2 prepends a PROXY protocol v2 header so mtg sees the # real client IP instead of HAProxy's. mtg must have # `proxy-protocol-listener = true` in its config. - server mtg mtg:3128 send-proxy-v2 + server mtg 127.0.0.1:3128 send-proxy-v2 backend web # send-proxy-v2 prepends a PROXY protocol v2 header so Caddy logs the # real client IP instead of HAProxy's. Caddy must enable the # proxy_protocol listener wrapper on :8443 (see Caddyfile). - server web web:8443 send-proxy-v2 + server web 127.0.0.1:8443 send-proxy-v2 backend web_acme mode http - server web web:80 + server web 127.0.0.1:8080 From b083d75731243694950562871fecc9427747b73e Mon Sep 17 00:00:00 2001 From: Alexey Dolotov Date: Mon, 18 May 2026 12:10:36 +0000 Subject: [PATCH 2/3] sni-router: review fixups (concise comments, accurate v6only note, narrow Caddy allow) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Caddy allow: 127.0.0.0/8 → 127.0.0.1/32 (only loopback peer is HAProxy). - haproxy.cfg: rewrite v6only comment to describe what it actually does (suppresses v4-mapped accept, preventing conflict with the v4 bind), not the symptom. - docker-compose.yml: trim the 8-line haproxy comment to 3 lines and defer the rationale to README. Add one-line note explaining why web uses host port 8080 (HAProxy owns :80). - README: condense the "Why network_mode: host" subsection. Spell out trade-offs as a list: own-the-host-ports, Linux-only (Docker Desktop doesn't make this layout reachable), userns-remap incompatibility. Note that mtg-config.toml stays as-is because mtg/web remain on the compose bridge. --- contrib/sni-router/Caddyfile | 4 +-- contrib/sni-router/README.md | 37 +++++++++++++++------------ contrib/sni-router/docker-compose.yml | 15 ++++------- contrib/sni-router/haproxy.cfg | 5 ++-- 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/contrib/sni-router/Caddyfile b/contrib/sni-router/Caddyfile index d98ae568e..31b708bdd 100644 --- a/contrib/sni-router/Caddyfile +++ b/contrib/sni-router/Caddyfile @@ -11,7 +11,7 @@ # is terminated on the unwrapped connection. # # `allow` lists the networks permitted to send PROXY headers. - # 127.0.0.0/8 covers HAProxy reaching Caddy over host loopback (HAProxy + # 127.0.0.1/32 covers HAProxy reaching Caddy over host loopback (HAProxy # runs in network_mode: host and connects to the published 127.0.0.1 # port). The RFC1918 ranges cover mtg → Caddy on the compose bridge # (fronting path; see "Fronting loop" in README.md). @@ -19,7 +19,7 @@ listener_wrappers { proxy_protocol { timeout 5s - allow 127.0.0.0/8 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 + allow 127.0.0.1/32 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 } tls } diff --git a/contrib/sni-router/README.md b/contrib/sni-router/README.md index 54cdbbfc6..6e48dd3b1 100644 --- a/contrib/sni-router/README.md +++ b/contrib/sni-router/README.md @@ -65,23 +65,26 @@ to parse the connection. ### Why HAProxy uses `network_mode: host` -When a container is on a bridge network and a port is published with -`ports: "443:443"`, the source IP of inbound connections is rewritten -to the bridge gateway before HAProxy sees it — Docker's `docker-proxy` -userland forwarder accepts on the host and re-opens the connection -from the gateway; Podman's `slirp4netns` / `pasta` does the same in -rootless mode. The PROXY v2 header HAProxy then sends downstream -carries that gateway address (e.g. `172.x.x.1`), not the real client. - -`network_mode: host` puts HAProxy in the host network namespace, so it -binds `:443` / `:80` directly with no NAT in the path and observes the -true source address of every connection. mtg and Caddy stay on the -compose bridge and are published only on `127.0.0.1` — HAProxy reaches -them via host loopback, and the PROXY v2 header carries the real -client IP (v4 or v6) end-to-end. - -Trade-off: HAProxy occupies the host's `:443` and `:80`. Don't run -anything else on those ports on the same host. +A published port on a bridge network rewrites the source IP of inbound +connections to the bridge gateway before HAProxy sees it (Docker's +`docker-proxy`, Podman's `slirp4netns`/`pasta`), so the PROXY v2 header +HAProxy forwards downstream carries that gateway address, not the real +client. Host-mode HAProxy binds in the host netns directly, no NAT in +the path, and the rewrite never happens. mtg and Caddy stay on the +compose bridge and are published on `127.0.0.1` only — HAProxy reaches +them over host loopback. `mtg-config.toml` does not need to change; +fronting still uses `host = "web"` over compose-network DNS. + +**Trade-offs.** +- HAProxy owns the host's `:443` and `:80` — don't run anything else + on those ports. +- Linux host only. On Docker Desktop (macOS/Windows), "host" means + the Linux VM, not the user's machine, so external clients can't + reach the proxy. +- If you run Docker with `userns-remap`, the in-container "root" + loses the privilege to bind `<1024` on the host; either disable + `userns-remap` for this stack or lower `net.ipv4.ip_unprivileged_port_start` + on the host. ## Fronting loop (why `[domain-fronting]` is set explicitly) diff --git a/contrib/sni-router/docker-compose.yml b/contrib/sni-router/docker-compose.yml index 52804f5f4..7ff735622 100644 --- a/contrib/sni-router/docker-compose.yml +++ b/contrib/sni-router/docker-compose.yml @@ -27,15 +27,9 @@ x-domain-env: &domain-env services: haproxy: image: haproxy:lts-alpine - # network_mode: host lets HAProxy see real client source IPs (v4 and v6) - # instead of the docker/podman bridge gateway. Bridge ingress (docker-proxy - # userland forwarder, podman slirp4netns/pasta) rewrites the source address - # of inbound connections to the gateway; with host networking HAProxy binds - # in the host netns directly and the rewrite never happens. See the - # "Real client IPs" section of README.md. - # - # Trade-off: HAProxy occupies host :443 and :80. Don't run anything else - # on those ports. + # Host netns so HAProxy sees real client IPs (v4/v6) instead of the + # bridge gateway address. Linux host only; see README → "Why HAProxy + # uses network_mode: host" for the rationale and trade-off. network_mode: host volumes: - ./haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg:ro,Z @@ -52,7 +46,7 @@ services: volumes: - ./mtg-config.toml:/config/config.toml:ro,Z # Published on host loopback only — HAProxy (host netns) reaches it via - # 127.0.0.1:3128. Not exposed on any public interface. + # 127.0.0.1. ports: - "127.0.0.1:3128:3128" restart: unless-stopped @@ -66,6 +60,7 @@ services: - caddy_data:/data - ./www:/srv:ro,Z # Published on host loopback only — HAProxy reaches Caddy on 127.0.0.1. + # Port 8080 (not 80) on the host because HAProxy already owns host :80. ports: - "127.0.0.1:8080:80" - "127.0.0.1:8443:8443" diff --git a/contrib/sni-router/haproxy.cfg b/contrib/sni-router/haproxy.cfg index b6a70d220..36208d2a7 100644 --- a/contrib/sni-router/haproxy.cfg +++ b/contrib/sni-router/haproxy.cfg @@ -24,8 +24,9 @@ defaults frontend http # Explicit v4 + v6 binds so IPv6 clients are accepted regardless of the - # host's IPV6_V6ONLY sysctl. v6only on the v6 bind avoids the - # "address in use" overlap on dual-stack hosts. + # host's net.ipv6.bindv6only sysctl. `v6only` on the v6 bind prevents it + # from also accepting v4-mapped connections, which would otherwise + # conflict with the explicit v4 bind on the same port. bind 0.0.0.0:80 bind [::]:80 v6only mode http From a7febc2bf2be6b81ffdcbf640f24f289016f69ee Mon Sep 17 00:00:00 2001 From: Alexey Dolotov Date: Tue, 19 May 2026 08:57:23 +0000 Subject: [PATCH 3/3] sni-router: collapse haproxy bind to comma-separated form MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch to one-line `bind :80,[::]:80` and `bind :443,[::]:443` per review feedback in #522. The v6only flag was self-documentation, not load-bearing: with SO_REUSEADDR (HAProxy's default) and bindv6only=0 the kernel routes v4 packets to the more-specific AF_INET socket regardless. Comment trimmed to match — the v6only paragraph is gone because v6only itself is gone. The shorter form also scales more cleanly when adding ports later, e.g. `bind :8080,[::]:8080` on a new line. --- contrib/sni-router/haproxy.cfg | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/contrib/sni-router/haproxy.cfg b/contrib/sni-router/haproxy.cfg index 36208d2a7..e3bd444ad 100644 --- a/contrib/sni-router/haproxy.cfg +++ b/contrib/sni-router/haproxy.cfg @@ -23,12 +23,9 @@ defaults # --- HTTP :80 — ACME challenges + redirect ----------------------------------- frontend http - # Explicit v4 + v6 binds so IPv6 clients are accepted regardless of the - # host's net.ipv6.bindv6only sysctl. `v6only` on the v6 bind prevents it - # from also accepting v4-mapped connections, which would otherwise - # conflict with the explicit v4 bind on the same port. - bind 0.0.0.0:80 - bind [::]:80 v6only + # Explicit v4 + v6 binds so IPv6 clients are accepted regardless of + # the host's net.ipv6.bindv6only sysctl. + bind :80,[::]:80 mode http # Let Caddy answer ACME HTTP-01 challenges for Let's Encrypt. @@ -40,8 +37,7 @@ frontend http # --- TLS :443 — SNI-based routing ------------------------------------------- frontend tls - bind 0.0.0.0:443 - bind [::]:443 v6only + bind :443,[::]:443 tcp-request inspect-delay 5s tcp-request content accept if { req_ssl_hello_type 1 }