NVIDIA · elezar · Mar 22, 2026
@@ -104,6 +104,7 @@ Look for:
 - k3s startup errors (certificate issues, port binding failures)
 - Manifest copy errors from `/opt/openshell/manifests/`
 - `iptables` or `cgroup` errors (privilege/capability issues)
+- `Warning: br_netfilter does not appear to be loaded` — this is advisory only; many kernels work without the explicit module. Only act on it if you also see DNS failures or pod-to-service connectivity problems (see Common Failure Patterns).
 
 ### Step 2: Check k3s Cluster Health
 
@@ -308,6 +309,7 @@ If DNS is broken, all image pulls from the distribution registry will fail, as w
 | Port conflict | Another service on the configured gateway host port (default 8080) | Stop conflicting service or use `--port` on `openshell gateway start` to pick a different host port |
 | gRPC connect refused to `127.0.0.1:443` in CI | Docker daemon is remote (`DOCKER_HOST=tcp://...`) but metadata still points to loopback | Verify metadata endpoint host matches `DOCKER_HOST` and includes non-loopback host |
 | DNS failures inside container | Entrypoint DNS detection failed | `openshell doctor exec -- cat /etc/rancher/k3s/resolv.conf` and `openshell doctor logs --lines 20` |
+| Pods can't reach kube-dns / ClusterIP services | `br_netfilter` not loaded; bridge traffic bypasses iptables DNAT rules | `sudo modprobe br_netfilter` on the host, then `echo br_netfilter \| sudo tee /etc/modules-load.d/br_netfilter.conf` to persist. Known to be required on Jetson Linux 5.15-tegra; other kernels (e.g. standard x86/aarch64 Linux) may have bridge netfilter built in and work without the module. The entrypoint logs a warning when `/proc/sys/net/bridge/bridge-nf-call-iptables` is absent but does not abort — only act on it if DNS or service connectivity is actually broken. |
 | Node DiskPressure / MemoryPressure / PIDPressure | Insufficient disk, memory, or PIDs on host | Free disk (`docker system prune -a --volumes`), increase memory, or expand host resources. Bootstrap auto-detects via `HEALTHCHECK_NODE_PRESSURE` marker |
 | Pods evicted with "The node had condition: [DiskPressure]" | Host disk full, kubelet evicting pods | Free disk space on host, then `openshell gateway destroy <name> && openshell gateway start` |
 | `metrics-server` errors in logs | Normal k3s noise, not the root cause | These errors are benign — look for the actual failing health check component |

@@ -262,15 +262,18 @@ impl NetworkNamespace {
 
         info!(
             namespace = %self.name,
-            iptables = iptables_path,
+            iptables = %iptables_path,
             proxy_addr = %format!("{}:{}", host_ip_str, proxy_port),
             "Installing bypass detection rules"
         );
 
         // Install IPv4 rules
-        if let Err(e) =
-            self.install_bypass_rules_for(iptables_path, &host_ip_str, &proxy_port_str, &log_prefix)
-        {
+        if let Err(e) = self.install_bypass_rules_for(
+            &iptables_path,
+            &host_ip_str,
+            &proxy_port_str,
+            &log_prefix,
+        ) {
             warn!(
                 namespace = %self.name,
                 error = %e,
@@ -281,7 +284,7 @@ impl NetworkNamespace {
 
         // Install IPv6 rules — best-effort.
         // Skip the proxy ACCEPT rule for IPv6 since the proxy address is IPv4.
-        if let Some(ip6_path) = find_ip6tables(iptables_path) {
+        if let Some(ip6_path) = find_ip6tables(&iptables_path) {
             if let Err(e) = self.install_bypass_rules_for_v6(&ip6_path, &log_prefix) {
                 warn!(
                     namespace = %self.name,
@@ -666,12 +669,92 @@ fn run_iptables_netns(netns: &str, iptables_cmd: &str, args: &[&str]) -> Result<
 const IPTABLES_SEARCH_PATHS: &[&str] =
     &["/usr/sbin/iptables", "/sbin/iptables", "/usr/bin/iptables"];
 
+/// Returns true if xt extension modules (e.g. xt_comment) cannot be used
+/// via the given iptables binary.
+///
+/// Some kernels have nf_tables but lack the nft_compat bridge that allows
+/// xt extension modules to be used through the nf_tables path (e.g. Jetson
+/// Linux 5.15-tegra). This probe detects that condition by attempting to
+/// insert a rule using the xt_comment extension. If it fails, xt extensions
+/// are unavailable and the caller should fall back to iptables-legacy.
+fn xt_extensions_unavailable(iptables_path: &str) -> bool {
+    // Create a temporary probe chain. If this fails (e.g. no CAP_NET_ADMIN),
+    // we can't determine availability — assume extensions are available.
+    let created = Command::new(iptables_path)
+        .args(["-t", "filter", "-N", "_xt_probe"])
+        .output()
+        .map(|o| o.status.success())
+        .unwrap_or(false);
+
+    if !created {
+        return false;
+    }
+
+    // Attempt to insert a rule using xt_comment. Failure means nft_compat
+    // cannot bridge xt extension modules on this kernel.
+    let probe_ok = Command::new(iptables_path)
+        .args([
+            "-t",
+            "filter",
+            "-A",
+            "_xt_probe",
+            "-m",
+            "comment",
+            "--comment",
+            "probe",
+            "-j",
+            "ACCEPT",
+        ])
+        .output()
+        .map(|o| o.status.success())
+        .unwrap_or(false);
+
+    // Clean up — best-effort, ignore failures.
+    let _ = Command::new(iptables_path)
+        .args([
+            "-t",
+            "filter",
+            "-D",
+            "_xt_probe",
+            "-m",
+            "comment",
+            "--comment",
+            "probe",
+            "-j",
+            "ACCEPT",
+        ])
+        .output();
+    let _ = Command::new(iptables_path)
+        .args(["-t", "filter", "-X", "_xt_probe"])
+        .output();
+
+    !probe_ok
+}
+
 /// Find the iptables binary path, checking well-known locations.
-fn find_iptables() -> Option<&'static str> {
-    IPTABLES_SEARCH_PATHS
+///
+/// If xt extension modules are unavailable via the standard binary and
+/// `iptables-legacy` is available alongside it, the legacy binary is returned
+/// instead. This ensures bypass-detection rules can be installed on kernels
+/// where `nft_compat` is unavailable (e.g. Jetson Linux 5.15-tegra).
+fn find_iptables() -> Option<String> {
+    let standard_path = IPTABLES_SEARCH_PATHS
         .iter()
         .find(|path| std::path::Path::new(path).exists())
-        .copied()
+        .copied()?;
+
+    if xt_extensions_unavailable(standard_path) {
+        let legacy_path = standard_path.replace("iptables", "iptables-legacy");
+        if std::path::Path::new(&legacy_path).exists() {
+            debug!(
+                legacy = legacy_path,
+                "xt extensions unavailable; using iptables-legacy"
+            );
+            return Some(legacy_path);
+        }
+    }
+
+    Some(standard_path.to_string())
 }
 
 /// Find the ip6tables binary path, deriving it from the iptables location.

@@ -25,6 +25,61 @@
 
 set -e
 
+# ---------------------------------------------------------------------------
+# Select iptables backend
+# ---------------------------------------------------------------------------
+# Some kernels (e.g. Jetson Linux 5.15-tegra) have the nf_tables subsystem
+# but lack the nft_compat bridge that allows flannel and kube-proxy to use
+# xt extension modules (xt_comment, xt_conntrack). Detect this by probing
+# whether xt_comment is usable via the current iptables backend. If the
+# probe fails, switch to iptables-legacy. Set USE_IPTABLES_LEGACY=1
+# externally to skip the probe and force the legacy backend.
+# ---------------------------------------------------------------------------
+# Check br_netfilter kernel module
+# ---------------------------------------------------------------------------
+# br_netfilter makes the kernel pass bridge (pod-to-pod) traffic through
+# iptables. Without it, kube-proxy's DNAT rules for ClusterIP services are
+# never applied to pod traffic, so pods cannot reach services such as
+# kube-dns (10.43.0.10), breaking all in-cluster DNS resolution.
+#
+# The module must be loaded on the HOST before the container starts —
+# containers cannot load kernel modules themselves. If it is missing, log a
+# warning rather than failing hard: some kernels have bridge netfilter support
+# built-in or expose it differently, and will work correctly without the module
+# being explicitly loaded as a separate .ko.
+if [ ! -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then
+    echo "Warning: br_netfilter does not appear to be loaded on the host." >&2
+    echo "         Pod-to-service networking (including kube-dns) may not work without it." >&2
+    echo "         If the cluster fails to start or DNS is broken, try loading it on the host:" >&2
+    echo "           sudo modprobe br_netfilter" >&2
+    echo "         To persist across reboots:" >&2
+    echo "           echo br_netfilter | sudo tee /etc/modules-load.d/br_netfilter.conf" >&2
+fi
+
+if [ -z "${USE_IPTABLES_LEGACY:-}" ]; then
+    if iptables -t filter -N _xt_probe 2>/dev/null; then
+        _probe_rc=0
+        iptables -t filter -A _xt_probe -m comment --comment "probe" -j ACCEPT \
+            2>/dev/null || _probe_rc=$?
+        iptables -t filter -D _xt_probe -m comment --comment "probe" -j ACCEPT \
+            2>/dev/null || true
+        iptables -t filter -X _xt_probe 2>/dev/null || true
+        [ "$_probe_rc" -ne 0 ] && USE_IPTABLES_LEGACY=1
+    fi
+fi
+
+if [ "${USE_IPTABLES_LEGACY:-0}" = "1" ]; then
+    echo "iptables nf_tables xt extension bridge unavailable — switching to iptables-legacy"
+    if update-alternatives --set iptables /usr/sbin/iptables-legacy 2>/dev/null && \
+       update-alternatives --set ip6tables /usr/sbin/ip6tables-legacy 2>/dev/null; then
+        echo "Now using iptables-legacy mode"
+    else
+        echo "Warning: could not switch to iptables-legacy — cluster networking may fail"
+    fi
+fi
+
+IPTABLES=$([ "${USE_IPTABLES_LEGACY:-0}" = "1" ] && echo iptables-legacy || echo iptables)
+
 RESOLV_CONF="/etc/rancher/k3s/resolv.conf"
 
 has_default_route() {
@@ -74,11 +129,11 @@ setup_dns_proxy() {
     # Docker sets up rules like:
     #   -A DOCKER_OUTPUT -d 127.0.0.11/32 -p udp --dport 53 -j DNAT --to-destination 127.0.0.11:<port>
     #   -A DOCKER_OUTPUT -d 127.0.0.11/32 -p tcp --dport 53 -j DNAT --to-destination 127.0.0.11:<port>
-    UDP_PORT=$(iptables -t nat -S DOCKER_OUTPUT 2>/dev/null \
+    UDP_PORT=$($IPTABLES -t nat -S DOCKER_OUTPUT 2>/dev/null \
         | grep -- '-p udp.*--dport 53' \
         | sed -n 's/.*--to-destination 127.0.0.11:\([0-9]*\).*/\1/p' \
         | head -1)
-    TCP_PORT=$(iptables -t nat -S DOCKER_OUTPUT 2>/dev/null \
+    TCP_PORT=$($IPTABLES -t nat -S DOCKER_OUTPUT 2>/dev/null \
         | grep -- '-p tcp.*--dport 53' \
         | sed -n 's/.*--to-destination 127.0.0.11:\([0-9]*\).*/\1/p' \
         | head -1)
@@ -101,9 +156,9 @@ setup_dns_proxy() {
     echo "Setting up DNS proxy: ${CONTAINER_IP}:53 -> 127.0.0.11 (udp:${UDP_PORT}, tcp:${TCP_PORT})"
 
     # Forward DNS from pods (PREROUTING) and local processes (OUTPUT) to Docker's DNS
-    iptables -t nat -I PREROUTING -p udp --dport 53 -d "$CONTAINER_IP" -j DNAT \
+    $IPTABLES -t nat -I PREROUTING -p udp --dport 53 -d "$CONTAINER_IP" -j DNAT \
         --to-destination "127.0.0.11:${UDP_PORT}"
-    iptables -t nat -I PREROUTING -p tcp --dport 53 -d "$CONTAINER_IP" -j DNAT \
+    $IPTABLES -t nat -I PREROUTING -p tcp --dport 53 -d "$CONTAINER_IP" -j DNAT \
         --to-destination "127.0.0.11:${TCP_PORT}"
 
     echo "nameserver $CONTAINER_IP" > "$RESOLV_CONF"
@@ -495,6 +550,13 @@ if [ ! -f /sys/fs/cgroup/cgroup.controllers ]; then
     EXTRA_KUBELET_ARGS="--kubelet-arg=fail-cgroupv1=false"
 fi
 
+# On kernels where xt_comment is unavailable, kube-router's network policy
+# controller panics at startup. Disable it when the iptables-legacy probe
+# triggered; sandbox isolation is enforced by the NSSH1 HMAC handshake instead.
+if [ "${USE_IPTABLES_LEGACY:-0}" = "1" ]; then
+    EXTRA_KUBELET_ARGS="$EXTRA_KUBELET_ARGS --disable-network-policy"
+fi
+
 # Docker Desktop can briefly start the container before its bridge default route
 # is fully installed. k3s exits immediately in that state, so wait briefly for
 # routing to settle first.